Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,30 @@ on:
jobs:
build:
name: Build distributions
runs-on: ubuntu-latest
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Build sdist and wheel
- name: Build sdist and universal2 wheel
run: |
python -m pip install --upgrade build
python -m build
- name: Check metadata
run: |
python -m pip install --upgrade twine
twine check dist/*
- name: Verify the wheel bundles the OCR binary
run: |
python - <<'PY'
import glob, zipfile
wheel = glob.glob("dist/*.whl")[0]
assert wheel.endswith("macosx_11_0_universal2.whl"), wheel
names = zipfile.ZipFile(wheel).namelist()
assert "localcontextrouter/_bin/lcr-ocr" in names, names
print("ok:", wheel)
PY
- uses: actions/upload-artifact@v4
with:
name: dist
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ ocr/.swiftpm/
*.xcodeproj/
.DS_Store

# OCR binary compiled into the wheel by the build hook
src/localcontextrouter/_bin/

# Editors
.idea/
.vscode/
Expand Down
5 changes: 2 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ First release.
### Notes

- macOS only; OCR uses the Apple Vision framework.
- The `lcr-ocr` binary is built from the bundled Swift package and located via
`LCR_OCR_BIN`, `PATH`, or the in-repo build. Pages that need OCR require it;
text extraction, classification, and token estimation work without it.
- The macOS wheel is a `universal2` platform wheel that bundles the `lcr-ocr`
binary, so OCR works out of the box. `LCR_OCR_BIN` overrides the bundled copy.

[0.1.0]: https://github.com/sid732/LocalContextRouter/releases/tag/v0.1.0
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,9 @@ to vision when the page genuinely needs it.
pip install localcontextrouter
```

OCR uses an on-device Swift binary (`lcr-ocr`). Build it from the bundled package
and point the library at it (text extraction, classification, and token estimation
work without it; only pages that need OCR require it):

```sh
swift build -c release --package-path ocr
export LCR_OCR_BIN="$PWD/ocr/.build/release/lcr-ocr"
```
The macOS wheel bundles the on-device OCR binary (`lcr-ocr`, a universal2 build),
so OCR works out of the box — no extra setup. To override it (e.g. a locally built
binary), set `LCR_OCR_BIN` to its path.

## Use

Expand Down
55 changes: 55 additions & 0 deletions hatch_build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Build hook: compile the universal lcr-ocr binary into the wheel.

Runs only for the wheel target and only where Swift is available (macOS). The
result is a platform wheel (``macosx_11_0_universal2``) carrying the on-device
OCR binary, so ``pip install`` gives users working OCR with no extra steps.
"""

from __future__ import annotations

import shutil
import subprocess
from pathlib import Path
from typing import Any

from hatchling.builders.hooks.plugin.interface import BuildHookInterface

_WHEEL_TAG = "py3-none-macosx_11_0_universal2"
_TARGET = "localcontextrouter/_bin/lcr-ocr"


class CustomBuildHook(BuildHookInterface):
PLUGIN_NAME = "custom"

def initialize(self, version: str, build_data: dict[str, Any]) -> None:
if self.target_name != "wheel":
return
if version == "editable":
# Editable installs (pip install -e) don't need the bundled binary;
# skip the Swift build so dev installs work without a toolchain.
return
if shutil.which("swift") is None:
# No Swift toolchain (e.g. building the sdist on a non-macOS host):
# produce a pure wheel; OCR then relies on LCR_OCR_BIN or PATH.
return

root = Path(self.root)
ocr_dir = root / "ocr"
subprocess.run(
["swift", "build", "-c", "release", "--arch", "arm64", "--arch", "x86_64"],
cwd=ocr_dir,
check=True,
)
built = ocr_dir / ".build" / "apple" / "Products" / "Release" / "lcr-ocr"
if not built.is_file():
raise FileNotFoundError(f"universal lcr-ocr not found at {built}")

dest = root / "src" / "localcontextrouter" / "_bin" / "lcr-ocr"
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(built, dest)
dest.chmod(0o755)

build_data["pure_python"] = False
build_data["infer_tag"] = False
build_data["tag"] = _WHEEL_TAG
build_data["force_include"][str(dest)] = _TARGET
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ path = "src/localcontextrouter/__init__.py"

[tool.hatch.build.targets.wheel]
packages = ["src/localcontextrouter"]
artifacts = ["src/localcontextrouter/_bin/lcr-ocr"]

[tool.hatch.build.targets.wheel.hooks.custom]
path = "hatch_build.py"

[tool.hatch.build.targets.sdist]
exclude = ["src/localcontextrouter/_bin"]

[tool.ruff]
line-length = 100
Expand Down
22 changes: 18 additions & 4 deletions src/localcontextrouter/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from __future__ import annotations

import contextlib
import json
import os
import shutil
Expand All @@ -19,6 +20,8 @@
BINARY_ENV_VAR = "LCR_OCR_BIN"

_BINARY_NAME = "lcr-ocr"
# Shipped in the wheel by the build hook.
_BUNDLED_BINARY = Path(__file__).resolve().parent / "_bin" / _BINARY_NAME
# Dev fallback: the binary built from the bundled Swift package in this repo.
_DEV_BINARY = Path(__file__).resolve().parents[2] / "ocr" / ".build" / "release" / _BINARY_NAME

Expand All @@ -31,25 +34,36 @@ class OcrError(RuntimeError):
"""Raised when the ``lcr-ocr`` binary exits with an error."""


def _executable(path: Path) -> Path:
"""Return ``path``, making it executable if installation dropped the bit."""
if not os.access(path, os.X_OK):
with contextlib.suppress(OSError):
path.chmod(path.stat().st_mode | 0o111)
return path


def locate_binary() -> Path:
"""Locate the ``lcr-ocr`` binary.

Resolution order: the ``LCR_OCR_BIN`` environment variable, then ``PATH``,
then the binary built from the bundled Swift package.
Resolution order: the ``LCR_OCR_BIN`` environment variable, the copy bundled
in the installed wheel, then ``PATH``, then the in-repo dev build.
"""
override = os.environ.get(BINARY_ENV_VAR)
if override:
path = Path(override)
if not path.exists():
raise OcrBinaryNotFound(f"{BINARY_ENV_VAR} points to a missing file: {path}")
return path
return _executable(path)

if _BUNDLED_BINARY.is_file():
return _executable(_BUNDLED_BINARY)

on_path = shutil.which(_BINARY_NAME)
if on_path:
return Path(on_path)

if _DEV_BINARY.exists():
return _DEV_BINARY
return _executable(_DEV_BINARY)

raise OcrBinaryNotFound(
f"could not find '{_BINARY_NAME}'. Build it with 'swift build -c release' in "
Expand Down
10 changes: 10 additions & 0 deletions tests/test_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ def test_locate_binary_prefers_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Pa
assert locate_binary() == fake


def test_locate_binary_uses_bundled_copy(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
from localcontextrouter import ocr

bundled = tmp_path / "lcr-ocr"
bundled.write_text("#!/bin/sh\n")
monkeypatch.delenv(BINARY_ENV_VAR, raising=False)
monkeypatch.setattr(ocr, "_BUNDLED_BINARY", bundled)
assert locate_binary() == bundled


@pytest.mark.integration
def test_run_ocr_reads_text(lcr_binary: Path, tmp_path: Path) -> None:
image = Image.new("RGB", (700, 180), "white")
Expand Down
Loading