From fc7549cc2cc49a659c5a25f66bb9537b5169f9cb Mon Sep 17 00:00:00 2001 From: Siddharth Nashikkar Date: Mon, 22 Jun 2026 20:55:59 -0400 Subject: [PATCH 1/6] build: compile the OCR binary into a universal2 wheel Add a hatch build hook that compiles a universal (arm64 + x86_64) lcr-ocr, bundles it under the package, and tags the wheel macosx_11_0_universal2. The hook skips when Swift is unavailable (e.g. the sdist), and the built binary is git-ignored since it is produced at build time. --- .gitignore | 3 +++ hatch_build.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 7 +++++++ 3 files changed, 61 insertions(+) create mode 100644 hatch_build.py diff --git a/.gitignore b/.gitignore index 706f1a9..8e80295 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,9 @@ ocr/.swiftpm/ *.xcodeproj/ .DS_Store +# OCR binary compiled into the wheel by the build hook +src/localcontextrouter/_bin/ + # Editors .idea/ .vscode/ diff --git a/hatch_build.py b/hatch_build.py new file mode 100644 index 0000000..3ed84e4 --- /dev/null +++ b/hatch_build.py @@ -0,0 +1,51 @@ +"""Build hook: compile the universal lcr-ocr binary into the wheel. + +Runs only for the wheel target and only where Swift is available (macOS). The +result is a platform wheel (``macosx_11_0_universal2``) carrying the on-device +OCR binary, so ``pip install`` gives users working OCR with no extra steps. +""" + +from __future__ import annotations + +import shutil +import subprocess +from pathlib import Path +from typing import Any + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + +_WHEEL_TAG = "py3-none-macosx_11_0_universal2" +_TARGET = "localcontextrouter/_bin/lcr-ocr" + + +class CustomBuildHook(BuildHookInterface): + PLUGIN_NAME = "custom" + + def initialize(self, version: str, build_data: dict[str, Any]) -> None: + if self.target_name != "wheel": + return + if shutil.which("swift") is None: + # No Swift toolchain (e.g. building the sdist on a non-macOS host): + # produce a pure wheel; OCR then relies on LCR_OCR_BIN or PATH. + return + + root = Path(self.root) + ocr_dir = root / "ocr" + subprocess.run( + ["swift", "build", "-c", "release", "--arch", "arm64", "--arch", "x86_64"], + cwd=ocr_dir, + check=True, + ) + built = ocr_dir / ".build" / "apple" / "Products" / "Release" / "lcr-ocr" + if not built.is_file(): + raise FileNotFoundError(f"universal lcr-ocr not found at {built}") + + dest = root / "src" / "localcontextrouter" / "_bin" / "lcr-ocr" + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(built, dest) + dest.chmod(0o755) + + build_data["pure_python"] = False + build_data["infer_tag"] = False + build_data["tag"] = _WHEEL_TAG + build_data["force_include"][str(dest)] = _TARGET diff --git a/pyproject.toml b/pyproject.toml index 0f9f75c..7aaf2c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,13 @@ path = "src/localcontextrouter/__init__.py" [tool.hatch.build.targets.wheel] packages = ["src/localcontextrouter"] +artifacts = ["src/localcontextrouter/_bin/lcr-ocr"] + +[tool.hatch.build.targets.wheel.hooks.custom] +path = "hatch_build.py" + +[tool.hatch.build.targets.sdist] +exclude = ["src/localcontextrouter/_bin"] [tool.ruff] line-length = 100 From 8830b80155d6159792f5e7ba6e718c0ce3ddb2c6 Mon Sep 17 00:00:00 2001 From: Siddharth Nashikkar Date: Mon, 22 Jun 2026 20:55:59 -0400 Subject: [PATCH 2/6] feat(ocr): prefer the bundled binary and fix its exec bit Resolve the binary as LCR_OCR_BIN, then the copy bundled in the wheel, then PATH, then the in-repo dev build, restoring the executable bit if installation dropped it. --- src/localcontextrouter/ocr.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/localcontextrouter/ocr.py b/src/localcontextrouter/ocr.py index bf56e62..45ffdc6 100644 --- a/src/localcontextrouter/ocr.py +++ b/src/localcontextrouter/ocr.py @@ -6,6 +6,7 @@ from __future__ import annotations +import contextlib import json import os import shutil @@ -19,6 +20,8 @@ BINARY_ENV_VAR = "LCR_OCR_BIN" _BINARY_NAME = "lcr-ocr" +# Shipped in the wheel by the build hook. +_BUNDLED_BINARY = Path(__file__).resolve().parent / "_bin" / _BINARY_NAME # Dev fallback: the binary built from the bundled Swift package in this repo. _DEV_BINARY = Path(__file__).resolve().parents[2] / "ocr" / ".build" / "release" / _BINARY_NAME @@ -31,25 +34,36 @@ class OcrError(RuntimeError): """Raised when the ``lcr-ocr`` binary exits with an error.""" +def _executable(path: Path) -> Path: + """Return ``path``, making it executable if installation dropped the bit.""" + if not os.access(path, os.X_OK): + with contextlib.suppress(OSError): + path.chmod(path.stat().st_mode | 0o111) + return path + + def locate_binary() -> Path: """Locate the ``lcr-ocr`` binary. - Resolution order: the ``LCR_OCR_BIN`` environment variable, then ``PATH``, - then the binary built from the bundled Swift package. + Resolution order: the ``LCR_OCR_BIN`` environment variable, the copy bundled + in the installed wheel, then ``PATH``, then the in-repo dev build. """ override = os.environ.get(BINARY_ENV_VAR) if override: path = Path(override) if not path.exists(): raise OcrBinaryNotFound(f"{BINARY_ENV_VAR} points to a missing file: {path}") - return path + return _executable(path) + + if _BUNDLED_BINARY.is_file(): + return _executable(_BUNDLED_BINARY) on_path = shutil.which(_BINARY_NAME) if on_path: return Path(on_path) if _DEV_BINARY.exists(): - return _DEV_BINARY + return _executable(_DEV_BINARY) raise OcrBinaryNotFound( f"could not find '{_BINARY_NAME}'. Build it with 'swift build -c release' in " From 059867f554ac102017b69c29e3432fe006e38881 Mon Sep 17 00:00:00 2001 From: Siddharth Nashikkar Date: Mon, 22 Jun 2026 20:55:59 -0400 Subject: [PATCH 3/6] test(ocr): cover bundled-binary resolution Assert the bundled copy is used when no override is set. --- tests/test_ocr.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_ocr.py b/tests/test_ocr.py index c5e08f8..59e6929 100644 --- a/tests/test_ocr.py +++ b/tests/test_ocr.py @@ -45,6 +45,16 @@ def test_locate_binary_prefers_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Pa assert locate_binary() == fake +def test_locate_binary_uses_bundled_copy(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + from localcontextrouter import ocr + + bundled = tmp_path / "lcr-ocr" + bundled.write_text("#!/bin/sh\n") + monkeypatch.delenv(BINARY_ENV_VAR, raising=False) + monkeypatch.setattr(ocr, "_BUNDLED_BINARY", bundled) + assert locate_binary() == bundled + + @pytest.mark.integration def test_run_ocr_reads_text(lcr_binary: Path, tmp_path: Path) -> None: image = Image.new("RGB", (700, 180), "white") From 4ae5f776be84e958aa54edd228a595e1281134eb Mon Sep 17 00:00:00 2001 From: Siddharth Nashikkar Date: Mon, 22 Jun 2026 20:55:59 -0400 Subject: [PATCH 4/6] ci: build the wheel on macOS and verify the bundled binary Move the release build to macOS so the Swift hook runs, and assert the wheel is universal2 and contains the OCR binary. --- .github/workflows/release.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index db4eaee..b73b562 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,13 +7,13 @@ on: jobs: build: name: Build distributions - runs-on: ubuntu-latest + runs-on: macos-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.12" - - name: Build sdist and wheel + - name: Build sdist and universal2 wheel run: | python -m pip install --upgrade build python -m build @@ -21,6 +21,16 @@ jobs: run: | python -m pip install --upgrade twine twine check dist/* + - name: Verify the wheel bundles the OCR binary + run: | + python - <<'PY' + import glob, zipfile + wheel = glob.glob("dist/*.whl")[0] + assert wheel.endswith("macosx_11_0_universal2.whl"), wheel + names = zipfile.ZipFile(wheel).namelist() + assert "localcontextrouter/_bin/lcr-ocr" in names, names + print("ok:", wheel) + PY - uses: actions/upload-artifact@v4 with: name: dist From f9b4540e3decadd792072ce35b97d706e7ec306c Mon Sep 17 00:00:00 2001 From: Siddharth Nashikkar Date: Mon, 22 Jun 2026 20:55:59 -0400 Subject: [PATCH 5/6] docs: document bundled OCR for pip install The macOS wheel ships the OCR binary, so OCR works without building it or setting LCR_OCR_BIN. --- CHANGELOG.md | 5 ++--- README.md | 11 +++-------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 733d7e9..30ecd0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,8 +30,7 @@ First release. ### Notes - macOS only; OCR uses the Apple Vision framework. -- The `lcr-ocr` binary is built from the bundled Swift package and located via - `LCR_OCR_BIN`, `PATH`, or the in-repo build. Pages that need OCR require it; - text extraction, classification, and token estimation work without it. +- The macOS wheel is a `universal2` platform wheel that bundles the `lcr-ocr` + binary, so OCR works out of the box. `LCR_OCR_BIN` overrides the bundled copy. [0.1.0]: https://github.com/sid732/LocalContextRouter/releases/tag/v0.1.0 diff --git a/README.md b/README.md index 5d4f823..d64ee92 100644 --- a/README.md +++ b/README.md @@ -30,14 +30,9 @@ to vision when the page genuinely needs it. pip install localcontextrouter ``` -OCR uses an on-device Swift binary (`lcr-ocr`). Build it from the bundled package -and point the library at it (text extraction, classification, and token estimation -work without it; only pages that need OCR require it): - -```sh -swift build -c release --package-path ocr -export LCR_OCR_BIN="$PWD/ocr/.build/release/lcr-ocr" -``` +The macOS wheel bundles the on-device OCR binary (`lcr-ocr`, a universal2 build), +so OCR works out of the box — no extra setup. To override it (e.g. a locally built +binary), set `LCR_OCR_BIN` to its path. ## Use From 44fd3186266987979ecbc179aca647ca04f605d9 Mon Sep 17 00:00:00 2001 From: Siddharth Nashikkar Date: Mon, 22 Jun 2026 21:02:09 -0400 Subject: [PATCH 6/6] fix(build): skip the OCR build for editable installs Editable installs invoke the build hook with version 'editable'; some hosts (e.g. CI without Xcode) have a non-functional swift shim that the which-guard does not catch. Editable installs never need the bundled binary, so return early for them. --- hatch_build.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hatch_build.py b/hatch_build.py index 3ed84e4..2e8c9b5 100644 --- a/hatch_build.py +++ b/hatch_build.py @@ -24,6 +24,10 @@ class CustomBuildHook(BuildHookInterface): def initialize(self, version: str, build_data: dict[str, Any]) -> None: if self.target_name != "wheel": return + if version == "editable": + # Editable installs (pip install -e) don't need the bundled binary; + # skip the Swift build so dev installs work without a toolchain. + return if shutil.which("swift") is None: # No Swift toolchain (e.g. building the sdist on a non-macOS host): # produce a pure wheel; OCR then relies on LCR_OCR_BIN or PATH.