diff --git a/.claude/skills/local-context-router/scripts/preflight.py b/.claude/skills/local-context-router/scripts/preflight.py index 2a2546f..f3ecc89 100644 --- a/.claude/skills/local-context-router/scripts/preflight.py +++ b/.claude/skills/local-context-router/scripts/preflight.py @@ -1,19 +1,17 @@ #!/usr/bin/env python3 """Preflight a document and report the cheapest faithful source for each page. -PDFs run through the full router (text / OCR / vision); a bare image is OCR'd. -Output is human-readable by default, or JSON with ``--json``. +Thin wrapper over ``localcontextrouter``'s report logic so the skill and the +installed CLI stay in lockstep. PDFs run through the full router (text / OCR / +vision); a bare image is OCR'd. """ from __future__ import annotations import argparse -import json import sys from pathlib import Path -_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".gif", ".heic"} - def _ensure_importable() -> None: """Make localcontextrouter importable, falling back to the repo source tree.""" @@ -25,84 +23,6 @@ def _ensure_importable() -> None: sys.path.insert(0, str(repo_src)) -def _preflight_pdf(path: Path, vision_dir: Path | None) -> dict[str, object]: - from localcontextrouter import Pdf, Source, route_pdf - - result = route_pdf(path) - pages: list[dict[str, object]] = [] - rendered: Pdf | None = None - try: - for page in result.pages: - image_path: str | None = None - if page.source is Source.VISION and vision_dir is not None: - vision_dir.mkdir(parents=True, exist_ok=True) - if rendered is None: - rendered = Pdf(path) - out = vision_dir / f"{path.stem}-page-{page.index + 1}.png" - out.write_bytes(rendered.render_page_png(page.index)) - image_path = str(out) - pages.append( - { - "index": page.index, - "source": page.source.value, - "text": page.text, - "text_tokens": page.tokens.text_tokens, - "image_tokens": page.tokens.image_tokens, - "image": image_path, - } - ) - finally: - if rendered is not None: - rendered.close() - - return { - "path": str(path), - "page_count": len(result.pages), - "tokens_saved": result.tokens_saved, - "pages": pages, - } - - -def _preflight_image(path: Path) -> dict[str, object]: - from localcontextrouter import estimate_text_tokens - from localcontextrouter.ocr import run_ocr - - text = "\n".join(line.text for line in run_ocr(path)) - return { - "path": str(path), - "page_count": 1, - "tokens_saved": 0, - "pages": [ - { - "index": 0, - "source": "ocr", - "text": text, - "text_tokens": estimate_text_tokens(text), - "image_tokens": None, - "image": None, - } - ], - } - - -def _print_human(report: dict[str, object]) -> None: - pages = report["pages"] - assert isinstance(pages, list) - print(f"Document: {report['path']} ({report['page_count']} pages)") - print(f"Tokens saved vs sending every page as an image: {report['tokens_saved']}\n") - for page in pages: - header = f"Page {page['index'] + 1} [{page['source']}]" - if page["image"]: - print(f"{header} -> attach image: {page['image']}") - else: - print(header) - text = str(page["text"]).strip() - if text: - preview = text if len(text) <= 500 else text[:500] + "..." - print(preview) - print() - - def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description="Preflight a document for an LLM.") parser.add_argument("path", help="PDF or image to analyze") @@ -115,19 +35,14 @@ def main(argv: list[str] | None = None) -> int: parser.error(f"no such file: {path}") _ensure_importable() - vision_dir = Path(args.vision_dir) if args.vision_dir else None + from localcontextrouter.cli import build_report, render_report - if path.suffix.lower() == ".pdf": - report = _preflight_pdf(path, vision_dir) - elif path.suffix.lower() in _IMAGE_SUFFIXES: - report = _preflight_image(path) - else: - parser.error(f"unsupported file type: {path.suffix or '(none)'}") + try: + report = build_report(path, args.vision_dir) + except ValueError as error: + parser.error(str(error)) - if args.json: - print(json.dumps(report, indent=2)) - else: - _print_human(report) + print(render_report(report, as_json=args.json)) return 0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..db4eaee --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,41 @@ +name: Release + +on: + push: + tags: ["v*"] + +jobs: + build: + name: Build distributions + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Build sdist and wheel + run: | + python -m pip install --upgrade build + python -m build + - name: Check metadata + run: | + python -m pip install --upgrade twine + twine check dist/* + - uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/* + + publish: + name: Publish to PyPI + needs: build + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist + - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..b02f87c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,34 @@ +# Changelog + +All notable changes to this project are documented here. The format follows +[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the project aims +to follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.1.0] + +First release. + +### Added + +- On-device OCR binary (`lcr-ocr`) wrapping Apple Vision: image to text with + bounding boxes and confidence, offline and without entitlements. +- Page classification (`digital` / `scanned` / `garbled`) from cheap text + signals. +- PDF text extraction and page rendering via pypdfium2. +- Vision-worthy detection (tables, charts, diagrams, figure-heavy layouts) from + image and vector-path signals. +- Token-cost estimation for Claude and OpenAI image inputs, plus a text + estimate, following each provider's documented tokenization. +- `route_pdf`, which routes each page to text, OCR, or vision and reports the + tokens saved versus sending every page as an image. +- `localctx` command-line interface. +- A `local-context-router` Agent Skill for Claude Code and Codex. + +### Notes + +- macOS only; OCR uses the Apple Vision framework. +- The `lcr-ocr` binary is built from the bundled Swift package and located via + `LCR_OCR_BIN`, `PATH`, or the in-repo build. Pages that need OCR require it; + text extraction, classification, and token estimation work without it. + +[0.1.0]: https://github.com/sid732/LocalContextRouter/releases/tag/v0.1.0 diff --git a/README.md b/README.md index cb7252c..5d4f823 100644 --- a/README.md +++ b/README.md @@ -24,14 +24,53 @@ text-dominant documents that is a 2–10× tax for zero added signal. LocalContextRouter spends cheap local compute to avoid that tax — and only escalates to vision when the page genuinely needs it. +## Install + +```sh +pip install localcontextrouter +``` + +OCR uses an on-device Swift binary (`lcr-ocr`). Build it from the bundled package +and point the library at it (text extraction, classification, and token estimation +work without it; only pages that need OCR require it): + +```sh +swift build -c release --package-path ocr +export LCR_OCR_BIN="$PWD/ocr/.build/release/lcr-ocr" +``` + ## Use -LocalContextRouter ships in two forms that share one core engine and one on-device OCR -binary. There is no server and no background process — everything runs on demand and -exits. +There is no server and no background process — everything runs on demand and exits. + +### Command line + +```sh +localctx report.pdf # human summary + tokens saved +localctx report.pdf --json # machine-readable +localctx report.pdf --vision-dir ./out # render visual pages to ./out +``` + +### Library + +```python +from localcontextrouter import route_pdf, Source + +result = route_pdf("report.pdf") +for page in result.pages: + if page.source is Source.VISION: + ... # send the rendered page image to the model + else: + ... # use page.text (extracted or OCR'd) + +print(result.text) # all text-routable pages joined +print(result.tokens_saved) # tokens avoided vs sending every page as an image +``` + +### Agent Skill -- **Python library + `localctx` CLI** — `pip install localcontextrouter` -- **Agent Skill** — a `SKILL.md` that runs the same preflight step inside Claude Code or Codex +The `local-context-router` skill (in `.claude/skills/`) runs the same preflight +inside Claude Code or Codex — copy it into your `.claude/skills/` (or `~/.claude/skills/`). ## Requirements diff --git a/pyproject.toml b/pyproject.toml index 52fc952..0f9f75c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ license = "MIT" authors = [{ name = "Siddharth Nashikkar" }] keywords = ["llm", "ocr", "pdf", "vision", "tokens", "preprocessing"] classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: MacOS", @@ -26,6 +26,9 @@ dependencies = [ "pillow>=10.1", ] +[project.scripts] +localctx = "localcontextrouter.cli:main" + [project.urls] Homepage = "https://github.com/sid732/LocalContextRouter" Repository = "https://github.com/sid732/LocalContextRouter" diff --git a/src/localcontextrouter/__init__.py b/src/localcontextrouter/__init__.py index 767f375..4d555d2 100644 --- a/src/localcontextrouter/__init__.py +++ b/src/localcontextrouter/__init__.py @@ -23,7 +23,7 @@ openai_image_tokens, ) -__version__ = "0.0.0" +__version__ = "0.1.0" __all__ = [ "BoundingBox", diff --git a/src/localcontextrouter/cli.py b/src/localcontextrouter/cli.py new file mode 100644 index 0000000..3744142 --- /dev/null +++ b/src/localcontextrouter/cli.py @@ -0,0 +1,140 @@ +"""``localctx`` — route a document and report the cheapest faithful source per page.""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +from . import __version__ +from .models import Source +from .ocr import run_ocr +from .pdf import Pdf +from .router import route_pdf +from .tokens import estimate_text_tokens + +IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".gif", ".heic"} + + +def build_report(path: str | Path, vision_dir: str | Path | None = None) -> dict[str, object]: + """Route a PDF (or OCR a bare image) and return a serializable report. + + For PDFs whose visual pages should go to a vision model, the page images are + rendered into ``vision_dir`` (when given) and referenced in the report. + + Raises ``ValueError`` for unsupported file types. + """ + path = Path(path) + suffix = path.suffix.lower() + if suffix == ".pdf": + return _report_pdf(path, Path(vision_dir) if vision_dir is not None else None) + if suffix in IMAGE_SUFFIXES: + return _report_image(path) + raise ValueError(f"unsupported file type: {path.suffix or '(none)'}") + + +def _report_pdf(path: Path, vision_dir: Path | None) -> dict[str, object]: + result = route_pdf(path) + pages: list[dict[str, object]] = [] + rendered: Pdf | None = None + try: + for page in result.pages: + image_path: str | None = None + if page.source is Source.VISION and vision_dir is not None: + vision_dir.mkdir(parents=True, exist_ok=True) + if rendered is None: + rendered = Pdf(path) + out = vision_dir / f"{path.stem}-page-{page.index + 1}.png" + out.write_bytes(rendered.render_page_png(page.index)) + image_path = str(out) + pages.append( + { + "index": page.index, + "source": page.source.value, + "text": page.text, + "text_tokens": page.tokens.text_tokens, + "image_tokens": page.tokens.image_tokens, + "image": image_path, + } + ) + finally: + if rendered is not None: + rendered.close() + + return { + "path": str(path), + "page_count": len(result.pages), + "tokens_saved": result.tokens_saved, + "pages": pages, + } + + +def _report_image(path: Path) -> dict[str, object]: + text = "\n".join(line.text for line in run_ocr(path)) + return { + "path": str(path), + "page_count": 1, + "tokens_saved": 0, + "pages": [ + { + "index": 0, + "source": "ocr", + "text": text, + "text_tokens": estimate_text_tokens(text), + "image_tokens": None, + "image": None, + } + ], + } + + +def render_report(report: dict[str, object], *, as_json: bool) -> str: + """Render a report from :func:`build_report` as JSON or a human summary.""" + if as_json: + return json.dumps(report, indent=2) + + pages = report["pages"] + assert isinstance(pages, list) + lines = [ + f"Document: {report['path']} ({report['page_count']} pages)", + f"Tokens saved vs sending every page as an image: {report['tokens_saved']}", + "", + ] + for page in pages: + header = f"Page {page['index'] + 1} [{page['source']}]" + if page["image"]: + header += f" -> attach image: {page['image']}" + lines.append(header) + text = str(page["text"]).strip() + if text: + lines.append(text if len(text) <= 500 else text[:500] + "...") + lines.append("") + return "\n".join(lines).rstrip() + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + prog="localctx", + description="Route a PDF or image to the cheapest faithful source for an LLM.", + ) + parser.add_argument("path", help="PDF or image to analyze") + parser.add_argument("--json", action="store_true", help="emit JSON instead of a summary") + parser.add_argument("--vision-dir", help="directory for rendered images of visual pages") + parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") + args = parser.parse_args(argv) + + path = Path(args.path) + if not path.exists(): + parser.error(f"no such file: {path}") + + try: + report = build_report(path, args.vision_dir) + except ValueError as error: + parser.error(str(error)) + + print(render_report(report, as_json=args.json)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..4af8985 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,61 @@ +"""Tests for the localctx command-line interface.""" + +import json +from collections.abc import Callable +from pathlib import Path + +import pytest + +from localcontextrouter import __version__ +from localcontextrouter.cli import build_report, main, render_report + +PROSE = "Annual report. Revenue rose across every region this year. " * 3 + + +def test_build_report_for_text_pdf(make_text_pdf: Callable[..., Path]) -> None: + report = build_report(make_text_pdf(PROSE)) + assert report["page_count"] == 1 + pages = report["pages"] + assert isinstance(pages, list) + assert pages[0]["source"] == "text" + assert isinstance(report["tokens_saved"], int) + + +def test_build_report_rejects_unsupported_type(tmp_path: Path) -> None: + bad = tmp_path / "notes.txt" + bad.write_text("hello") + with pytest.raises(ValueError, match="unsupported file type"): + build_report(bad) + + +def test_render_report_json_roundtrips(make_text_pdf: Callable[..., Path]) -> None: + report = build_report(make_text_pdf(PROSE)) + assert json.loads(render_report(report, as_json=True)) == report + + +def test_render_report_human_mentions_savings(make_text_pdf: Callable[..., Path]) -> None: + text = render_report(build_report(make_text_pdf(PROSE)), as_json=False) + assert "Tokens saved" in text + assert "Page 1 [text]" in text + + +def test_main_routes_and_prints( + make_text_pdf: Callable[..., Path], capsys: pytest.CaptureFixture[str] +) -> None: + assert main([str(make_text_pdf(PROSE)), "--json"]) == 0 + report = json.loads(capsys.readouterr().out) + assert report["pages"][0]["source"] == "text" + + +def test_main_missing_file_errors(capsys: pytest.CaptureFixture[str]) -> None: + with pytest.raises(SystemExit) as exc: + main(["/no/such/file.pdf"]) + assert exc.value.code != 0 + assert "no such file" in capsys.readouterr().err + + +def test_main_version(capsys: pytest.CaptureFixture[str]) -> None: + with pytest.raises(SystemExit) as exc: + main(["--version"]) + assert exc.value.code == 0 + assert __version__ in capsys.readouterr().out