diff --git a/.claude/skills/local-context-router/SKILL.md b/.claude/skills/local-context-router/SKILL.md new file mode 100644 index 0000000..e7d47b8 --- /dev/null +++ b/.claude/skills/local-context-router/SKILL.md @@ -0,0 +1,56 @@ +--- +name: local-context-router +description: >- + Preflight a PDF, scan, or screenshot locally before sending it to the model. + Extracts the embedded text layer for free, OCRs image-only pages on-device + with Apple Vision, and flags only genuinely visual pages (tables, charts, + diagrams) for the vision model — cutting vision-token cost. Use whenever the + user shares a PDF or image to read, summarize, or extract from. +--- + +# Local Context Router + +Multimodal models read a PDF by extracting its text *and* rendering every page +to an image, billing for both. For text-heavy pages that is a 2–10× token tax +for no added signal. This skill spends cheap local compute first and only pays +for vision when a page's meaning actually lives in its pixels. + +## When to use + +Use this **before** attaching a PDF, scan, or screenshot to the conversation — +whenever the user wants you to read, summarize, or extract from a document. + +## How to run + +Run the preflight script on the file. It picks the cheapest faithful source per +page and prints the result as JSON: + +```sh +python "${CLAUDE_SKILL_DIR}/scripts/preflight.py" --json --vision-dir "${CLAUDE_SKILL_DIR}/.cache" +``` + +- `` is the PDF or image to analyze. +- `--vision-dir` is where rendered images of visual pages are written. + +## How to use the result + +The JSON has a `pages` array and a `tokens_saved` total. For each page: + +- **`source: "text"`** — use the page's `text` directly. Do **not** attach the + image; it adds cost without information. +- **`source: "ocr"`** — the page was image-only and has been OCR'd on-device; + use the returned `text`. +- **`source: "vision"`** — the page is a table, chart, or diagram whose meaning + is visual. Attach the rendered image at `image` to the conversation so the + vision model can read it. The `text` is a rough fallback only. + +Assemble the per-page text in order for the parts you can read as text, and +attach images only for the `vision` pages. Mention `tokens_saved` if the user +cares about cost. + +## Notes + +- Everything runs locally and offline; no document leaves the machine during + preflight. +- Requires macOS (on-device OCR uses Apple Vision) and the `localcontextrouter` + package importable by the Python interpreter. diff --git a/.claude/skills/local-context-router/scripts/preflight.py b/.claude/skills/local-context-router/scripts/preflight.py new file mode 100644 index 0000000..2a2546f --- /dev/null +++ b/.claude/skills/local-context-router/scripts/preflight.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +"""Preflight a document and report the cheapest faithful source for each page. + +PDFs run through the full router (text / OCR / vision); a bare image is OCR'd. +Output is human-readable by default, or JSON with ``--json``. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".gif", ".heic"} + + +def _ensure_importable() -> None: + """Make localcontextrouter importable, falling back to the repo source tree.""" + try: + import localcontextrouter # noqa: F401 + except ModuleNotFoundError: + repo_src = Path(__file__).resolve().parents[4] / "src" + if repo_src.is_dir(): + sys.path.insert(0, str(repo_src)) + + +def _preflight_pdf(path: Path, vision_dir: Path | None) -> dict[str, object]: + from localcontextrouter import Pdf, Source, route_pdf + + result = route_pdf(path) + pages: list[dict[str, object]] = [] + rendered: Pdf | None = None + try: + for page in result.pages: + image_path: str | None = None + if page.source is Source.VISION and vision_dir is not None: + vision_dir.mkdir(parents=True, exist_ok=True) + if rendered is None: + rendered = Pdf(path) + out = vision_dir / f"{path.stem}-page-{page.index + 1}.png" + out.write_bytes(rendered.render_page_png(page.index)) + image_path = str(out) + pages.append( + { + "index": page.index, + "source": page.source.value, + "text": page.text, + "text_tokens": page.tokens.text_tokens, + "image_tokens": page.tokens.image_tokens, + "image": image_path, + } + ) + finally: + if rendered is not None: + rendered.close() + + return { + "path": str(path), + "page_count": len(result.pages), + "tokens_saved": result.tokens_saved, + "pages": pages, + } + + +def _preflight_image(path: Path) -> dict[str, object]: + from localcontextrouter import estimate_text_tokens + from localcontextrouter.ocr import run_ocr + + text = "\n".join(line.text for line in run_ocr(path)) + return { + "path": str(path), + "page_count": 1, + "tokens_saved": 0, + "pages": [ + { + "index": 0, + "source": "ocr", + "text": text, + "text_tokens": estimate_text_tokens(text), + "image_tokens": None, + "image": None, + } + ], + } + + +def _print_human(report: dict[str, object]) -> None: + pages = report["pages"] + assert isinstance(pages, list) + print(f"Document: {report['path']} ({report['page_count']} pages)") + print(f"Tokens saved vs sending every page as an image: {report['tokens_saved']}\n") + for page in pages: + header = f"Page {page['index'] + 1} [{page['source']}]" + if page["image"]: + print(f"{header} -> attach image: {page['image']}") + else: + print(header) + text = str(page["text"]).strip() + if text: + preview = text if len(text) <= 500 else text[:500] + "..." + print(preview) + print() + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Preflight a document for an LLM.") + parser.add_argument("path", help="PDF or image to analyze") + parser.add_argument("--json", action="store_true", help="emit JSON instead of text") + parser.add_argument("--vision-dir", help="directory for rendered images of visual pages") + args = parser.parse_args(argv) + + path = Path(args.path) + if not path.exists(): + parser.error(f"no such file: {path}") + + _ensure_importable() + vision_dir = Path(args.vision_dir) if args.vision_dir else None + + if path.suffix.lower() == ".pdf": + report = _preflight_pdf(path, vision_dir) + elif path.suffix.lower() in _IMAGE_SUFFIXES: + report = _preflight_image(path) + else: + parser.error(f"unsupported file type: {path.suffix or '(none)'}") + + if args.json: + print(json.dumps(report, indent=2)) + else: + _print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_skill.py b/tests/test_skill.py new file mode 100644 index 0000000..21803b4 --- /dev/null +++ b/tests/test_skill.py @@ -0,0 +1,65 @@ +"""Tests for the local-context-router Agent Skill.""" + +import json +import subprocess +import sys +from collections.abc import Callable +from pathlib import Path + +SKILL_DIR = Path(__file__).resolve().parents[1] / ".claude" / "skills" / "local-context-router" +SKILL_MD = SKILL_DIR / "SKILL.md" +PREFLIGHT = SKILL_DIR / "scripts" / "preflight.py" + + +def _frontmatter(markdown: str) -> str: + assert markdown.startswith("---\n"), "SKILL.md must open with YAML frontmatter" + end = markdown.index("\n---", 4) + return markdown[4:end] + + +def test_skill_files_exist() -> None: + assert SKILL_MD.is_file() + assert PREFLIGHT.is_file() + + +def test_frontmatter_name_matches_directory() -> None: + front = _frontmatter(SKILL_MD.read_text()) + assert "name: local-context-router" in front + assert SKILL_DIR.name == "local-context-router" + + +def test_frontmatter_description_is_present_and_bounded() -> None: + front = _frontmatter(SKILL_MD.read_text()) + description = front.split("description:", 1)[1] + assert len(description) < 1024 + for keyword in ("PDF", "OCR", "vision"): + assert keyword in description + + +def test_preflight_runs_on_text_pdf(make_text_pdf: Callable[..., Path]) -> None: + pdf = make_text_pdf("Annual report. Revenue rose across every region this year. " * 3) + completed = subprocess.run( + [sys.executable, str(PREFLIGHT), str(pdf), "--json"], + capture_output=True, + text=True, + check=True, + ) + report = json.loads(completed.stdout) + assert report["page_count"] == 1 + page = report["pages"][0] + assert page["source"] == "text" + assert "revenue" in page["text"].lower() + assert isinstance(report["tokens_saved"], int) + + +def test_preflight_rejects_unsupported_type(tmp_path: Path) -> None: + bad = tmp_path / "notes.txt" + bad.write_text("hello") + completed = subprocess.run( + [sys.executable, str(PREFLIGHT), str(bad)], + capture_output=True, + text=True, + check=False, + ) + assert completed.returncode != 0 + assert "unsupported file type" in completed.stderr