sid732 · sid732 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/.claude/skills/local-context-router/SKILL.md b/.claude/skills/local-context-router/SKILL.md
@@ -0,0 +1,56 @@
+---
+name: local-context-router
+description: >-
+  Preflight a PDF, scan, or screenshot locally before sending it to the model.
+  Extracts the embedded text layer for free, OCRs image-only pages on-device
+  with Apple Vision, and flags only genuinely visual pages (tables, charts,
+  diagrams) for the vision model — cutting vision-token cost. Use whenever the
+  user shares a PDF or image to read, summarize, or extract from.
+---
+
+# Local Context Router
+
+Multimodal models read a PDF by extracting its text *and* rendering every page
+to an image, billing for both. For text-heavy pages that is a 2–10× token tax
+for no added signal. This skill spends cheap local compute first and only pays
+for vision when a page's meaning actually lives in its pixels.
+
+## When to use
+
+Use this **before** attaching a PDF, scan, or screenshot to the conversation —
+whenever the user wants you to read, summarize, or extract from a document.
+
+## How to run
+
+Run the preflight script on the file. It picks the cheapest faithful source per
+page and prints the result as JSON:
+
+```sh
+python "${CLAUDE_SKILL_DIR}/scripts/preflight.py" <path-to-document> --json --vision-dir "${CLAUDE_SKILL_DIR}/.cache"
+```
+
+- `<path-to-document>` is the PDF or image to analyze.
+- `--vision-dir` is where rendered images of visual pages are written.
+
+## How to use the result
+
+The JSON has a `pages` array and a `tokens_saved` total. For each page:
+
+- **`source: "text"`** — use the page's `text` directly. Do **not** attach the
+  image; it adds cost without information.
+- **`source: "ocr"`** — the page was image-only and has been OCR'd on-device;
+  use the returned `text`.
+- **`source: "vision"`** — the page is a table, chart, or diagram whose meaning
+  is visual. Attach the rendered image at `image` to the conversation so the
+  vision model can read it. The `text` is a rough fallback only.
+
+Assemble the per-page text in order for the parts you can read as text, and
+attach images only for the `vision` pages. Mention `tokens_saved` if the user
+cares about cost.
+
+## Notes
+
+- Everything runs locally and offline; no document leaves the machine during
+  preflight.
+- Requires macOS (on-device OCR uses Apple Vision) and the `localcontextrouter`
+  package importable by the Python interpreter.
diff --git a/.claude/skills/local-context-router/scripts/preflight.py b/.claude/skills/local-context-router/scripts/preflight.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+"""Preflight a document and report the cheapest faithful source for each page.
+
+PDFs run through the full router (text / OCR / vision); a bare image is OCR'd.
+Output is human-readable by default, or JSON with ``--json``.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".gif", ".heic"}
+
+
+def _ensure_importable() -> None:
+    """Make localcontextrouter importable, falling back to the repo source tree."""
+    try:
+        import localcontextrouter  # noqa: F401
+    except ModuleNotFoundError:
+        repo_src = Path(__file__).resolve().parents[4] / "src"
+        if repo_src.is_dir():
+            sys.path.insert(0, str(repo_src))
+
+
+def _preflight_pdf(path: Path, vision_dir: Path | None) -> dict[str, object]:
+    from localcontextrouter import Pdf, Source, route_pdf
+
+    result = route_pdf(path)
+    pages: list[dict[str, object]] = []
+    rendered: Pdf | None = None
+    try:
+        for page in result.pages:
+            image_path: str | None = None
+            if page.source is Source.VISION and vision_dir is not None:
+                vision_dir.mkdir(parents=True, exist_ok=True)
+                if rendered is None:
+                    rendered = Pdf(path)
+                out = vision_dir / f"{path.stem}-page-{page.index + 1}.png"
+                out.write_bytes(rendered.render_page_png(page.index))
+                image_path = str(out)
+            pages.append(
+                {
+                    "index": page.index,
+                    "source": page.source.value,
+                    "text": page.text,
+                    "text_tokens": page.tokens.text_tokens,
+                    "image_tokens": page.tokens.image_tokens,
+                    "image": image_path,
+                }
+            )
+    finally:
+        if rendered is not None:
+            rendered.close()
+
+    return {
+        "path": str(path),
+        "page_count": len(result.pages),
+        "tokens_saved": result.tokens_saved,
+        "pages": pages,
+    }
+
+
+def _preflight_image(path: Path) -> dict[str, object]:
+    from localcontextrouter import estimate_text_tokens
+    from localcontextrouter.ocr import run_ocr
+
+    text = "\n".join(line.text for line in run_ocr(path))
+    return {
+        "path": str(path),
+        "page_count": 1,
+        "tokens_saved": 0,
+        "pages": [
+            {
+                "index": 0,
+                "source": "ocr",
+                "text": text,
+                "text_tokens": estimate_text_tokens(text),
+                "image_tokens": None,
+                "image": None,
+            }
+        ],
+    }
+
+
+def _print_human(report: dict[str, object]) -> None:
+    pages = report["pages"]
+    assert isinstance(pages, list)
+    print(f"Document: {report['path']} ({report['page_count']} pages)")
+    print(f"Tokens saved vs sending every page as an image: {report['tokens_saved']}\n")
+    for page in pages:
+        header = f"Page {page['index'] + 1} [{page['source']}]"
+        if page["image"]:
+            print(f"{header} -> attach image: {page['image']}")
+        else:
+            print(header)
+        text = str(page["text"]).strip()
+        if text:
+            preview = text if len(text) <= 500 else text[:500] + "..."
+            print(preview)
+        print()
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Preflight a document for an LLM.")
+    parser.add_argument("path", help="PDF or image to analyze")
+    parser.add_argument("--json", action="store_true", help="emit JSON instead of text")
+    parser.add_argument("--vision-dir", help="directory for rendered images of visual pages")
+    args = parser.parse_args(argv)
+
+    path = Path(args.path)
+    if not path.exists():
+        parser.error(f"no such file: {path}")
+
+    _ensure_importable()
+    vision_dir = Path(args.vision_dir) if args.vision_dir else None
+
+    if path.suffix.lower() == ".pdf":
+        report = _preflight_pdf(path, vision_dir)
+    elif path.suffix.lower() in _IMAGE_SUFFIXES:
+        report = _preflight_image(path)
+    else:
+        parser.error(f"unsupported file type: {path.suffix or '(none)'}")
+
+    if args.json:
+        print(json.dumps(report, indent=2))
+    else:
+        _print_human(report)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_skill.py b/tests/test_skill.py
@@ -0,0 +1,65 @@
+"""Tests for the local-context-router Agent Skill."""
+
+import json
+import subprocess
+import sys
+from collections.abc import Callable
+from pathlib import Path
+
+SKILL_DIR = Path(__file__).resolve().parents[1] / ".claude" / "skills" / "local-context-router"
+SKILL_MD = SKILL_DIR / "SKILL.md"
+PREFLIGHT = SKILL_DIR / "scripts" / "preflight.py"
+
+
+def _frontmatter(markdown: str) -> str:
+    assert markdown.startswith("---\n"), "SKILL.md must open with YAML frontmatter"
+    end = markdown.index("\n---", 4)
+    return markdown[4:end]
+
+
+def test_skill_files_exist() -> None:
+    assert SKILL_MD.is_file()
+    assert PREFLIGHT.is_file()
+
+
+def test_frontmatter_name_matches_directory() -> None:
+    front = _frontmatter(SKILL_MD.read_text())
+    assert "name: local-context-router" in front
+    assert SKILL_DIR.name == "local-context-router"
+
+
+def test_frontmatter_description_is_present_and_bounded() -> None:
+    front = _frontmatter(SKILL_MD.read_text())
+    description = front.split("description:", 1)[1]
+    assert len(description) < 1024
+    for keyword in ("PDF", "OCR", "vision"):
+        assert keyword in description
+
+
+def test_preflight_runs_on_text_pdf(make_text_pdf: Callable[..., Path]) -> None:
+    pdf = make_text_pdf("Annual report. Revenue rose across every region this year. " * 3)
+    completed = subprocess.run(
+        [sys.executable, str(PREFLIGHT), str(pdf), "--json"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    report = json.loads(completed.stdout)
+    assert report["page_count"] == 1
+    page = report["pages"][0]
+    assert page["source"] == "text"
+    assert "revenue" in page["text"].lower()
+    assert isinstance(report["tokens_saved"], int)
+
+
+def test_preflight_rejects_unsupported_type(tmp_path: Path) -> None:
+    bad = tmp_path / "notes.txt"
+    bad.write_text("hello")
+    completed = subprocess.run(
+        [sys.executable, str(PREFLIGHT), str(bad)],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    assert completed.returncode != 0
+    assert "unsupported file type" in completed.stderr