Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .claude/skills/local-context-router/SKILL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---
name: local-context-router
description: >-
Preflight a PDF, scan, or screenshot locally before sending it to the model.
Extracts the embedded text layer for free, OCRs image-only pages on-device
with Apple Vision, and flags only genuinely visual pages (tables, charts,
diagrams) for the vision model — cutting vision-token cost. Use whenever the
user shares a PDF or image to read, summarize, or extract from.
---

# Local Context Router

Multimodal models read a PDF by extracting its text *and* rendering every page
to an image, billing for both. For text-heavy pages that is a 2–10× token tax
for no added signal. This skill spends cheap local compute first and only pays
for vision when a page's meaning actually lives in its pixels.

## When to use

Use this **before** attaching a PDF, scan, or screenshot to the conversation —
whenever the user wants you to read, summarize, or extract from a document.

## How to run

Run the preflight script on the file. It picks the cheapest faithful source per
page and prints the result as JSON:

```sh
python "${CLAUDE_SKILL_DIR}/scripts/preflight.py" <path-to-document> --json --vision-dir "${CLAUDE_SKILL_DIR}/.cache"
```

- `<path-to-document>` is the PDF or image to analyze.
- `--vision-dir` is where rendered images of visual pages are written.

## How to use the result

The JSON has a `pages` array and a `tokens_saved` total. For each page:

- **`source: "text"`** — use the page's `text` directly. Do **not** attach the
image; it adds cost without information.
- **`source: "ocr"`** — the page was image-only and has been OCR'd on-device;
use the returned `text`.
- **`source: "vision"`** — the page is a table, chart, or diagram whose meaning
is visual. Attach the rendered image at `image` to the conversation so the
vision model can read it. The `text` is a rough fallback only.

Assemble the per-page text in order for the parts you can read as text, and
attach images only for the `vision` pages. Mention `tokens_saved` if the user
cares about cost.

## Notes

- Everything runs locally and offline; no document leaves the machine during
preflight.
- Requires macOS (on-device OCR uses Apple Vision) and the `localcontextrouter`
package importable by the Python interpreter.
135 changes: 135 additions & 0 deletions .claude/skills/local-context-router/scripts/preflight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/usr/bin/env python3
"""Preflight a document and report the cheapest faithful source for each page.

PDFs run through the full router (text / OCR / vision); a bare image is OCR'd.
Output is human-readable by default, or JSON with ``--json``.
"""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".gif", ".heic"}


def _ensure_importable() -> None:
"""Make localcontextrouter importable, falling back to the repo source tree."""
try:
import localcontextrouter # noqa: F401
except ModuleNotFoundError:
repo_src = Path(__file__).resolve().parents[4] / "src"
if repo_src.is_dir():
sys.path.insert(0, str(repo_src))


def _preflight_pdf(path: Path, vision_dir: Path | None) -> dict[str, object]:
from localcontextrouter import Pdf, Source, route_pdf

result = route_pdf(path)
pages: list[dict[str, object]] = []
rendered: Pdf | None = None
try:
for page in result.pages:
image_path: str | None = None
if page.source is Source.VISION and vision_dir is not None:
vision_dir.mkdir(parents=True, exist_ok=True)
if rendered is None:
rendered = Pdf(path)
out = vision_dir / f"{path.stem}-page-{page.index + 1}.png"
out.write_bytes(rendered.render_page_png(page.index))
image_path = str(out)
pages.append(
{
"index": page.index,
"source": page.source.value,
"text": page.text,
"text_tokens": page.tokens.text_tokens,
"image_tokens": page.tokens.image_tokens,
"image": image_path,
}
)
finally:
if rendered is not None:
rendered.close()

return {
"path": str(path),
"page_count": len(result.pages),
"tokens_saved": result.tokens_saved,
"pages": pages,
}


def _preflight_image(path: Path) -> dict[str, object]:
from localcontextrouter import estimate_text_tokens
from localcontextrouter.ocr import run_ocr

text = "\n".join(line.text for line in run_ocr(path))
return {
"path": str(path),
"page_count": 1,
"tokens_saved": 0,
"pages": [
{
"index": 0,
"source": "ocr",
"text": text,
"text_tokens": estimate_text_tokens(text),
"image_tokens": None,
"image": None,
}
],
}


def _print_human(report: dict[str, object]) -> None:
pages = report["pages"]
assert isinstance(pages, list)
print(f"Document: {report['path']} ({report['page_count']} pages)")
print(f"Tokens saved vs sending every page as an image: {report['tokens_saved']}\n")
for page in pages:
header = f"Page {page['index'] + 1} [{page['source']}]"
if page["image"]:
print(f"{header} -> attach image: {page['image']}")
else:
print(header)
text = str(page["text"]).strip()
if text:
preview = text if len(text) <= 500 else text[:500] + "..."
print(preview)
print()


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Preflight a document for an LLM.")
parser.add_argument("path", help="PDF or image to analyze")
parser.add_argument("--json", action="store_true", help="emit JSON instead of text")
parser.add_argument("--vision-dir", help="directory for rendered images of visual pages")
args = parser.parse_args(argv)

path = Path(args.path)
if not path.exists():
parser.error(f"no such file: {path}")

_ensure_importable()
vision_dir = Path(args.vision_dir) if args.vision_dir else None

if path.suffix.lower() == ".pdf":
report = _preflight_pdf(path, vision_dir)
elif path.suffix.lower() in _IMAGE_SUFFIXES:
report = _preflight_image(path)
else:
parser.error(f"unsupported file type: {path.suffix or '(none)'}")

if args.json:
print(json.dumps(report, indent=2))
else:
_print_human(report)
return 0


if __name__ == "__main__":
raise SystemExit(main())
65 changes: 65 additions & 0 deletions tests/test_skill.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Tests for the local-context-router Agent Skill."""

import json
import subprocess
import sys
from collections.abc import Callable
from pathlib import Path

SKILL_DIR = Path(__file__).resolve().parents[1] / ".claude" / "skills" / "local-context-router"
SKILL_MD = SKILL_DIR / "SKILL.md"
PREFLIGHT = SKILL_DIR / "scripts" / "preflight.py"


def _frontmatter(markdown: str) -> str:
assert markdown.startswith("---\n"), "SKILL.md must open with YAML frontmatter"
end = markdown.index("\n---", 4)
return markdown[4:end]


def test_skill_files_exist() -> None:
assert SKILL_MD.is_file()
assert PREFLIGHT.is_file()


def test_frontmatter_name_matches_directory() -> None:
front = _frontmatter(SKILL_MD.read_text())
assert "name: local-context-router" in front
assert SKILL_DIR.name == "local-context-router"


def test_frontmatter_description_is_present_and_bounded() -> None:
front = _frontmatter(SKILL_MD.read_text())
description = front.split("description:", 1)[1]
assert len(description) < 1024
for keyword in ("PDF", "OCR", "vision"):
assert keyword in description


def test_preflight_runs_on_text_pdf(make_text_pdf: Callable[..., Path]) -> None:
pdf = make_text_pdf("Annual report. Revenue rose across every region this year. " * 3)
completed = subprocess.run(
[sys.executable, str(PREFLIGHT), str(pdf), "--json"],
capture_output=True,
text=True,
check=True,
)
report = json.loads(completed.stdout)
assert report["page_count"] == 1
page = report["pages"][0]
assert page["source"] == "text"
assert "revenue" in page["text"].lower()
assert isinstance(report["tokens_saved"], int)


def test_preflight_rejects_unsupported_type(tmp_path: Path) -> None:
bad = tmp_path / "notes.txt"
bad.write_text("hello")
completed = subprocess.run(
[sys.executable, str(PREFLIGHT), str(bad)],
capture_output=True,
text=True,
check=False,
)
assert completed.returncode != 0
assert "unsupported file type" in completed.stderr
Loading