From 42f105fdd740371d24cb65a2dd6150735f1decdb Mon Sep 17 00:00:00 2001
From: Siddharth Nashikkar <siddharth.nashikkar@yahoo.com>
Date: Fri, 19 Jun 2026 13:03:58 -0400
Subject: [PATCH 1/6] feat(core): add layout features and token types

Add PageFeatures (image/path counts and coverage), TokenEstimate with a
saved property, the Source.VISION case, the tokens field on PageRoute, and
RouteResult.tokens_saved.
---
 src/localcontextrouter/models.py | 53 ++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/src/localcontextrouter/models.py b/src/localcontextrouter/models.py
index ad2aaf7..daa9dd2 100644
--- a/src/localcontextrouter/models.py
+++ b/src/localcontextrouter/models.py
@@ -65,7 +65,7 @@ class OcrLine:
 
 
 class Source(str, Enum):
-    """Where a page's final text came from."""
+    """Where a page's final content should come from."""
 
     TEXT = "text"
     """Extracted directly from the embedded text layer."""
@@ -73,15 +73,55 @@ class Source(str, Enum):
     OCR = "ocr"
     """Produced by on-device OCR after rendering the page."""
 
+    VISION = "vision"
+    """Send the page to a vision model — its meaning lives in the visuals."""
+
+
+@dataclass(frozen=True)
+class PageFeatures:
+    """Layout signals for a page, derived from its content objects."""
+
+    width: float
+    """Page width in PDF points."""
+
+    height: float
+    """Page height in PDF points."""
+
+    image_count: int
+    """Number of raster image objects on the page."""
+
+    image_coverage: float
+    """Fraction of the page area covered by raster images, in 0...1."""
+
+    path_count: int
+    """Number of vector path objects (lines, curves, fills)."""
+
+    path_coverage: float
+    """Fraction of the page area covered by vector paths, in 0...1."""
+
+
+@dataclass(frozen=True)
+class TokenEstimate:
+    """Estimated token cost of a page as extracted text versus as an image."""
+
+    text_tokens: int
+    image_tokens: int
+
+    @property
+    def saved(self) -> int:
+        """Tokens avoided by sending text instead of the page image (never negative)."""
+        return max(0, self.image_tokens - self.text_tokens)
+
 
 @dataclass(frozen=True)
 class PageRoute:
-    """The routing outcome for one page: its classification, source, and text."""
+    """The routing outcome for one page."""
 
     index: int
     classification: Classification
     source: Source
     text: str
+    tokens: TokenEstimate
 
 
 @dataclass(frozen=True)
@@ -94,3 +134,12 @@ class RouteResult:
     def text(self) -> str:
         """All page text joined in reading order."""
         return "\n\n".join(page.text for page in self.pages)
+
+    @property
+    def tokens_saved(self) -> int:
+        """Total tokens avoided versus sending every page as an image.
+
+        Counts only pages routed to text or OCR; vision pages are sent as
+        images, so they save nothing.
+        """
+        return sum(page.tokens.saved for page in self.pages if page.source is not Source.VISION)

From a4191e62a008222ff93454296c6fb7df6d022be6 Mon Sep 17 00:00:00 2001
From: Siddharth Nashikkar <siddharth.nashikkar@yahoo.com>
Date: Fri, 19 Jun 2026 13:03:58 -0400
Subject: [PATCH 2/6] feat(core): extract page layout features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Pdf.page_features, which counts raster image and vector path objects and
their page coverage via pypdfium2 — the signals that flag charts, tables, and
diagrams without rendering.
---
 src/localcontextrouter/pdf.py | 36 ++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/localcontextrouter/pdf.py b/src/localcontextrouter/pdf.py
index 25f2486..cd2af72 100644
--- a/src/localcontextrouter/pdf.py
+++ b/src/localcontextrouter/pdf.py
@@ -12,9 +12,10 @@
 from pathlib import Path
 
 import pypdfium2 as pdfium
+import pypdfium2.raw as pdfium_c
 
 from .classify import classify_text
-from .models import Classification
+from .models import Classification, PageFeatures
 
 
 class Pdf:
@@ -49,6 +50,39 @@ def page_texts(self) -> Iterator[str]:
         for index in range(len(self)):
             yield self.page_text(index)
 
+    def page_features(self, index: int) -> PageFeatures:
+        """Summarize the page's image and vector-path content for routing.
+
+        Counts raster image and vector path objects and the fraction of the page
+        each covers. Charts and diagrams emit many vector paths rather than raster
+        images, so the path signals catch content that an image count misses.
+        """
+        page = self._doc[index]
+        try:
+            width, height = page.get_size()
+            page_area = width * height
+            image_count = path_count = 0
+            image_area = path_area = 0.0
+            for obj in page.get_objects():
+                left, bottom, right, top = obj.get_bounds()
+                area = abs((right - left) * (top - bottom))
+                if obj.type == pdfium_c.FPDF_PAGEOBJ_IMAGE:
+                    image_count += 1
+                    image_area += area
+                elif obj.type == pdfium_c.FPDF_PAGEOBJ_PATH:
+                    path_count += 1
+                    path_area += area
+            return PageFeatures(
+                width=width,
+                height=height,
+                image_count=image_count,
+                image_coverage=image_area / page_area if page_area else 0.0,
+                path_count=path_count,
+                path_coverage=path_area / page_area if page_area else 0.0,
+            )
+        finally:
+            page.close()
+
     def render_page_png(self, index: int, scale: float = 2.0) -> bytes:
         """Render the page at ``index`` to PNG bytes.
 

From 17cbc286d4bcb2bea526630ad28f945377ae21f2 Mon Sep 17 00:00:00 2001
From: Siddharth Nashikkar <siddharth.nashikkar@yahoo.com>
Date: Fri, 19 Jun 2026 13:03:58 -0400
Subject: [PATCH 3/6] feat(core): detect vision-worthy pages

Add is_vision_worthy: route a page to a vision model when images cover much of
it, vectors cover a large area, or many vector paths suggest a table or chart.
---
 src/localcontextrouter/detect.py | 39 ++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 src/localcontextrouter/detect.py

diff --git a/src/localcontextrouter/detect.py b/src/localcontextrouter/detect.py
new file mode 100644
index 0000000..76014c9
--- /dev/null
+++ b/src/localcontextrouter/detect.py
@@ -0,0 +1,39 @@
+"""Decide whether a page should go to a vision model rather than as text.
+
+Some pages carry a perfectly good text layer yet still lose their meaning when
+flattened to text: tables, charts, diagrams, and figure-heavy layouts. Those are
+worth the vision-token cost. This module decides that from cheap layout features
+(:class:`~.models.PageFeatures`) — no rendering and no ML.
+"""
+
+from __future__ import annotations
+
+from .models import PageFeatures
+
+#: A page with at least this many vector paths is treated as a table or diagram.
+#: Charts and ruled tables emit many line/curve objects.
+MIN_VISION_PATHS = 25
+
+#: A page with at least this fraction covered by raster images is figure-heavy.
+MIN_VISION_IMAGE_COVERAGE = 0.40
+
+#: A page with at least this fraction covered by vector paths holds a large
+#: filled chart or diagram.
+MIN_VISION_PATH_COVERAGE = 0.30
+
+
+def is_vision_worthy(features: PageFeatures) -> tuple[bool, str]:
+    """Return whether a page should go to a vision model, with the reason."""
+    if features.image_coverage >= MIN_VISION_IMAGE_COVERAGE:
+        return True, (
+            f"{features.image_coverage:.0%} image coverage "
+            f"(>= {MIN_VISION_IMAGE_COVERAGE:.0%}); figure-heavy"
+        )
+    if features.path_coverage >= MIN_VISION_PATH_COVERAGE:
+        return True, (
+            f"{features.path_coverage:.0%} vector coverage "
+            f"(>= {MIN_VISION_PATH_COVERAGE:.0%}); large chart or diagram"
+        )
+    if features.path_count >= MIN_VISION_PATHS:
+        return True, f"{features.path_count} vector paths (>= {MIN_VISION_PATHS}); table or diagram"
+    return False, "no dominant visual structure; text is faithful"

From 106356d1d16a4d1d73055e8b7e691d29a21c86fd Mon Sep 17 00:00:00 2001
From: Siddharth Nashikkar <siddharth.nashikkar@yahoo.com>
Date: Fri, 19 Jun 2026 13:03:58 -0400
Subject: [PATCH 4/6] feat(core): estimate image vs text token cost

Add token estimators following each provider's documented tokenization: Claude
28px patches with resolution caps, OpenAI tile counting, and a text estimate.
---
 src/localcontextrouter/tokens.py | 65 ++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 src/localcontextrouter/tokens.py

diff --git a/src/localcontextrouter/tokens.py b/src/localcontextrouter/tokens.py
new file mode 100644
index 0000000..b16b99e
--- /dev/null
+++ b/src/localcontextrouter/tokens.py
@@ -0,0 +1,65 @@
+"""Estimate the token cost of a page as extracted text versus as an image.
+
+The numbers are estimates that follow each provider's documented tokenization so
+the router can report the savings of routing a page to text instead of vision.
+
+- Claude tokenizes images in 28x28 pixel patches, downscaling so the long edge
+  fits a cap (1568 px / 1568 tokens for most models; 2576 px / 4784 tokens for
+  the high-resolution models).
+- OpenAI's tile models bill a flat 85 tokens at ``detail="low"``; at ``"high"``
+  they fit the image to 2048x2048, scale the short side to 768, and bill
+  85 + 170 per 512x512 tile.
+- Text is approximated at ~4 characters per token.
+"""
+
+from __future__ import annotations
+
+import math
+
+_PATCH = 28
+_CLAUDE_MAX_TOKENS = 1568
+_CLAUDE_MAX_EDGE = 1568
+_CLAUDE_HIRES_MAX_TOKENS = 4784
+_CLAUDE_HIRES_MAX_EDGE = 2576
+
+_CHARS_PER_TOKEN = 4
+
+
+def estimate_text_tokens(text: str) -> int:
+    """Estimate tokens for a block of text (~4 characters per token)."""
+    return math.ceil(len(text) / _CHARS_PER_TOKEN)
+
+
+def _fit_long_edge(width: float, height: float, max_edge: int) -> tuple[float, float]:
+    long_edge = max(width, height)
+    if long_edge <= max_edge:
+        return width, height
+    scale = max_edge / long_edge
+    return width * scale, height * scale
+
+
+def claude_image_tokens(width: float, height: float, *, high_res: bool = False) -> int:
+    """Estimate Claude image tokens for an image of the given pixel size."""
+    if width <= 0 or height <= 0:
+        return 0
+    max_edge = _CLAUDE_HIRES_MAX_EDGE if high_res else _CLAUDE_MAX_EDGE
+    cap = _CLAUDE_HIRES_MAX_TOKENS if high_res else _CLAUDE_MAX_TOKENS
+    fitted_w, fitted_h = _fit_long_edge(width, height, max_edge)
+    patches = math.ceil(fitted_w / _PATCH) * math.ceil(fitted_h / _PATCH)
+    return min(patches, cap)
+
+
+def openai_image_tokens(width: float, height: float, *, detail: str = "high") -> int:
+    """Estimate OpenAI tile-model image tokens (GPT-4o / GPT-4.1 family)."""
+    if detail == "low":
+        return 85
+    if width <= 0 or height <= 0:
+        return 0
+    # Fit within 2048x2048, then scale the shortest side to 768.
+    fitted_w, fitted_h = _fit_long_edge(width, height, 2048)
+    short_edge = min(fitted_w, fitted_h)
+    if short_edge > 768:
+        scale = 768 / short_edge
+        fitted_w, fitted_h = fitted_w * scale, fitted_h * scale
+    tiles = math.ceil(fitted_w / 512) * math.ceil(fitted_h / 512)
+    return 85 + 170 * tiles

From 925d1cb5371374b047665cf3cc4c45a36e71459d Mon Sep 17 00:00:00 2001
From: Siddharth Nashikkar <siddharth.nashikkar@yahoo.com>
Date: Fri, 19 Jun 2026 13:03:58 -0400
Subject: [PATCH 5/6] feat(core): route vision-worthy pages and report savings

route_pdf now sends visually-dominant pages to vision and attaches a token
estimate to every page, so RouteResult.tokens_saved shows the cost avoided.
---
 src/localcontextrouter/__init__.py | 14 ++++++++++++
 src/localcontextrouter/router.py   | 35 +++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/src/localcontextrouter/__init__.py b/src/localcontextrouter/__init__.py
index e8a2f1e..767f375 100644
--- a/src/localcontextrouter/__init__.py
+++ b/src/localcontextrouter/__init__.py
@@ -1,19 +1,27 @@
 """LocalContextRouter — cheapest faithful path for documents bound for a multimodal LLM."""
 
 from .classify import classify_text, compute_signals
+from .detect import is_vision_worthy
 from .models import (
     BoundingBox,
     Classification,
     OcrLine,
     PageClass,
+    PageFeatures,
     PageRoute,
     PageSignals,
     RouteResult,
     Source,
+    TokenEstimate,
 )
 from .ocr import ocr_png_text, run_ocr
 from .pdf import Pdf, classify_pdf
 from .router import route_pdf
+from .tokens import (
+    claude_image_tokens,
+    estimate_text_tokens,
+    openai_image_tokens,
+)
 
 __version__ = "0.0.0"
 
@@ -22,15 +30,21 @@
     "Classification",
     "OcrLine",
     "PageClass",
+    "PageFeatures",
     "PageRoute",
     "PageSignals",
     "Pdf",
     "RouteResult",
     "Source",
+    "TokenEstimate",
+    "claude_image_tokens",
     "classify_pdf",
     "classify_text",
     "compute_signals",
+    "estimate_text_tokens",
+    "is_vision_worthy",
     "ocr_png_text",
+    "openai_image_tokens",
     "route_pdf",
     "run_ocr",
     "__version__",
diff --git a/src/localcontextrouter/router.py b/src/localcontextrouter/router.py
index f682934..d6a0fdf 100644
--- a/src/localcontextrouter/router.py
+++ b/src/localcontextrouter/router.py
@@ -1,7 +1,11 @@
-"""Route each PDF page to the cheapest faithful text source.
+"""Route each PDF page to the cheapest faithful source: text, OCR, or vision.
 
-Digital pages keep their extracted text; scanned or garbled pages are rendered
-and sent to OCR. Vision routing and token accounting are added in a later phase.
+- Digital pages keep their extracted text, unless their meaning lives in visuals
+  (tables, charts, diagrams) — those go to a vision model.
+- Scanned or garbled pages are rendered and sent to OCR.
+
+Every page carries a token estimate so the savings of avoiding the image path
+are visible.
 """
 
 from __future__ import annotations
@@ -9,21 +13,36 @@
 from pathlib import Path
 
 from .classify import classify_text
-from .models import PageClass, PageRoute, RouteResult, Source
+from .detect import is_vision_worthy
+from .models import PageClass, PageRoute, RouteResult, Source, TokenEstimate
 from .ocr import ocr_png_text
 from .pdf import Pdf
+from .tokens import claude_image_tokens, estimate_text_tokens
 
 
 def route_pdf(path: str | Path, *, render_scale: float = 2.0) -> RouteResult:
-    """Route every page of a PDF and return per-page text with its source."""
+    """Route every page of a PDF and return per-page content, source, and tokens."""
     pages: list[PageRoute] = []
     with Pdf(path) as pdf:
         for index in range(len(pdf)):
             text = pdf.page_text(index)
             classification = classify_text(text)
+            features = pdf.page_features(index)
+
             if classification.page_class is PageClass.DIGITAL:
-                pages.append(PageRoute(index, classification, Source.TEXT, text))
+                source = Source.VISION if is_vision_worthy(features)[0] else Source.TEXT
+                page_text = text
             else:
-                png = pdf.render_page_png(index, scale=render_scale)
-                pages.append(PageRoute(index, classification, Source.OCR, ocr_png_text(png)))
+                source = Source.OCR
+                page_text = ocr_png_text(pdf.render_page_png(index, scale=render_scale))
+
+            # text_tokens reflects the text we would actually send (OCR output for
+            # scanned pages), so the reported savings are honest.
+            estimate = TokenEstimate(
+                text_tokens=estimate_text_tokens(page_text),
+                image_tokens=claude_image_tokens(
+                    features.width * render_scale, features.height * render_scale
+                ),
+            )
+            pages.append(PageRoute(index, classification, source, page_text, estimate))
     return RouteResult(pages)

From a915c63154cdaafef17dadc0bd188a8db5b8af71 Mon Sep 17 00:00:00 2001
From: Siddharth Nashikkar <siddharth.nashikkar@yahoo.com>
Date: Fri, 19 Jun 2026 13:03:58 -0400
Subject: [PATCH 6/6] test(core): cover detection, tokens, and vision routing

Test the detector on synthetic and real page features, the token formulas
against documented provider examples, and routing of a table page to vision.
---
 tests/conftest.py    | 25 ++++++++++++++++++
 tests/test_detect.py | 61 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_router.py | 16 ++++++++++++
 tests/test_tokens.py | 42 ++++++++++++++++++++++++++++++
 4 files changed, 144 insertions(+)
 create mode 100644 tests/test_detect.py
 create mode 100644 tests/test_tokens.py

diff --git a/tests/conftest.py b/tests/conftest.py
index 128a0ee..9bfa3f5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -36,6 +36,31 @@ def _make(text: str, pages: int = 1, name: str = "text.pdf") -> Path:
     return _make
 
 
+@pytest.fixture
+def make_table_pdf(tmp_path: Path) -> Callable[..., Path]:
+    """Return a factory for a page with a real text layer and a ruled table.
+
+    The body text keeps the page ``DIGITAL`` while the many ruling lines make it
+    vision-worthy.
+    """
+
+    def _make(rows: int = 20, cols: int = 6, name: str = "table.pdf") -> Path:
+        pdf = FPDF()
+        pdf.add_page()
+        pdf.set_font("Helvetica", size=10)
+        pdf.multi_cell(0, 6, "Financial summary table with quarterly figures follows. " * 2)
+        top = 60
+        for i in range(rows + 1):
+            pdf.line(10, top + i * 8, 200, top + i * 8)
+        for j in range(cols + 1):
+            pdf.line(10 + j * 30, top, 10 + j * 30, top + rows * 8)
+        path = tmp_path / name
+        pdf.output(str(path))
+        return path
+
+    return _make
+
+
 @pytest.fixture
 def make_image_pdf(tmp_path: Path) -> Callable[..., Path]:
     """Return a factory that writes an image-only PDF (no text layer)."""
diff --git a/tests/test_detect.py b/tests/test_detect.py
new file mode 100644
index 0000000..f6c0f21
--- /dev/null
+++ b/tests/test_detect.py
@@ -0,0 +1,61 @@
+"""Tests for the vision-worthy page detector."""
+
+from collections.abc import Callable
+from pathlib import Path
+
+from localcontextrouter.detect import is_vision_worthy
+from localcontextrouter.models import PageFeatures
+from localcontextrouter.pdf import Pdf
+
+
+def _features(
+    *,
+    image_count: int = 0,
+    image_coverage: float = 0.0,
+    path_count: int = 0,
+    path_coverage: float = 0.0,
+) -> PageFeatures:
+    return PageFeatures(
+        width=600,
+        height=800,
+        image_count=image_count,
+        image_coverage=image_coverage,
+        path_count=path_count,
+        path_coverage=path_coverage,
+    )
+
+
+def test_plain_page_is_not_vision_worthy() -> None:
+    worthy, _ = is_vision_worthy(_features(path_count=3))
+    assert worthy is False
+
+
+def test_figure_heavy_page_is_vision_worthy() -> None:
+    worthy, reason = is_vision_worthy(_features(image_count=1, image_coverage=0.6))
+    assert worthy is True
+    assert "image" in reason
+
+
+def test_large_vector_chart_is_vision_worthy() -> None:
+    worthy, reason = is_vision_worthy(_features(path_count=5, path_coverage=0.5))
+    assert worthy is True
+    assert "chart" in reason or "diagram" in reason
+
+
+def test_many_paths_is_vision_worthy() -> None:
+    worthy, reason = is_vision_worthy(_features(path_count=40))
+    assert worthy is True
+    assert "table" in reason or "diagram" in reason
+
+
+def test_table_pdf_features_trip_detector(make_table_pdf: Callable[..., Path]) -> None:
+    with Pdf(make_table_pdf()) as pdf:
+        features = pdf.page_features(0)
+    assert features.path_count >= 25
+    assert is_vision_worthy(features)[0] is True
+
+
+def test_prose_pdf_is_not_vision_worthy(make_text_pdf: Callable[..., Path]) -> None:
+    with Pdf(make_text_pdf("Plain prose with several sentences of body text. " * 3)) as pdf:
+        features = pdf.page_features(0)
+    assert is_vision_worthy(features)[0] is False
diff --git a/tests/test_router.py b/tests/test_router.py
index 2869748..1b0270f 100644
--- a/tests/test_router.py
+++ b/tests/test_router.py
@@ -23,6 +23,22 @@ def test_routes_digital_page_to_text(make_text_pdf: Callable[..., Path]) -> None
     assert "revenue" in result.text
 
 
+def test_text_page_reports_token_savings(make_text_pdf: Callable[..., Path]) -> None:
+    result = route_pdf(make_text_pdf(PROSE))
+    page = result.pages[0]
+    # A short prose page is far cheaper as text than as a full-page image.
+    assert page.tokens.image_tokens > page.tokens.text_tokens
+    assert result.tokens_saved == page.tokens.saved > 0
+
+
+def test_routes_table_page_to_vision(make_table_pdf: Callable[..., Path]) -> None:
+    result = route_pdf(make_table_pdf())
+    page = result.pages[0]
+    assert page.source is Source.VISION
+    # Vision pages are sent as images, so they contribute no savings.
+    assert result.tokens_saved == 0
+
+
 @pytest.mark.integration
 def test_routes_scanned_page_to_ocr(lcr_binary: Path, make_image_pdf: Callable[..., Path]) -> None:
     result = route_pdf(make_image_pdf("SCANNED INVOICE 2026"))
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
new file mode 100644
index 0000000..fbea762
--- /dev/null
+++ b/tests/test_tokens.py
@@ -0,0 +1,42 @@
+"""Tests for token estimation, checked against documented provider examples."""
+
+from localcontextrouter.tokens import (
+    claude_image_tokens,
+    estimate_text_tokens,
+    openai_image_tokens,
+)
+
+
+def test_estimate_text_tokens() -> None:
+    assert estimate_text_tokens("") == 0
+    assert estimate_text_tokens("abcd") == 1
+    assert estimate_text_tokens("abcde") == 2
+
+
+def test_claude_tokens_match_documented_patch_counts() -> None:
+    # 28x28 patches: ceil(w/28) * ceil(h/28).
+    assert claude_image_tokens(1000, 1000) == 1296
+    assert claude_image_tokens(1092, 1092) == 1521
+
+
+def test_claude_tokens_capped_for_large_images() -> None:
+    assert claude_image_tokens(4000, 4000) == 1568
+    assert claude_image_tokens(8000, 8000, high_res=True) == 4784
+
+
+def test_claude_high_res_three_megapixel_page() -> None:
+    # 2000x1500 fits under the 2576 px high-res edge, so no downscale.
+    assert claude_image_tokens(2000, 1500, high_res=True) == 3888
+
+
+def test_claude_tokens_zero_for_empty() -> None:
+    assert claude_image_tokens(0, 100) == 0
+
+
+def test_openai_low_detail_is_flat() -> None:
+    assert openai_image_tokens(4000, 4000, detail="low") == 85
+
+
+def test_openai_high_detail_match_documented_examples() -> None:
+    assert openai_image_tokens(1024, 1024) == 765
+    assert openai_image_tokens(2048, 4096) == 1105