sid732 · sid732 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/src/localcontextrouter/__init__.py b/src/localcontextrouter/__init__.py
@@ -1,19 +1,27 @@
 """LocalContextRouter — cheapest faithful path for documents bound for a multimodal LLM."""
 
 from .classify import classify_text, compute_signals
+from .detect import is_vision_worthy
 from .models import (
     BoundingBox,
     Classification,
     OcrLine,
     PageClass,
+    PageFeatures,
     PageRoute,
     PageSignals,
     RouteResult,
     Source,
+    TokenEstimate,
 )
 from .ocr import ocr_png_text, run_ocr
 from .pdf import Pdf, classify_pdf
 from .router import route_pdf
+from .tokens import (
+    claude_image_tokens,
+    estimate_text_tokens,
+    openai_image_tokens,
+)
 
 __version__ = "0.0.0"
 
@@ -22,15 +30,21 @@
     "Classification",
     "OcrLine",
     "PageClass",
+    "PageFeatures",
     "PageRoute",
     "PageSignals",
     "Pdf",
     "RouteResult",
     "Source",
+    "TokenEstimate",
+    "claude_image_tokens",
     "classify_pdf",
     "classify_text",
     "compute_signals",
+    "estimate_text_tokens",
+    "is_vision_worthy",
     "ocr_png_text",
+    "openai_image_tokens",
     "route_pdf",
     "run_ocr",
     "__version__",

diff --git a/src/localcontextrouter/detect.py b/src/localcontextrouter/detect.py
@@ -0,0 +1,39 @@
+"""Decide whether a page should go to a vision model rather than as text.
+
+Some pages carry a perfectly good text layer yet still lose their meaning when
+flattened to text: tables, charts, diagrams, and figure-heavy layouts. Those are
+worth the vision-token cost. This module decides that from cheap layout features
+(:class:`~.models.PageFeatures`) — no rendering and no ML.
+"""
+
+from __future__ import annotations
+
+from .models import PageFeatures
+
+#: A page with at least this many vector paths is treated as a table or diagram.
+#: Charts and ruled tables emit many line/curve objects.
+MIN_VISION_PATHS = 25
+
+#: A page with at least this fraction covered by raster images is figure-heavy.
+MIN_VISION_IMAGE_COVERAGE = 0.40
+
+#: A page with at least this fraction covered by vector paths holds a large
+#: filled chart or diagram.
+MIN_VISION_PATH_COVERAGE = 0.30
+
+
+def is_vision_worthy(features: PageFeatures) -> tuple[bool, str]:
+    """Return whether a page should go to a vision model, with the reason."""
+    if features.image_coverage >= MIN_VISION_IMAGE_COVERAGE:
+        return True, (
+            f"{features.image_coverage:.0%} image coverage "
+            f"(>= {MIN_VISION_IMAGE_COVERAGE:.0%}); figure-heavy"
+        )
+    if features.path_coverage >= MIN_VISION_PATH_COVERAGE:
+        return True, (
+            f"{features.path_coverage:.0%} vector coverage "
+            f"(>= {MIN_VISION_PATH_COVERAGE:.0%}); large chart or diagram"
+        )
+    if features.path_count >= MIN_VISION_PATHS:
+        return True, f"{features.path_count} vector paths (>= {MIN_VISION_PATHS}); table or diagram"
+    return False, "no dominant visual structure; text is faithful"
diff --git a/src/localcontextrouter/models.py b/src/localcontextrouter/models.py
@@ -65,23 +65,63 @@ class OcrLine:
 
 
 class Source(str, Enum):
-    """Where a page's final text came from."""
+    """Where a page's final content should come from."""
 
     TEXT = "text"
     """Extracted directly from the embedded text layer."""
 
     OCR = "ocr"
     """Produced by on-device OCR after rendering the page."""
 
+    VISION = "vision"
+    """Send the page to a vision model — its meaning lives in the visuals."""
+
+
+@dataclass(frozen=True)
+class PageFeatures:
+    """Layout signals for a page, derived from its content objects."""
+
+    width: float
+    """Page width in PDF points."""
+
+    height: float
+    """Page height in PDF points."""
+
+    image_count: int
+    """Number of raster image objects on the page."""
+
+    image_coverage: float
+    """Fraction of the page area covered by raster images, in 0...1."""
+
+    path_count: int
+    """Number of vector path objects (lines, curves, fills)."""
+
+    path_coverage: float
+    """Fraction of the page area covered by vector paths, in 0...1."""
+
+
+@dataclass(frozen=True)
+class TokenEstimate:
+    """Estimated token cost of a page as extracted text versus as an image."""
+
+    text_tokens: int
+    image_tokens: int
+
+    @property
+    def saved(self) -> int:
+        """Tokens avoided by sending text instead of the page image (never negative)."""
+        return max(0, self.image_tokens - self.text_tokens)
+
 
 @dataclass(frozen=True)
 class PageRoute:
-    """The routing outcome for one page: its classification, source, and text."""
+    """The routing outcome for one page."""
 
     index: int
     classification: Classification
     source: Source
     text: str
+    tokens: TokenEstimate
 
 
 @dataclass(frozen=True)
@@ -94,3 +134,12 @@ class RouteResult:
     def text(self) -> str:
         """All page text joined in reading order."""
         return "\n\n".join(page.text for page in self.pages)
+
+    @property
+    def tokens_saved(self) -> int:
+        """Total tokens avoided versus sending every page as an image.
+
+        Counts only pages routed to text or OCR; vision pages are sent as
+        images, so they save nothing.
+        """
+        return sum(page.tokens.saved for page in self.pages if page.source is not Source.VISION)
diff --git a/src/localcontextrouter/pdf.py b/src/localcontextrouter/pdf.py
@@ -12,9 +12,10 @@
 from pathlib import Path
 
 import pypdfium2 as pdfium
+import pypdfium2.raw as pdfium_c
 
 from .classify import classify_text
-from .models import Classification
+from .models import Classification, PageFeatures
 
 
 class Pdf:
@@ -49,6 +50,39 @@ def page_texts(self) -> Iterator[str]:
         for index in range(len(self)):
             yield self.page_text(index)
 
+    def page_features(self, index: int) -> PageFeatures:
+        """Summarize the page's image and vector-path content for routing.
+
+        Counts raster image and vector path objects and the fraction of the page
+        each covers. Charts and diagrams emit many vector paths rather than raster
+        images, so the path signals catch content that an image count misses.
+        """
+        page = self._doc[index]
+        try:
+            width, height = page.get_size()
+            page_area = width * height
+            image_count = path_count = 0
+            image_area = path_area = 0.0
+            for obj in page.get_objects():
+                left, bottom, right, top = obj.get_bounds()
+                area = abs((right - left) * (top - bottom))
+                if obj.type == pdfium_c.FPDF_PAGEOBJ_IMAGE:
+                    image_count += 1
+                    image_area += area
+                elif obj.type == pdfium_c.FPDF_PAGEOBJ_PATH:
+                    path_count += 1
+                    path_area += area
+            return PageFeatures(
+                width=width,
+                height=height,
+                image_count=image_count,
+                image_coverage=image_area / page_area if page_area else 0.0,
+                path_count=path_count,
+                path_coverage=path_area / page_area if page_area else 0.0,
+            )
+        finally:
+            page.close()
+
     def render_page_png(self, index: int, scale: float = 2.0) -> bytes:
         """Render the page at ``index`` to PNG bytes.
 

diff --git a/src/localcontextrouter/router.py b/src/localcontextrouter/router.py
@@ -1,29 +1,48 @@
-"""Route each PDF page to the cheapest faithful text source.
+"""Route each PDF page to the cheapest faithful source: text, OCR, or vision.
 
-Digital pages keep their extracted text; scanned or garbled pages are rendered
-and sent to OCR. Vision routing and token accounting are added in a later phase.
+- Digital pages keep their extracted text, unless their meaning lives in visuals
+  (tables, charts, diagrams) — those go to a vision model.
+- Scanned or garbled pages are rendered and sent to OCR.
+
+Every page carries a token estimate so the savings of avoiding the image path
+are visible.
 """
 
 from __future__ import annotations
 
 from pathlib import Path
 
 from .classify import classify_text
-from .models import PageClass, PageRoute, RouteResult, Source
+from .detect import is_vision_worthy
+from .models import PageClass, PageRoute, RouteResult, Source, TokenEstimate
 from .ocr import ocr_png_text
 from .pdf import Pdf
+from .tokens import claude_image_tokens, estimate_text_tokens
 
 
 def route_pdf(path: str | Path, *, render_scale: float = 2.0) -> RouteResult:
-    """Route every page of a PDF and return per-page text with its source."""
+    """Route every page of a PDF and return per-page content, source, and tokens."""
     pages: list[PageRoute] = []
     with Pdf(path) as pdf:
         for index in range(len(pdf)):
             text = pdf.page_text(index)
             classification = classify_text(text)
+            features = pdf.page_features(index)
+
             if classification.page_class is PageClass.DIGITAL:
-                pages.append(PageRoute(index, classification, Source.TEXT, text))
+                source = Source.VISION if is_vision_worthy(features)[0] else Source.TEXT
+                page_text = text
             else:
-                png = pdf.render_page_png(index, scale=render_scale)
-                pages.append(PageRoute(index, classification, Source.OCR, ocr_png_text(png)))
+                source = Source.OCR
+                page_text = ocr_png_text(pdf.render_page_png(index, scale=render_scale))
+
+            # text_tokens reflects the text we would actually send (OCR output for
+            # scanned pages), so the reported savings are honest.
+            estimate = TokenEstimate(
+                text_tokens=estimate_text_tokens(page_text),
+                image_tokens=claude_image_tokens(
+                    features.width * render_scale, features.height * render_scale
+                ),
+            )
+            pages.append(PageRoute(index, classification, source, page_text, estimate))
     return RouteResult(pages)
diff --git a/src/localcontextrouter/tokens.py b/src/localcontextrouter/tokens.py
@@ -0,0 +1,65 @@
+"""Estimate the token cost of a page as extracted text versus as an image.
+
+The numbers are estimates that follow each provider's documented tokenization so
+the router can report the savings of routing a page to text instead of vision.
+
+- Claude tokenizes images in 28x28 pixel patches, downscaling so the long edge
+  fits a cap (1568 px / 1568 tokens for most models; 2576 px / 4784 tokens for
+  the high-resolution models).
+- OpenAI's tile models bill a flat 85 tokens at ``detail="low"``; at ``"high"``
+  they fit the image to 2048x2048, scale the short side to 768, and bill
+  85 + 170 per 512x512 tile.
+- Text is approximated at ~4 characters per token.
+"""
+
+from __future__ import annotations
+
+import math
+
+_PATCH = 28
+_CLAUDE_MAX_TOKENS = 1568
+_CLAUDE_MAX_EDGE = 1568
+_CLAUDE_HIRES_MAX_TOKENS = 4784
+_CLAUDE_HIRES_MAX_EDGE = 2576
+
+_CHARS_PER_TOKEN = 4
+
+
+def estimate_text_tokens(text: str) -> int:
+    """Estimate tokens for a block of text (~4 characters per token)."""
+    return math.ceil(len(text) / _CHARS_PER_TOKEN)
+
+
+def _fit_long_edge(width: float, height: float, max_edge: int) -> tuple[float, float]:
+    long_edge = max(width, height)
+    if long_edge <= max_edge:
+        return width, height
+    scale = max_edge / long_edge
+    return width * scale, height * scale
+
+
+def claude_image_tokens(width: float, height: float, *, high_res: bool = False) -> int:
+    """Estimate Claude image tokens for an image of the given pixel size."""
+    if width <= 0 or height <= 0:
+        return 0
+    max_edge = _CLAUDE_HIRES_MAX_EDGE if high_res else _CLAUDE_MAX_EDGE
+    cap = _CLAUDE_HIRES_MAX_TOKENS if high_res else _CLAUDE_MAX_TOKENS
+    fitted_w, fitted_h = _fit_long_edge(width, height, max_edge)
+    patches = math.ceil(fitted_w / _PATCH) * math.ceil(fitted_h / _PATCH)
+    return min(patches, cap)
+
+
+def openai_image_tokens(width: float, height: float, *, detail: str = "high") -> int:
+    """Estimate OpenAI tile-model image tokens (GPT-4o / GPT-4.1 family)."""
+    if detail == "low":
+        return 85
+    if width <= 0 or height <= 0:
+        return 0
+    # Fit within 2048x2048, then scale the shortest side to 768.
+    fitted_w, fitted_h = _fit_long_edge(width, height, 2048)
+    short_edge = min(fitted_w, fitted_h)
+    if short_edge > 768:
+        scale = 768 / short_edge
+        fitted_w, fitted_h = fitted_w * scale, fitted_h * scale
+    tiles = math.ceil(fitted_w / 512) * math.ceil(fitted_h / 512)
+    return 85 + 170 * tiles
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -36,6 +36,31 @@ def _make(text: str, pages: int = 1, name: str = "text.pdf") -> Path:
     return _make
 
 
+@pytest.fixture
+def make_table_pdf(tmp_path: Path) -> Callable[..., Path]:
+    """Return a factory for a page with a real text layer and a ruled table.
+
+    The body text keeps the page ``DIGITAL`` while the many ruling lines make it
+    vision-worthy.
+    """
+
+    def _make(rows: int = 20, cols: int = 6, name: str = "table.pdf") -> Path:
+        pdf = FPDF()
+        pdf.add_page()
+        pdf.set_font("Helvetica", size=10)
+        pdf.multi_cell(0, 6, "Financial summary table with quarterly figures follows. " * 2)
+        top = 60
+        for i in range(rows + 1):
+            pdf.line(10, top + i * 8, 200, top + i * 8)
+        for j in range(cols + 1):
+            pdf.line(10 + j * 30, top, 10 + j * 30, top + rows * 8)
+        path = tmp_path / name
+        pdf.output(str(path))
+        return path
+
+    return _make
+
+
 @pytest.fixture
 def make_image_pdf(tmp_path: Path) -> Callable[..., Path]:
     """Return a factory that writes an image-only PDF (no text layer)."""