diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 0512786b8..f41e857e7 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -13,7 +13,9 @@ concurrency:
 jobs:
   lint:
     runs-on: windows-latest
-    timeout-minutes: 5
+    # Bumped from 5: combined mypy on 12 packages cold-starts at ~3-4 min on
+    # Windows runners; the original 5-min ceiling cancelled mid-run.
+    timeout-minutes: 10
 
     steps:
       - uses: actions/checkout@v4
@@ -34,9 +36,15 @@ jobs:
       - name: Lint
         run: uv run ruff check src/ tests/
 
-      # Required type check: these folders are clean against the strict
+      # Required type check: these packages are clean against the strict
       # config in pyproject.toml. Any new mypy error here blocks the PR.
       # Expand the package list as more folders are cleaned up.
+      #
+      # Single mypy invocation across all packages — a per-package loop pays
+      # cold typeshed/plugin startup per package and tipped the job past the
+      # 5-minute timeout once the list grew to 12. The combined summary still
+      # reports total error/file counts; error lines include file paths so
+      # the failing package is identifiable without per-package groups.
       - name: Type check (required)
         run: >-
           uv run mypy
@@ -48,9 +56,7 @@ jobs:
           -p winml.modelkit.config
           -p winml.modelkit.core
           -p winml.modelkit.data
-
-      # Advisory type check for the rest of the tree: surfaces type issues
-      # in CI logs without blocking PRs while the backlog is worked down.
-      - name: Type check (advisory, full package)
-        continue-on-error: true
-        run: uv run mypy -p winml.modelkit
+          -p winml.modelkit.datasets
+          -p winml.modelkit.eval
+          -p winml.modelkit.export
+          -p winml.modelkit.inference
diff --git a/pyproject.toml b/pyproject.toml
index 64a80427b..72f36f10f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,8 +111,10 @@ dev = [
   "pre-commit>=4.5.1",
   "pytest-cov>=7",
   "pytest-timeout>=2.4.0",
+  "scipy-stubs>=1.17.1.5",
   "types-jsonschema>=4.26.0.20260518",
   "types-protobuf>=7.34.1.20260518",
+  "types-psutil>=7.2.2.20260518",
   "types-pyyaml>=6.0.12.20260518",
   "types-tqdm>=4.67.3.20260518",
 ]
@@ -478,6 +480,10 @@ module = [
   "openvino",
   "openvino.*",
   "plotext",
+  "soundfile",       # audio I/O in inference/engine.py; no community stubs
+  "sklearn.*",       # used in eval/metrics; no community stubs
+  "evaluate",        # HF evaluate, used in eval/; no community stubs
+  "evaluate.*",
 ]
 ignore_missing_imports = true
 
diff --git a/src/winml/modelkit/build/hf.py b/src/winml/modelkit/build/hf.py
index eaae70ef4..99851462c 100644
--- a/src/winml/modelkit/build/hf.py
+++ b/src/winml/modelkit/build/hf.py
@@ -218,6 +218,9 @@ def _name(base: str) -> str:
     # =========================================================================
     logger.info("Exporting to ONNX...")
     t0 = time.monotonic()
+    # config.export is None only for the ONNX build path (build_onnx_model);
+    # this is the HF path so the field must be populated.
+    assert config.export is not None, "build_hf_model requires config.export"
     export_onnx(
         model=pytorch_model,
         output_path=export_path,
diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py
index 7266620cb..e519892aa 100644
--- a/src/winml/modelkit/commands/build.py
+++ b/src/winml/modelkit/commands/build.py
@@ -1356,6 +1356,8 @@ def _name(base: str) -> str:
             config, model_id, trust_remote_code=False, hf_config=preloaded_hf_config
         )
         t0 = time.monotonic()
+        # config.export is None only for the ONNX build path; this is the HF path.
+        assert config.export is not None, "HF build path requires config.export"
         export_onnx(
             model=pytorch_model,
             output_path=export_path,
diff --git a/src/winml/modelkit/commands/config.py b/src/winml/modelkit/commands/config.py
index 373db06f5..a3c3a5cdd 100644
--- a/src/winml/modelkit/commands/config.py
+++ b/src/winml/modelkit/commands/config.py
@@ -327,26 +327,25 @@ def config(
                 )
                 return
 
-            # Generate config(s) - module parameter selects overload:
-            # module=str → list[WinMLBuildConfig], module=None → WinMLBuildConfig.
-            # ``module`` is the only differing kwarg, so build a shared dict
-            # once and add it only on the list-returning branch. This keeps
-            # the overload dispatch but avoids repeating the other 10 kwargs.
-            _shared_kwargs: dict[str, Any] = {
-                "model_id": hf_model,
-                "task": task,
-                "model_class": model_class,
-                "model_type": model_type,
-                "override": override,
-                "shape_config": shape_config,
-                "library_name": library_name,
-                "device": device,
-                "precision": precision,
-                "trust_remote_code": trust_remote_code,
-                "ep": ep,
-            }
-            if module:
-                configs = generate_hf_build_config(module=module, **_shared_kwargs)
+            # Generate config(s). The ``module: str | None`` overload of
+            # generate_hf_build_config returns WinMLBuildConfig | list[...],
+            # which isinstance(result, list) narrows for the branches below.
+            result = generate_hf_build_config(
+                model_id=hf_model,
+                task=task,
+                model_class=model_class,
+                model_type=model_type,
+                module=module,
+                override=override,
+                shape_config=shape_config,
+                library_name=library_name,
+                device=device,
+                precision=precision,
+                trust_remote_code=trust_remote_code,
+                ep=ep,
+            )
+            if isinstance(result, list):
+                configs = result
                 for cfg in configs:
                     _apply_stage_overrides(cfg, no_quant=not quant, no_compile=no_compile)
                 output_data = [cfg.to_dict() for cfg in configs]
@@ -354,7 +353,7 @@ def config(
                 # Use first config for display metadata
                 config_obj = configs[0] if configs else None
             else:
-                config_obj = generate_hf_build_config(**_shared_kwargs)
+                config_obj = result
                 configs = []
                 _apply_stage_overrides(config_obj, no_quant=not quant, no_compile=no_compile)
                 output_data = config_obj.to_dict()
diff --git a/src/winml/modelkit/config/build.py b/src/winml/modelkit/config/build.py
index 16cde11f2..9bdbb06e0 100644
--- a/src/winml/modelkit/config/build.py
+++ b/src/winml/modelkit/config/build.py
@@ -489,6 +489,29 @@ def generate_hf_build_config(
 ) -> list[WinMLBuildConfig]: ...
 
 
+@overload
+def generate_hf_build_config(
+    model_id: str | None = None,
+    *,
+    task: str | None = None,
+    model_class: str | None = None,
+    model_type: str | None = None,
+    # Catch-all for callers that hold ``module`` as ``str | None`` (e.g. the
+    # ``generate_build_config`` dispatcher). Without this overload, mypy can't
+    # resolve the call against the two narrower overloads above and fails with
+    # "too many union combinations".
+    module: str | None,
+    override: WinMLBuildConfig | None = None,
+    shape_config: dict | None = None,
+    library_name: str = "transformers",
+    device: str = "auto",
+    precision: str = "auto",
+    trust_remote_code: bool = False,
+    ep: EPNameOrAlias | None = None,
+    no_compile: bool = False,
+) -> WinMLBuildConfig | list[WinMLBuildConfig]: ...
+
+
 def generate_hf_build_config(
     model_id: str | None = None,
     *,
@@ -804,24 +827,24 @@ class name (HF path only).
             ep=ep,
             override=override,
         )
-    # Split branches so mypy can pick the matching overload of generate_hf_build_config.
-    # Typed as dict[str, Any] so per-kwarg type checks happen at the callee, not on the
-    # widened Union mypy would otherwise infer from this heterogeneous literal.
-    common_kwargs: dict[str, Any] = {
-        "task": task,
-        "model_class": model_class,
-        "model_type": model_type,
-        "override": override,
-        "shape_config": shape_config,
-        "library_name": library_name,
-        "device": device,
-        "precision": precision,
-        "trust_remote_code": trust_remote_code,
-        "ep": ep,
-    }
-    if module is None:
-        return generate_hf_build_config(model_id, module=None, **common_kwargs)
-    return generate_hf_build_config(model_id, module=module, **common_kwargs)
+    # Single call resolves against generate_hf_build_config's `module: str | None`
+    # overload, which returns WinMLBuildConfig | list[WinMLBuildConfig] — matching
+    # this dispatcher's implementation return type. The dispatcher's own
+    # narrowing overloads above still tighten the return type for its callers.
+    return generate_hf_build_config(
+        model_id,
+        task=task,
+        model_class=model_class,
+        model_type=model_type,
+        module=module,
+        override=override,
+        shape_config=shape_config,
+        library_name=library_name,
+        device=device,
+        precision=precision,
+        trust_remote_code=trust_remote_code,
+        ep=ep,
+    )
 
 
 # =============================================================================
diff --git a/src/winml/modelkit/core/time_utils.py b/src/winml/modelkit/core/time_utils.py
index 9fc8b8b4c..77b1baf28 100644
--- a/src/winml/modelkit/core/time_utils.py
+++ b/src/winml/modelkit/core/time_utils.py
@@ -5,8 +5,15 @@
 """Simple timestamp formatting utility."""
 
 from datetime import datetime, timezone
+from typing import overload
 
 
+@overload
+def format_timestamp_iso(epoch_time: float) -> str: ...
+@overload
+def format_timestamp_iso(epoch_time: None) -> None: ...
+@overload
+def format_timestamp_iso(epoch_time: float | None) -> str | None: ...
 def format_timestamp_iso(epoch_time: float | None) -> str | None:
     """Format Unix epoch timestamp to ISO 8601 with Z suffix.
 
diff --git a/src/winml/modelkit/datasets/__init__.py b/src/winml/modelkit/datasets/__init__.py
index 54ac73239..ebbde0cfb 100644
--- a/src/winml/modelkit/datasets/__init__.py
+++ b/src/winml/modelkit/datasets/__init__.py
@@ -136,7 +136,7 @@ def universal_calib_dataset(
         raise RuntimeError(f"Failed to create {task} dataset: {e}") from e
 
 
-class DatasetCalibrationReader(CalibrationDataReader):
+class DatasetCalibrationReader(CalibrationDataReader):  # type: ignore[misc]
     """Calibration data reader that wraps universal_calib_dataset.
 
     Bridges HuggingFace-style datasets to ORT's calibration API by:
diff --git a/src/winml/modelkit/datasets/base.py b/src/winml/modelkit/datasets/base.py
index 8d86af57e..ea73e7bf6 100644
--- a/src/winml/modelkit/datasets/base.py
+++ b/src/winml/modelkit/datasets/base.py
@@ -39,7 +39,7 @@ def __init__(
         dataset_name: str | None = None,
         max_samples: int | None = None,
         data_split: str | None = None,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         """Initialize dataset with readonly properties.
 
@@ -59,8 +59,10 @@ def __init__(
         # Store additional kwargs for subclass use
         self._config = kwargs
 
-        # Subclasses should populate these during initialization
-        self._dataset = None  # The actual dataset object
+        # Subclasses should populate these during initialization.
+        # Typed as Any because each subclass uses a different dataset library
+        # (HF datasets.Dataset, torch DataLoader, plain list[dict], ...).
+        self._dataset: Any = None
         self._metadata: dict[str, Any] = {}  # Dataset metadata
 
         # Initialize subclass-specific data
diff --git a/src/winml/modelkit/datasets/depth_estimation.py b/src/winml/modelkit/datasets/depth_estimation.py
index 6aa3f7d62..2b268a6ec 100644
--- a/src/winml/modelkit/datasets/depth_estimation.py
+++ b/src/winml/modelkit/datasets/depth_estimation.py
@@ -123,7 +123,7 @@ def _initialize(self) -> None:
 
         # Convert raw images into model-ready tensors.
         def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]:
-            return processor(example[self._image_col].convert("RGB"), return_tensors="pt")
+            return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt"))
 
         self._dataset = dataset.map(
             preprocess_single_sample, remove_columns=[self._image_col]
@@ -142,7 +142,7 @@ def _detect_image_column(self, dataset: Any) -> None:
 
         features = dataset.features
 
-        self._image_col = None
+        self._image_col = ""
         for col_name, feature in features.items():
             if isinstance(feature, Image):
                 self._image_col = col_name
diff --git a/src/winml/modelkit/datasets/image.py b/src/winml/modelkit/datasets/image.py
index 251eaec9f..20b05de20 100644
--- a/src/winml/modelkit/datasets/image.py
+++ b/src/winml/modelkit/datasets/image.py
@@ -12,7 +12,7 @@
 
 import logging
 from random import Random
-from typing import Any
+from typing import Any, cast
 
 from datasets import load_dataset
 from datasets.features import ClassLabel, Image
@@ -35,6 +35,10 @@ class ImageDataset(BaseTaskDataset):
     - HuggingFace Features API for metadata discovery
     """
 
+    # Populated by _detect_columns(); empty string until then.
+    _image_col: str = ""
+    _label_col: str = ""
+    _label_feature: ClassLabel | None = None
 
     def _get_default_dataset(self) -> None:
         """Set default dataset configuration if none specified.
@@ -130,13 +134,13 @@ def _initialize(self) -> None:
         processor = AutoImageProcessor.from_pretrained(self._model_name, use_fast=True)
 
         # 5. Conditional label alignment using should_align_labels()
-        if should_align_labels(self._dataset_name):
+        if self._dataset_name and should_align_labels(self._dataset_name):
             dataset = dataset.align_labels_with_mapping(get_imagenet_label_map(), self._label_col)
 
         # 6. Apply image processing with proper batch dimension
-        def preprocess_single_sample(example):
+        def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]:
             # Process single image and add batch dimension
-            return processor(example[self._image_col].convert("RGB"), return_tensors="pt")
+            return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt"))
 
         self._dataset = (
             dataset
@@ -146,7 +150,7 @@ def preprocess_single_sample(example):
 
         logger.info(f"Dataset initialized with {len(self._dataset)} samples")
 
-    def _detect_columns(self, dataset) -> None:
+    def _detect_columns(self, dataset: Any) -> None:
         """Detect image and label columns using HuggingFace Features API.
 
         Uses proper type checking with HuggingFace Features API to reliably
@@ -158,8 +162,8 @@ def _detect_columns(self, dataset) -> None:
         features = dataset.features
 
         # Detect columns using proper type checking
-        self._image_col = None
-        self._label_col = None
+        self._image_col = ""
+        self._label_col = ""
         self._label_feature = None  # Store ClassLabel feature for mapping
 
         for col_name, feature in features.items():
@@ -210,7 +214,7 @@ def __getitem__(self, idx: int) -> dict[str, Any]:
         Returns:
             Dictionary containing preprocessed tensors
         """
-        return self._dataset[idx]
+        return cast("dict[str, Any]", self._dataset[idx])
 
     @property
     def label_names(self) -> list[str]:
diff --git a/src/winml/modelkit/datasets/image_segmentation.py b/src/winml/modelkit/datasets/image_segmentation.py
index ad4a1d174..84d81aa61 100644
--- a/src/winml/modelkit/datasets/image_segmentation.py
+++ b/src/winml/modelkit/datasets/image_segmentation.py
@@ -13,7 +13,7 @@
 
 import logging
 from random import Random
-from typing import Any
+from typing import Any, cast
 
 from datasets import load_dataset
 from datasets.features import Image
@@ -39,6 +39,11 @@ class ImageSegmentationDataset(BaseTaskDataset):
     DEFAULT_DATASET = "nielsr/ade20k-demo"
     DEFAULT_SPLIT = "train"
 
+    # Populated by _detect_columns(); empty string until then.
+    _image_col: str = ""
+    _label_col: str = ""
+    _mask_col: str = ""
+
     def __init__(
         self,
         model_name: str,
@@ -46,7 +51,7 @@ def __init__(
         max_samples: int | None = None,
         data_split: str | None = None,
         do_reduce_labels: bool = True,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         """Initialize image segmentation dataset.
 
@@ -119,7 +124,7 @@ def _initialize(self) -> None:
         processor = AutoImageProcessor.from_pretrained(self._model_name, use_fast=False)
 
         # 6. Apply image + mask processing
-        def preprocess_single_sample(example):
+        def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]:
             """Preprocess a single image + mask sample for segmentation models."""
             # Get image and mask
             image = example[self._image_col].convert("RGB")
@@ -146,7 +151,7 @@ def preprocess_single_sample(example):
         logger.info(f"Image column: {self._image_col}")
         logger.info(f"Mask column: {self._mask_col}")
 
-    def _detect_columns(self, dataset) -> None:
+    def _detect_columns(self, dataset: Any) -> None:
         """Detect image and mask columns using HuggingFace Features API.
 
         Uses proper type checking to identify Image features and applies
@@ -158,8 +163,8 @@ def _detect_columns(self, dataset) -> None:
         features = dataset.features
 
         # Initialize column detection
-        self._image_col = None
-        self._mask_col = None
+        self._image_col = ""
+        self._mask_col = ""
 
         # Detect columns using proper type checking and naming patterns
         image_candidates = []
@@ -256,7 +261,7 @@ def __getitem__(self, idx: int) -> dict[str, Any]:
         Returns:
             Dictionary containing preprocessed tensors for segmentation models
         """
-        return self._dataset[idx]
+        return cast("dict[str, Any]", self._dataset[idx])
 
     @property
     def mask_col(self) -> str:
diff --git a/src/winml/modelkit/datasets/object_detection.py b/src/winml/modelkit/datasets/object_detection.py
index 628204f35..94530a19c 100644
--- a/src/winml/modelkit/datasets/object_detection.py
+++ b/src/winml/modelkit/datasets/object_detection.py
@@ -123,7 +123,7 @@ def _initialize(self) -> None:
 
         # Apply image processing
         def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]:
-            return processor(example[self._image_col].convert("RGB"), return_tensors="pt")
+            return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt"))
 
         self._dataset = (
             dataset
@@ -151,8 +151,8 @@ def _detect_image_column(self, dataset: Any) -> None:
         features = dataset.features
 
         # Find image column
-        self._image_col = None
-        self._label_col = None  # May not have simple label column
+        self._image_col = ""
+        self._label_col = ""  # May not have simple label column
         self._label_feature = None
 
         for col_name, feature in features.items():
@@ -174,7 +174,7 @@ def _detect_image_column(self, dataset: Any) -> None:
                 self._label_col = col_name
                 break
 
-        if self._label_col is None:
+        if not self._label_col:
             # Use first non-image column as fallback
             for col_name in features:
                 if col_name != self._image_col:
@@ -200,5 +200,5 @@ def label_names(self) -> list[str]:
         so this returns an empty list unless explicitly set.
         """
         if self._label_feature is not None and hasattr(self._label_feature, "names"):
-            return self._label_feature.names
+            return list(self._label_feature.names)
         return []
diff --git a/src/winml/modelkit/datasets/processor_utils.py b/src/winml/modelkit/datasets/processor_utils.py
index 136a9c106..cfecb2c30 100644
--- a/src/winml/modelkit/datasets/processor_utils.py
+++ b/src/winml/modelkit/datasets/processor_utils.py
@@ -13,7 +13,9 @@
 import logging
 from typing import Any
 
-from transformers.image_processing_utils import ImageProcessingMixin
+from transformers.image_processing_utils import (  # type: ignore[attr-defined]
+    ImageProcessingMixin,
+)
 
 
 logger = logging.getLogger(__name__)
diff --git a/src/winml/modelkit/datasets/random_dataset.py b/src/winml/modelkit/datasets/random_dataset.py
index 6cb0e9411..a8da3d029 100644
--- a/src/winml/modelkit/datasets/random_dataset.py
+++ b/src/winml/modelkit/datasets/random_dataset.py
@@ -12,7 +12,7 @@
 
 import logging
 import random
-from typing import Any, ClassVar
+from typing import Any, ClassVar, cast
 
 import numpy as np
 import torch
@@ -44,7 +44,7 @@ def __init__(
         model_path: str,
         max_samples: int = 100,
         seed: int = 42,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         self.model_path = model_path
         self.max_samples = max_samples
@@ -91,7 +91,7 @@ def __len__(self) -> int:
 
     def __getitem__(self, idx: int) -> dict[str, Any]:
         """Get a single preprocessed sample."""
-        return self.dataset[idx]
+        return cast("dict[str, Any]", self.dataset[idx])
 
     @property
     def label_col(self) -> str:
@@ -104,7 +104,7 @@ def _generate_random_sample(self) -> dict[str, Any]:
         Uses cached InputTensorSpec list built from ONNX model I/O config.
         Each spec's to_tensor() handles value_range, dtype, and shape correctly.
         """
-        return {spec.name: spec.to_tensor() for spec in self._input_specs}
+        return {spec.name: spec.to_tensor() for spec in self._input_specs if spec.name}
 
     def _load_dataset(self) -> Dataset:
         """Generate synthetic dataset with random samples as tensors."""
diff --git a/src/winml/modelkit/datasets/text.py b/src/winml/modelkit/datasets/text.py
index 5c5c9cb1b..e44f115e2 100644
--- a/src/winml/modelkit/datasets/text.py
+++ b/src/winml/modelkit/datasets/text.py
@@ -16,7 +16,7 @@
 
 import logging
 from random import Random
-from typing import Any
+from typing import Any, cast
 
 from datasets import load_dataset
 from datasets.features import ClassLabel, Value
@@ -50,7 +50,7 @@ def __init__(
         max_length: int | None = None,
         io_config: dict | None = None,
         io_mapping: dict | None = None,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         """Initialize text classification dataset.
 
@@ -159,13 +159,13 @@ def _initialize(self) -> None:
 
         def tokenize(example: dict) -> dict:
             texts = [example[col] for col in self._text_cols]
-            return tokenizer(
+            return dict(tokenizer(
                 *texts,
                 padding="max_length",
                 truncation=True,
                 max_length=self._max_length,
                 return_tensors="pt",
-            )
+            ))
 
         # 7. Apply tokenization, remove text columns
         self._dataset = (
@@ -180,7 +180,7 @@ def tokenize(example: dict) -> dict:
         logger.info("Initialized: %d samples, max_length=%d, text=%s, label=%s",
                     len(self._dataset), self._max_length, self._text_cols, self._label_col)
 
-    def _detect_columns(self, dataset) -> None:
+    def _detect_columns(self, dataset: Any) -> None:
         """Detect text and label columns by feature type."""
         if not hasattr(dataset, "features"):
             raise ValueError(f"Dataset {self._dataset_name} has no features")
@@ -210,12 +210,13 @@ def _detect_columns(self, dataset) -> None:
 
     def __getitem__(self, idx: int) -> dict[str, Any]:
         """Get tokenized sample."""
-        return self._dataset[idx]
+        return cast("dict[str, Any]", self._dataset[idx])
 
     # Readonly properties
     @property
     def max_length(self) -> int:
         """Sequence length."""
+        assert self._max_length is not None, "max_length not resolved"
         return self._max_length
 
     @property
diff --git a/src/winml/modelkit/eval/base_evaluator.py b/src/winml/modelkit/eval/base_evaluator.py
index 6eec3f595..30eba265d 100644
--- a/src/winml/modelkit/eval/base_evaluator.py
+++ b/src/winml/modelkit/eval/base_evaluator.py
@@ -8,7 +8,7 @@
 from __future__ import annotations
 
 import logging
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from ..utils.eval_utils import DatasetValidationError, validate_dataset_columns
 
@@ -72,7 +72,7 @@ def compute(self) -> dict[str, Any]:
                     )
                 kwargs.pop(key)
 
-        return task_evaluator.compute(**kwargs)
+        return cast("dict[str, Any]", task_evaluator.compute(**kwargs))
 
     def prepare_data(self) -> Dataset:
         """Load dataset, shuffle, sample, and align labels."""
@@ -122,6 +122,7 @@ def prepare_data(self) -> Dataset:
                 )
             dataset = dataset.select(range(actual_samples))
 
+        assert self.config.task is not None, "config.task is required for evaluation"
         validate_dataset_columns(
             dataset, self.config.task, self.config.dataset.columns_mapping,
         )
@@ -131,18 +132,24 @@ def prepare_pipeline(self) -> Pipeline:
         """Create HF pipeline for inference. Subclasses override to configure."""
         from transformers import pipeline
 
+        assert self.config.task is not None, "config.task is required to build pipeline"
         pipeline_task = _PIPELINE_TASK_MAP.get(self.config.task, self.config.task)
-        return pipeline(
-            pipeline_task,
-            model=self.model,
-            framework="pt",
-            tokenizer=self.config.model_id,
-            feature_extractor=self.config.model_id,
-            image_processor=self.config.model_id,
-            processor=self.config.model_id,
-            # "device" is for HF pipeline pytorch tensors, not ORT EP.
-            # WinMLSession handles device delegation for ORT.
-            device="cpu",
+        # transformers.pipeline has 60+ Literal overloads — runtime task strings
+        # can't be statically matched. The string-task fallback handles unknown tasks.
+        return cast(
+            "Pipeline",
+            pipeline(  # type: ignore[call-overload]
+                pipeline_task,
+                model=self.model,
+                framework="pt",
+                tokenizer=self.config.model_id,
+                feature_extractor=self.config.model_id,
+                image_processor=self.config.model_id,
+                processor=self.config.model_id,
+                # "device" is for HF pipeline pytorch tensors, not ORT EP.
+                # WinMLSession handles device delegation for ORT.
+                device="cpu",
+            ),
         )
 
     def _fixed_seq_length(self) -> int | None:
diff --git a/src/winml/modelkit/eval/depth_estimation_evaluator.py b/src/winml/modelkit/eval/depth_estimation_evaluator.py
index 7b0bd0333..40b903ad9 100644
--- a/src/winml/modelkit/eval/depth_estimation_evaluator.py
+++ b/src/winml/modelkit/eval/depth_estimation_evaluator.py
@@ -41,12 +41,20 @@ def __init__(
         task = "depth-estimation"
         self._input_col = mapping.get("input_column", get_default(task, "input_column"))
         self._depth_col = mapping.get("depth_column", get_default(task, "depth_column"))
-        self._align = mapping.get("align", get_default(task, "align"))
-        self._depth_kind = mapping.get("depth_kind", get_default(task, "depth_kind"))
-        self._min_depth = float(mapping.get("min_depth", get_default(task, "min_depth")))
+        align_raw = mapping.get("align", get_default(task, "align"))
+        depth_kind_raw = mapping.get("depth_kind", get_default(task, "depth_kind"))
+        assert align_raw is not None, "align has no default for depth-estimation"
+        assert depth_kind_raw is not None, "depth_kind has no default for depth-estimation"
+        self._align: str = align_raw
+        self._depth_kind: str = depth_kind_raw
+        min_depth_raw = mapping.get("min_depth", get_default(task, "min_depth"))
+        assert min_depth_raw is not None, "min_depth has no default for depth-estimation"
+        self._min_depth = float(min_depth_raw)
         max_depth_raw = mapping.get("max_depth", get_default(task, "max_depth"))
         self._max_depth: float | None
-        if isinstance(max_depth_raw, str) and max_depth_raw.lower() == "none":
+        if max_depth_raw is None or (
+            isinstance(max_depth_raw, str) and max_depth_raw.lower() == "none"
+        ):
             self._max_depth = None
         else:
             self._max_depth = float(max_depth_raw)
@@ -68,13 +76,15 @@ def prepare_pipeline(self) -> Pipeline:
 
         io_config = getattr(self.model, "io_config", None) or {}
         input_shapes = io_config.get("input_shapes", [])
-        if input_shapes and len(input_shapes[0]) == 4:
+        image_processor = pipe.image_processor
+        if image_processor is not None and input_shapes and len(input_shapes[0]) == 4:
             _, _, h, w = input_shapes[0]
-            pipe.image_processor.size = {"height": h, "width": w}
-            if hasattr(pipe.image_processor, "keep_aspect_ratio"):
-                pipe.image_processor.keep_aspect_ratio = False
-            if hasattr(pipe.image_processor, "do_pad"):
-                pipe.image_processor.do_pad = False
+            # Runtime-settable processor attribute; not on the base class.
+            image_processor.size = {"height": h, "width": w}  # type: ignore[attr-defined]
+            if hasattr(image_processor, "keep_aspect_ratio"):
+                image_processor.keep_aspect_ratio = False
+            if hasattr(image_processor, "do_pad"):
+                image_processor.do_pad = False
 
         return pipe
 
diff --git a/src/winml/modelkit/eval/evaluate.py b/src/winml/modelkit/eval/evaluate.py
index 69fda09c8..cab10d26e 100644
--- a/src/winml/modelkit/eval/evaluate.py
+++ b/src/winml/modelkit/eval/evaluate.py
@@ -11,7 +11,7 @@
 import logging
 from copy import deepcopy
 from dataclasses import dataclass, field, replace
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from rich.console import Console
 
@@ -71,7 +71,7 @@
 def get_evaluator_class(config: WinMLEvaluationConfig) -> type[WinMLEvaluator]:
     """Return the evaluator class for *task*, or raise ValueError if unsupported."""
     key = "compare-tensor" if config.mode == "compare" else config.task
-    spec = _EVALUATOR_REGISTRY.get(key)
+    spec = _EVALUATOR_REGISTRY.get(key) if key is not None else None
     if spec is None:
         supported = ", ".join(sorted(_EVALUATOR_REGISTRY))
         raise ValueError(
@@ -79,7 +79,7 @@ def get_evaluator_class(config: WinMLEvaluationConfig) -> type[WinMLEvaluator]:
         )
     module_path, class_name = spec.rsplit(":", 1)
     module = importlib.import_module(module_path)
-    return getattr(module, class_name)
+    return cast("type[WinMLEvaluator]", getattr(module, class_name))
 
 
 _FE_DEFAULT = {
@@ -273,7 +273,7 @@ def evaluate(config: WinMLEvaluationConfig) -> EvalResult:
         config, mode=mode, task=_resolve_task(config), dataset=deepcopy(config.dataset)
     )
     if config.mode != "compare" and config.dataset.path is None:
-        default = _DEFAULT_DATASETS.get(config.task)
+        default = _DEFAULT_DATASETS.get(config.task) if config.task is not None else None
         if default is None:
             raise ValueError(
                 f"No dataset provided and no default for task '{config.task}'. Use --dataset."
diff --git a/src/winml/modelkit/eval/fill_mask_evaluator.py b/src/winml/modelkit/eval/fill_mask_evaluator.py
index f1c5c4496..c771f2a6a 100644
--- a/src/winml/modelkit/eval/fill_mask_evaluator.py
+++ b/src/winml/modelkit/eval/fill_mask_evaluator.py
@@ -13,7 +13,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from tqdm import tqdm
 
@@ -70,10 +70,10 @@ def _max_length(self) -> int | None:
 
     def _logits(self, outputs: Any) -> torch.Tensor:
         if not isinstance(outputs, dict):
-            return outputs.logits
+            return cast("torch.Tensor", outputs.logits)
         if "logits" not in outputs:
             raise KeyError(f"Model output dict has no 'logits' key; got keys {list(outputs)}.")
-        return outputs["logits"]
+        return cast("torch.Tensor", outputs["logits"])
 
     def _score(
         self,
diff --git a/src/winml/modelkit/eval/image_feature_extraction_evaluator.py b/src/winml/modelkit/eval/image_feature_extraction_evaluator.py
index aad6a553c..fe250c8c6 100644
--- a/src/winml/modelkit/eval/image_feature_extraction_evaluator.py
+++ b/src/winml/modelkit/eval/image_feature_extraction_evaluator.py
@@ -20,7 +20,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 import numpy as np
 from tqdm import tqdm
@@ -58,9 +58,10 @@ def prepare_pipeline(self) -> Pipeline:
 
         io_config = getattr(self.model, "io_config", None) or {}
         input_shapes = io_config.get("input_shapes", [])
-        if input_shapes and len(input_shapes[0]) == 4:
+        if pipe.image_processor is not None and input_shapes and len(input_shapes[0]) == 4:
             _, _, h, w = input_shapes[0]
-            pipe.image_processor.size = {"height": h, "width": w}
+            # Runtime-settable processor attribute; not on the base class.
+            pipe.image_processor.size = {"height": h, "width": w}  # type: ignore[attr-defined]
 
         return pipe
 
@@ -113,7 +114,7 @@ def _extract_image_embedding(raw: Any) -> np.ndarray:
             return tokens
         if tokens.ndim == 2:
             # CLS token (index 0) — standard image-level embedding for ViT/DINOv2.
-            return tokens[0]
+            return cast("np.ndarray", tokens[0])
         raise ValueError(
             f"Unsupported image-feature-extraction output shape: {np.asarray(raw).shape}. "
             "Expected [1, hidden] (pooled) or [1, num_tokens, hidden] (token sequence)."
diff --git a/src/winml/modelkit/eval/image_segmentation_evaluator.py b/src/winml/modelkit/eval/image_segmentation_evaluator.py
index 19d5b3452..c9a429e13 100644
--- a/src/winml/modelkit/eval/image_segmentation_evaluator.py
+++ b/src/winml/modelkit/eval/image_segmentation_evaluator.py
@@ -63,9 +63,10 @@ def prepare_pipeline(self) -> Pipeline:
 
         io_config = getattr(self.model, "io_config", None) or {}
         input_shapes = io_config.get("input_shapes", [])
-        if input_shapes and len(input_shapes[0]) == 4:
+        if pipe.image_processor is not None and input_shapes and len(input_shapes[0]) == 4:
             _, _, h, w = input_shapes[0]
-            pipe.image_processor.size = {"height": h, "width": w}
+            # Runtime-settable processor attribute; not on the base class.
+            pipe.image_processor.size = {"height": h, "width": w}  # type: ignore[attr-defined]
 
         return pipe
 
diff --git a/src/winml/modelkit/eval/image_to_text_evaluator.py b/src/winml/modelkit/eval/image_to_text_evaluator.py
index 828d5b0c8..9f541f173 100644
--- a/src/winml/modelkit/eval/image_to_text_evaluator.py
+++ b/src/winml/modelkit/eval/image_to_text_evaluator.py
@@ -28,8 +28,10 @@
 
 
 if TYPE_CHECKING:
+    from datasets import Dataset
+
     from ..models.winml.base import WinMLPreTrainedModel
-    from .config import WinMLEvaluationConfig
+    from .config import DatasetConfig, WinMLEvaluationConfig
 
 
 logger = logging.getLogger(__name__)
@@ -50,7 +52,7 @@ def __init__(
         self._label_col = cm.get("label_column", get_default("image-to-text", "label_column"))
         super().__init__(config, model)
 
-    def align_labels(self, dataset, ds_config):  # type: ignore[override]
+    def align_labels(self, dataset: Dataset, ds_config: DatasetConfig) -> Dataset:
         """No-op: free-text labels need no ClassLabel alignment."""
         return dataset
 
diff --git a/src/winml/modelkit/eval/metrics/depth.py b/src/winml/modelkit/eval/metrics/depth.py
index d89474440..c366c2015 100644
--- a/src/winml/modelkit/eval/metrics/depth.py
+++ b/src/winml/modelkit/eval/metrics/depth.py
@@ -12,7 +12,7 @@
 
 from __future__ import annotations
 
-from typing import Any
+from typing import Any, cast
 
 import numpy as np
 import torch
@@ -170,7 +170,7 @@ def _valid_mask(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray:
         if self._max_depth is not None:
             mask &= gt <= self._max_depth
         mask &= np.isfinite(pred) & (pred > 0)
-        return mask
+        return cast("np.ndarray", mask)
 
     @staticmethod
     def _to_numpy(arr: Any) -> np.ndarray:
diff --git a/src/winml/modelkit/eval/metrics/spearman_correlation.py b/src/winml/modelkit/eval/metrics/spearman_correlation.py
index 3d93825ab..9b643ba42 100644
--- a/src/winml/modelkit/eval/metrics/spearman_correlation.py
+++ b/src/winml/modelkit/eval/metrics/spearman_correlation.py
@@ -65,6 +65,6 @@ def compute(
                 "Spearman correlation is NaN. This typically means the model "
                 "produced constant outputs (zero variance). Returning 0.0.",
             )
-            corr = 0.0
+            return {"cosine_spearman": 0.0}
 
         return {"cosine_spearman": round(float(corr) * 100, 4)}
diff --git a/src/winml/modelkit/eval/metrics/text_similarity.py b/src/winml/modelkit/eval/metrics/text_similarity.py
index 37af65947..add146542 100644
--- a/src/winml/modelkit/eval/metrics/text_similarity.py
+++ b/src/winml/modelkit/eval/metrics/text_similarity.py
@@ -19,7 +19,11 @@
 from __future__ import annotations
 
 import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+
+if TYPE_CHECKING:
+    from collections.abc import Hashable
 
 
 logger = logging.getLogger(__name__)
@@ -91,8 +95,12 @@ def _cider(self) -> float | None:
         from .cider import Cider
 
         try:
-            refs_dict = {str(i): refs for i, refs in enumerate(self._references)}
-            preds_dict = {str(i): [pred] for i, pred in enumerate(self._predictions)}
+            refs_dict: dict[Hashable, list[str]] = {
+                str(i): refs for i, refs in enumerate(self._references)
+            }
+            preds_dict: dict[Hashable, list[str]] = {
+                str(i): [pred] for i, pred in enumerate(self._predictions)
+            }
             score, _ = Cider().compute_score(refs_dict, preds_dict)
             return float(score)
         except Exception as e:
diff --git a/src/winml/modelkit/eval/object_detection_evaluator.py b/src/winml/modelkit/eval/object_detection_evaluator.py
index 6d6223fc5..baa578664 100644
--- a/src/winml/modelkit/eval/object_detection_evaluator.py
+++ b/src/winml/modelkit/eval/object_detection_evaluator.py
@@ -46,13 +46,21 @@ def __init__(
         mapping = config.dataset.columns_mapping
         task = "object-detection"
         self._image_col = mapping.get("input_column", get_default(task, "input_column"))
-        self._annotation_col = mapping.get(
-            "annotation_column", get_default(task, "annotation_column"),
-        )
-        self._bbox_key = mapping.get("bbox_key", get_default(task, "bbox_key"))
-        self._category_key = mapping.get("category_key", get_default(task, "category_key"))
-        self._box_format = mapping.get("box_format", get_default(task, "box_format"))
-        self._box_coords = mapping.get("box_coords", get_default(task, "box_coords"))
+        ann_col_raw = mapping.get("annotation_column", get_default(task, "annotation_column"))
+        bbox_key_raw = mapping.get("bbox_key", get_default(task, "bbox_key"))
+        category_key_raw = mapping.get("category_key", get_default(task, "category_key"))
+        assert ann_col_raw is not None, "annotation_column has no default for object-detection"
+        assert bbox_key_raw is not None, "bbox_key has no default for object-detection"
+        assert category_key_raw is not None, "category_key has no default for object-detection"
+        self._annotation_col: str = ann_col_raw
+        self._bbox_key: str = bbox_key_raw
+        self._category_key: str = category_key_raw
+        box_format_raw = mapping.get("box_format", get_default(task, "box_format"))
+        box_coords_raw = mapping.get("box_coords", get_default(task, "box_coords"))
+        assert box_format_raw is not None, "box_format has no default for object-detection"
+        assert box_coords_raw is not None, "box_coords has no default for object-detection"
+        self._box_format: str = box_format_raw
+        self._box_coords: str = box_coords_raw
 
         super().__init__(config, model)
 
@@ -63,21 +71,23 @@ def prepare_pipeline(self) -> Pipeline:
         io_config = getattr(self.model, "io_config", None) or {}
         input_shapes = io_config.get("input_shapes", [[]])
         input_names = io_config.get("input_names", [])
-        if input_shapes and len(input_shapes[0]) == 4:
+        image_processor = pipe.image_processor
+        if image_processor is not None and input_shapes and len(input_shapes[0]) == 4:
             _, _, h, w = input_shapes[0]
+            # Runtime-settable processor attributes; not on the base class.
             if "pixel_mask" in input_names:
-                pipe.image_processor.size = {
+                image_processor.size = {  # type: ignore[attr-defined]
                     "shortest_edge": min(h, w),
                     "longest_edge": max(h, w),
                 }
-                if hasattr(pipe.image_processor, "pad_size"):
-                    pipe.image_processor.pad_size = {"height": h, "width": w}
-                if hasattr(pipe.image_processor, "do_pad"):
-                    pipe.image_processor.do_pad = True
+                if hasattr(image_processor, "pad_size"):
+                    image_processor.pad_size = {"height": h, "width": w}
+                if hasattr(image_processor, "do_pad"):
+                    image_processor.do_pad = True
             else:
-                pipe.image_processor.size = {"height": h, "width": w}
-                if hasattr(pipe.image_processor, "do_pad"):
-                    pipe.image_processor.do_pad = False
+                image_processor.size = {"height": h, "width": w}  # type: ignore[attr-defined]
+                if hasattr(image_processor, "do_pad"):
+                    image_processor.do_pad = False
 
         return pipe
 
@@ -132,7 +142,7 @@ def align_labels(
         ann_feat[cat_key] = Sequence(Value("int64"))
         new_features[ann_col] = ann_feat
 
-        def remap(sample):
+        def remap(sample: dict[str, Any]) -> dict[str, Any]:
             ann = sample[ann_col]
             ann[cat_key] = [id_map[lbl] for lbl in ann[cat_key]]
             return sample
@@ -151,8 +161,8 @@ def compute(self) -> dict[str, Any]:
 
         label2id = getattr(self.model.config, "label2id", {})
 
-        predictions = []
-        references = []
+        predictions: list[dict[str, Any]] = []
+        references: list[dict[str, Any]] = []
 
         for i, sample in enumerate(self.data):
             # --- Ground truth ---
diff --git a/src/winml/modelkit/eval/question_answering_evaluator.py b/src/winml/modelkit/eval/question_answering_evaluator.py
index e024638dc..362325536 100644
--- a/src/winml/modelkit/eval/question_answering_evaluator.py
+++ b/src/winml/modelkit/eval/question_answering_evaluator.py
@@ -8,7 +8,7 @@
 from __future__ import annotations
 
 import logging
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from .base_evaluator import WinMLEvaluator
 
@@ -102,4 +102,4 @@ def compute(self) -> dict[str, Any]:
             **self.config.dataset.columns_mapping,
         }
 
-        return task_evaluator.compute(**kwargs)
+        return cast("dict[str, Any]", task_evaluator.compute(**kwargs))
diff --git a/src/winml/modelkit/eval/tensor_similarity_evaluator.py b/src/winml/modelkit/eval/tensor_similarity_evaluator.py
index 7b483e8f1..d1321c3ea 100644
--- a/src/winml/modelkit/eval/tensor_similarity_evaluator.py
+++ b/src/winml/modelkit/eval/tensor_similarity_evaluator.py
@@ -22,6 +22,7 @@
 
 if TYPE_CHECKING:
     from ..models.winml.base import WinMLPreTrainedModel
+    from ..models.winml.composite_model import WinMLCompositeModel
     from .config import WinMLEvaluationConfig
 
 
@@ -34,10 +35,13 @@ class TensorSimilarityEvaluator:
     def __init__(
         self,
         config: WinMLEvaluationConfig,
-        model: WinMLPreTrainedModel,
+        model: WinMLPreTrainedModel | WinMLCompositeModel,
     ) -> None:
         from ..models.winml.composite_model import WinMLCompositeModel
 
+        # Composite models must be split into their sub-components before
+        # tensor-similarity comparison — the union param keeps this runtime
+        # guard live for type checkers.
         if isinstance(model, WinMLCompositeModel):
             sub_tasks = list(getattr(type(model), "_SUB_MODEL_CONFIG", {}).values())
             raise TypeError(
@@ -71,7 +75,8 @@ def _load_reference_model(self) -> Any:
         hf_config = AutoConfig.from_pretrained(self.config.model_id)
         _, cls = resolve_task_and_model_class(hf_config, task=self.config.task)
         logger.info("Loading HF reference %s on CPU/fp32", cls.__name__)
-        return cls.from_pretrained(
+        # cls is a HF model class which exposes from_pretrained; not in `type`.
+        return cls.from_pretrained(  # type: ignore[attr-defined]
             self.config.model_id, dtype=torch.float32
         ).eval()
 
diff --git a/src/winml/modelkit/eval/zero_shot_classification_evaluator.py b/src/winml/modelkit/eval/zero_shot_classification_evaluator.py
index 262a168f9..f9e0ca509 100644
--- a/src/winml/modelkit/eval/zero_shot_classification_evaluator.py
+++ b/src/winml/modelkit/eval/zero_shot_classification_evaluator.py
@@ -20,7 +20,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from tqdm import tqdm
 from transformers.pipelines.zero_shot_classification import ZeroShotClassificationPipeline
@@ -46,10 +46,10 @@ class _FixedShapeZeroShotPipeline(ZeroShotClassificationPipeline):
 
     _winml_evaluator: WinMLEvaluator | None = None
 
-    def _parse_and_tokenize(self, sequence_pairs: Any, **kwargs: Any) -> Any:
+    def _parse_and_tokenize(self, *args: Any, **kwargs: Any) -> Any:
         kwargs.setdefault("padding", True)
         kwargs.setdefault("truncation", True)
-        encoding = super()._parse_and_tokenize(sequence_pairs, **kwargs)
+        encoding = super()._parse_and_tokenize(*args, **kwargs)
         if self._winml_evaluator is None or self.tokenizer is None:
             return encoding
         return self._winml_evaluator._pad_or_truncate(encoding, self.tokenizer)
@@ -79,7 +79,9 @@ def prepare_pipeline(self) -> Pipeline:
 
         max_length = self._fixed_seq_length()
 
-        pipe = pipeline(
+        # WinMLPreTrainedModel isn't in transformers' Pipeline model union;
+        # the pipeline_class override is also outside the Literal overloads.
+        pipe = pipeline(  # type: ignore[call-overload]
             "zero-shot-classification",
             model=self.model,
             framework="pt",
@@ -101,7 +103,7 @@ def prepare_pipeline(self) -> Pipeline:
                 if filtered:
                     pipe.tokenizer.model_input_names = filtered
 
-        return pipe
+        return cast("Pipeline", pipe)
 
     def align_labels(
         self,
diff --git a/src/winml/modelkit/export/__init__.py b/src/winml/modelkit/export/__init__.py
index 868c8a925..f935f24d8 100644
--- a/src/winml/modelkit/export/__init__.py
+++ b/src/winml/modelkit/export/__init__.py
@@ -11,6 +11,8 @@
 - export_pytorch / export_onnx for ONNX export
 """
 
+from typing import TYPE_CHECKING, Any
+
 from .config import (
     InputTensorSpec,
     OutputTensorSpec,
@@ -19,6 +21,22 @@
 )
 
 
+# Static type re-exports for the names exposed by ``__getattr__`` below.
+# At runtime these are loaded lazily (see _LAZY_IMPORTS); at type-check time
+# we want mypy to see real types so callers like ``build.hf.export_onnx(...)``
+# get checked instead of resolving to ``Any``.
+if TYPE_CHECKING:
+    from .io import (
+        MaxLengthTextInputGenerator,
+        ONNXConfigNotFoundError,
+        generate_dummy_inputs,
+        register_onnx_overwrite,
+        resolve_io_specs,
+    )
+    from .pytorch import export_pytorch
+    from .pytorch import export_pytorch as export_onnx
+
+
 __version__ = "2.1.0"
 
 __all__ = [
@@ -47,7 +65,7 @@
 }
 
 
-def __getattr__(name: str):
+def __getattr__(name: str) -> Any:
     """Lazy-load heavy exports to avoid importing optimum at package init."""
     if name in _LAZY_IMPORTS:
         module_path, attr_name = _LAZY_IMPORTS[name]
diff --git a/src/winml/modelkit/export/config.py b/src/winml/modelkit/export/config.py
index a37b99b9c..4bee0907f 100644
--- a/src/winml/modelkit/export/config.py
+++ b/src/winml/modelkit/export/config.py
@@ -435,6 +435,9 @@ def resolve_export_config(
         trust_remote_code=trust_remote_code,
         library_name=library_name,
     )
+    # resolve_loader_config guarantees both fields are populated (it raises otherwise).
+    assert loader_config.model_type is not None
+    assert loader_config.task is not None
 
     export_config = _resolve_export_config_from_specs(
         model_type=loader_config.model_type,
diff --git a/src/winml/modelkit/export/htp/__init__.py b/src/winml/modelkit/export/htp/__init__.py
index 97bdcb33d..2ea63c8cb 100644
--- a/src/winml/modelkit/export/htp/__init__.py
+++ b/src/winml/modelkit/export/htp/__init__.py
@@ -26,8 +26,8 @@
 """
 
 # HTP strategy version (defined before imports to avoid circular dependencies)
-__version__ = "1.0.0"  # HTP strategy version
-__spec_version__ = ".".join(__version__.split(".")[:2])  # "1.0"
+__version__: str = "1.0.0"  # HTP strategy version
+__spec_version__: str = ".".join(__version__.split(".")[:2])  # "1.0"
 
 from .base_writer import ExportStep
 from .exporter import HTPExporter
diff --git a/src/winml/modelkit/export/htp/base_writer.py b/src/winml/modelkit/export/htp/base_writer.py
index 3bbb9ef15..0e9295caa 100644
--- a/src/winml/modelkit/export/htp/base_writer.py
+++ b/src/winml/modelkit/export/htp/base_writer.py
@@ -19,8 +19,7 @@
 
 # datetime imports removed - following ADR-006 to use float timestamps only
 from enum import Enum
-from functools import wraps
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, TypeVar
 
 from .step_data import (
     HierarchyData,
@@ -92,17 +91,25 @@ def elapsed_time(self) -> float:
         return time.time() - self.start_time
 
 
-def step(export_step: ExportStep) -> Any:
-    """Decorator to mark step-specific handler methods."""
+# Bound to `Callable[..., int]` to match StepAwareWriter.write()'s IOBase
+# contract (returns "bytes written"). All @step handlers must return int —
+# a handler typed `-> None` will fail mypy here rather than silently
+# breaking the writer's return value.
+F = TypeVar("F", bound="Callable[..., int]")
 
-    def decorator(func: Callable) -> Callable:
-        func._handles_step = export_step
 
-        @wraps(func)
-        def wrapper(*args: Any, **kwargs: Any) -> int:
-            return func(*args, **kwargs)
+def step(export_step: ExportStep) -> Callable[[F], F]:
+    """Decorator to mark step-specific handler methods.
 
-        return wrapper
+    Attaches ``_handles_step`` on the function so ``StepAwareWriter``'s
+    discovery loop can map each handler to its declared step. The function is
+    returned unchanged, so the original signature is preserved for callers and
+    type checkers.
+    """
+
+    def decorator(func: F) -> F:
+        func._handles_step = export_step  # type: ignore[attr-defined]
+        return func
 
     return decorator
 
@@ -113,7 +120,7 @@ class StepAwareWriter(io.IOBase, ABC):
     def __init__(self) -> None:
         """Initialize the writer and discover step handlers."""
         super().__init__()
-        self._step_handlers: dict[ExportStep, Callable] = {}
+        self._step_handlers: dict[ExportStep, Callable[..., int]] = {}
         self._discover_handlers()
 
     def _discover_handlers(self) -> None:
diff --git a/src/winml/modelkit/export/htp/config_generator.py b/src/winml/modelkit/export/htp/config_generator.py
index 7dbfa0e3a..060a12499 100644
--- a/src/winml/modelkit/export/htp/config_generator.py
+++ b/src/winml/modelkit/export/htp/config_generator.py
@@ -203,7 +203,8 @@ def _generate_input_specs(
         Uses InputSpecGenerator patterns (universal approach).
         """
         try:
-            from ...inference.onnx_config.input_generator import (
+            # Tracked: inference.onnx_config.* doesn't exist; #859 decides delete-vs-restore.
+            from ...inference.onnx_config.input_generator import (  # type: ignore[import-not-found]
                 InputSpecGenerator,
             )
 
@@ -265,10 +266,13 @@ def _get_output_names(
         Uses InputSpecGenerator patterns (universal approach).
         """
         try:
-            from ...inference.onnx_config.patterns import TASK_TO_OUTPUTS
+            # Tracked: inference.onnx_config.* doesn't exist; #859 decides delete-vs-restore.
+            from ...inference.onnx_config.patterns import (  # type: ignore[import-not-found]
+                TASK_TO_OUTPUTS,
+            )
 
             if task and task in TASK_TO_OUTPUTS:
-                return TASK_TO_OUTPUTS[task]
+                return list(TASK_TO_OUTPUTS[task])
 
             # Default outputs
             if task and "classification" in task:
@@ -300,7 +304,7 @@ def generate_for_cuda(model_name_or_path: str, **kwargs: Any) -> ExportConfigTem
 def generate_config_cli(
     model: str,
     output: str = "export_config.json",
-    target: str = "qnn",
+    target: Literal["qnn", "cpu", "cuda", "universal"] = "qnn",
     task: str | None = None,
     batch_size: int = 1,
 ) -> None:
@@ -340,6 +344,18 @@ def generate_config_cli(
 
     model = sys.argv[1]
     output = sys.argv[2] if len(sys.argv) > 2 else "export_config.json"
-    target = sys.argv[3] if len(sys.argv) > 3 else "qnn"
+    target: Literal["qnn", "cpu", "cuda", "universal"]
+    raw_target = sys.argv[3] if len(sys.argv) > 3 else "qnn"
+    if raw_target == "qnn":
+        target = "qnn"
+    elif raw_target == "cpu":
+        target = "cpu"
+    elif raw_target == "cuda":
+        target = "cuda"
+    elif raw_target == "universal":
+        target = "universal"
+    else:
+        print(f"Invalid target {raw_target!r}; expected qnn, cpu, cuda, or universal.")
+        sys.exit(1)
 
     generate_config_cli(model, output, target)
diff --git a/src/winml/modelkit/export/htp/console_writer.py b/src/winml/modelkit/export/htp/console_writer.py
index c59e14558..c132feb4b 100644
--- a/src/winml/modelkit/export/htp/console_writer.py
+++ b/src/winml/modelkit/export/htp/console_writer.py
@@ -15,7 +15,7 @@
 
 import io
 import os
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from rich.console import Console
 from rich.tree import Tree
@@ -201,7 +201,7 @@ def write_onnx_export(self, export_step: ExportStep, data: ExportData) -> int:
 
         self.console.print("🔧 Export configuration:")
         self.console.print(
-            f"   • Opset version: {self._bright_green(data.onnx_export.opset_version)}"
+            f"   • Opset version: {self._bright_green(str(data.onnx_export.opset_version))}"
         )
         self.console.print(
             f"   • Constant folding: {self._format_bool(data.onnx_export.do_constant_folding)}"
@@ -335,7 +335,9 @@ def _build_truncated_tree(self, source_tree: Tree, target_tree: Tree, max_lines:
         line_count = 1  # Start with root
 
         # Helper to add nodes up to limit
-        def add_nodes_to_limit(source_children, target_parent, current_count):
+        def add_nodes_to_limit(
+            source_children: Any, target_parent: Any, current_count: int
+        ) -> int:
             count = current_count
             for child in source_children:
                 if count >= max_lines:
diff --git a/src/winml/modelkit/export/htp/exporter.py b/src/winml/modelkit/export/htp/exporter.py
index 35718392d..91abd5828 100644
--- a/src/winml/modelkit/export/htp/exporter.py
+++ b/src/winml/modelkit/export/htp/exporter.py
@@ -30,7 +30,7 @@
 import torch.nn as nn
 from rich.console import Console
 
-from ...core.onnx_node_tagger import create_node_tagger_from_hierarchy
+from ...core.onnx_node_tagger import ONNXNodeTagger, create_node_tagger_from_hierarchy
 from ...core.onnx_utils import infer_output_names
 from .base_writer import ExportStep
 from .hierarchy import TracingHierarchyBuilder
@@ -140,17 +140,17 @@ def __init__(
         self.strategy = HTPConfig.STRATEGY_NAME
 
         # Core components
-        self._hierarchy_builder = None
-        self._node_tagger = None
-        self._hierarchy_data = {}
-        self._tagged_nodes = {}
-        self._tagging_stats = {}
+        self._hierarchy_builder: TracingHierarchyBuilder | None = None
+        self._node_tagger: ONNXNodeTagger | None = None
+        self._hierarchy_data: dict[str, Any] = {}
+        self._tagged_nodes: dict[str, str] = {}
+        self._tagging_stats: dict[str, Any] = {}
 
         # Export statistics
         self._export_stats = HTPConfig.DEFAULT_EXPORT_STATS.copy()
 
         # Export monitor will be initialized in export()
-        self._monitor = None
+        self._monitor: HTPExportMonitor | None = None
 
         # Rich console for tree rendering
         self.console = Console(width=HTPConfig.CONSOLE_WIDTH)
@@ -447,7 +447,8 @@ def _convert_model_to_onnx(
             # get_export_args(inputs) → tuple of positional args.
             # Default: pass inputs dict as kwargs.
             if hasattr(model, "get_export_args"):
-                export_args = model.get_export_args(inputs)
+                # hasattr-gated optional protocol; not in nn.Module's static type.
+                export_args = model.get_export_args(inputs)  # type: ignore[operator]
                 torch.onnx.export(model, export_args, output_path, **onnx_kwargs)
             else:
                 torch.onnx.export(model, (), output_path, kwargs=inputs, **onnx_kwargs)
@@ -472,6 +473,9 @@ def _get_optimum_patcher(model: nn.Module, task: str | None) -> Any:
         if not model_type:
             logger.debug("Model has no config.model_type; skipping Optimum patcher.")
             return contextlib.nullcontext()
+        if task is None:
+            logger.debug("No task provided; skipping Optimum patcher.")
+            return contextlib.nullcontext()
 
         # TasksManager expects Optimum-canonical task names
         from ...loader import to_optimum_task
@@ -532,6 +536,9 @@ def _initialize_node_tagger(self, enable_operation_fallback: bool) -> None:
 
     def _apply_hierarchy_tags(self, onnx_model: onnx.ModelProto) -> None:
         """Tag nodes internally."""
+        assert self._node_tagger is not None, (
+            "_apply_hierarchy_tags called before _initialize_node_tagger"
+        )
         # Store ONNX model for later use in displaying operations
         self._onnx_model = onnx_model
         self._tagged_nodes = self._node_tagger.tag_all_nodes(onnx_model)
diff --git a/src/winml/modelkit/export/htp/hierarchy.py b/src/winml/modelkit/export/htp/hierarchy.py
index c4fb385cd..7a3fa2f91 100644
--- a/src/winml/modelkit/export/htp/hierarchy.py
+++ b/src/winml/modelkit/export/htp/hierarchy.py
@@ -39,14 +39,14 @@ def __init__(self, exceptions: list[str] | None = None) -> None:
                        torch.nn modules. Passed to should_include_in_hierarchy.
                        Example: ["Conv2d", "BatchNorm2d"] to include these in hierarchy.
         """
-        self.tag_stack = []
-        self.execution_trace = []
-        self.operation_context = {}
-        self.hooks = []
-        self.module_hierarchy = {}  # Only populated for executed modules
-        self.traced_modules = set()  # Track which modules were traced
+        self.tag_stack: list[str] = []
+        self.execution_trace: list[dict[str, Any]] = []
+        self.operation_context: dict[str, dict[str, Any]] = {}
+        self.hooks: list[Any] = []
+        self.module_hierarchy: dict[str, dict[str, Any]] = {}  # Only populated for executed modules
+        self.traced_modules: set[str] = set()  # Track which modules were traced
         self.exceptions = exceptions  # torch.nn exceptions to include
-        self.model_outputs = None  # Store model outputs from execution
+        self.model_outputs: Any = None  # Store model outputs from execution
 
     def is_hf_class(self, module: nn.Module) -> bool:
         """Check if a module is a HuggingFace class - UNIVERSAL."""
@@ -183,7 +183,9 @@ def remove_hooks(self) -> None:
         self.hooks.clear()
 
     def trace_model_execution(
-        self, model: nn.Module, example_inputs: tuple[torch.Tensor, ...]
+        self,
+        model: nn.Module,
+        example_inputs: tuple[torch.Tensor, ...] | dict[str, Any],
     ) -> None:
         """Trace model execution to build hierarchy mapping - UNIVERSAL."""
         self.register_hooks(model)
diff --git a/src/winml/modelkit/export/htp/markdown_report_writer.py b/src/winml/modelkit/export/htp/markdown_report_writer.py
index 98e53e4f7..35a42b687 100644
--- a/src/winml/modelkit/export/htp/markdown_report_writer.py
+++ b/src/winml/modelkit/export/htp/markdown_report_writer.py
@@ -12,6 +12,7 @@
 
 import time
 from pathlib import Path
+from typing import Any
 
 import snakemd
 
@@ -53,9 +54,9 @@ def __init__(self, output_path: str) -> None:
         self.doc = snakemd.new_doc()
 
         # Store step data for final report generation
-        self._step_results = {}
+        self._step_results: dict[ExportStep, dict[str, Any]] = {}
         self._start_time = time.time()
-        self._export_data = None  # Will be set on first write
+        self._export_data: ExportData | None = None  # Will be set on first write
         self._report_generated = False  # Track if report was generated
 
     def _write_default(self, export_step: ExportStep, data: ExportData) -> int:
@@ -421,8 +422,8 @@ def _write_module_hierarchy_section(self, data: ExportData) -> None:
         self.doc.add_heading("Module List (Sorted by Execution Order)", level=3)
 
         # Count direct and total nodes for each module if available
-        direct_counts = {}
-        total_counts = {}
+        direct_counts: dict[str, int] = {}
+        total_counts: dict[str, int] = {}
         if data.node_tagging and data.node_tagging.tagged_nodes:
             direct_counts, total_counts = count_direct_and_total_nodes(
                 data.node_tagging.tagged_nodes
diff --git a/src/winml/modelkit/export/htp/metadata_builder.py b/src/winml/modelkit/export/htp/metadata_builder.py
index b7728ee8c..fd0698faf 100644
--- a/src/winml/modelkit/export/htp/metadata_builder.py
+++ b/src/winml/modelkit/export/htp/metadata_builder.py
@@ -371,7 +371,7 @@ def build_minimal(self, error: str | None = None) -> dict[str, Any]:
             # version will use the default htp_version from the dataclass
         )
 
-        result = {"export_context": asdict(minimal_context)}
+        result: dict[str, Any] = {"export_context": asdict(minimal_context)}
 
         if error:
             result["error"] = error
diff --git a/src/winml/modelkit/export/htp/metadata_writer.py b/src/winml/modelkit/export/htp/metadata_writer.py
index 7f7a2d648..88953322a 100644
--- a/src/winml/modelkit/export/htp/metadata_writer.py
+++ b/src/winml/modelkit/export/htp/metadata_writer.py
@@ -44,8 +44,8 @@ def __init__(self, output_path: str) -> None:
         # Store data for final building
         self._model_info_set = False
         self._export_time = 0.0
-        self._steps_data = {}
-        self._export_data = None  # Will be set on first write
+        self._steps_data: dict[str, dict[str, Any]] = {}
+        self._export_data: ExportData | None = None  # Will be set on first write
 
     def _write_default(self, export_step: ExportStep, data: ExportData) -> int:
         """Default handler - record step completion."""
@@ -360,7 +360,7 @@ def _build_hierarchical_modules(self, flat_hierarchy: dict[str, ModuleInfo]) ->
             return {}
 
         # Build root structure
-        root = {
+        root: dict[str, Any] = {
             "class_name": root_info.class_name,
             "traced_tag": root_info.traced_tag,
             "scope": "",
@@ -446,7 +446,7 @@ def _build_children_for_parent(
                 key = module_info.class_name
 
             # Build child structure
-            child = {
+            child: dict[str, Any] = {
                 "class_name": module_info.class_name,
                 "traced_tag": module_info.traced_tag,
                 "scope": path,  # Full path from root
diff --git a/src/winml/modelkit/export/htp/monitor.py b/src/winml/modelkit/export/htp/monitor.py
index 27f148985..2cf94f549 100644
--- a/src/winml/modelkit/export/htp/monitor.py
+++ b/src/winml/modelkit/export/htp/monitor.py
@@ -83,6 +83,7 @@ def __init__(
         self.writers.append(self.metadata_writer)
 
         # Report writer (optional) - now using MarkdownReportWriter
+        self.report_writer: MarkdownReportWriter | None
         if enable_report:
             self.report_writer = MarkdownReportWriter(output_path)
             self.writers.append(self.report_writer)
diff --git a/src/winml/modelkit/export/io.py b/src/winml/modelkit/export/io.py
index a2111a86b..139200802 100644
--- a/src/winml/modelkit/export/io.py
+++ b/src/winml/modelkit/export/io.py
@@ -32,7 +32,7 @@
 from __future__ import annotations
 
 import logging
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from optimum.exporters.tasks import TasksManager
 from optimum.utils.input_generators import (
@@ -92,7 +92,7 @@ def ensure_hf_models_registered() -> None:
 # =============================================================================
 # Custom Input Generators
 # =============================================================================
-class MaxLengthTextInputGenerator(DummyTextInputGenerator):
+class MaxLengthTextInputGenerator(DummyTextInputGenerator):  # type: ignore[misc]
     """Text input generator that uses max_position_embeddings as sequence_length.
 
     Optimum's DummyTextInputGenerator uses a hardcoded default of 16 for
@@ -116,8 +116,8 @@ def __init__(
         task: str,
         normalized_config: NormalizedTextConfig,
         sequence_length: int | None = None,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> None:
         """Initialize with sequence_length from normalized_config.
 
         Args:
@@ -271,7 +271,11 @@ def _get_preprocessor_dict(
     from. Returns an empty dict when neither source yields a usable size.
     """
     try:
-        from transformers.image_processing_utils import ImageProcessingMixin
+        if model_id is None:
+            raise OSError("No model_id provided")
+        from transformers.image_processing_utils import (  # type: ignore[attr-defined]
+            ImageProcessingMixin,
+        )
 
         config, _ = ImageProcessingMixin.get_image_processor_dict(model_id)
         if "size" in config:
@@ -279,7 +283,6 @@ def _get_preprocessor_dict(
         # Partial preprocessor_config.json without a "size" key: fall through
         # to synthesis so we don't silently use Optimum's 64x64 default.
     except (OSError, ValueError, KeyError) as e:
-        # if model_id is None, OSError is raised
         logger.debug("Could not load preprocessor_config.json for %s: %s", model_id, e)
 
     if hf_config is not None:
@@ -411,7 +414,11 @@ def generate_dummy_inputs(
         shape_kwargs,
     )
 
-    return onnx_config.generate_dummy_inputs(framework="pt", **shape_kwargs)
+    # Optimum's OnnxConfig is untyped; the dummy-inputs dict matches our return type.
+    return cast(
+        "dict[str, torch.Tensor]",
+        onnx_config.generate_dummy_inputs(framework="pt", **shape_kwargs),
+    )
 
 
 def resolve_io_specs(
@@ -477,7 +484,9 @@ def resolve_io_specs(
     input_dtypes = [str(t.dtype).replace("torch.", "") for t in dummy_inputs.values()]
 
     # Build value_range dict: {name: (min, max)} from intercepted data
-    value_ranges = {name: (info["min"], info["max"]) for name, info in value_ranges.items()}
+    value_range_tuples = {
+        name: (info["min"], info["max"]) for name, info in value_ranges.items()
+    }
 
     return {
         "inputs": onnx_config.inputs,
@@ -487,5 +496,5 @@ def resolve_io_specs(
         "dynamic_axes": {**onnx_config.inputs, **onnx_config.outputs},
         "input_shapes": input_shapes,
         "input_dtypes": input_dtypes,
-        "value_ranges": value_ranges,
+        "value_ranges": value_range_tuples,
     }
diff --git a/src/winml/modelkit/export/value_range.py b/src/winml/modelkit/export/value_range.py
index 2946a594f..b2d990596 100644
--- a/src/winml/modelkit/export/value_range.py
+++ b/src/winml/modelkit/export/value_range.py
@@ -23,10 +23,15 @@
 import threading
 from contextlib import contextmanager
 from functools import wraps
+from typing import TYPE_CHECKING, Any
 
 from optimum.utils.input_generators import DummyInputGenerator
 
 
+if TYPE_CHECKING:
+    from collections.abc import Callable, Iterator
+
+
 # Thread-local to correlate static method calls with the current input_name.
 # generate() wrapper sets .name before calling the original, static method
 # wrappers read it to associate captured ranges with the correct input.
@@ -41,11 +46,13 @@
 )
 
 
-def _make_static_wrapper(original, method_name, captured):
+def _make_static_wrapper(
+    original: Callable[..., Any], method_name: str, captured: dict[str, dict[str, Any]]
+) -> Callable[..., Any]:
     """Wrap a DummyInputGenerator static method to capture value range args."""
 
     @wraps(original)
-    def wrapper(*args, **kwargs):
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
         result = original(*args, **kwargs)
         input_name = getattr(_current_input, "name", None)
         if input_name is None:
@@ -81,11 +88,11 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def _make_generate_wrapper(original):
+def _make_generate_wrapper(original: Callable[..., Any]) -> Callable[..., Any]:
     """Wrap a generator's generate() to track which input_name is active."""
 
     @wraps(original)
-    def wrapper(self, input_name, *args, **kwargs):
+    def wrapper(self: Any, input_name: str, *args: Any, **kwargs: Any) -> Any:
         _current_input.name = input_name
         try:
             return original(self, input_name, *args, **kwargs)
@@ -96,7 +103,7 @@ def wrapper(self, input_name, *args, **kwargs):
 
 
 @contextmanager
-def intercept_value_ranges():
+def intercept_value_ranges() -> Iterator[dict[str, dict[str, Any]]]:
     """Context manager that captures value ranges from Optimum's dummy input generation.
 
     Monkey-patches DummyInputGenerator's static tensor methods and all
@@ -133,11 +140,12 @@ def intercept_value_ranges():
     # Patch generate() on all subclasses that override it
     patched_classes = []
 
-    def _patch_subclasses(base):
+    def _patch_subclasses(base: type) -> None:
         for cls in base.__subclasses__():
             if "generate" in cls.__dict__:
                 originals[(cls, "generate")] = cls.__dict__["generate"]
-                cls.generate = _make_generate_wrapper(cls.__dict__["generate"])
+                # Monkey-patch optimum's untyped generator hierarchy.
+                cls.generate = _make_generate_wrapper(cls.__dict__["generate"])  # type: ignore[attr-defined]
                 patched_classes.append(cls)
             _patch_subclasses(cls)
 
@@ -154,4 +162,4 @@ def _patch_subclasses(base):
                 staticmethod(originals[method_name]),
             )
         for cls in patched_classes:
-            cls.generate = originals[(cls, "generate")]
+            cls.generate = originals[(cls, "generate")]  # type: ignore[attr-defined]
diff --git a/src/winml/modelkit/inference/engine.py b/src/winml/modelkit/inference/engine.py
index 9f4c28f72..e3e4bbf38 100644
--- a/src/winml/modelkit/inference/engine.py
+++ b/src/winml/modelkit/inference/engine.py
@@ -34,7 +34,7 @@
 from datetime import datetime, timezone
 from io import BytesIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from .tasks import BINARY_TYPES, TASK_REGISTRY, InputField, PipelineMapping
 from .types import Prediction, PredictionResult
@@ -234,7 +234,7 @@ def _discover_pipeline_params_from_task(task: str | None) -> list[dict]:
     try:
         from transformers.pipelines import SUPPORTED_TASKS
 
-        task_info = SUPPORTED_TASKS.get(task)
+        task_info = cast("dict[str, Any] | None", SUPPORTED_TASKS.get(task))
         if not task_info:
             return []
         pipeline_class = task_info.get("impl")
@@ -870,7 +870,10 @@ def _normalize_pipeline_output(
         # output transformation without any if/else branching here.
         spec = TASK_REGISTRY.get(task or "")
         if spec and spec.postprocess is not None:
-            return spec.postprocess(raw, pipeline=self._pipeline, inputs=inputs)
+            return cast(
+                "list[Prediction] | dict[str, Any]",
+                spec.postprocess(raw, pipeline=self._pipeline, inputs=inputs),
+            )
 
         if isinstance(raw, list) and raw and isinstance(raw[0], dict):
             # Classification / detection: list of {"label": ..., "score": ...}
@@ -886,10 +889,10 @@ def _normalize_pipeline_output(
             # Sanitize numpy scalars so pydantic/JSON serialization works
             # (NER pipelines return np.float32 scores).
             result = raw[0] if len(raw) == 1 else {"results": raw}
-            return _sanitize_numpy(result)
+            return cast("dict[str, Any]", _sanitize_numpy(result))
         # Other tasks: return as-is dict
         if isinstance(raw, dict):
-            return _sanitize_numpy(raw)
+            return cast("dict[str, Any]", _sanitize_numpy(raw))
         # Fallback
         return {"raw": str(raw)}
 
@@ -902,6 +905,7 @@ def _predict_raw_tensors(self, tensor_inputs: dict[str, Any]) -> dict[str, Any]:
         import numpy as np
         import torch
 
+        assert self._model is not None, "_predict_raw_tensors called before model loaded"
         inputs_torch = {
             k: torch.from_numpy(np.array(v)) if not isinstance(v, torch.Tensor) else v
             for k, v in tensor_inputs.items()
@@ -961,10 +965,10 @@ def _load_from_build_dir(
     def _resolve_model_id_from_dir(build_dir: Path) -> str | None:
         """Extract model_id from any manifest in the directory (task-agnostic)."""
         for manifest_path in build_dir.glob("*build_manifest.json"):
-            manifest = json.loads(manifest_path.read_text())
+            manifest: dict[str, Any] = json.loads(manifest_path.read_text())
             model_id = manifest.get("model_id")
             if model_id:
-                return model_id
+                return str(model_id)
         return None
 
     def _load_from_onnx(
diff --git a/src/winml/modelkit/inference/pipeline.py b/src/winml/modelkit/inference/pipeline.py
index c478f724a..33e243cfe 100644
--- a/src/winml/modelkit/inference/pipeline.py
+++ b/src/winml/modelkit/inference/pipeline.py
@@ -24,6 +24,8 @@
 
 
 if TYPE_CHECKING:
+    from collections.abc import Mapping
+
     from ..models.winml.base import WinMLPreTrainedModel
 
 logger = logging.getLogger(__name__)
@@ -69,7 +71,9 @@ def create_pipeline(
         kwargs["processor"] = model_id
 
     hf_task = _HF_PIPELINE_TASK_MAP.get(task, task)
-    pipe = pipeline(hf_task, model=model, **kwargs)
+    # transformers.pipeline has 60+ Literal overloads — runtime task strings can't
+    # be statically matched. The string-task fallback handles unknown tasks safely.
+    pipe = pipeline(hf_task, model=model, **kwargs)  # type: ignore[call-overload]
 
     # Adapt pipeline to fixed ONNX input shapes
     _adapt_tokenizer_padding(pipe, task, model)
@@ -160,7 +164,9 @@ def _adapt_tokenizer_padding(pipe: Any, task: str, model: Any) -> None:
     pipe.tokenizer.model_max_length = max_length
 
 
-def _detect_tokenizer_dict_param(pipe: Any, sig_params: dict) -> str | None:
+def _detect_tokenizer_dict_param(
+    pipe: Any, sig_params: Mapping[str, inspect.Parameter]
+) -> str | None:
     """Detect if preprocess() consumes tokenizer settings via a nested dict.
 
     Returns the dict key name (e.g. "tokenizer_kwargs", "tokenizer_params"),
diff --git a/src/winml/modelkit/inference/tasks.py b/src/winml/modelkit/inference/tasks.py
index 2a291bb87..e0602ff65 100644
--- a/src/winml/modelkit/inference/tasks.py
+++ b/src/winml/modelkit/inference/tasks.py
@@ -16,7 +16,7 @@
 import re
 from collections.abc import Callable
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 
 if TYPE_CHECKING:
@@ -124,9 +124,9 @@ def _masked_mean_pool(
         mask = attention_mask.astype(float)
         denom = mask.sum()
         if denom > 0:
-            return (token_embeddings * mask[:, None]).sum(0) / denom
+            return cast("np.ndarray", (token_embeddings * mask[:, None]).sum(0) / denom)
     if token_embeddings.ndim > 1:
-        return token_embeddings.mean(axis=0)
+        return cast("np.ndarray", token_embeddings.mean(axis=0))
     return token_embeddings
 
 
diff --git a/src/winml/modelkit/models/auto.py b/src/winml/modelkit/models/auto.py
index a14d5b162..f0ed7456c 100644
--- a/src/winml/modelkit/models/auto.py
+++ b/src/winml/modelkit/models/auto.py
@@ -35,6 +35,8 @@
 
 
 if TYPE_CHECKING:
+    from collections.abc import Mapping
+
     from transformers import PretrainedConfig
 
     from ..config import WinMLBuildConfig
@@ -98,7 +100,9 @@ def __init__(self) -> None:
     @classmethod
     def from_onnx(
         cls,
-        onnx_path: str | Path | dict[str, str | Path],
+        # Mapping (not dict) so dict[str, str] from configs is accepted
+        # without a cast — dict is invariant on value type, Mapping is covariant.
+        onnx_path: str | Path | Mapping[str, str | Path],
         *,
         task: str | None = None,
         config: WinMLBuildConfig | None = None,
diff --git a/src/winml/modelkit/models/winml/composite_model.py b/src/winml/modelkit/models/winml/composite_model.py
index 9b7aa20a6..353cf2ec8 100644
--- a/src/winml/modelkit/models/winml/composite_model.py
+++ b/src/winml/modelkit/models/winml/composite_model.py
@@ -49,6 +49,7 @@
 
 
 if TYPE_CHECKING:
+    from collections.abc import Mapping
     from pathlib import Path
 
     from transformers import PretrainedConfig
@@ -198,7 +199,9 @@ def from_pretrained(
     @classmethod
     def from_onnx(
         cls,
-        onnx_path: dict[str, str | Path],
+        # Mapping (not dict) so dict[str, str] from configs is accepted
+        # without a cast — dict is invariant on value type, Mapping is covariant.
+        onnx_path: Mapping[str, str | Path],
         *,
         task: str | None = None,
         hf_config: PretrainedConfig | None = None,