microsoft · xieofxie · Jun 8, 2026 · Jun 10, 2026 · Jun 11, 2026 · Jun 11, 2026
@@ -13,7 +13,9 @@ concurrency:
 jobs:
   lint:
     runs-on: windows-latest
-    timeout-minutes: 5
+    # Bumped from 5: combined mypy on 12 packages cold-starts at ~3-4 min on
+    # Windows runners; the original 5-min ceiling cancelled mid-run.
+    timeout-minutes: 10
 
     steps:
       - uses: actions/checkout@v4
@@ -34,9 +36,15 @@ jobs:
       - name: Lint
         run: uv run ruff check src/ tests/
 
-      # Required type check: these folders are clean against the strict
+      # Required type check: these packages are clean against the strict
       # config in pyproject.toml. Any new mypy error here blocks the PR.
       # Expand the package list as more folders are cleaned up.
+      #
+      # Single mypy invocation across all packages — a per-package loop pays
+      # cold typeshed/plugin startup per package and tipped the job past the
+      # 5-minute timeout once the list grew to 12. The combined summary still
+      # reports total error/file counts; error lines include file paths so
+      # the failing package is identifiable without per-package groups.
       - name: Type check (required)
         run: >-
           uv run mypy
@@ -48,9 +56,7 @@ jobs:
           -p winml.modelkit.config
           -p winml.modelkit.core
           -p winml.modelkit.data
-
-      # Advisory type check for the rest of the tree: surfaces type issues
-      # in CI logs without blocking PRs while the backlog is worked down.
-      - name: Type check (advisory, full package)
-        continue-on-error: true
-        run: uv run mypy -p winml.modelkit
+          -p winml.modelkit.datasets
+          -p winml.modelkit.eval
+          -p winml.modelkit.export
+          -p winml.modelkit.inference
@@ -111,8 +111,10 @@ dev = [
   "pre-commit>=4.5.1",
   "pytest-cov>=7",
   "pytest-timeout>=2.4.0",
+  "scipy-stubs>=1.17.1.5",
   "types-jsonschema>=4.26.0.20260518",
   "types-protobuf>=7.34.1.20260518",
+  "types-psutil>=7.2.2.20260518",
   "types-pyyaml>=6.0.12.20260518",
   "types-tqdm>=4.67.3.20260518",
 ]
@@ -478,6 +480,10 @@ module = [
   "openvino",
   "openvino.*",
   "plotext",
+  "soundfile",       # audio I/O in inference/engine.py; no community stubs
+  "sklearn.*",       # used in eval/metrics; no community stubs
+  "evaluate",        # HF evaluate, used in eval/; no community stubs
+  "evaluate.*",
 ]
 ignore_missing_imports = true
 

@@ -218,6 +218,9 @@ def _name(base: str) -> str:
     # =========================================================================
     logger.info("Exporting to ONNX...")
     t0 = time.monotonic()
+    # config.export is None only for the ONNX build path (build_onnx_model);
+    # this is the HF path so the field must be populated.
+    assert config.export is not None, "build_hf_model requires config.export"
     export_onnx(
         model=pytorch_model,
         output_path=export_path,

@@ -1356,6 +1356,8 @@ def _name(base: str) -> str:
             config, model_id, trust_remote_code=False, hf_config=preloaded_hf_config
         )
         t0 = time.monotonic()
+        # config.export is None only for the ONNX build path; this is the HF path.
+        assert config.export is not None, "HF build path requires config.export"
         export_onnx(
             model=pytorch_model,
             output_path=export_path,

@@ -327,34 +327,33 @@ def config(
                 )
                 return
 
-            # Generate config(s) - module parameter selects overload:
-            # module=str → list[WinMLBuildConfig], module=None → WinMLBuildConfig.
-            # ``module`` is the only differing kwarg, so build a shared dict
-            # once and add it only on the list-returning branch. This keeps
-            # the overload dispatch but avoids repeating the other 10 kwargs.
-            _shared_kwargs: dict[str, Any] = {
-                "model_id": hf_model,
-                "task": task,
-                "model_class": model_class,
-                "model_type": model_type,
-                "override": override,
-                "shape_config": shape_config,
-                "library_name": library_name,
-                "device": device,
-                "precision": precision,
-                "trust_remote_code": trust_remote_code,
-                "ep": ep,
-            }
-            if module:
-                configs = generate_hf_build_config(module=module, **_shared_kwargs)
+            # Generate config(s). The ``module: str | None`` overload of
+            # generate_hf_build_config returns WinMLBuildConfig | list[...],
+            # which isinstance(result, list) narrows for the branches below.
+            result = generate_hf_build_config(
+                model_id=hf_model,
+                task=task,
+                model_class=model_class,
+                model_type=model_type,
+                module=module,
+                override=override,
+                shape_config=shape_config,
+                library_name=library_name,
+                device=device,
+                precision=precision,
+                trust_remote_code=trust_remote_code,
+                ep=ep,
+            )
+            if isinstance(result, list):
+                configs = result
                 for cfg in configs:
                     _apply_stage_overrides(cfg, no_quant=not quant, no_compile=no_compile)
                 output_data = [cfg.to_dict() for cfg in configs]
                 _n_modules = len(configs)
                 # Use first config for display metadata
                 config_obj = configs[0] if configs else None
             else:
-                config_obj = generate_hf_build_config(**_shared_kwargs)
+                config_obj = result
                 configs = []
                 _apply_stage_overrides(config_obj, no_quant=not quant, no_compile=no_compile)
                 output_data = config_obj.to_dict()

@@ -489,6 +489,29 @@ def generate_hf_build_config(
 ) -> list[WinMLBuildConfig]: ...
 
 
+@overload
+def generate_hf_build_config(
+    model_id: str | None = None,
+    *,
+    task: str | None = None,
+    model_class: str | None = None,
+    model_type: str | None = None,
+    # Catch-all for callers that hold ``module`` as ``str | None`` (e.g. the
+    # ``generate_build_config`` dispatcher). Without this overload, mypy can't
+    # resolve the call against the two narrower overloads above and fails with
+    # "too many union combinations".
+    module: str | None,
+    override: WinMLBuildConfig | None = None,
+    shape_config: dict | None = None,
+    library_name: str = "transformers",
+    device: str = "auto",
+    precision: str = "auto",
+    trust_remote_code: bool = False,
+    ep: EPNameOrAlias | None = None,
+    no_compile: bool = False,
+) -> WinMLBuildConfig | list[WinMLBuildConfig]: ...
+
+
 def generate_hf_build_config(
     model_id: str | None = None,
     *,
@@ -804,24 +827,24 @@ class name (HF path only).
             ep=ep,
             override=override,
         )
-    # Split branches so mypy can pick the matching overload of generate_hf_build_config.
-    # Typed as dict[str, Any] so per-kwarg type checks happen at the callee, not on the
-    # widened Union mypy would otherwise infer from this heterogeneous literal.
-    common_kwargs: dict[str, Any] = {
-        "task": task,
-        "model_class": model_class,
-        "model_type": model_type,
-        "override": override,
-        "shape_config": shape_config,
-        "library_name": library_name,
-        "device": device,
-        "precision": precision,
-        "trust_remote_code": trust_remote_code,
-        "ep": ep,
-    }
-    if module is None:
-        return generate_hf_build_config(model_id, module=None, **common_kwargs)
-    return generate_hf_build_config(model_id, module=module, **common_kwargs)
+    # Single call resolves against generate_hf_build_config's `module: str | None`
+    # overload, which returns WinMLBuildConfig | list[WinMLBuildConfig] — matching
+    # this dispatcher's implementation return type. The dispatcher's own
+    # narrowing overloads above still tighten the return type for its callers.
+    return generate_hf_build_config(
+        model_id,
+        task=task,
+        model_class=model_class,
+        model_type=model_type,
+        module=module,
+        override=override,
+        shape_config=shape_config,
+        library_name=library_name,
+        device=device,
+        precision=precision,
+        trust_remote_code=trust_remote_code,
+        ep=ep,
+    )
 
 
 # =============================================================================

@@ -5,8 +5,15 @@
 """Simple timestamp formatting utility."""
 
 from datetime import datetime, timezone
+from typing import overload
 
 
+@overload
+def format_timestamp_iso(epoch_time: float) -> str: ...
+@overload
+def format_timestamp_iso(epoch_time: None) -> None: ...
+@overload
+def format_timestamp_iso(epoch_time: float | None) -> str | None: ...
 def format_timestamp_iso(epoch_time: float | None) -> str | None:
     """Format Unix epoch timestamp to ISO 8601 with Z suffix.
 

@@ -136,7 +136,7 @@ def universal_calib_dataset(
         raise RuntimeError(f"Failed to create {task} dataset: {e}") from e
 
 
-class DatasetCalibrationReader(CalibrationDataReader):
+class DatasetCalibrationReader(CalibrationDataReader):  # type: ignore[misc]
     """Calibration data reader that wraps universal_calib_dataset.
 
     Bridges HuggingFace-style datasets to ORT's calibration API by:

@@ -39,7 +39,7 @@ def __init__(
         dataset_name: str | None = None,
         max_samples: int | None = None,
         data_split: str | None = None,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         """Initialize dataset with readonly properties.
 
@@ -59,8 +59,10 @@ def __init__(
         # Store additional kwargs for subclass use
         self._config = kwargs
 
-        # Subclasses should populate these during initialization
-        self._dataset = None  # The actual dataset object
+        # Subclasses should populate these during initialization.
+        # Typed as Any because each subclass uses a different dataset library
+        # (HF datasets.Dataset, torch DataLoader, plain list[dict], ...).
+        self._dataset: Any = None
         self._metadata: dict[str, Any] = {}  # Dataset metadata
 
         # Initialize subclass-specific data

@@ -123,7 +123,7 @@ def _initialize(self) -> None:
 
         # Convert raw images into model-ready tensors.
         def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]:
-            return processor(example[self._image_col].convert("RGB"), return_tensors="pt")
+            return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt"))
 
         self._dataset = dataset.map(
             preprocess_single_sample, remove_columns=[self._image_col]
@@ -142,7 +142,7 @@ def _detect_image_column(self, dataset: Any) -> None:
 
         features = dataset.features
 
-        self._image_col = None
+        self._image_col = ""
         for col_name, feature in features.items():
             if isinstance(feature, Image):
                 self._image_col = col_name

@@ -12,7 +12,7 @@
 
 import logging
 from random import Random
-from typing import Any
+from typing import Any, cast
 
 from datasets import load_dataset
 from datasets.features import ClassLabel, Image
@@ -35,6 +35,10 @@ class ImageDataset(BaseTaskDataset):
     - HuggingFace Features API for metadata discovery
     """
 
+    # Populated by _detect_columns(); empty string until then.
+    _image_col: str = ""
+    _label_col: str = ""
+    _label_feature: ClassLabel | None = None
 
     def _get_default_dataset(self) -> None:
         """Set default dataset configuration if none specified.
@@ -130,13 +134,13 @@ def _initialize(self) -> None:
         processor = AutoImageProcessor.from_pretrained(self._model_name, use_fast=True)
 
         # 5. Conditional label alignment using should_align_labels()
-        if should_align_labels(self._dataset_name):
+        if self._dataset_name and should_align_labels(self._dataset_name):
             dataset = dataset.align_labels_with_mapping(get_imagenet_label_map(), self._label_col)
 
         # 6. Apply image processing with proper batch dimension
-        def preprocess_single_sample(example):
+        def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]:
             # Process single image and add batch dimension
-            return processor(example[self._image_col].convert("RGB"), return_tensors="pt")
+            return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt"))
 
         self._dataset = (
             dataset
@@ -146,7 +150,7 @@ def preprocess_single_sample(example):
 
         logger.info(f"Dataset initialized with {len(self._dataset)} samples")
 
-    def _detect_columns(self, dataset) -> None:
+    def _detect_columns(self, dataset: Any) -> None:
         """Detect image and label columns using HuggingFace Features API.
 
         Uses proper type checking with HuggingFace Features API to reliably
@@ -158,8 +162,8 @@ def _detect_columns(self, dataset) -> None:
         features = dataset.features
 
         # Detect columns using proper type checking
-        self._image_col = None
-        self._label_col = None
+        self._image_col = ""
+        self._label_col = ""
         self._label_feature = None  # Store ClassLabel feature for mapping
 
         for col_name, feature in features.items():
@@ -210,7 +214,7 @@ def __getitem__(self, idx: int) -> dict[str, Any]:
         Returns:
             Dictionary containing preprocessed tensors
         """
-        return self._dataset[idx]
+        return cast("dict[str, Any]", self._dataset[idx])
 
     @property
     def label_names(self) -> list[str]: