diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0512786b8..f41e857e7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -13,7 +13,9 @@ concurrency: jobs: lint: runs-on: windows-latest - timeout-minutes: 5 + # Bumped from 5: combined mypy on 12 packages cold-starts at ~3-4 min on + # Windows runners; the original 5-min ceiling cancelled mid-run. + timeout-minutes: 10 steps: - uses: actions/checkout@v4 @@ -34,9 +36,15 @@ jobs: - name: Lint run: uv run ruff check src/ tests/ - # Required type check: these folders are clean against the strict + # Required type check: these packages are clean against the strict # config in pyproject.toml. Any new mypy error here blocks the PR. # Expand the package list as more folders are cleaned up. + # + # Single mypy invocation across all packages — a per-package loop pays + # cold typeshed/plugin startup per package and tipped the job past the + # 5-minute timeout once the list grew to 12. The combined summary still + # reports total error/file counts; error lines include file paths so + # the failing package is identifiable without per-package groups. - name: Type check (required) run: >- uv run mypy @@ -48,9 +56,7 @@ jobs: -p winml.modelkit.config -p winml.modelkit.core -p winml.modelkit.data - - # Advisory type check for the rest of the tree: surfaces type issues - # in CI logs without blocking PRs while the backlog is worked down. - - name: Type check (advisory, full package) - continue-on-error: true - run: uv run mypy -p winml.modelkit + -p winml.modelkit.datasets + -p winml.modelkit.eval + -p winml.modelkit.export + -p winml.modelkit.inference diff --git a/pyproject.toml b/pyproject.toml index 64a80427b..72f36f10f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,8 +111,10 @@ dev = [ "pre-commit>=4.5.1", "pytest-cov>=7", "pytest-timeout>=2.4.0", + "scipy-stubs>=1.17.1.5", "types-jsonschema>=4.26.0.20260518", "types-protobuf>=7.34.1.20260518", + "types-psutil>=7.2.2.20260518", "types-pyyaml>=6.0.12.20260518", "types-tqdm>=4.67.3.20260518", ] @@ -478,6 +480,10 @@ module = [ "openvino", "openvino.*", "plotext", + "soundfile", # audio I/O in inference/engine.py; no community stubs + "sklearn.*", # used in eval/metrics; no community stubs + "evaluate", # HF evaluate, used in eval/; no community stubs + "evaluate.*", ] ignore_missing_imports = true diff --git a/src/winml/modelkit/build/hf.py b/src/winml/modelkit/build/hf.py index eaae70ef4..99851462c 100644 --- a/src/winml/modelkit/build/hf.py +++ b/src/winml/modelkit/build/hf.py @@ -218,6 +218,9 @@ def _name(base: str) -> str: # ========================================================================= logger.info("Exporting to ONNX...") t0 = time.monotonic() + # config.export is None only for the ONNX build path (build_onnx_model); + # this is the HF path so the field must be populated. + assert config.export is not None, "build_hf_model requires config.export" export_onnx( model=pytorch_model, output_path=export_path, diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index 7266620cb..e519892aa 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -1356,6 +1356,8 @@ def _name(base: str) -> str: config, model_id, trust_remote_code=False, hf_config=preloaded_hf_config ) t0 = time.monotonic() + # config.export is None only for the ONNX build path; this is the HF path. + assert config.export is not None, "HF build path requires config.export" export_onnx( model=pytorch_model, output_path=export_path, diff --git a/src/winml/modelkit/commands/config.py b/src/winml/modelkit/commands/config.py index 373db06f5..a3c3a5cdd 100644 --- a/src/winml/modelkit/commands/config.py +++ b/src/winml/modelkit/commands/config.py @@ -327,26 +327,25 @@ def config( ) return - # Generate config(s) - module parameter selects overload: - # module=str → list[WinMLBuildConfig], module=None → WinMLBuildConfig. - # ``module`` is the only differing kwarg, so build a shared dict - # once and add it only on the list-returning branch. This keeps - # the overload dispatch but avoids repeating the other 10 kwargs. - _shared_kwargs: dict[str, Any] = { - "model_id": hf_model, - "task": task, - "model_class": model_class, - "model_type": model_type, - "override": override, - "shape_config": shape_config, - "library_name": library_name, - "device": device, - "precision": precision, - "trust_remote_code": trust_remote_code, - "ep": ep, - } - if module: - configs = generate_hf_build_config(module=module, **_shared_kwargs) + # Generate config(s). The ``module: str | None`` overload of + # generate_hf_build_config returns WinMLBuildConfig | list[...], + # which isinstance(result, list) narrows for the branches below. + result = generate_hf_build_config( + model_id=hf_model, + task=task, + model_class=model_class, + model_type=model_type, + module=module, + override=override, + shape_config=shape_config, + library_name=library_name, + device=device, + precision=precision, + trust_remote_code=trust_remote_code, + ep=ep, + ) + if isinstance(result, list): + configs = result for cfg in configs: _apply_stage_overrides(cfg, no_quant=not quant, no_compile=no_compile) output_data = [cfg.to_dict() for cfg in configs] @@ -354,7 +353,7 @@ def config( # Use first config for display metadata config_obj = configs[0] if configs else None else: - config_obj = generate_hf_build_config(**_shared_kwargs) + config_obj = result configs = [] _apply_stage_overrides(config_obj, no_quant=not quant, no_compile=no_compile) output_data = config_obj.to_dict() diff --git a/src/winml/modelkit/config/build.py b/src/winml/modelkit/config/build.py index 16cde11f2..9bdbb06e0 100644 --- a/src/winml/modelkit/config/build.py +++ b/src/winml/modelkit/config/build.py @@ -489,6 +489,29 @@ def generate_hf_build_config( ) -> list[WinMLBuildConfig]: ... +@overload +def generate_hf_build_config( + model_id: str | None = None, + *, + task: str | None = None, + model_class: str | None = None, + model_type: str | None = None, + # Catch-all for callers that hold ``module`` as ``str | None`` (e.g. the + # ``generate_build_config`` dispatcher). Without this overload, mypy can't + # resolve the call against the two narrower overloads above and fails with + # "too many union combinations". + module: str | None, + override: WinMLBuildConfig | None = None, + shape_config: dict | None = None, + library_name: str = "transformers", + device: str = "auto", + precision: str = "auto", + trust_remote_code: bool = False, + ep: EPNameOrAlias | None = None, + no_compile: bool = False, +) -> WinMLBuildConfig | list[WinMLBuildConfig]: ... + + def generate_hf_build_config( model_id: str | None = None, *, @@ -804,24 +827,24 @@ class name (HF path only). ep=ep, override=override, ) - # Split branches so mypy can pick the matching overload of generate_hf_build_config. - # Typed as dict[str, Any] so per-kwarg type checks happen at the callee, not on the - # widened Union mypy would otherwise infer from this heterogeneous literal. - common_kwargs: dict[str, Any] = { - "task": task, - "model_class": model_class, - "model_type": model_type, - "override": override, - "shape_config": shape_config, - "library_name": library_name, - "device": device, - "precision": precision, - "trust_remote_code": trust_remote_code, - "ep": ep, - } - if module is None: - return generate_hf_build_config(model_id, module=None, **common_kwargs) - return generate_hf_build_config(model_id, module=module, **common_kwargs) + # Single call resolves against generate_hf_build_config's `module: str | None` + # overload, which returns WinMLBuildConfig | list[WinMLBuildConfig] — matching + # this dispatcher's implementation return type. The dispatcher's own + # narrowing overloads above still tighten the return type for its callers. + return generate_hf_build_config( + model_id, + task=task, + model_class=model_class, + model_type=model_type, + module=module, + override=override, + shape_config=shape_config, + library_name=library_name, + device=device, + precision=precision, + trust_remote_code=trust_remote_code, + ep=ep, + ) # ============================================================================= diff --git a/src/winml/modelkit/core/time_utils.py b/src/winml/modelkit/core/time_utils.py index 9fc8b8b4c..77b1baf28 100644 --- a/src/winml/modelkit/core/time_utils.py +++ b/src/winml/modelkit/core/time_utils.py @@ -5,8 +5,15 @@ """Simple timestamp formatting utility.""" from datetime import datetime, timezone +from typing import overload +@overload +def format_timestamp_iso(epoch_time: float) -> str: ... +@overload +def format_timestamp_iso(epoch_time: None) -> None: ... +@overload +def format_timestamp_iso(epoch_time: float | None) -> str | None: ... def format_timestamp_iso(epoch_time: float | None) -> str | None: """Format Unix epoch timestamp to ISO 8601 with Z suffix. diff --git a/src/winml/modelkit/datasets/__init__.py b/src/winml/modelkit/datasets/__init__.py index 54ac73239..ebbde0cfb 100644 --- a/src/winml/modelkit/datasets/__init__.py +++ b/src/winml/modelkit/datasets/__init__.py @@ -136,7 +136,7 @@ def universal_calib_dataset( raise RuntimeError(f"Failed to create {task} dataset: {e}") from e -class DatasetCalibrationReader(CalibrationDataReader): +class DatasetCalibrationReader(CalibrationDataReader): # type: ignore[misc] """Calibration data reader that wraps universal_calib_dataset. Bridges HuggingFace-style datasets to ORT's calibration API by: diff --git a/src/winml/modelkit/datasets/base.py b/src/winml/modelkit/datasets/base.py index 8d86af57e..ea73e7bf6 100644 --- a/src/winml/modelkit/datasets/base.py +++ b/src/winml/modelkit/datasets/base.py @@ -39,7 +39,7 @@ def __init__( dataset_name: str | None = None, max_samples: int | None = None, data_split: str | None = None, - **kwargs, + **kwargs: Any, ) -> None: """Initialize dataset with readonly properties. @@ -59,8 +59,10 @@ def __init__( # Store additional kwargs for subclass use self._config = kwargs - # Subclasses should populate these during initialization - self._dataset = None # The actual dataset object + # Subclasses should populate these during initialization. + # Typed as Any because each subclass uses a different dataset library + # (HF datasets.Dataset, torch DataLoader, plain list[dict], ...). + self._dataset: Any = None self._metadata: dict[str, Any] = {} # Dataset metadata # Initialize subclass-specific data diff --git a/src/winml/modelkit/datasets/depth_estimation.py b/src/winml/modelkit/datasets/depth_estimation.py index 6aa3f7d62..2b268a6ec 100644 --- a/src/winml/modelkit/datasets/depth_estimation.py +++ b/src/winml/modelkit/datasets/depth_estimation.py @@ -123,7 +123,7 @@ def _initialize(self) -> None: # Convert raw images into model-ready tensors. def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]: - return processor(example[self._image_col].convert("RGB"), return_tensors="pt") + return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt")) self._dataset = dataset.map( preprocess_single_sample, remove_columns=[self._image_col] @@ -142,7 +142,7 @@ def _detect_image_column(self, dataset: Any) -> None: features = dataset.features - self._image_col = None + self._image_col = "" for col_name, feature in features.items(): if isinstance(feature, Image): self._image_col = col_name diff --git a/src/winml/modelkit/datasets/image.py b/src/winml/modelkit/datasets/image.py index 251eaec9f..20b05de20 100644 --- a/src/winml/modelkit/datasets/image.py +++ b/src/winml/modelkit/datasets/image.py @@ -12,7 +12,7 @@ import logging from random import Random -from typing import Any +from typing import Any, cast from datasets import load_dataset from datasets.features import ClassLabel, Image @@ -35,6 +35,10 @@ class ImageDataset(BaseTaskDataset): - HuggingFace Features API for metadata discovery """ + # Populated by _detect_columns(); empty string until then. + _image_col: str = "" + _label_col: str = "" + _label_feature: ClassLabel | None = None def _get_default_dataset(self) -> None: """Set default dataset configuration if none specified. @@ -130,13 +134,13 @@ def _initialize(self) -> None: processor = AutoImageProcessor.from_pretrained(self._model_name, use_fast=True) # 5. Conditional label alignment using should_align_labels() - if should_align_labels(self._dataset_name): + if self._dataset_name and should_align_labels(self._dataset_name): dataset = dataset.align_labels_with_mapping(get_imagenet_label_map(), self._label_col) # 6. Apply image processing with proper batch dimension - def preprocess_single_sample(example): + def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]: # Process single image and add batch dimension - return processor(example[self._image_col].convert("RGB"), return_tensors="pt") + return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt")) self._dataset = ( dataset @@ -146,7 +150,7 @@ def preprocess_single_sample(example): logger.info(f"Dataset initialized with {len(self._dataset)} samples") - def _detect_columns(self, dataset) -> None: + def _detect_columns(self, dataset: Any) -> None: """Detect image and label columns using HuggingFace Features API. Uses proper type checking with HuggingFace Features API to reliably @@ -158,8 +162,8 @@ def _detect_columns(self, dataset) -> None: features = dataset.features # Detect columns using proper type checking - self._image_col = None - self._label_col = None + self._image_col = "" + self._label_col = "" self._label_feature = None # Store ClassLabel feature for mapping for col_name, feature in features.items(): @@ -210,7 +214,7 @@ def __getitem__(self, idx: int) -> dict[str, Any]: Returns: Dictionary containing preprocessed tensors """ - return self._dataset[idx] + return cast("dict[str, Any]", self._dataset[idx]) @property def label_names(self) -> list[str]: diff --git a/src/winml/modelkit/datasets/image_segmentation.py b/src/winml/modelkit/datasets/image_segmentation.py index ad4a1d174..84d81aa61 100644 --- a/src/winml/modelkit/datasets/image_segmentation.py +++ b/src/winml/modelkit/datasets/image_segmentation.py @@ -13,7 +13,7 @@ import logging from random import Random -from typing import Any +from typing import Any, cast from datasets import load_dataset from datasets.features import Image @@ -39,6 +39,11 @@ class ImageSegmentationDataset(BaseTaskDataset): DEFAULT_DATASET = "nielsr/ade20k-demo" DEFAULT_SPLIT = "train" + # Populated by _detect_columns(); empty string until then. + _image_col: str = "" + _label_col: str = "" + _mask_col: str = "" + def __init__( self, model_name: str, @@ -46,7 +51,7 @@ def __init__( max_samples: int | None = None, data_split: str | None = None, do_reduce_labels: bool = True, - **kwargs, + **kwargs: Any, ) -> None: """Initialize image segmentation dataset. @@ -119,7 +124,7 @@ def _initialize(self) -> None: processor = AutoImageProcessor.from_pretrained(self._model_name, use_fast=False) # 6. Apply image + mask processing - def preprocess_single_sample(example): + def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]: """Preprocess a single image + mask sample for segmentation models.""" # Get image and mask image = example[self._image_col].convert("RGB") @@ -146,7 +151,7 @@ def preprocess_single_sample(example): logger.info(f"Image column: {self._image_col}") logger.info(f"Mask column: {self._mask_col}") - def _detect_columns(self, dataset) -> None: + def _detect_columns(self, dataset: Any) -> None: """Detect image and mask columns using HuggingFace Features API. Uses proper type checking to identify Image features and applies @@ -158,8 +163,8 @@ def _detect_columns(self, dataset) -> None: features = dataset.features # Initialize column detection - self._image_col = None - self._mask_col = None + self._image_col = "" + self._mask_col = "" # Detect columns using proper type checking and naming patterns image_candidates = [] @@ -256,7 +261,7 @@ def __getitem__(self, idx: int) -> dict[str, Any]: Returns: Dictionary containing preprocessed tensors for segmentation models """ - return self._dataset[idx] + return cast("dict[str, Any]", self._dataset[idx]) @property def mask_col(self) -> str: diff --git a/src/winml/modelkit/datasets/object_detection.py b/src/winml/modelkit/datasets/object_detection.py index 628204f35..94530a19c 100644 --- a/src/winml/modelkit/datasets/object_detection.py +++ b/src/winml/modelkit/datasets/object_detection.py @@ -123,7 +123,7 @@ def _initialize(self) -> None: # Apply image processing def preprocess_single_sample(example: dict[str, Any]) -> dict[str, Any]: - return processor(example[self._image_col].convert("RGB"), return_tensors="pt") + return dict(processor(example[self._image_col].convert("RGB"), return_tensors="pt")) self._dataset = ( dataset @@ -151,8 +151,8 @@ def _detect_image_column(self, dataset: Any) -> None: features = dataset.features # Find image column - self._image_col = None - self._label_col = None # May not have simple label column + self._image_col = "" + self._label_col = "" # May not have simple label column self._label_feature = None for col_name, feature in features.items(): @@ -174,7 +174,7 @@ def _detect_image_column(self, dataset: Any) -> None: self._label_col = col_name break - if self._label_col is None: + if not self._label_col: # Use first non-image column as fallback for col_name in features: if col_name != self._image_col: @@ -200,5 +200,5 @@ def label_names(self) -> list[str]: so this returns an empty list unless explicitly set. """ if self._label_feature is not None and hasattr(self._label_feature, "names"): - return self._label_feature.names + return list(self._label_feature.names) return [] diff --git a/src/winml/modelkit/datasets/processor_utils.py b/src/winml/modelkit/datasets/processor_utils.py index 136a9c106..cfecb2c30 100644 --- a/src/winml/modelkit/datasets/processor_utils.py +++ b/src/winml/modelkit/datasets/processor_utils.py @@ -13,7 +13,9 @@ import logging from typing import Any -from transformers.image_processing_utils import ImageProcessingMixin +from transformers.image_processing_utils import ( # type: ignore[attr-defined] + ImageProcessingMixin, +) logger = logging.getLogger(__name__) diff --git a/src/winml/modelkit/datasets/random_dataset.py b/src/winml/modelkit/datasets/random_dataset.py index 6cb0e9411..a8da3d029 100644 --- a/src/winml/modelkit/datasets/random_dataset.py +++ b/src/winml/modelkit/datasets/random_dataset.py @@ -12,7 +12,7 @@ import logging import random -from typing import Any, ClassVar +from typing import Any, ClassVar, cast import numpy as np import torch @@ -44,7 +44,7 @@ def __init__( model_path: str, max_samples: int = 100, seed: int = 42, - **kwargs, + **kwargs: Any, ) -> None: self.model_path = model_path self.max_samples = max_samples @@ -91,7 +91,7 @@ def __len__(self) -> int: def __getitem__(self, idx: int) -> dict[str, Any]: """Get a single preprocessed sample.""" - return self.dataset[idx] + return cast("dict[str, Any]", self.dataset[idx]) @property def label_col(self) -> str: @@ -104,7 +104,7 @@ def _generate_random_sample(self) -> dict[str, Any]: Uses cached InputTensorSpec list built from ONNX model I/O config. Each spec's to_tensor() handles value_range, dtype, and shape correctly. """ - return {spec.name: spec.to_tensor() for spec in self._input_specs} + return {spec.name: spec.to_tensor() for spec in self._input_specs if spec.name} def _load_dataset(self) -> Dataset: """Generate synthetic dataset with random samples as tensors.""" diff --git a/src/winml/modelkit/datasets/text.py b/src/winml/modelkit/datasets/text.py index 5c5c9cb1b..e44f115e2 100644 --- a/src/winml/modelkit/datasets/text.py +++ b/src/winml/modelkit/datasets/text.py @@ -16,7 +16,7 @@ import logging from random import Random -from typing import Any +from typing import Any, cast from datasets import load_dataset from datasets.features import ClassLabel, Value @@ -50,7 +50,7 @@ def __init__( max_length: int | None = None, io_config: dict | None = None, io_mapping: dict | None = None, - **kwargs, + **kwargs: Any, ) -> None: """Initialize text classification dataset. @@ -159,13 +159,13 @@ def _initialize(self) -> None: def tokenize(example: dict) -> dict: texts = [example[col] for col in self._text_cols] - return tokenizer( + return dict(tokenizer( *texts, padding="max_length", truncation=True, max_length=self._max_length, return_tensors="pt", - ) + )) # 7. Apply tokenization, remove text columns self._dataset = ( @@ -180,7 +180,7 @@ def tokenize(example: dict) -> dict: logger.info("Initialized: %d samples, max_length=%d, text=%s, label=%s", len(self._dataset), self._max_length, self._text_cols, self._label_col) - def _detect_columns(self, dataset) -> None: + def _detect_columns(self, dataset: Any) -> None: """Detect text and label columns by feature type.""" if not hasattr(dataset, "features"): raise ValueError(f"Dataset {self._dataset_name} has no features") @@ -210,12 +210,13 @@ def _detect_columns(self, dataset) -> None: def __getitem__(self, idx: int) -> dict[str, Any]: """Get tokenized sample.""" - return self._dataset[idx] + return cast("dict[str, Any]", self._dataset[idx]) # Readonly properties @property def max_length(self) -> int: """Sequence length.""" + assert self._max_length is not None, "max_length not resolved" return self._max_length @property diff --git a/src/winml/modelkit/eval/base_evaluator.py b/src/winml/modelkit/eval/base_evaluator.py index 6eec3f595..30eba265d 100644 --- a/src/winml/modelkit/eval/base_evaluator.py +++ b/src/winml/modelkit/eval/base_evaluator.py @@ -8,7 +8,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from ..utils.eval_utils import DatasetValidationError, validate_dataset_columns @@ -72,7 +72,7 @@ def compute(self) -> dict[str, Any]: ) kwargs.pop(key) - return task_evaluator.compute(**kwargs) + return cast("dict[str, Any]", task_evaluator.compute(**kwargs)) def prepare_data(self) -> Dataset: """Load dataset, shuffle, sample, and align labels.""" @@ -122,6 +122,7 @@ def prepare_data(self) -> Dataset: ) dataset = dataset.select(range(actual_samples)) + assert self.config.task is not None, "config.task is required for evaluation" validate_dataset_columns( dataset, self.config.task, self.config.dataset.columns_mapping, ) @@ -131,18 +132,24 @@ def prepare_pipeline(self) -> Pipeline: """Create HF pipeline for inference. Subclasses override to configure.""" from transformers import pipeline + assert self.config.task is not None, "config.task is required to build pipeline" pipeline_task = _PIPELINE_TASK_MAP.get(self.config.task, self.config.task) - return pipeline( - pipeline_task, - model=self.model, - framework="pt", - tokenizer=self.config.model_id, - feature_extractor=self.config.model_id, - image_processor=self.config.model_id, - processor=self.config.model_id, - # "device" is for HF pipeline pytorch tensors, not ORT EP. - # WinMLSession handles device delegation for ORT. - device="cpu", + # transformers.pipeline has 60+ Literal overloads — runtime task strings + # can't be statically matched. The string-task fallback handles unknown tasks. + return cast( + "Pipeline", + pipeline( # type: ignore[call-overload] + pipeline_task, + model=self.model, + framework="pt", + tokenizer=self.config.model_id, + feature_extractor=self.config.model_id, + image_processor=self.config.model_id, + processor=self.config.model_id, + # "device" is for HF pipeline pytorch tensors, not ORT EP. + # WinMLSession handles device delegation for ORT. + device="cpu", + ), ) def _fixed_seq_length(self) -> int | None: diff --git a/src/winml/modelkit/eval/depth_estimation_evaluator.py b/src/winml/modelkit/eval/depth_estimation_evaluator.py index 7b0bd0333..40b903ad9 100644 --- a/src/winml/modelkit/eval/depth_estimation_evaluator.py +++ b/src/winml/modelkit/eval/depth_estimation_evaluator.py @@ -41,12 +41,20 @@ def __init__( task = "depth-estimation" self._input_col = mapping.get("input_column", get_default(task, "input_column")) self._depth_col = mapping.get("depth_column", get_default(task, "depth_column")) - self._align = mapping.get("align", get_default(task, "align")) - self._depth_kind = mapping.get("depth_kind", get_default(task, "depth_kind")) - self._min_depth = float(mapping.get("min_depth", get_default(task, "min_depth"))) + align_raw = mapping.get("align", get_default(task, "align")) + depth_kind_raw = mapping.get("depth_kind", get_default(task, "depth_kind")) + assert align_raw is not None, "align has no default for depth-estimation" + assert depth_kind_raw is not None, "depth_kind has no default for depth-estimation" + self._align: str = align_raw + self._depth_kind: str = depth_kind_raw + min_depth_raw = mapping.get("min_depth", get_default(task, "min_depth")) + assert min_depth_raw is not None, "min_depth has no default for depth-estimation" + self._min_depth = float(min_depth_raw) max_depth_raw = mapping.get("max_depth", get_default(task, "max_depth")) self._max_depth: float | None - if isinstance(max_depth_raw, str) and max_depth_raw.lower() == "none": + if max_depth_raw is None or ( + isinstance(max_depth_raw, str) and max_depth_raw.lower() == "none" + ): self._max_depth = None else: self._max_depth = float(max_depth_raw) @@ -68,13 +76,15 @@ def prepare_pipeline(self) -> Pipeline: io_config = getattr(self.model, "io_config", None) or {} input_shapes = io_config.get("input_shapes", []) - if input_shapes and len(input_shapes[0]) == 4: + image_processor = pipe.image_processor + if image_processor is not None and input_shapes and len(input_shapes[0]) == 4: _, _, h, w = input_shapes[0] - pipe.image_processor.size = {"height": h, "width": w} - if hasattr(pipe.image_processor, "keep_aspect_ratio"): - pipe.image_processor.keep_aspect_ratio = False - if hasattr(pipe.image_processor, "do_pad"): - pipe.image_processor.do_pad = False + # Runtime-settable processor attribute; not on the base class. + image_processor.size = {"height": h, "width": w} # type: ignore[attr-defined] + if hasattr(image_processor, "keep_aspect_ratio"): + image_processor.keep_aspect_ratio = False + if hasattr(image_processor, "do_pad"): + image_processor.do_pad = False return pipe diff --git a/src/winml/modelkit/eval/evaluate.py b/src/winml/modelkit/eval/evaluate.py index 69fda09c8..cab10d26e 100644 --- a/src/winml/modelkit/eval/evaluate.py +++ b/src/winml/modelkit/eval/evaluate.py @@ -11,7 +11,7 @@ import logging from copy import deepcopy from dataclasses import dataclass, field, replace -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from rich.console import Console @@ -71,7 +71,7 @@ def get_evaluator_class(config: WinMLEvaluationConfig) -> type[WinMLEvaluator]: """Return the evaluator class for *task*, or raise ValueError if unsupported.""" key = "compare-tensor" if config.mode == "compare" else config.task - spec = _EVALUATOR_REGISTRY.get(key) + spec = _EVALUATOR_REGISTRY.get(key) if key is not None else None if spec is None: supported = ", ".join(sorted(_EVALUATOR_REGISTRY)) raise ValueError( @@ -79,7 +79,7 @@ def get_evaluator_class(config: WinMLEvaluationConfig) -> type[WinMLEvaluator]: ) module_path, class_name = spec.rsplit(":", 1) module = importlib.import_module(module_path) - return getattr(module, class_name) + return cast("type[WinMLEvaluator]", getattr(module, class_name)) _FE_DEFAULT = { @@ -273,7 +273,7 @@ def evaluate(config: WinMLEvaluationConfig) -> EvalResult: config, mode=mode, task=_resolve_task(config), dataset=deepcopy(config.dataset) ) if config.mode != "compare" and config.dataset.path is None: - default = _DEFAULT_DATASETS.get(config.task) + default = _DEFAULT_DATASETS.get(config.task) if config.task is not None else None if default is None: raise ValueError( f"No dataset provided and no default for task '{config.task}'. Use --dataset." diff --git a/src/winml/modelkit/eval/fill_mask_evaluator.py b/src/winml/modelkit/eval/fill_mask_evaluator.py index f1c5c4496..c771f2a6a 100644 --- a/src/winml/modelkit/eval/fill_mask_evaluator.py +++ b/src/winml/modelkit/eval/fill_mask_evaluator.py @@ -13,7 +13,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from tqdm import tqdm @@ -70,10 +70,10 @@ def _max_length(self) -> int | None: def _logits(self, outputs: Any) -> torch.Tensor: if not isinstance(outputs, dict): - return outputs.logits + return cast("torch.Tensor", outputs.logits) if "logits" not in outputs: raise KeyError(f"Model output dict has no 'logits' key; got keys {list(outputs)}.") - return outputs["logits"] + return cast("torch.Tensor", outputs["logits"]) def _score( self, diff --git a/src/winml/modelkit/eval/image_feature_extraction_evaluator.py b/src/winml/modelkit/eval/image_feature_extraction_evaluator.py index aad6a553c..fe250c8c6 100644 --- a/src/winml/modelkit/eval/image_feature_extraction_evaluator.py +++ b/src/winml/modelkit/eval/image_feature_extraction_evaluator.py @@ -20,7 +20,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast import numpy as np from tqdm import tqdm @@ -58,9 +58,10 @@ def prepare_pipeline(self) -> Pipeline: io_config = getattr(self.model, "io_config", None) or {} input_shapes = io_config.get("input_shapes", []) - if input_shapes and len(input_shapes[0]) == 4: + if pipe.image_processor is not None and input_shapes and len(input_shapes[0]) == 4: _, _, h, w = input_shapes[0] - pipe.image_processor.size = {"height": h, "width": w} + # Runtime-settable processor attribute; not on the base class. + pipe.image_processor.size = {"height": h, "width": w} # type: ignore[attr-defined] return pipe @@ -113,7 +114,7 @@ def _extract_image_embedding(raw: Any) -> np.ndarray: return tokens if tokens.ndim == 2: # CLS token (index 0) — standard image-level embedding for ViT/DINOv2. - return tokens[0] + return cast("np.ndarray", tokens[0]) raise ValueError( f"Unsupported image-feature-extraction output shape: {np.asarray(raw).shape}. " "Expected [1, hidden] (pooled) or [1, num_tokens, hidden] (token sequence)." diff --git a/src/winml/modelkit/eval/image_segmentation_evaluator.py b/src/winml/modelkit/eval/image_segmentation_evaluator.py index 19d5b3452..c9a429e13 100644 --- a/src/winml/modelkit/eval/image_segmentation_evaluator.py +++ b/src/winml/modelkit/eval/image_segmentation_evaluator.py @@ -63,9 +63,10 @@ def prepare_pipeline(self) -> Pipeline: io_config = getattr(self.model, "io_config", None) or {} input_shapes = io_config.get("input_shapes", []) - if input_shapes and len(input_shapes[0]) == 4: + if pipe.image_processor is not None and input_shapes and len(input_shapes[0]) == 4: _, _, h, w = input_shapes[0] - pipe.image_processor.size = {"height": h, "width": w} + # Runtime-settable processor attribute; not on the base class. + pipe.image_processor.size = {"height": h, "width": w} # type: ignore[attr-defined] return pipe diff --git a/src/winml/modelkit/eval/image_to_text_evaluator.py b/src/winml/modelkit/eval/image_to_text_evaluator.py index 828d5b0c8..9f541f173 100644 --- a/src/winml/modelkit/eval/image_to_text_evaluator.py +++ b/src/winml/modelkit/eval/image_to_text_evaluator.py @@ -28,8 +28,10 @@ if TYPE_CHECKING: + from datasets import Dataset + from ..models.winml.base import WinMLPreTrainedModel - from .config import WinMLEvaluationConfig + from .config import DatasetConfig, WinMLEvaluationConfig logger = logging.getLogger(__name__) @@ -50,7 +52,7 @@ def __init__( self._label_col = cm.get("label_column", get_default("image-to-text", "label_column")) super().__init__(config, model) - def align_labels(self, dataset, ds_config): # type: ignore[override] + def align_labels(self, dataset: Dataset, ds_config: DatasetConfig) -> Dataset: """No-op: free-text labels need no ClassLabel alignment.""" return dataset diff --git a/src/winml/modelkit/eval/metrics/depth.py b/src/winml/modelkit/eval/metrics/depth.py index d89474440..c366c2015 100644 --- a/src/winml/modelkit/eval/metrics/depth.py +++ b/src/winml/modelkit/eval/metrics/depth.py @@ -12,7 +12,7 @@ from __future__ import annotations -from typing import Any +from typing import Any, cast import numpy as np import torch @@ -170,7 +170,7 @@ def _valid_mask(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray: if self._max_depth is not None: mask &= gt <= self._max_depth mask &= np.isfinite(pred) & (pred > 0) - return mask + return cast("np.ndarray", mask) @staticmethod def _to_numpy(arr: Any) -> np.ndarray: diff --git a/src/winml/modelkit/eval/metrics/spearman_correlation.py b/src/winml/modelkit/eval/metrics/spearman_correlation.py index 3d93825ab..9b643ba42 100644 --- a/src/winml/modelkit/eval/metrics/spearman_correlation.py +++ b/src/winml/modelkit/eval/metrics/spearman_correlation.py @@ -65,6 +65,6 @@ def compute( "Spearman correlation is NaN. This typically means the model " "produced constant outputs (zero variance). Returning 0.0.", ) - corr = 0.0 + return {"cosine_spearman": 0.0} return {"cosine_spearman": round(float(corr) * 100, 4)} diff --git a/src/winml/modelkit/eval/metrics/text_similarity.py b/src/winml/modelkit/eval/metrics/text_similarity.py index 37af65947..add146542 100644 --- a/src/winml/modelkit/eval/metrics/text_similarity.py +++ b/src/winml/modelkit/eval/metrics/text_similarity.py @@ -19,7 +19,11 @@ from __future__ import annotations import logging -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Hashable logger = logging.getLogger(__name__) @@ -91,8 +95,12 @@ def _cider(self) -> float | None: from .cider import Cider try: - refs_dict = {str(i): refs for i, refs in enumerate(self._references)} - preds_dict = {str(i): [pred] for i, pred in enumerate(self._predictions)} + refs_dict: dict[Hashable, list[str]] = { + str(i): refs for i, refs in enumerate(self._references) + } + preds_dict: dict[Hashable, list[str]] = { + str(i): [pred] for i, pred in enumerate(self._predictions) + } score, _ = Cider().compute_score(refs_dict, preds_dict) return float(score) except Exception as e: diff --git a/src/winml/modelkit/eval/object_detection_evaluator.py b/src/winml/modelkit/eval/object_detection_evaluator.py index 6d6223fc5..baa578664 100644 --- a/src/winml/modelkit/eval/object_detection_evaluator.py +++ b/src/winml/modelkit/eval/object_detection_evaluator.py @@ -46,13 +46,21 @@ def __init__( mapping = config.dataset.columns_mapping task = "object-detection" self._image_col = mapping.get("input_column", get_default(task, "input_column")) - self._annotation_col = mapping.get( - "annotation_column", get_default(task, "annotation_column"), - ) - self._bbox_key = mapping.get("bbox_key", get_default(task, "bbox_key")) - self._category_key = mapping.get("category_key", get_default(task, "category_key")) - self._box_format = mapping.get("box_format", get_default(task, "box_format")) - self._box_coords = mapping.get("box_coords", get_default(task, "box_coords")) + ann_col_raw = mapping.get("annotation_column", get_default(task, "annotation_column")) + bbox_key_raw = mapping.get("bbox_key", get_default(task, "bbox_key")) + category_key_raw = mapping.get("category_key", get_default(task, "category_key")) + assert ann_col_raw is not None, "annotation_column has no default for object-detection" + assert bbox_key_raw is not None, "bbox_key has no default for object-detection" + assert category_key_raw is not None, "category_key has no default for object-detection" + self._annotation_col: str = ann_col_raw + self._bbox_key: str = bbox_key_raw + self._category_key: str = category_key_raw + box_format_raw = mapping.get("box_format", get_default(task, "box_format")) + box_coords_raw = mapping.get("box_coords", get_default(task, "box_coords")) + assert box_format_raw is not None, "box_format has no default for object-detection" + assert box_coords_raw is not None, "box_coords has no default for object-detection" + self._box_format: str = box_format_raw + self._box_coords: str = box_coords_raw super().__init__(config, model) @@ -63,21 +71,23 @@ def prepare_pipeline(self) -> Pipeline: io_config = getattr(self.model, "io_config", None) or {} input_shapes = io_config.get("input_shapes", [[]]) input_names = io_config.get("input_names", []) - if input_shapes and len(input_shapes[0]) == 4: + image_processor = pipe.image_processor + if image_processor is not None and input_shapes and len(input_shapes[0]) == 4: _, _, h, w = input_shapes[0] + # Runtime-settable processor attributes; not on the base class. if "pixel_mask" in input_names: - pipe.image_processor.size = { + image_processor.size = { # type: ignore[attr-defined] "shortest_edge": min(h, w), "longest_edge": max(h, w), } - if hasattr(pipe.image_processor, "pad_size"): - pipe.image_processor.pad_size = {"height": h, "width": w} - if hasattr(pipe.image_processor, "do_pad"): - pipe.image_processor.do_pad = True + if hasattr(image_processor, "pad_size"): + image_processor.pad_size = {"height": h, "width": w} + if hasattr(image_processor, "do_pad"): + image_processor.do_pad = True else: - pipe.image_processor.size = {"height": h, "width": w} - if hasattr(pipe.image_processor, "do_pad"): - pipe.image_processor.do_pad = False + image_processor.size = {"height": h, "width": w} # type: ignore[attr-defined] + if hasattr(image_processor, "do_pad"): + image_processor.do_pad = False return pipe @@ -132,7 +142,7 @@ def align_labels( ann_feat[cat_key] = Sequence(Value("int64")) new_features[ann_col] = ann_feat - def remap(sample): + def remap(sample: dict[str, Any]) -> dict[str, Any]: ann = sample[ann_col] ann[cat_key] = [id_map[lbl] for lbl in ann[cat_key]] return sample @@ -151,8 +161,8 @@ def compute(self) -> dict[str, Any]: label2id = getattr(self.model.config, "label2id", {}) - predictions = [] - references = [] + predictions: list[dict[str, Any]] = [] + references: list[dict[str, Any]] = [] for i, sample in enumerate(self.data): # --- Ground truth --- diff --git a/src/winml/modelkit/eval/question_answering_evaluator.py b/src/winml/modelkit/eval/question_answering_evaluator.py index e024638dc..362325536 100644 --- a/src/winml/modelkit/eval/question_answering_evaluator.py +++ b/src/winml/modelkit/eval/question_answering_evaluator.py @@ -8,7 +8,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from .base_evaluator import WinMLEvaluator @@ -102,4 +102,4 @@ def compute(self) -> dict[str, Any]: **self.config.dataset.columns_mapping, } - return task_evaluator.compute(**kwargs) + return cast("dict[str, Any]", task_evaluator.compute(**kwargs)) diff --git a/src/winml/modelkit/eval/tensor_similarity_evaluator.py b/src/winml/modelkit/eval/tensor_similarity_evaluator.py index 7b483e8f1..d1321c3ea 100644 --- a/src/winml/modelkit/eval/tensor_similarity_evaluator.py +++ b/src/winml/modelkit/eval/tensor_similarity_evaluator.py @@ -22,6 +22,7 @@ if TYPE_CHECKING: from ..models.winml.base import WinMLPreTrainedModel + from ..models.winml.composite_model import WinMLCompositeModel from .config import WinMLEvaluationConfig @@ -34,10 +35,13 @@ class TensorSimilarityEvaluator: def __init__( self, config: WinMLEvaluationConfig, - model: WinMLPreTrainedModel, + model: WinMLPreTrainedModel | WinMLCompositeModel, ) -> None: from ..models.winml.composite_model import WinMLCompositeModel + # Composite models must be split into their sub-components before + # tensor-similarity comparison — the union param keeps this runtime + # guard live for type checkers. if isinstance(model, WinMLCompositeModel): sub_tasks = list(getattr(type(model), "_SUB_MODEL_CONFIG", {}).values()) raise TypeError( @@ -71,7 +75,8 @@ def _load_reference_model(self) -> Any: hf_config = AutoConfig.from_pretrained(self.config.model_id) _, cls = resolve_task_and_model_class(hf_config, task=self.config.task) logger.info("Loading HF reference %s on CPU/fp32", cls.__name__) - return cls.from_pretrained( + # cls is a HF model class which exposes from_pretrained; not in `type`. + return cls.from_pretrained( # type: ignore[attr-defined] self.config.model_id, dtype=torch.float32 ).eval() diff --git a/src/winml/modelkit/eval/zero_shot_classification_evaluator.py b/src/winml/modelkit/eval/zero_shot_classification_evaluator.py index 262a168f9..f9e0ca509 100644 --- a/src/winml/modelkit/eval/zero_shot_classification_evaluator.py +++ b/src/winml/modelkit/eval/zero_shot_classification_evaluator.py @@ -20,7 +20,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from tqdm import tqdm from transformers.pipelines.zero_shot_classification import ZeroShotClassificationPipeline @@ -46,10 +46,10 @@ class _FixedShapeZeroShotPipeline(ZeroShotClassificationPipeline): _winml_evaluator: WinMLEvaluator | None = None - def _parse_and_tokenize(self, sequence_pairs: Any, **kwargs: Any) -> Any: + def _parse_and_tokenize(self, *args: Any, **kwargs: Any) -> Any: kwargs.setdefault("padding", True) kwargs.setdefault("truncation", True) - encoding = super()._parse_and_tokenize(sequence_pairs, **kwargs) + encoding = super()._parse_and_tokenize(*args, **kwargs) if self._winml_evaluator is None or self.tokenizer is None: return encoding return self._winml_evaluator._pad_or_truncate(encoding, self.tokenizer) @@ -79,7 +79,9 @@ def prepare_pipeline(self) -> Pipeline: max_length = self._fixed_seq_length() - pipe = pipeline( + # WinMLPreTrainedModel isn't in transformers' Pipeline model union; + # the pipeline_class override is also outside the Literal overloads. + pipe = pipeline( # type: ignore[call-overload] "zero-shot-classification", model=self.model, framework="pt", @@ -101,7 +103,7 @@ def prepare_pipeline(self) -> Pipeline: if filtered: pipe.tokenizer.model_input_names = filtered - return pipe + return cast("Pipeline", pipe) def align_labels( self, diff --git a/src/winml/modelkit/export/__init__.py b/src/winml/modelkit/export/__init__.py index 868c8a925..f935f24d8 100644 --- a/src/winml/modelkit/export/__init__.py +++ b/src/winml/modelkit/export/__init__.py @@ -11,6 +11,8 @@ - export_pytorch / export_onnx for ONNX export """ +from typing import TYPE_CHECKING, Any + from .config import ( InputTensorSpec, OutputTensorSpec, @@ -19,6 +21,22 @@ ) +# Static type re-exports for the names exposed by ``__getattr__`` below. +# At runtime these are loaded lazily (see _LAZY_IMPORTS); at type-check time +# we want mypy to see real types so callers like ``build.hf.export_onnx(...)`` +# get checked instead of resolving to ``Any``. +if TYPE_CHECKING: + from .io import ( + MaxLengthTextInputGenerator, + ONNXConfigNotFoundError, + generate_dummy_inputs, + register_onnx_overwrite, + resolve_io_specs, + ) + from .pytorch import export_pytorch + from .pytorch import export_pytorch as export_onnx + + __version__ = "2.1.0" __all__ = [ @@ -47,7 +65,7 @@ } -def __getattr__(name: str): +def __getattr__(name: str) -> Any: """Lazy-load heavy exports to avoid importing optimum at package init.""" if name in _LAZY_IMPORTS: module_path, attr_name = _LAZY_IMPORTS[name] diff --git a/src/winml/modelkit/export/config.py b/src/winml/modelkit/export/config.py index a37b99b9c..4bee0907f 100644 --- a/src/winml/modelkit/export/config.py +++ b/src/winml/modelkit/export/config.py @@ -435,6 +435,9 @@ def resolve_export_config( trust_remote_code=trust_remote_code, library_name=library_name, ) + # resolve_loader_config guarantees both fields are populated (it raises otherwise). + assert loader_config.model_type is not None + assert loader_config.task is not None export_config = _resolve_export_config_from_specs( model_type=loader_config.model_type, diff --git a/src/winml/modelkit/export/htp/__init__.py b/src/winml/modelkit/export/htp/__init__.py index 97bdcb33d..2ea63c8cb 100644 --- a/src/winml/modelkit/export/htp/__init__.py +++ b/src/winml/modelkit/export/htp/__init__.py @@ -26,8 +26,8 @@ """ # HTP strategy version (defined before imports to avoid circular dependencies) -__version__ = "1.0.0" # HTP strategy version -__spec_version__ = ".".join(__version__.split(".")[:2]) # "1.0" +__version__: str = "1.0.0" # HTP strategy version +__spec_version__: str = ".".join(__version__.split(".")[:2]) # "1.0" from .base_writer import ExportStep from .exporter import HTPExporter diff --git a/src/winml/modelkit/export/htp/base_writer.py b/src/winml/modelkit/export/htp/base_writer.py index 3bbb9ef15..0e9295caa 100644 --- a/src/winml/modelkit/export/htp/base_writer.py +++ b/src/winml/modelkit/export/htp/base_writer.py @@ -19,8 +19,7 @@ # datetime imports removed - following ADR-006 to use float timestamps only from enum import Enum -from functools import wraps -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, TypeVar from .step_data import ( HierarchyData, @@ -92,17 +91,25 @@ def elapsed_time(self) -> float: return time.time() - self.start_time -def step(export_step: ExportStep) -> Any: - """Decorator to mark step-specific handler methods.""" +# Bound to `Callable[..., int]` to match StepAwareWriter.write()'s IOBase +# contract (returns "bytes written"). All @step handlers must return int — +# a handler typed `-> None` will fail mypy here rather than silently +# breaking the writer's return value. +F = TypeVar("F", bound="Callable[..., int]") - def decorator(func: Callable) -> Callable: - func._handles_step = export_step - @wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> int: - return func(*args, **kwargs) +def step(export_step: ExportStep) -> Callable[[F], F]: + """Decorator to mark step-specific handler methods. - return wrapper + Attaches ``_handles_step`` on the function so ``StepAwareWriter``'s + discovery loop can map each handler to its declared step. The function is + returned unchanged, so the original signature is preserved for callers and + type checkers. + """ + + def decorator(func: F) -> F: + func._handles_step = export_step # type: ignore[attr-defined] + return func return decorator @@ -113,7 +120,7 @@ class StepAwareWriter(io.IOBase, ABC): def __init__(self) -> None: """Initialize the writer and discover step handlers.""" super().__init__() - self._step_handlers: dict[ExportStep, Callable] = {} + self._step_handlers: dict[ExportStep, Callable[..., int]] = {} self._discover_handlers() def _discover_handlers(self) -> None: diff --git a/src/winml/modelkit/export/htp/config_generator.py b/src/winml/modelkit/export/htp/config_generator.py index 7dbfa0e3a..060a12499 100644 --- a/src/winml/modelkit/export/htp/config_generator.py +++ b/src/winml/modelkit/export/htp/config_generator.py @@ -203,7 +203,8 @@ def _generate_input_specs( Uses InputSpecGenerator patterns (universal approach). """ try: - from ...inference.onnx_config.input_generator import ( + # Tracked: inference.onnx_config.* doesn't exist; #859 decides delete-vs-restore. + from ...inference.onnx_config.input_generator import ( # type: ignore[import-not-found] InputSpecGenerator, ) @@ -265,10 +266,13 @@ def _get_output_names( Uses InputSpecGenerator patterns (universal approach). """ try: - from ...inference.onnx_config.patterns import TASK_TO_OUTPUTS + # Tracked: inference.onnx_config.* doesn't exist; #859 decides delete-vs-restore. + from ...inference.onnx_config.patterns import ( # type: ignore[import-not-found] + TASK_TO_OUTPUTS, + ) if task and task in TASK_TO_OUTPUTS: - return TASK_TO_OUTPUTS[task] + return list(TASK_TO_OUTPUTS[task]) # Default outputs if task and "classification" in task: @@ -300,7 +304,7 @@ def generate_for_cuda(model_name_or_path: str, **kwargs: Any) -> ExportConfigTem def generate_config_cli( model: str, output: str = "export_config.json", - target: str = "qnn", + target: Literal["qnn", "cpu", "cuda", "universal"] = "qnn", task: str | None = None, batch_size: int = 1, ) -> None: @@ -340,6 +344,18 @@ def generate_config_cli( model = sys.argv[1] output = sys.argv[2] if len(sys.argv) > 2 else "export_config.json" - target = sys.argv[3] if len(sys.argv) > 3 else "qnn" + target: Literal["qnn", "cpu", "cuda", "universal"] + raw_target = sys.argv[3] if len(sys.argv) > 3 else "qnn" + if raw_target == "qnn": + target = "qnn" + elif raw_target == "cpu": + target = "cpu" + elif raw_target == "cuda": + target = "cuda" + elif raw_target == "universal": + target = "universal" + else: + print(f"Invalid target {raw_target!r}; expected qnn, cpu, cuda, or universal.") + sys.exit(1) generate_config_cli(model, output, target) diff --git a/src/winml/modelkit/export/htp/console_writer.py b/src/winml/modelkit/export/htp/console_writer.py index c59e14558..c132feb4b 100644 --- a/src/winml/modelkit/export/htp/console_writer.py +++ b/src/winml/modelkit/export/htp/console_writer.py @@ -15,7 +15,7 @@ import io import os -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from rich.console import Console from rich.tree import Tree @@ -201,7 +201,7 @@ def write_onnx_export(self, export_step: ExportStep, data: ExportData) -> int: self.console.print("🔧 Export configuration:") self.console.print( - f" • Opset version: {self._bright_green(data.onnx_export.opset_version)}" + f" • Opset version: {self._bright_green(str(data.onnx_export.opset_version))}" ) self.console.print( f" • Constant folding: {self._format_bool(data.onnx_export.do_constant_folding)}" @@ -335,7 +335,9 @@ def _build_truncated_tree(self, source_tree: Tree, target_tree: Tree, max_lines: line_count = 1 # Start with root # Helper to add nodes up to limit - def add_nodes_to_limit(source_children, target_parent, current_count): + def add_nodes_to_limit( + source_children: Any, target_parent: Any, current_count: int + ) -> int: count = current_count for child in source_children: if count >= max_lines: diff --git a/src/winml/modelkit/export/htp/exporter.py b/src/winml/modelkit/export/htp/exporter.py index 35718392d..91abd5828 100644 --- a/src/winml/modelkit/export/htp/exporter.py +++ b/src/winml/modelkit/export/htp/exporter.py @@ -30,7 +30,7 @@ import torch.nn as nn from rich.console import Console -from ...core.onnx_node_tagger import create_node_tagger_from_hierarchy +from ...core.onnx_node_tagger import ONNXNodeTagger, create_node_tagger_from_hierarchy from ...core.onnx_utils import infer_output_names from .base_writer import ExportStep from .hierarchy import TracingHierarchyBuilder @@ -140,17 +140,17 @@ def __init__( self.strategy = HTPConfig.STRATEGY_NAME # Core components - self._hierarchy_builder = None - self._node_tagger = None - self._hierarchy_data = {} - self._tagged_nodes = {} - self._tagging_stats = {} + self._hierarchy_builder: TracingHierarchyBuilder | None = None + self._node_tagger: ONNXNodeTagger | None = None + self._hierarchy_data: dict[str, Any] = {} + self._tagged_nodes: dict[str, str] = {} + self._tagging_stats: dict[str, Any] = {} # Export statistics self._export_stats = HTPConfig.DEFAULT_EXPORT_STATS.copy() # Export monitor will be initialized in export() - self._monitor = None + self._monitor: HTPExportMonitor | None = None # Rich console for tree rendering self.console = Console(width=HTPConfig.CONSOLE_WIDTH) @@ -447,7 +447,8 @@ def _convert_model_to_onnx( # get_export_args(inputs) → tuple of positional args. # Default: pass inputs dict as kwargs. if hasattr(model, "get_export_args"): - export_args = model.get_export_args(inputs) + # hasattr-gated optional protocol; not in nn.Module's static type. + export_args = model.get_export_args(inputs) # type: ignore[operator] torch.onnx.export(model, export_args, output_path, **onnx_kwargs) else: torch.onnx.export(model, (), output_path, kwargs=inputs, **onnx_kwargs) @@ -472,6 +473,9 @@ def _get_optimum_patcher(model: nn.Module, task: str | None) -> Any: if not model_type: logger.debug("Model has no config.model_type; skipping Optimum patcher.") return contextlib.nullcontext() + if task is None: + logger.debug("No task provided; skipping Optimum patcher.") + return contextlib.nullcontext() # TasksManager expects Optimum-canonical task names from ...loader import to_optimum_task @@ -532,6 +536,9 @@ def _initialize_node_tagger(self, enable_operation_fallback: bool) -> None: def _apply_hierarchy_tags(self, onnx_model: onnx.ModelProto) -> None: """Tag nodes internally.""" + assert self._node_tagger is not None, ( + "_apply_hierarchy_tags called before _initialize_node_tagger" + ) # Store ONNX model for later use in displaying operations self._onnx_model = onnx_model self._tagged_nodes = self._node_tagger.tag_all_nodes(onnx_model) diff --git a/src/winml/modelkit/export/htp/hierarchy.py b/src/winml/modelkit/export/htp/hierarchy.py index c4fb385cd..7a3fa2f91 100644 --- a/src/winml/modelkit/export/htp/hierarchy.py +++ b/src/winml/modelkit/export/htp/hierarchy.py @@ -39,14 +39,14 @@ def __init__(self, exceptions: list[str] | None = None) -> None: torch.nn modules. Passed to should_include_in_hierarchy. Example: ["Conv2d", "BatchNorm2d"] to include these in hierarchy. """ - self.tag_stack = [] - self.execution_trace = [] - self.operation_context = {} - self.hooks = [] - self.module_hierarchy = {} # Only populated for executed modules - self.traced_modules = set() # Track which modules were traced + self.tag_stack: list[str] = [] + self.execution_trace: list[dict[str, Any]] = [] + self.operation_context: dict[str, dict[str, Any]] = {} + self.hooks: list[Any] = [] + self.module_hierarchy: dict[str, dict[str, Any]] = {} # Only populated for executed modules + self.traced_modules: set[str] = set() # Track which modules were traced self.exceptions = exceptions # torch.nn exceptions to include - self.model_outputs = None # Store model outputs from execution + self.model_outputs: Any = None # Store model outputs from execution def is_hf_class(self, module: nn.Module) -> bool: """Check if a module is a HuggingFace class - UNIVERSAL.""" @@ -183,7 +183,9 @@ def remove_hooks(self) -> None: self.hooks.clear() def trace_model_execution( - self, model: nn.Module, example_inputs: tuple[torch.Tensor, ...] + self, + model: nn.Module, + example_inputs: tuple[torch.Tensor, ...] | dict[str, Any], ) -> None: """Trace model execution to build hierarchy mapping - UNIVERSAL.""" self.register_hooks(model) diff --git a/src/winml/modelkit/export/htp/markdown_report_writer.py b/src/winml/modelkit/export/htp/markdown_report_writer.py index 98e53e4f7..35a42b687 100644 --- a/src/winml/modelkit/export/htp/markdown_report_writer.py +++ b/src/winml/modelkit/export/htp/markdown_report_writer.py @@ -12,6 +12,7 @@ import time from pathlib import Path +from typing import Any import snakemd @@ -53,9 +54,9 @@ def __init__(self, output_path: str) -> None: self.doc = snakemd.new_doc() # Store step data for final report generation - self._step_results = {} + self._step_results: dict[ExportStep, dict[str, Any]] = {} self._start_time = time.time() - self._export_data = None # Will be set on first write + self._export_data: ExportData | None = None # Will be set on first write self._report_generated = False # Track if report was generated def _write_default(self, export_step: ExportStep, data: ExportData) -> int: @@ -421,8 +422,8 @@ def _write_module_hierarchy_section(self, data: ExportData) -> None: self.doc.add_heading("Module List (Sorted by Execution Order)", level=3) # Count direct and total nodes for each module if available - direct_counts = {} - total_counts = {} + direct_counts: dict[str, int] = {} + total_counts: dict[str, int] = {} if data.node_tagging and data.node_tagging.tagged_nodes: direct_counts, total_counts = count_direct_and_total_nodes( data.node_tagging.tagged_nodes diff --git a/src/winml/modelkit/export/htp/metadata_builder.py b/src/winml/modelkit/export/htp/metadata_builder.py index b7728ee8c..fd0698faf 100644 --- a/src/winml/modelkit/export/htp/metadata_builder.py +++ b/src/winml/modelkit/export/htp/metadata_builder.py @@ -371,7 +371,7 @@ def build_minimal(self, error: str | None = None) -> dict[str, Any]: # version will use the default htp_version from the dataclass ) - result = {"export_context": asdict(minimal_context)} + result: dict[str, Any] = {"export_context": asdict(minimal_context)} if error: result["error"] = error diff --git a/src/winml/modelkit/export/htp/metadata_writer.py b/src/winml/modelkit/export/htp/metadata_writer.py index 7f7a2d648..88953322a 100644 --- a/src/winml/modelkit/export/htp/metadata_writer.py +++ b/src/winml/modelkit/export/htp/metadata_writer.py @@ -44,8 +44,8 @@ def __init__(self, output_path: str) -> None: # Store data for final building self._model_info_set = False self._export_time = 0.0 - self._steps_data = {} - self._export_data = None # Will be set on first write + self._steps_data: dict[str, dict[str, Any]] = {} + self._export_data: ExportData | None = None # Will be set on first write def _write_default(self, export_step: ExportStep, data: ExportData) -> int: """Default handler - record step completion.""" @@ -360,7 +360,7 @@ def _build_hierarchical_modules(self, flat_hierarchy: dict[str, ModuleInfo]) -> return {} # Build root structure - root = { + root: dict[str, Any] = { "class_name": root_info.class_name, "traced_tag": root_info.traced_tag, "scope": "", @@ -446,7 +446,7 @@ def _build_children_for_parent( key = module_info.class_name # Build child structure - child = { + child: dict[str, Any] = { "class_name": module_info.class_name, "traced_tag": module_info.traced_tag, "scope": path, # Full path from root diff --git a/src/winml/modelkit/export/htp/monitor.py b/src/winml/modelkit/export/htp/monitor.py index 27f148985..2cf94f549 100644 --- a/src/winml/modelkit/export/htp/monitor.py +++ b/src/winml/modelkit/export/htp/monitor.py @@ -83,6 +83,7 @@ def __init__( self.writers.append(self.metadata_writer) # Report writer (optional) - now using MarkdownReportWriter + self.report_writer: MarkdownReportWriter | None if enable_report: self.report_writer = MarkdownReportWriter(output_path) self.writers.append(self.report_writer) diff --git a/src/winml/modelkit/export/io.py b/src/winml/modelkit/export/io.py index a2111a86b..139200802 100644 --- a/src/winml/modelkit/export/io.py +++ b/src/winml/modelkit/export/io.py @@ -32,7 +32,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from optimum.exporters.tasks import TasksManager from optimum.utils.input_generators import ( @@ -92,7 +92,7 @@ def ensure_hf_models_registered() -> None: # ============================================================================= # Custom Input Generators # ============================================================================= -class MaxLengthTextInputGenerator(DummyTextInputGenerator): +class MaxLengthTextInputGenerator(DummyTextInputGenerator): # type: ignore[misc] """Text input generator that uses max_position_embeddings as sequence_length. Optimum's DummyTextInputGenerator uses a hardcoded default of 16 for @@ -116,8 +116,8 @@ def __init__( task: str, normalized_config: NormalizedTextConfig, sequence_length: int | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: """Initialize with sequence_length from normalized_config. Args: @@ -271,7 +271,11 @@ def _get_preprocessor_dict( from. Returns an empty dict when neither source yields a usable size. """ try: - from transformers.image_processing_utils import ImageProcessingMixin + if model_id is None: + raise OSError("No model_id provided") + from transformers.image_processing_utils import ( # type: ignore[attr-defined] + ImageProcessingMixin, + ) config, _ = ImageProcessingMixin.get_image_processor_dict(model_id) if "size" in config: @@ -279,7 +283,6 @@ def _get_preprocessor_dict( # Partial preprocessor_config.json without a "size" key: fall through # to synthesis so we don't silently use Optimum's 64x64 default. except (OSError, ValueError, KeyError) as e: - # if model_id is None, OSError is raised logger.debug("Could not load preprocessor_config.json for %s: %s", model_id, e) if hf_config is not None: @@ -411,7 +414,11 @@ def generate_dummy_inputs( shape_kwargs, ) - return onnx_config.generate_dummy_inputs(framework="pt", **shape_kwargs) + # Optimum's OnnxConfig is untyped; the dummy-inputs dict matches our return type. + return cast( + "dict[str, torch.Tensor]", + onnx_config.generate_dummy_inputs(framework="pt", **shape_kwargs), + ) def resolve_io_specs( @@ -477,7 +484,9 @@ def resolve_io_specs( input_dtypes = [str(t.dtype).replace("torch.", "") for t in dummy_inputs.values()] # Build value_range dict: {name: (min, max)} from intercepted data - value_ranges = {name: (info["min"], info["max"]) for name, info in value_ranges.items()} + value_range_tuples = { + name: (info["min"], info["max"]) for name, info in value_ranges.items() + } return { "inputs": onnx_config.inputs, @@ -487,5 +496,5 @@ def resolve_io_specs( "dynamic_axes": {**onnx_config.inputs, **onnx_config.outputs}, "input_shapes": input_shapes, "input_dtypes": input_dtypes, - "value_ranges": value_ranges, + "value_ranges": value_range_tuples, } diff --git a/src/winml/modelkit/export/value_range.py b/src/winml/modelkit/export/value_range.py index 2946a594f..b2d990596 100644 --- a/src/winml/modelkit/export/value_range.py +++ b/src/winml/modelkit/export/value_range.py @@ -23,10 +23,15 @@ import threading from contextlib import contextmanager from functools import wraps +from typing import TYPE_CHECKING, Any from optimum.utils.input_generators import DummyInputGenerator +if TYPE_CHECKING: + from collections.abc import Callable, Iterator + + # Thread-local to correlate static method calls with the current input_name. # generate() wrapper sets .name before calling the original, static method # wrappers read it to associate captured ranges with the correct input. @@ -41,11 +46,13 @@ ) -def _make_static_wrapper(original, method_name, captured): +def _make_static_wrapper( + original: Callable[..., Any], method_name: str, captured: dict[str, dict[str, Any]] +) -> Callable[..., Any]: """Wrap a DummyInputGenerator static method to capture value range args.""" @wraps(original) - def wrapper(*args, **kwargs): + def wrapper(*args: Any, **kwargs: Any) -> Any: result = original(*args, **kwargs) input_name = getattr(_current_input, "name", None) if input_name is None: @@ -81,11 +88,11 @@ def wrapper(*args, **kwargs): return wrapper -def _make_generate_wrapper(original): +def _make_generate_wrapper(original: Callable[..., Any]) -> Callable[..., Any]: """Wrap a generator's generate() to track which input_name is active.""" @wraps(original) - def wrapper(self, input_name, *args, **kwargs): + def wrapper(self: Any, input_name: str, *args: Any, **kwargs: Any) -> Any: _current_input.name = input_name try: return original(self, input_name, *args, **kwargs) @@ -96,7 +103,7 @@ def wrapper(self, input_name, *args, **kwargs): @contextmanager -def intercept_value_ranges(): +def intercept_value_ranges() -> Iterator[dict[str, dict[str, Any]]]: """Context manager that captures value ranges from Optimum's dummy input generation. Monkey-patches DummyInputGenerator's static tensor methods and all @@ -133,11 +140,12 @@ def intercept_value_ranges(): # Patch generate() on all subclasses that override it patched_classes = [] - def _patch_subclasses(base): + def _patch_subclasses(base: type) -> None: for cls in base.__subclasses__(): if "generate" in cls.__dict__: originals[(cls, "generate")] = cls.__dict__["generate"] - cls.generate = _make_generate_wrapper(cls.__dict__["generate"]) + # Monkey-patch optimum's untyped generator hierarchy. + cls.generate = _make_generate_wrapper(cls.__dict__["generate"]) # type: ignore[attr-defined] patched_classes.append(cls) _patch_subclasses(cls) @@ -154,4 +162,4 @@ def _patch_subclasses(base): staticmethod(originals[method_name]), ) for cls in patched_classes: - cls.generate = originals[(cls, "generate")] + cls.generate = originals[(cls, "generate")] # type: ignore[attr-defined] diff --git a/src/winml/modelkit/inference/engine.py b/src/winml/modelkit/inference/engine.py index 9f4c28f72..e3e4bbf38 100644 --- a/src/winml/modelkit/inference/engine.py +++ b/src/winml/modelkit/inference/engine.py @@ -34,7 +34,7 @@ from datetime import datetime, timezone from io import BytesIO from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from .tasks import BINARY_TYPES, TASK_REGISTRY, InputField, PipelineMapping from .types import Prediction, PredictionResult @@ -234,7 +234,7 @@ def _discover_pipeline_params_from_task(task: str | None) -> list[dict]: try: from transformers.pipelines import SUPPORTED_TASKS - task_info = SUPPORTED_TASKS.get(task) + task_info = cast("dict[str, Any] | None", SUPPORTED_TASKS.get(task)) if not task_info: return [] pipeline_class = task_info.get("impl") @@ -870,7 +870,10 @@ def _normalize_pipeline_output( # output transformation without any if/else branching here. spec = TASK_REGISTRY.get(task or "") if spec and spec.postprocess is not None: - return spec.postprocess(raw, pipeline=self._pipeline, inputs=inputs) + return cast( + "list[Prediction] | dict[str, Any]", + spec.postprocess(raw, pipeline=self._pipeline, inputs=inputs), + ) if isinstance(raw, list) and raw and isinstance(raw[0], dict): # Classification / detection: list of {"label": ..., "score": ...} @@ -886,10 +889,10 @@ def _normalize_pipeline_output( # Sanitize numpy scalars so pydantic/JSON serialization works # (NER pipelines return np.float32 scores). result = raw[0] if len(raw) == 1 else {"results": raw} - return _sanitize_numpy(result) + return cast("dict[str, Any]", _sanitize_numpy(result)) # Other tasks: return as-is dict if isinstance(raw, dict): - return _sanitize_numpy(raw) + return cast("dict[str, Any]", _sanitize_numpy(raw)) # Fallback return {"raw": str(raw)} @@ -902,6 +905,7 @@ def _predict_raw_tensors(self, tensor_inputs: dict[str, Any]) -> dict[str, Any]: import numpy as np import torch + assert self._model is not None, "_predict_raw_tensors called before model loaded" inputs_torch = { k: torch.from_numpy(np.array(v)) if not isinstance(v, torch.Tensor) else v for k, v in tensor_inputs.items() @@ -961,10 +965,10 @@ def _load_from_build_dir( def _resolve_model_id_from_dir(build_dir: Path) -> str | None: """Extract model_id from any manifest in the directory (task-agnostic).""" for manifest_path in build_dir.glob("*build_manifest.json"): - manifest = json.loads(manifest_path.read_text()) + manifest: dict[str, Any] = json.loads(manifest_path.read_text()) model_id = manifest.get("model_id") if model_id: - return model_id + return str(model_id) return None def _load_from_onnx( diff --git a/src/winml/modelkit/inference/pipeline.py b/src/winml/modelkit/inference/pipeline.py index c478f724a..33e243cfe 100644 --- a/src/winml/modelkit/inference/pipeline.py +++ b/src/winml/modelkit/inference/pipeline.py @@ -24,6 +24,8 @@ if TYPE_CHECKING: + from collections.abc import Mapping + from ..models.winml.base import WinMLPreTrainedModel logger = logging.getLogger(__name__) @@ -69,7 +71,9 @@ def create_pipeline( kwargs["processor"] = model_id hf_task = _HF_PIPELINE_TASK_MAP.get(task, task) - pipe = pipeline(hf_task, model=model, **kwargs) + # transformers.pipeline has 60+ Literal overloads — runtime task strings can't + # be statically matched. The string-task fallback handles unknown tasks safely. + pipe = pipeline(hf_task, model=model, **kwargs) # type: ignore[call-overload] # Adapt pipeline to fixed ONNX input shapes _adapt_tokenizer_padding(pipe, task, model) @@ -160,7 +164,9 @@ def _adapt_tokenizer_padding(pipe: Any, task: str, model: Any) -> None: pipe.tokenizer.model_max_length = max_length -def _detect_tokenizer_dict_param(pipe: Any, sig_params: dict) -> str | None: +def _detect_tokenizer_dict_param( + pipe: Any, sig_params: Mapping[str, inspect.Parameter] +) -> str | None: """Detect if preprocess() consumes tokenizer settings via a nested dict. Returns the dict key name (e.g. "tokenizer_kwargs", "tokenizer_params"), diff --git a/src/winml/modelkit/inference/tasks.py b/src/winml/modelkit/inference/tasks.py index 2a291bb87..e0602ff65 100644 --- a/src/winml/modelkit/inference/tasks.py +++ b/src/winml/modelkit/inference/tasks.py @@ -16,7 +16,7 @@ import re from collections.abc import Callable from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast if TYPE_CHECKING: @@ -124,9 +124,9 @@ def _masked_mean_pool( mask = attention_mask.astype(float) denom = mask.sum() if denom > 0: - return (token_embeddings * mask[:, None]).sum(0) / denom + return cast("np.ndarray", (token_embeddings * mask[:, None]).sum(0) / denom) if token_embeddings.ndim > 1: - return token_embeddings.mean(axis=0) + return cast("np.ndarray", token_embeddings.mean(axis=0)) return token_embeddings diff --git a/src/winml/modelkit/models/auto.py b/src/winml/modelkit/models/auto.py index a14d5b162..f0ed7456c 100644 --- a/src/winml/modelkit/models/auto.py +++ b/src/winml/modelkit/models/auto.py @@ -35,6 +35,8 @@ if TYPE_CHECKING: + from collections.abc import Mapping + from transformers import PretrainedConfig from ..config import WinMLBuildConfig @@ -98,7 +100,9 @@ def __init__(self) -> None: @classmethod def from_onnx( cls, - onnx_path: str | Path | dict[str, str | Path], + # Mapping (not dict) so dict[str, str] from configs is accepted + # without a cast — dict is invariant on value type, Mapping is covariant. + onnx_path: str | Path | Mapping[str, str | Path], *, task: str | None = None, config: WinMLBuildConfig | None = None, diff --git a/src/winml/modelkit/models/winml/composite_model.py b/src/winml/modelkit/models/winml/composite_model.py index 9b7aa20a6..353cf2ec8 100644 --- a/src/winml/modelkit/models/winml/composite_model.py +++ b/src/winml/modelkit/models/winml/composite_model.py @@ -49,6 +49,7 @@ if TYPE_CHECKING: + from collections.abc import Mapping from pathlib import Path from transformers import PretrainedConfig @@ -198,7 +199,9 @@ def from_pretrained( @classmethod def from_onnx( cls, - onnx_path: dict[str, str | Path], + # Mapping (not dict) so dict[str, str] from configs is accepted + # without a cast — dict is invariant on value type, Mapping is covariant. + onnx_path: Mapping[str, str | Path], *, task: str | None = None, hf_config: PretrainedConfig | None = None,