hampsterx · hampsterx · Jun 19, 2026 · Jun 18, 2026 · coderabbitai · Jun 19, 2026
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Report the cold-import time and source size of ``snowflake_sql_api``.
+
+Manual / CI-informational tool, not a gate (the hard regression guard lives in
+``tests/test_import_footprint.py``). The whole point of this client is a small
+install and a fast cold start; this prints both so a regression is visible.
+
+Run from the repo (the script locates the package relative to itself, so no
+install is required)::
+
+    python scripts/benchmark.py            # default 5 import runs
+    python scripts/benchmark.py --runs 10
+"""
+
+from __future__ import annotations
+
+import argparse
+import pathlib
+import statistics
+import subprocess
+import sys
+
+_REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent
+_PACKAGE_DIR = _REPO_ROOT / "snowflake_sql_api"
+
+_TIMING_SCRIPT = """
+import time
+start = time.perf_counter()
+import snowflake_sql_api  # noqa: F401
+print(time.perf_counter() - start)
+"""
+
+
+def cold_import_seconds(runs: int) -> list:
+    """Time ``import snowflake_sql_api`` in a fresh subprocess, ``runs`` times.
+
+    Runs with cwd at the repo root so the package imports from the source tree
+    even without an editable install.
+    """
+    timings = []
+    for _ in range(runs):
+        result = subprocess.run(
+            [sys.executable, "-c", _TIMING_SCRIPT],
+            capture_output=True,
+            text=True,
+            check=True,
+            cwd=_REPO_ROOT,
+        )
+        timings.append(float(result.stdout.strip()))
+    return timings
+
+
+def source_size_bytes() -> int:
+    """Sum the on-disk size of the package's source files (excludes bytecode)."""
+    return sum(
+        path.stat().st_size
+        for path in _PACKAGE_DIR.rglob("*")
+        if path.is_file() and "__pycache__" not in path.parts and path.suffix != ".pyc"
+    )
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--runs", type=int, default=5, help="import timing samples (default: 5)"
+    )
+    args = parser.parse_args()
+    if args.runs < 1:
+        parser.error("--runs must be >= 1")
+
+    timings = cold_import_seconds(args.runs)
+    size_kb = source_size_bytes() / 1024
+
+    print(f"cold import (n={args.runs}):")
+    print(f"  median: {statistics.median(timings) * 1000:.1f} ms")
+    print(f"  min:    {min(timings) * 1000:.1f} ms")
+    print(f"  max:    {max(timings) * 1000:.1f} ms")
+    print(f"source size: {size_kb:.1f} KiB (package only, excludes deps)")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_import_footprint.py b/tests/test_import_footprint.py
@@ -0,0 +1,108 @@
+"""Guard the install footprint: a bare ``import snowflake_sql_api`` stays light.
+
+The package's whole value proposition is a small install and a fast cold start
+(serverless / Lambda). Two regressions would silently erode that:
+
+1. A heavy optional dependency (pandas / pyarrow) getting imported at module top
+   instead of behind its extra. The ``[pandas]`` feature must stay opt-in.
+2. Import time ballooning for any reason.
+
+Both checks run in a **fresh subprocess** so the parent pytest process's
+already-imported modules can't mask a regression (pandas may well be imported by
+the time these tests run). The leak check installs a recording meta-path finder
+that stubs the forbidden names, so it fires deterministically whether or not the
+real packages are installed: a module-top ``import pandas`` is caught even when
+it is guarded by ``try/except ImportError`` and even in an env without pandas.
+"""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+
+# Heavy optional deps that a bare import must never pull in. pandas is the
+# ``[pandas]`` extra; pyarrow/numpy ride in with it. If any is imported at module
+# top, something bypassed the optional-import guard.
+FORBIDDEN_ON_BARE_IMPORT = ("pandas", "pyarrow", "numpy")
+
+# Generous ceiling: the real cold import (httpx + PyJWT + cryptography, all pure
+# Python or fast C imports) is well under a second. This is a coarse backstop
+# against an order-of-magnitude regression, not a precise benchmark, so the bound
+# is deliberately loose and we assert on the best of several samples to stay
+# non-flaky on shared CI runners.
+IMPORT_BUDGET_SECONDS = 2.5
+TIMING_SAMPLES = 3
+
+# Subprocess: record any *attempt* to import a forbidden module during a bare
+# `import snowflake_sql_api`. The recorder returns a stub module for forbidden
+# names, so the import succeeds (and is recorded) regardless of whether the real
+# dependency is installed -- making the guard deterministic across environments.
+_LEAK_SCRIPT = """
+import importlib.abc
+import importlib.machinery
+import sys
+import types
+
+FORBIDDEN = {forbidden!r}
+attempted = []
+
+
+class _Recorder(importlib.abc.MetaPathFinder, importlib.abc.Loader):
+    def find_spec(self, name, path, target=None):
+        if name.split(".")[0] in FORBIDDEN:
+            attempted.append(name.split(".")[0])
+            return importlib.machinery.ModuleSpec(name, self)
+        return None
+
+    def create_module(self, spec):
+        return types.ModuleType(spec.name)
+
+    def exec_module(self, module):
+        pass
+
+
+sys.meta_path.insert(0, _Recorder())
+import snowflake_sql_api  # noqa: F401
+print(",".join(sorted(set(attempted))))
+""".format(
+    forbidden=FORBIDDEN_ON_BARE_IMPORT
+)
+
+_TIMING_SCRIPT = """
+import time
+start = time.perf_counter()
+import snowflake_sql_api  # noqa: F401
+print(time.perf_counter() - start)
+"""
+
+
+def _run(script: str) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        [sys.executable, "-c", script], capture_output=True, text=True
+    )
-def _run(script: str) -> subprocess.CompletedProcess:
-    return subprocess.run(
-        [sys.executable, "-c", script], capture_output=True, text=True
-    )
+import subprocess
+import sys
+from pathlib import Path
+
+# ... existing code ...
+
+TIMING_SAMPLES = 3
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+
+# ... existing code ...
+
+def _run(script: str) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        [sys.executable, "-c", script],
+        capture_output=True,
+        text=True,
+        cwd=_REPO_ROOT,
+        timeout=30,
+    )
-def _run(script: str) -> subprocess.CompletedProcess:
-    return subprocess.run(
-        [sys.executable, "-c", script], capture_output=True, text=True
-    )
+import subprocess
+import sys
+from pathlib import Path
+
+# ... existing code ...
+
+TIMING_SAMPLES = 3
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+
+# ... existing code ...
+
+def _run(script: str) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        [sys.executable, "-c", script],
+        capture_output=True,
+        text=True,
+        cwd=_REPO_ROOT,
+        timeout=30,
+    )
+
+
+def test_bare_import_pulls_no_heavy_deps() -> None:
+    result = _run(_LEAK_SCRIPT)
+    assert (
+        result.returncode == 0
+    ), f"`import snowflake_sql_api` failed in a clean subprocess:\n{result.stderr}"
+    attempted = [name for name in result.stdout.strip().split(",") if name]
+    assert not attempted, (
+        f"bare `import snowflake_sql_api` tried to import heavy deps: {attempted}. "
+        "Keep optional dependencies behind their extra (lazy import)."
+    )
+
+
+def test_bare_import_is_fast() -> None:
+    samples = []
+    for _ in range(TIMING_SAMPLES):
+        result = _run(_TIMING_SCRIPT)
+        assert result.returncode == 0, result.stderr
+        samples.append(float(result.stdout.strip()))
+    best = min(samples)
+    assert best < IMPORT_BUDGET_SECONDS, (
+        f"fastest of {TIMING_SAMPLES} imports was {best:.3f}s, over the "
+        f"{IMPORT_BUDGET_SECONDS}s budget. A heavy import likely crept into the "
+        "module-load path."
+    )