Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions scripts/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""Report the cold-import time and source size of ``snowflake_sql_api``.

Manual / CI-informational tool, not a gate (the hard regression guard lives in
``tests/test_import_footprint.py``). The whole point of this client is a small
install and a fast cold start; this prints both so a regression is visible.

Run from the repo (the script locates the package relative to itself, so no
install is required)::

python scripts/benchmark.py # default 5 import runs
python scripts/benchmark.py --runs 10
"""

from __future__ import annotations

import argparse
import pathlib
import statistics
import subprocess
import sys

_REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent
_PACKAGE_DIR = _REPO_ROOT / "snowflake_sql_api"

_TIMING_SCRIPT = """
import time
start = time.perf_counter()
import snowflake_sql_api # noqa: F401
print(time.perf_counter() - start)
"""


def cold_import_seconds(runs: int) -> list:
"""Time ``import snowflake_sql_api`` in a fresh subprocess, ``runs`` times.

Runs with cwd at the repo root so the package imports from the source tree
even without an editable install.
"""
timings = []
for _ in range(runs):
result = subprocess.run(
[sys.executable, "-c", _TIMING_SCRIPT],
capture_output=True,
text=True,
check=True,
cwd=_REPO_ROOT,
)
timings.append(float(result.stdout.strip()))
return timings


def source_size_bytes() -> int:
"""Sum the on-disk size of the package's source files (excludes bytecode)."""
return sum(
path.stat().st_size
for path in _PACKAGE_DIR.rglob("*")
if path.is_file() and "__pycache__" not in path.parts and path.suffix != ".pyc"
)


def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--runs", type=int, default=5, help="import timing samples (default: 5)"
)
args = parser.parse_args()
if args.runs < 1:
parser.error("--runs must be >= 1")

timings = cold_import_seconds(args.runs)
size_kb = source_size_bytes() / 1024

print(f"cold import (n={args.runs}):")
print(f" median: {statistics.median(timings) * 1000:.1f} ms")
print(f" min: {min(timings) * 1000:.1f} ms")
print(f" max: {max(timings) * 1000:.1f} ms")
print(f"source size: {size_kb:.1f} KiB (package only, excludes deps)")
return 0


if __name__ == "__main__":
raise SystemExit(main())
108 changes: 108 additions & 0 deletions tests/test_import_footprint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""Guard the install footprint: a bare ``import snowflake_sql_api`` stays light.

The package's whole value proposition is a small install and a fast cold start
(serverless / Lambda). Two regressions would silently erode that:

1. A heavy optional dependency (pandas / pyarrow) getting imported at module top
instead of behind its extra. The ``[pandas]`` feature must stay opt-in.
2. Import time ballooning for any reason.

Both checks run in a **fresh subprocess** so the parent pytest process's
already-imported modules can't mask a regression (pandas may well be imported by
the time these tests run). The leak check installs a recording meta-path finder
that stubs the forbidden names, so it fires deterministically whether or not the
real packages are installed: a module-top ``import pandas`` is caught even when
it is guarded by ``try/except ImportError`` and even in an env without pandas.
"""

from __future__ import annotations

import subprocess
import sys

# Heavy optional deps that a bare import must never pull in. pandas is the
# ``[pandas]`` extra; pyarrow/numpy ride in with it. If any is imported at module
# top, something bypassed the optional-import guard.
FORBIDDEN_ON_BARE_IMPORT = ("pandas", "pyarrow", "numpy")

# Generous ceiling: the real cold import (httpx + PyJWT + cryptography, all pure
# Python or fast C imports) is well under a second. This is a coarse backstop
# against an order-of-magnitude regression, not a precise benchmark, so the bound
# is deliberately loose and we assert on the best of several samples to stay
# non-flaky on shared CI runners.
IMPORT_BUDGET_SECONDS = 2.5
TIMING_SAMPLES = 3

# Subprocess: record any *attempt* to import a forbidden module during a bare
# `import snowflake_sql_api`. The recorder returns a stub module for forbidden
# names, so the import succeeds (and is recorded) regardless of whether the real
# dependency is installed -- making the guard deterministic across environments.
_LEAK_SCRIPT = """
import importlib.abc
import importlib.machinery
import sys
import types

FORBIDDEN = {forbidden!r}
attempted = []


class _Recorder(importlib.abc.MetaPathFinder, importlib.abc.Loader):
def find_spec(self, name, path, target=None):
if name.split(".")[0] in FORBIDDEN:
attempted.append(name.split(".")[0])
return importlib.machinery.ModuleSpec(name, self)
return None

def create_module(self, spec):
return types.ModuleType(spec.name)

def exec_module(self, module):
pass


sys.meta_path.insert(0, _Recorder())
import snowflake_sql_api # noqa: F401
print(",".join(sorted(set(attempted))))
""".format(
forbidden=FORBIDDEN_ON_BARE_IMPORT
)

_TIMING_SCRIPT = """
import time
start = time.perf_counter()
import snowflake_sql_api # noqa: F401
print(time.perf_counter() - start)
"""


def _run(script: str) -> subprocess.CompletedProcess:
return subprocess.run(
[sys.executable, "-c", script], capture_output=True, text=True
)
Comment on lines +79 to +82

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Make subprocess execution deterministic to the checked-out repo and time-bounded.

_run() inherits caller cwd, so these tests can accidentally validate an installed snowflake_sql_api instead of this checkout. It also has no timeout, so CI can hang on a stuck child import.

Proposed patch
 import subprocess
 import sys
+from pathlib import Path
@@
 TIMING_SAMPLES = 3
+_REPO_ROOT = Path(__file__).resolve().parent.parent
@@
 def _run(script: str) -> subprocess.CompletedProcess:
     return subprocess.run(
-        [sys.executable, "-c", script], capture_output=True, text=True
+        [sys.executable, "-c", script],
+        capture_output=True,
+        text=True,
+        cwd=_REPO_ROOT,
+        timeout=30,
     )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def _run(script: str) -> subprocess.CompletedProcess:
return subprocess.run(
[sys.executable, "-c", script], capture_output=True, text=True
)
import subprocess
import sys
from pathlib import Path
# ... existing code ...
TIMING_SAMPLES = 3
_REPO_ROOT = Path(__file__).resolve().parent.parent
# ... existing code ...
def _run(script: str) -> subprocess.CompletedProcess:
return subprocess.run(
[sys.executable, "-c", script],
capture_output=True,
text=True,
cwd=_REPO_ROOT,
timeout=30,
)
🧰 Tools
🪛 ast-grep (0.43.0)

[error] 79-81: Command coming from incoming request
Context: subprocess.run(
[sys.executable, "-c", script], capture_output=True, text=True
)
Note: [CWE-20].

(subprocess-from-request)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/test_import_footprint.py` around lines 79 - 82, The _run() function
needs to be modified to ensure subprocess execution is deterministic and
time-bounded. Add a cwd parameter to the subprocess.run() call to explicitly set
the working directory to the repository root (ensuring the checked-out
snowflake_sql_api is used instead of any installed version), and add a timeout
parameter to set a time limit on subprocess execution to prevent CI hangs. Both
parameters should be passed directly to the subprocess.run() call along with the
existing arguments.



def test_bare_import_pulls_no_heavy_deps() -> None:
result = _run(_LEAK_SCRIPT)
assert (
result.returncode == 0
), f"`import snowflake_sql_api` failed in a clean subprocess:\n{result.stderr}"
attempted = [name for name in result.stdout.strip().split(",") if name]
assert not attempted, (
f"bare `import snowflake_sql_api` tried to import heavy deps: {attempted}. "
"Keep optional dependencies behind their extra (lazy import)."
)


def test_bare_import_is_fast() -> None:
samples = []
for _ in range(TIMING_SAMPLES):
result = _run(_TIMING_SCRIPT)
assert result.returncode == 0, result.stderr
samples.append(float(result.stdout.strip()))
best = min(samples)
assert best < IMPORT_BUDGET_SECONDS, (
f"fastest of {TIMING_SAMPLES} imports was {best:.3f}s, over the "
f"{IMPORT_BUDGET_SECONDS}s budget. A heavy import likely crept into the "
"module-load path."
)