diff --git a/.gitignore b/.gitignore index e09293e..a08610a 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ uv.lock # Tests and coverage /data/ +/tests/data/scanpy_cache/ /node_modules/ /.coverage* diff --git a/CHANGELOG.md b/CHANGELOG.md index 31b4c11..495a982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning][]. ### Changed - Dropped Python 3.11 support; the minimum supported version is now Python 3.12, aligning with `cellrank` (≥2.1 requires Python ≥3.12) and unpinning the `tutorials` extra from the old `cellrank` 2.0.7 {pr}`81` +### Fixed +- Fixed intermittent `OSError: Can't synchronously read data (filter returned failure during read)` when running the test suite under `pytest -n auto`, caused by xdist workers racing on scanpy's shared `pbmc3k_raw.h5ad` dataset cache. Each worker now uses its own cache directory. + ## [v0.2.5] ### Added diff --git a/tests/conftest.py b/tests/conftest.py index 63281d1..7168a3c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import numpy as np @@ -11,6 +12,24 @@ TESTS_DIR = Path(__file__).parent +def pytest_configure(config): + """Give each pytest-xdist worker its own scanpy dataset cache directory. + + ``sc.datasets.pbmc3k()`` (used by the ``adata_pbmc3k`` fixture) downloads and + reads a single shared cache file (``/pbmc3k_raw.h5ad``). Under + ``pytest -n auto`` multiple worker processes race on that file -- one worker + reading it while another is still downloading/writing -- which intermittently + surfaces as an HDF5 ``OSError: Can't synchronously read data (filter returned + failure during read)``. Pointing each worker at its own cache directory + removes the shared-file contention entirely. + """ + worker_id = os.environ.get("PYTEST_XDIST_WORKER") + if worker_id is not None: + cache_dir = TESTS_DIR / "data" / "scanpy_cache" / worker_id + cache_dir.mkdir(parents=True, exist_ok=True) + sc.settings.datasetdir = cache_dir + + @pytest.fixture def sample_distances(): # 3 samples, 2 neighbors each