diff --git a/src/spatialdata_io/__main__.py b/src/spatialdata_io/__main__.py index 293260a8..f0a5f9f7 100644 --- a/src/spatialdata_io/__main__.py +++ b/src/spatialdata_io/__main__.py @@ -794,9 +794,9 @@ def xenium_wrapper( @_input_output_click_options @click.option( "--parsing-style", - type=click.Choice(["auto", "processed_single_folder", "processed_multiple_folders", "raw"]), - default="auto", - help="Parsing style for MACSima data. [default: auto]", + type=click.Choice(["processed_single_folder", "processed_multiple_folders", "raw"]), + default="processed_single_folder", + help="Parsing style for MACSima data. [default: processed_single_folder]", ) @click.option( "--filter-folder-names", @@ -870,7 +870,7 @@ def macsima_wrapper( input: str, output: str, *, - parsing_style: str = "auto", + parsing_style: str = "processed_single_folder", filter_folder_names: list[str] | None = None, subset: int | None = None, c_subset: int | None = None, diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py index 40333960..1d0650bf 100644 --- a/src/spatialdata_io/readers/macsima.py +++ b/src/spatialdata_io/readers/macsima.py @@ -48,7 +48,6 @@ class MACSimaParsingStyle(ModeEnum): PROCESSED_SINGLE_FOLDER = "processed_single_folder" PROCESSED_MULTIPLE_FOLDERS = "processed_multiple_folders" RAW = "raw" - AUTO = "auto" @dataclass @@ -224,7 +223,7 @@ def get_stack(self) -> da.Array: def macsima( path: str | Path, - parsing_style: MACSimaParsingStyle | str = MACSimaParsingStyle.AUTO, + parsing_style: MACSimaParsingStyle | str = MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER, filter_folder_names: list[str] | None = None, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), subset: int | None = None, @@ -255,7 +254,8 @@ def macsima( path Path to the directory containing the data. parsing_style - Parsing style to use. If ``auto``, the parsing style is determined based on the contents of the path. + Parsing style to use. If ``processed_single_folder``, all subfolders of ``path`` are combined into a stack. + If ``processed_multiple_folders``, a stack is created for each folder directly beneath ``path``. filter_folder_names List of folder names to filter out when parsing multiple folders. imread_kwargs @@ -295,19 +295,13 @@ def macsima( if not isinstance(parsing_style, MACSimaParsingStyle): parsing_style = MACSimaParsingStyle(parsing_style) - if parsing_style == MACSimaParsingStyle.AUTO: - assert path.is_dir(), f"Path {path} is not a directory." - - if any(p.suffix in [".tif", ".tiff"] for p in path.iterdir()): - # if path contains tifs, do parse_processed_folder on path - parsing_style = MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER - elif all(p.is_dir() for p in path.iterdir() if not p.name.startswith(".")): - # if path contains only folders or hidden files, do parse_processed_folder on each folder - parsing_style = MACSimaParsingStyle.PROCESSED_MULTIPLE_FOLDERS - else: - raise ValueError(f"Cannot determine parsing style for path {path}. Please specify the parsing style.") - if parsing_style == MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER: + if filter_folder_names: + warnings.warn( + "single_processed_folder was requested but filter_folder_names was specified. Note that it is ignored here, filtering only happens for processed_multi_folders!", + UserWarning, + stacklevel=2, + ) return parse_processed_folder( path=path, imread_kwargs=imread_kwargs, @@ -332,6 +326,9 @@ def macsima( for p in path.iterdir() if p.is_dir() and (not filter_folder_names or not any(f in p.name for f in filter_folder_names)) ]: + if not len(list(p.glob("*.tif*"))): + warnings.warn(f"No tif files found in {p}, skipping it!", UserWarning, stacklevel=2) + continue sdatas[p.stem] = parse_processed_folder( path=p, imread_kwargs=imread_kwargs, @@ -625,7 +622,7 @@ def parse_processed_folder( nuclei_channel_name: str = "DAPI", split_threshold_nuclei_channel: int | None = 2, skip_rounds: list[int] | None = None, - file_pattern: str = "*.tif*", + file_pattern: str = "**/*.tif*", include_cycle_in_channel_name: bool = False, ) -> SpatialData: """Parse a single folder containing images from a cyclical imaging platform.""" diff --git a/tests/test_macsima.py b/tests/test_macsima.py index 72b33fbe..49eae625 100644 --- a/tests/test_macsima.py +++ b/tests/test_macsima.py @@ -35,7 +35,6 @@ _parse_v1_ome_metadata, macsima, ) -from tests._utils import skip_if_below_python_version RNG = da.random.default_rng(seed=0) @@ -101,7 +100,6 @@ def test_exception_on_no_valid_files(tmp_path: Path) -> None: macsima(tmp_path) -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [ @@ -123,7 +121,6 @@ def test_image_size(dataset: str, expected: dict[str, Any]) -> None: assert extent == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [("OMAP10_small", 4), ("OMAP23_small", 5)], @@ -139,7 +136,6 @@ def test_total_channels(dataset: str, expected: int) -> None: assert channels == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [ @@ -161,7 +157,6 @@ def test_channel_names_with_cycle_in_name(dataset: str, expected: list[str]) -> assert list(channels) == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [ @@ -178,7 +173,6 @@ def test_total_rounds(dataset: str, expected: list[int]) -> None: assert max_cycle == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,skip_rounds,expected", [ @@ -201,6 +195,57 @@ def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str]) assert list(channels) == expected, f"Expected {expected}, got {list(channels)}" +def test_unsupported_parsing_styles() -> None: + with pytest.raises(ValueError, match="Invalid option `not_a_parsing_style` for `MACSimaParsingStyle`."): + macsima(Path(), parsing_style="not_a_parsing_style") + + +def test_processed_single_folder_parsing_returns_a_single_image_stack(tmp_path: Path) -> None: + omap10_path = Path("./data/OMAP10_small") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2") + + sdata = macsima(tmp_path, parsing_style="processed_single_folder") + + assert len(sdata.images) == 1 + # omap10_small has 4 channels, so we expect 8 here + el = sdata[list(sdata.images.keys())[0]] + assert len(get_channel_names(el)) == 8 + assert len(sdata.tables) == 1 + + +def test_processed_single_folder_parsing_warns_when_specifying_filtered_folders(tmp_path: Path) -> None: + omap10_path = Path("./data/OMAP10_small") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2") + with pytest.warns(UserWarning, match="filtering only happens for processed_multi_folders"): + macsima(tmp_path, parsing_style="processed_single_folder", filter_folder_names=["OMAP10_small_2"]) + + +def test_processed_multiple_folders_returns_an_image_stack_per_subfolder(tmp_path: Path) -> None: + omap10_path = Path("./data/OMAP10_small") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2") + + sdata = macsima(tmp_path, parsing_style="processed_multiple_folders") + + assert len(sdata.images) == 2 + for el in sdata.images.keys(): + assert len(get_channel_names(sdata[el])) == 4 + assert len(sdata.tables) == 2 + + +def test_processed_multiple_folders_skips_filtered_folder_names(tmp_path: Path) -> None: + shutil.copytree(Path("./data/OMAP10_small"), tmp_path / "OMAP10_small") + shutil.copytree(Path("./data/OMAP23_small"), tmp_path / "OMAP23_small") + + sdata = macsima(tmp_path, parsing_style="processed_multiple_folders", filter_folder_names=["OMAP10_small"]) + assert len(sdata.images) == 1 + assert list(sdata.images.keys()) == ["OMAP23_small_image"] + assert len(sdata.tables) == 1 + assert list(sdata.tables.keys()) == ["OMAP23_small_table"] + + METADATA_COLUMN_ORDER = [ "cycle", "imagetype", @@ -242,7 +287,6 @@ def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str]) ) -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected_df", [ @@ -262,11 +306,6 @@ def test_metadata_table(dataset: str, expected_df: pd.DataFrame) -> None: pd.testing.assert_frame_equal(actual, expected_df) -def test_parsing_style() -> None: - with pytest.raises(ValueError): - macsima(Path(), parsing_style="not_a_parsing_style") - - def test_mci_sort_by_channel() -> None: sizes = [100, 200, 300] c_names = ["test11", "test3", "test2"] @@ -315,7 +354,6 @@ def test_mci_array_reference() -> None: assert da.all(mci.data[0] == orig_arr1) -@skip_if_below_python_version() @pytest.mark.parametrize("dataset", ["OMAP10_small", "OMAP23_small"]) def test_cli_macsima(runner: CliRunner, dataset: str) -> None: f = Path("./data") / dataset