From 5321d7373d3e905e703b25725b01ee13aac661c7 Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sat, 27 Jun 2026 22:56:29 +0200 Subject: [PATCH] Fix local dataframe extension detection --- .../data_designer/config/utils/io_helpers.py | 2 +- .../tests/config/utils/test_io_helpers.py | 22 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/io_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/io_helpers.py index 8247a36b7..7bceeff05 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/io_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/io_helpers.py @@ -196,7 +196,7 @@ def smart_load_dataframe(dataframe: str | Path | pd.DataFrame) -> pd.DataFrame: ext = PurePosixPath(urlparse(dataframe).path).suffix.lstrip(".").lower() else: dataframe = Path(dataframe) - ext = dataframe.suffix.lower() + ext = dataframe.suffix.lstrip(".").lower() if not dataframe.exists(): raise FileNotFoundError(f"File not found: {dataframe}") diff --git a/packages/data-designer-config/tests/config/utils/test_io_helpers.py b/packages/data-designer-config/tests/config/utils/test_io_helpers.py index 5b2b8b98a..b70d2e684 100644 --- a/packages/data-designer-config/tests/config/utils/test_io_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_io_helpers.py @@ -64,7 +64,7 @@ def test_smart_load_dataframe(mock_read_parquet, mock_read_json, mock_read_csv, mock_path = MagicMock(autospec=Path) mock_path.exists.return_value = True - mock_path.suffix.lower.return_value = "csv" + mock_path.suffix = ".csv" mock_path_cls.return_value = mock_path stub_base_path_str = "/some/path/to/data.{extension}" @@ -73,13 +73,31 @@ def test_smart_load_dataframe(mock_read_parquet, mock_read_json, mock_read_csv, mock_read_csv.assert_called_once_with(mock_path) mock_path.reset_mock() - mock_path.suffix.lower.return_value = "json" + mock_path.suffix = ".json" mock_path.exists.return_value = False path_json = stub_base_path_str.format(extension="json") with pytest.raises(FileNotFoundError): _ = smart_load_dataframe(Path(path_json)) +@pytest.mark.parametrize("extension", ["csv", "json", "parquet"]) +@pytest.mark.parametrize("path_type", [Path, str], ids=["path", "string"]) +def test_smart_load_dataframe_from_real_local_files(tmp_path: Path, extension: str, path_type) -> None: + expected = lazy.pd.DataFrame({"id": [1, 2], "value": ["alpha", "beta"]}) + file_path = tmp_path / f"data.{extension}" + + if extension == "csv": + expected.to_csv(file_path, index=False) + elif extension == "json": + expected.to_json(file_path, orient="records", lines=True) + else: + expected.to_parquet(file_path, index=False) + + loaded = smart_load_dataframe(path_type(file_path)) + + lazy.pd.testing.assert_frame_equal(loaded, expected) + + def test_smart_load_yaml(): stub_dict = { "hello": "world",