From 99004efb0ba3e13159dea791132156667488070a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 9 Apr 2026 14:36:16 -0400 Subject: [PATCH] Remove unused Config class that could not be connected to a SessionContext Closes #322. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/core/src/config.rs | 104 ---------------------- crates/core/src/lib.rs | 3 - docs/source/contributor-guide/ffi.rst | 15 +--- docs/source/user-guide/upgrade-guides.rst | 27 ++++++ python/datafusion/__init__.py | 2 - python/tests/test_concurrency.py | 35 +------- python/tests/test_config.py | 42 --------- 7 files changed, 30 insertions(+), 198 deletions(-) delete mode 100644 crates/core/src/config.rs delete mode 100644 python/tests/test_config.py diff --git a/crates/core/src/config.rs b/crates/core/src/config.rs deleted file mode 100644 index fdb693a12..000000000 --- a/crates/core/src/config.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::config::ConfigOptions; -use parking_lot::RwLock; -use pyo3::prelude::*; -use pyo3::types::*; - -use crate::common::data_type::PyScalarValue; -use crate::errors::PyDataFusionResult; -#[pyclass( - from_py_object, - name = "Config", - module = "datafusion", - subclass, - frozen -)] -#[derive(Clone)] -pub(crate) struct PyConfig { - config: Arc>, -} - -#[pymethods] -impl PyConfig { - #[new] - fn py_new() -> Self { - Self { - config: Arc::new(RwLock::new(ConfigOptions::new())), - } - } - - /// Get configurations from environment variables - #[staticmethod] - pub fn from_env() -> PyDataFusionResult { - Ok(Self { - config: Arc::new(RwLock::new(ConfigOptions::from_env()?)), - }) - } - - /// Get a configuration option - pub fn get<'py>(&self, key: &str, py: Python<'py>) -> PyResult> { - let value: Option> = { - let options = self.config.read(); - options - .entries() - .into_iter() - .find_map(|entry| (entry.key == key).then_some(entry.value.clone())) - }; - - match value { - Some(value) => Ok(value.into_pyobject(py)?), - None => Ok(None::.into_pyobject(py)?), - } - } - - /// Set a configuration option - pub fn set(&self, key: &str, value: Py, py: Python) -> PyDataFusionResult<()> { - let scalar_value: PyScalarValue = value.extract(py)?; - let mut options = self.config.write(); - options.set(key, scalar_value.0.to_string().as_str())?; - Ok(()) - } - - /// Get all configuration options - pub fn get_all(&self, py: Python) -> PyResult> { - let entries: Vec<(String, Option)> = { - let options = self.config.read(); - options - .entries() - .into_iter() - .map(|entry| (entry.key.clone(), entry.value.clone())) - .collect() - }; - - let dict = PyDict::new(py); - for (key, value) in entries { - dict.set_item(key, value.into_pyobject(py)?)?; - } - Ok(dict.into()) - } - - fn __repr__(&self, py: Python) -> PyResult { - match self.get_all(py) { - Ok(result) => Ok(format!("Config({result})")), - Err(err) => Ok(format!("Error: {:?}", err.to_string())), - } - } -} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index fc2d006d3..3af1afac9 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -30,8 +30,6 @@ use pyo3::prelude::*; pub mod catalog; pub mod common; -#[allow(clippy::borrow_deref_ref)] -mod config; #[allow(clippy::borrow_deref_ref)] pub mod context; #[allow(clippy::borrow_deref_ref)] @@ -90,7 +88,6 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst index e0158e0a2..c89b99849 100644 --- a/docs/source/contributor-guide/ffi.rst +++ b/docs/source/contributor-guide/ffi.rst @@ -149,20 +149,7 @@ interior-mutable state. In practice this means that any ``#[pyclass]`` containin ``Arc>`` or similar synchronized primitive must opt into ``#[pyclass(frozen)]`` unless there is a compelling reason not to. -The :mod:`datafusion` configuration helpers illustrate the preferred pattern. The -``PyConfig`` class in :file:`src/config.rs` stores an ``Arc>`` and is -explicitly frozen so callers interact with configuration state through provided methods -instead of mutating the container directly: - -.. code-block:: rust - - #[pyclass(from_py_object, name = "Config", module = "datafusion", subclass, frozen)] - #[derive(Clone)] - pub(crate) struct PyConfig { - config: Arc>, - } - -The same approach applies to execution contexts. ``PySessionContext`` in +The execution context illustrates the preferred pattern. ``PySessionContext`` in :file:`src/context.rs` stays frozen even though it shares mutable state internally via ``SessionContext``. This ensures PyO3 tracks borrows correctly while Python-facing APIs clone the inner ``SessionContext`` or return new wrappers instead of mutating the diff --git a/docs/source/user-guide/upgrade-guides.rst b/docs/source/user-guide/upgrade-guides.rst index 2ac7f7703..12d0f1bb3 100644 --- a/docs/source/user-guide/upgrade-guides.rst +++ b/docs/source/user-guide/upgrade-guides.rst @@ -18,6 +18,33 @@ Upgrade Guides ============== +DataFusion 54.0.0 +----------------- + +The ``Config`` class has been removed. It was a standalone wrapper around +``ConfigOptions`` that could not be connected to a ``SessionContext``, making it +effectively unusable. Use :py:class:`~datafusion.context.SessionConfig` instead, +which is passed directly to ``SessionContext``. + +Before: + +.. code-block:: python + + from datafusion import Config + + config = Config() + config.set("datafusion.execution.batch_size", "4096") + # config could not be passed to SessionContext + +After: + +.. code-block:: python + + from datafusion import SessionConfig, SessionContext + + config = SessionConfig().set("datafusion.execution.batch_size", "4096") + ctx = SessionContext(config) + DataFusion 53.0.0 ----------------- diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index ee02c921d..94a33d5c4 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -34,7 +34,6 @@ from . import functions, object_store, substrait, unparser # The following imports are okay to remain as opaque to the user. -from ._internal import Config from .catalog import Catalog, Table from .col import col, column from .common import DFSchema @@ -76,7 +75,6 @@ "Accumulator", "AggregateUDF", "Catalog", - "Config", "CsvReadOptions", "DFSchema", "DataFrame", diff --git a/python/tests/test_concurrency.py b/python/tests/test_concurrency.py index f790f9473..e7fe44f01 100644 --- a/python/tests/test_concurrency.py +++ b/python/tests/test_concurrency.py @@ -20,7 +20,7 @@ from concurrent.futures import ThreadPoolExecutor import pyarrow as pa -from datafusion import Config, SessionContext, col, lit +from datafusion import SessionContext, col, lit from datafusion import functions as f from datafusion.common import SqlSchema @@ -34,16 +34,14 @@ def _run_in_threads(fn, count: int = 8) -> None: def test_concurrent_access_to_shared_structures() -> None: - """Exercise SqlSchema, Config, and DataFrame concurrently.""" + """Exercise SqlSchema and DataFrame concurrently.""" schema = SqlSchema("concurrency") - config = Config() ctx = SessionContext() batch = pa.record_batch([pa.array([1, 2, 3], type=pa.int32())], names=["value"]) df = ctx.create_dataframe([[batch]]) - config_key = "datafusion.execution.batch_size" expected_rows = batch.num_rows def worker(index: int) -> None: @@ -54,41 +52,12 @@ def worker(index: int) -> None: assert isinstance(schema.views, list) assert isinstance(schema.functions, list) - config.set(config_key, str(1024 + index)) - assert config.get(config_key) is not None - # Access the full config map to stress lock usage. - assert config_key in config.get_all() - batches = df.collect() assert sum(batch.num_rows for batch in batches) == expected_rows _run_in_threads(worker, count=12) -def test_config_set_during_get_all() -> None: - """Ensure config writes proceed while another thread reads all entries.""" - - config = Config() - key = "datafusion.execution.batch_size" - - def reader() -> None: - for _ in range(200): - # get_all should not hold the lock while converting to Python objects - config.get_all() - - def writer() -> None: - for index in range(200): - config.set(key, str(1024 + index)) - - with ThreadPoolExecutor(max_workers=2) as executor: - reader_future = executor.submit(reader) - writer_future = executor.submit(writer) - reader_future.result(timeout=10) - writer_future.result(timeout=10) - - assert config.get(key) is not None - - def test_case_builder_reuse_from_multiple_threads() -> None: """Ensure the case builder can be safely reused across threads.""" diff --git a/python/tests/test_config.py b/python/tests/test_config.py deleted file mode 100644 index c1d7f97e1..000000000 --- a/python/tests/test_config.py +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pytest -from datafusion import Config - - -@pytest.fixture -def config(): - return Config() - - -def test_get_then_set(config): - config_key = "datafusion.optimizer.filter_null_join_keys" - - assert config.get(config_key) == "false" - - config.set(config_key, "true") - assert config.get(config_key) == "true" - - -def test_get_all(config): - config_dict = config.get_all() - assert config_dict["datafusion.catalog.create_default_catalog_and_schema"] == "true" - - -def test_get_invalid_config(config): - assert config.get("not.valid.key") is None