From 0e4c366199e484780ec3706191d9def17f5f28f2 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 15:35:38 +0200 Subject: [PATCH 1/6] :sparkles: add support for custom httpx client for V1 and V2 --- mindee/input/url_input_source.py | 17 +++++++-- mindee/v1/client.py | 22 +++++++++--- mindee/v1/mindee_http/base_endpoint.py | 11 +++++- mindee/v1/mindee_http/endpoint.py | 41 +++++++++++++--------- mindee/v1/mindee_http/workflow_endpoint.py | 11 +++--- mindee/v2/client.py | 8 +++-- mindee/v2/mindee_http/mindee_api_v2.py | 22 +++++++----- tests/v2/test_client_integration.py | 33 +++++++++++++++++ 8 files changed, 126 insertions(+), 39 deletions(-) diff --git a/mindee/input/url_input_source.py b/mindee/input/url_input_source.py index 1f32baea..b3e33d14 100644 --- a/mindee/input/url_input_source.py +++ b/mindee/input/url_input_source.py @@ -173,7 +173,14 @@ def __fill_filename(self, filename=None) -> str: return filename @staticmethod - def __make_request(url, auth, headers, redirects, max_redirects) -> bytes: + def __make_request( + url, + auth, + headers, + redirects, + max_redirects, + http_client: httpx.Client | None = None, + ) -> bytes: """ Makes an HTTP request to the given URL, while following redirections. @@ -185,11 +192,15 @@ def __make_request(url, auth, headers, redirects, max_redirects) -> bytes: :return: The content of the response. :raises MindeeSourceError: If max redirects are exceeded or the request fails. """ - result = httpx.get(url, headers=headers, timeout=120, auth=auth) + http_client = http_client or httpx.Client() + result = http_client.get( + url, headers=headers, timeout=120, auth=auth, follow_redirects=True + ) if 299 < result.status_code < 400: if redirects == max_redirects: raise MindeeSourceError( - f"Can't reach URL after {redirects} out of {max_redirects} redirects, " + f"Can't reach URL after {redirects} out of {max_redirects} " + f"redirects, " f"aborting operation." ) return URLInputSource.__make_request( diff --git a/mindee/v1/client.py b/mindee/v1/client.py index cc500a85..d0cf3128 100644 --- a/mindee/v1/client.py +++ b/mindee/v1/client.py @@ -1,5 +1,7 @@ from time import sleep +import httpx + from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import handle_error @@ -59,14 +61,21 @@ class Client(ClientMixin): """ api_key: str + """API key for all endpoints.""" + http_client: httpx.Client + """HTTP client for making requests.""" - def __init__(self, api_key: str = "") -> None: + def __init__( + self, api_key: str = "", http_client: httpx.Client | None = None + ) -> None: """ Mindee API Client. :param api_key: Your API key for all endpoints + :param http_client: HTTP client for making requests. """ self.api_key = api_key + self.http_client = http_client or httpx.Client() def parse( self, @@ -522,7 +531,8 @@ def _send_to_workflow( raise MindeeClientError("No input document provided") workflow_endpoint = WorkflowEndpoint( - WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id) + WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id), + self.http_client, ) response = workflow_endpoint.workflow_execution_post(input_source, options) @@ -555,8 +565,12 @@ def _build_endpoint( version=version, ) if account_name and len(account_name) > 0 and account_name != "mindee": - return CustomEndpoint(endpoint_name, account_name, version, api_settings) - return Endpoint(endpoint_name, account_name, version, api_settings) + return CustomEndpoint( + endpoint_name, account_name, version, api_settings, self.http_client + ) + return Endpoint( + endpoint_name, account_name, version, api_settings, self.http_client + ) def create_endpoint( self, diff --git a/mindee/v1/mindee_http/base_endpoint.py b/mindee/v1/mindee_http/base_endpoint.py index 6b252d8a..0f6b4418 100644 --- a/mindee/v1/mindee_http/base_endpoint.py +++ b/mindee/v1/mindee_http/base_endpoint.py @@ -1,13 +1,22 @@ +import httpx + from mindee.v1.mindee_http.base_settings import BaseSettings class BaseEndpoint: """Base endpoint class for the Mindee API.""" - def __init__(self, settings: BaseSettings) -> None: + settings: BaseSettings + http_client: httpx.Client + + def __init__( + self, settings: BaseSettings, http_client: httpx.Client | None = None + ) -> None: """ Base API endpoint class for all endpoints. :param settings: Settings relating to all endpoints. + :param http_client: HTTP client for making requests. """ self.settings = settings + self.http_client = http_client or httpx.Client() diff --git a/mindee/v1/mindee_http/endpoint.py b/mindee/v1/mindee_http/endpoint.py index 4d6e0a5d..f6dbd379 100644 --- a/mindee/v1/mindee_http/endpoint.py +++ b/mindee/v1/mindee_http/endpoint.py @@ -13,7 +13,12 @@ class Endpoint(BaseEndpoint): settings: MindeeAPI def __init__( - self, url_name: str, owner: str, version: str, settings: MindeeAPI + self, + url_name: str, + owner: str, + version: str, + settings: MindeeAPI, + http_client: httpx.Client | None = None, ) -> None: """ Generic API endpoint for a product. @@ -21,8 +26,10 @@ def __init__( :param owner: owner of the product :param url_name: name of the product as it appears in the URL :param version: interface version + :param settings: settings for the API + :param http_client: HTTP client for making requests. """ - super().__init__(settings) + super().__init__(settings, http_client) self.owner = owner self.url_name = url_name self.version = version @@ -42,7 +49,8 @@ def predict_req_post( :param include_words: Include raw OCR words in the response :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param full_text: Whether to include the full OCR text response in compatible + APIs. :return: httpx response """ return self._custom_request( @@ -66,7 +74,8 @@ def predict_async_req_post( :param include_words: Include raw OCR words in the response :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param full_text: Whether to include the full OCR text response in compatible + APIs. :param workflow_id: Workflow ID. :param rag: If set, will enable Retrieval-Augmented Generation. :return: httpx response @@ -112,7 +121,7 @@ def _custom_request( if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = httpx.post( + response = self.http_client.post( url=url, headers=self.settings.base_headers, data=data, @@ -121,7 +130,7 @@ def _custom_request( ) else: files = {"document": input_source.read_contents(close_file)} - response = httpx.post( + response = self.http_client.post( url=url, files=files, headers=self.settings.base_headers, @@ -138,7 +147,7 @@ def document_queue_req_get(self, queue_id: str) -> httpx.Response: :param queue_id: queue_id received from the API """ - return httpx.get( + return self.http_client.get( f"{self.settings.url_root}/documents/queue/{queue_id}", headers=self.settings.base_headers, timeout=self.settings.request_timeout, @@ -147,7 +156,7 @@ def document_queue_req_get(self, queue_id: str) -> httpx.Response: def openapi_get_req(self) -> httpx.Response: """Get the OpenAPI specification of the product.""" - return httpx.get( + return self.http_client.get( f"{self.settings.url_root}/openapi.json", headers=self.settings.base_headers, timeout=self.settings.request_timeout, @@ -163,7 +172,7 @@ def document_feedback_req_put( :param document_id: ID of the document to send feedback to. :param feedback: Feedback object to send. """ - return httpx.put( + return self.http_client.put( f"{self.settings.base_url}/v1/documents/{document_id}/feedback", headers=self.settings.base_headers, data=feedback, @@ -187,7 +196,7 @@ def training_req_post( files = {"document": input_source.read_contents(close_file)} params = {"training": True, "with_candidates": True} - response = httpx.post( + response = self.http_client.post( f"{self.settings.url_root}/predict", files=files, headers=self.settings.base_headers, @@ -209,7 +218,7 @@ def training_async_req_post( files = {"document": input_source.read_contents(close_file)} params = {"training": True, "async": True} - response = httpx.post( + response = self.http_client.post( f"{self.settings.url_root}/predict", files=files, headers=self.settings.base_headers, @@ -240,7 +249,7 @@ def documents_req_get(self, page_id: int = 1) -> httpx.Response: params = { "page": page_id, } - response = httpx.get( + response = self.http_client.get( f"{self.settings.url_root}/documents", headers=self.settings.base_headers, params=params, @@ -260,7 +269,7 @@ def document_req_get(self, document_id: str) -> httpx.Response: "include_candidates": True, "global_orientation": True, } - response = httpx.get( + response = self.http_client.get( f"{self.settings.url_root}/documents/{document_id}", headers=self.settings.base_headers, params=params, @@ -279,7 +288,7 @@ def annotations_req_post( :param annotations: Annotations object :return: httpx response """ - response = httpx.post( + response = self.http_client.post( f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, json=annotations, @@ -297,7 +306,7 @@ def annotations_req_put( :param annotations: Annotations object :return: httpx response """ - response = httpx.put( + response = self.http_client.put( f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, json=annotations, @@ -312,7 +321,7 @@ def annotations_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document to annotate :return: httpx response """ - response = httpx.delete( + response = self.http_client.delete( f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, timeout=self.settings.request_timeout, diff --git a/mindee/v1/mindee_http/workflow_endpoint.py b/mindee/v1/mindee_http/workflow_endpoint.py index 7217013c..da774e18 100644 --- a/mindee/v1/mindee_http/workflow_endpoint.py +++ b/mindee/v1/mindee_http/workflow_endpoint.py @@ -11,14 +11,17 @@ class WorkflowEndpoint(BaseEndpoint): """Workflow endpoint.""" settings: WorkflowSettings + """Settings object.""" - def __init__(self, settings: WorkflowSettings) -> None: + def __init__( + self, settings: WorkflowSettings, http_client: httpx.Client | None = None + ) -> None: """ Workflow Endpoint. :param settings: Settings object. """ - super().__init__(settings) + super().__init__(settings, http_client) def workflow_execution_post( self, @@ -50,7 +53,7 @@ def workflow_execution_post( if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = httpx.post( + response = self.http_client.post( self.settings.url_root, headers=self.settings.base_headers, data=data, @@ -59,7 +62,7 @@ def workflow_execution_post( ) else: files = {"document": input_source.read_contents(True)} - response = httpx.post( + response = self.http_client.post( self.settings.url_root, files=files, headers=self.settings.base_headers, diff --git a/mindee/v2/client.py b/mindee/v2/client.py index 22d9c5ab..183e76c8 100644 --- a/mindee/v2/client.py +++ b/mindee/v2/client.py @@ -1,6 +1,8 @@ from time import sleep from typing import TypeVar +import httpx + from mindee.client_mixin import ClientMixin from mindee.client_options.polling_options import PollingOptions from mindee.error.mindee_error import MindeeError @@ -27,14 +29,16 @@ class Client(ClientMixin): api_key: str | None mindee_api: MindeeAPIV2 - def __init__(self, api_key: str | None = None) -> None: + def __init__( + self, api_key: str | None = None, http_client: httpx.Client | None = None + ) -> None: """ Mindee API Client. :param api_key: Your API key for all endpoints """ self.api_key = api_key - self.mindee_api = MindeeAPIV2(api_key) + self.mindee_api = MindeeAPIV2(api_key, http_client) def enqueue( self, diff --git a/mindee/v2/mindee_http/mindee_api_v2.py b/mindee/v2/mindee_http/mindee_api_v2.py index d2e384b0..050b4013 100644 --- a/mindee/v2/mindee_http/mindee_api_v2.py +++ b/mindee/v2/mindee_http/mindee_api_v2.py @@ -38,8 +38,9 @@ class MindeeAPIV2(SettingsMixin): """Root of the URL to use for polling.""" api_key: str | None """API Key for the client.""" + http_client: httpx.Client - def __init__(self, api_key: str | None): + def __init__(self, api_key: str | None, http_client: httpx.Client | None = None): self.api_key = ( api_key if api_key @@ -56,6 +57,7 @@ def __init__(self, api_key: str | None): f"'{API_KEY_V2_ENV_NAME}' environment variable." ) self.url_root = f"{self.base_url.rstrip('/')}" + self.http_client = http_client or httpx.Client() @property def base_headers(self) -> dict[str, str]: @@ -96,7 +98,7 @@ def req_post_inference_enqueue( if isinstance(input_source, LocalInputSource): files = {"file": input_source.read_contents(params.close_file)} - response = httpx.post( + response = self.http_client.post( url=url, files=files, headers=self.base_headers, @@ -105,7 +107,7 @@ def req_post_inference_enqueue( ) elif isinstance(input_source, URLInputSource): data["url"] = input_source.url - response = httpx.post( + response = self.http_client.post( url=url, headers=self.base_headers, data=data, @@ -121,7 +123,7 @@ def req_get_job(self, job_id: str) -> httpx.Response: :param job_id: Job ID, returned by the enqueue request. """ - return httpx.get( + return self.http_client.get( f"{self.url_root}/v2/jobs/{job_id}", headers=self.base_headers, timeout=self.request_timeout, @@ -130,12 +132,13 @@ def req_get_job(self, job_id: str) -> httpx.Response: def req_get_inference_by_url(self, url) -> httpx.Response: """ - Sends a request matching a given inference_id. Returns either a Job or a Document. + Sends a request matching a given inference_id. Returns either a Job or a + Document. :param url: URL to use for the request. :return: Response object from the request. """ - return httpx.get( + return self.http_client.get( url, headers=self.base_headers, timeout=self.request_timeout, @@ -151,7 +154,7 @@ def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: """ url = f"{self.url_root}/v2/{slug}/{inference_id}" - return httpx.get( + return self.http_client.get( url, headers=self.base_headers, timeout=self.request_timeout, @@ -168,7 +171,7 @@ def req_get_search_models( :return: Response object containing search results. """ url = f"{self.url_root}/v2/search/models" - return httpx.get( + return self.http_client.get( url, headers=self.base_headers, params={"name": model_name, "model_type": model_type}, @@ -258,5 +261,6 @@ def _response_json(response: httpx.Response) -> StringDict: return response.json() except httpx.DecodingError as e: raise MindeeHTTPUnknownErrorV2( - f"HTTP {response.status_code} response is not valid JSON: {response.text}" + f"HTTP {response.status_code} response is not valid JSON: " + f"{response.text}" ) from e diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index ebf88ff0..20df7d2b 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,6 +1,7 @@ import os from pathlib import Path +import httpx import pytest from mindee import ExtractionParameters @@ -306,3 +307,35 @@ def test_data_schema_must_succeed( assert response.inference.active_options.data_schema.replace is True assert response.inference.result.fields["test_replace"] is not None assert response.inference.result.fields["test_replace"].value == "a test value" + + +@pytest.mark.integration +@pytest.mark.v2 +def test_custom_httpx_client_event_hook( + findoc_model_id: str, +) -> None: + request_urls = [] + + def log_request(request: httpx.Request): + request_urls.append(str(request.url)) + + httpx_client = httpx.Client(event_hooks={"request": [log_request]}) + client = Client(http_client=httpx_client) + + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + input_source = PathInput(input_path) + + params = ExtractionParameters( + model_id=findoc_model_id, + rag=False, + raw_text=False, + polygon=False, + confidence=False, + webhook_ids=[], + alias="py_integration_custom_httpx_client", + ) + + client.enqueue(input_source, params) + + assert len(request_urls) > 0 + assert any("enqueue" in url for url in request_urls) From 393df368370b2e0f11d2b0d6ffa104d34d17b195 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 16:24:27 +0200 Subject: [PATCH 2/6] fix pagination metadata class name --- .../parsing/search/{pagination.py => paginationmetadata.py} | 2 +- mindee/v2/parsing/search/search_response.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename mindee/v2/parsing/search/{pagination.py => paginationmetadata.py} (97%) diff --git a/mindee/v2/parsing/search/pagination.py b/mindee/v2/parsing/search/paginationmetadata.py similarity index 97% rename from mindee/v2/parsing/search/pagination.py rename to mindee/v2/parsing/search/paginationmetadata.py index 34b63a3d..14e628e3 100644 --- a/mindee/v2/parsing/search/pagination.py +++ b/mindee/v2/parsing/search/paginationmetadata.py @@ -1,4 +1,4 @@ -class Pagination: +class PaginationMetadata: """Pagination metadata.""" per_page: int diff --git a/mindee/v2/parsing/search/search_response.py b/mindee/v2/parsing/search/search_response.py index 233be58d..5dea6b22 100644 --- a/mindee/v2/parsing/search/search_response.py +++ b/mindee/v2/parsing/search/search_response.py @@ -1,5 +1,5 @@ from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.search.pagination import Pagination +from mindee.v2.parsing.search.paginationmetadata import PaginationMetadata from mindee.v2.parsing.search.search_models import SearchModels @@ -8,12 +8,12 @@ class SearchResponse: models: SearchModels """Parsed search payload.""" - pagination: Pagination + pagination: PaginationMetadata """Pagination metadata for the search results.""" def __init__(self, raw_response: StringDict) -> None: self.models = SearchModels(raw_response["models"]) - self.pagination = Pagination(raw_response["pagination"]) + self.pagination = PaginationMetadata(raw_response["pagination"]) def __str__(self) -> str: """ From d788adbffdab2c4473fcd61e740b4d8301879ed7 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:22:08 +0200 Subject: [PATCH 3/6] add better tests --- mindee/input/url_input_source.py | 1 + mindee/v1/mindee_http/base_endpoint.py | 10 +++++++ mindee/v2/client.py | 10 +++++++ pyproject.toml | 1 + tests/fixtures.py | 14 +++++++++ tests/v2/test_client.py | 5 +--- tests/v2/test_client_integration.py | 40 ++++++++++++++++++++++---- 7 files changed, 71 insertions(+), 10 deletions(-) create mode 100644 tests/fixtures.py diff --git a/mindee/input/url_input_source.py b/mindee/input/url_input_source.py index b3e33d14..69ccffb1 100644 --- a/mindee/input/url_input_source.py +++ b/mindee/input/url_input_source.py @@ -212,4 +212,5 @@ def __make_request( f"Couldn't retrieve file from server, error code {result.status_code}." ) + http_client.close() return result.content diff --git a/mindee/v1/mindee_http/base_endpoint.py b/mindee/v1/mindee_http/base_endpoint.py index 0f6b4418..d599c686 100644 --- a/mindee/v1/mindee_http/base_endpoint.py +++ b/mindee/v1/mindee_http/base_endpoint.py @@ -20,3 +20,13 @@ def __init__( """ self.settings = settings self.http_client = http_client or httpx.Client() + + def close(self) -> None: + """Closes the underlying HTTP client.""" + self.http_client.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() diff --git a/mindee/v2/client.py b/mindee/v2/client.py index 183e76c8..87efb314 100644 --- a/mindee/v2/client.py +++ b/mindee/v2/client.py @@ -170,3 +170,13 @@ def search_models( :return: A list of models matching the provided criteria. """ return self.mindee_api.get_models(name, model_type) + + def close(self) -> None: + """Closes the underlying HTTP client.""" + self.mindee_api.http_client.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() diff --git a/pyproject.toml b/pyproject.toml index 795a1390..db5b3da7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ test = [ "toml~=0.10.2", "pytest~=9.0.3", "pytest-cov~=7.1.0", + "respx~=0.23.1" ] docs = [ "sphinx~=9.1.0", diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 00000000..835cc64a --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,14 @@ +import os + +import pytest + + +@pytest.fixture(scope="session") +def findoc_model_id() -> str: + """Identifier of the Financial Document model, supplied through an env var.""" + findoc_model_id = os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID") + if not findoc_model_id: + raise ValueError( + "MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID environment variable is not set" + ) + return findoc_model_id diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index dc4ef3c2..23cf2c1b 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -30,11 +30,10 @@ def env_client(monkeypatch) -> Client: @pytest.fixture def custom_base_url_client(monkeypatch) -> Client: class _FakePostRespError: - status_code = 400 # any non-2xx will do + status_code = 400 is_error = True def json(self): - # Shape must match what handle_error_v2 expects return { "status": 0, "code": "000-000", @@ -157,8 +156,6 @@ def test_enqueue_and_parse_path_with_env_token(custom_base_url_client): def _assert_findoc_inference(response: ExtractionResponse): - # There are already detailed tests of the inference object. - # Here we are just testing whether the client can load OK. assert isinstance(response, ExtractionResponse) assert isinstance(response.inference, ExtractionInference) assert response.inference.id diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index 20df7d2b..e8e8009b 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -3,23 +3,19 @@ import httpx import pytest +import respx from mindee import ExtractionParameters from mindee.input.path_input import PathInput from mindee.input.url_input_source import URLInputSource from mindee.v2.client import Client +from mindee.v2.error import MindeeAPIV2Error from mindee.v2.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.v2.parsing import InferenceActiveOptions from mindee.v2.product.extraction.extraction_response import ExtractionResponse from tests.utils import FILE_TYPES_DIR, V2_PRODUCT_DATA_DIR -@pytest.fixture(scope="session") -def findoc_model_id() -> str: - """Identifier of the Financial Document model, supplied through an env var.""" - return os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID") - - @pytest.fixture(scope="session") def v2_client() -> Client: return Client() @@ -339,3 +335,35 @@ def log_request(request: httpx.Request): assert len(request_urls) > 0 assert any("enqueue" in url for url in request_urls) + + +@pytest.mark.v2 +@respx.mock +def test_explicit_timeout_failure(findoc_model_id) -> None: + respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( + side_effect=httpx.ReadTimeout("Simulated Read Timeout") + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + + with pytest.raises(httpx.ReadTimeout): + client.enqueue(input_source, params) + + +@pytest.mark.v2 +@respx.mock +def test_explicit_500_server_error(findoc_model_id) -> None: + respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( + return_value=httpx.Response(500, json={"message": "Internal Server Error"}) + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + + with pytest.raises(MindeeAPIV2Error) as exc_info: + client.enqueue(input_source, params) + + assert exc_info.value.status_code == 500 From 02c3d207893edefca68ecd294d0ba8221dd50d57 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:36:45 +0200 Subject: [PATCH 4/6] add tests --- tests/v2/test_client.py | 10 ++++++++++ tests/v2/test_client_integration.py | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index 23cf2c1b..00a256c4 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -258,3 +258,13 @@ def test_queue_get(custom_base_url_client): assert not response.job.result_url assert len(response.job.webhooks) == 0 assert not response.job.error + + +@pytest.mark.v2 +def test_client_closes_httpx_connections() -> None: + client = Client(api_key="dummy_key") + client.close() + with pytest.raises( + RuntimeError, match=r"Cannot send a request, as the client has been closed\." + ): + client.mindee_api.http_client.get("https://google.com") diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index e8e8009b..cded0c78 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,3 +1,4 @@ +import concurrent.futures import os from pathlib import Path @@ -367,3 +368,28 @@ def test_explicit_500_server_error(findoc_model_id) -> None: client.enqueue(input_source, params) assert exc_info.value.status_code == 500 + + +@pytest.mark.integration +@pytest.mark.v2 +def test_httpx_multiple_calls_thread_safety(findoc_model_id) -> None: + client = Client() + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + + def make_request(): + input_source = PathInput(input_path) + params = ExtractionParameters(model_id=findoc_model_id) + return client.enqueue(input_source, params) + + thread_count = 20 + successful_responses = 0 + + with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor: + futures = [executor.submit(make_request) for _ in range(thread_count)] + + for future in concurrent.futures.as_completed(futures): + response = future.result() + if response.job and response.job.id: + successful_responses += 1 + + assert successful_responses == thread_count From 6f8c8f0ebc6b83e311ab6c33e25e07f04555b457 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:57:14 +0200 Subject: [PATCH 5/6] fix tests --- mindee/v2/parsing/job/job.py | 2 +- tests/conftest.py | 7 ++++++ tests/fixtures.py | 14 ----------- tests/v2/test_client.py | 39 +++++++++++++++++++++++++++++ tests/v2/test_client_integration.py | 27 +------------------- 5 files changed, 48 insertions(+), 41 deletions(-) delete mode 100644 tests/fixtures.py diff --git a/mindee/v2/parsing/job/job.py b/mindee/v2/parsing/job/job.py index 9871b0b8..492c2fa5 100644 --- a/mindee/v2/parsing/job/job.py +++ b/mindee/v2/parsing/job/job.py @@ -35,7 +35,7 @@ def __init__(self, raw_response: StringDict) -> None: self.id = raw_response["id"] self.status = raw_response["status"] self.error = ( - ErrorResponse(raw_response["error"]) if raw_response["error"] else None + ErrorResponse(raw_response["error"]) if raw_response.get("error") else None ) self.created_at = datetime.fromisoformat( raw_response["created_at"].replace("Z", "+00:00") diff --git a/tests/conftest.py b/tests/conftest.py index c130f51b..b5cc6b4a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import gc +import os import pytest @@ -7,3 +8,9 @@ def force_gc(): yield gc.collect() + + +@pytest.fixture(scope="session") +def findoc_model_id() -> str: + """Identifier of the Financial Document model, supplied through an env var.""" + return os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID", "") diff --git a/tests/fixtures.py b/tests/fixtures.py deleted file mode 100644 index 835cc64a..00000000 --- a/tests/fixtures.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - -import pytest - - -@pytest.fixture(scope="session") -def findoc_model_id() -> str: - """Identifier of the Financial Document model, supplied through an env var.""" - findoc_model_id = os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID") - if not findoc_model_id: - raise ValueError( - "MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID environment variable is not set" - ) - return findoc_model_id diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index 00a256c4..4dfc58ca 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -1,8 +1,12 @@ +import concurrent.futures import json import os +import re +import time import httpx import pytest +import respx from mindee import ExtractionParameters, ExtractionResponse, LocalResponse from mindee.error.mindee_error import MindeeError @@ -268,3 +272,38 @@ def test_client_closes_httpx_connections() -> None: RuntimeError, match=r"Cannot send a request, as the client has been closed\." ): client.mindee_api.http_client.get("https://google.com") + + +@pytest.mark.v2 +@respx.mock +def test_httpx_multiple_calls_thread_safety() -> None: + client = Client(api_key="dummy_key") + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + + def delayed_response(request: httpx.Request) -> httpx.Response: + job_json = json.loads((V2_DATA_DIR / "job" / "ok_processing.json").read_text()) + time.sleep(0.1) + return httpx.Response(201, json=job_json) + + url_pattern = re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue") + respx.post(url_pattern).mock(side_effect=delayed_response) + + def make_request(): + input_source = PathInput(input_path) + params = ExtractionParameters(model_id="dummy-model-id") + return client.enqueue(input_source, params) + + thread_count = 20 + successful_responses = 0 + with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor: + futures = [executor.submit(make_request) for _ in range(thread_count)] + + for future in concurrent.futures.as_completed(futures): + response = future.result() + if ( + response.job + and response.job.id == "12345678-1234-1234-1234-123456789ABC" + ): + successful_responses += 1 + + assert successful_responses == thread_count diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index cded0c78..f0329bc1 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,4 +1,3 @@ -import concurrent.futures import os from pathlib import Path @@ -354,6 +353,7 @@ def test_explicit_timeout_failure(findoc_model_id) -> None: @pytest.mark.v2 +@pytest.mark.integration @respx.mock def test_explicit_500_server_error(findoc_model_id) -> None: respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( @@ -368,28 +368,3 @@ def test_explicit_500_server_error(findoc_model_id) -> None: client.enqueue(input_source, params) assert exc_info.value.status_code == 500 - - -@pytest.mark.integration -@pytest.mark.v2 -def test_httpx_multiple_calls_thread_safety(findoc_model_id) -> None: - client = Client() - input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" - - def make_request(): - input_source = PathInput(input_path) - params = ExtractionParameters(model_id=findoc_model_id) - return client.enqueue(input_source, params) - - thread_count = 20 - successful_responses = 0 - - with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor: - futures = [executor.submit(make_request) for _ in range(thread_count)] - - for future in concurrent.futures.as_completed(futures): - response = future.result() - if response.job and response.job.id: - successful_responses += 1 - - assert successful_responses == thread_count From 3857258bed8d314500d145bae0251b16aeed175c Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 18:12:41 +0200 Subject: [PATCH 6/6] fix tests, again --- tests/v2/test_client_integration.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index f0329bc1..a82e7451 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,4 +1,5 @@ import os +import re from pathlib import Path import httpx @@ -9,8 +10,10 @@ from mindee.input.path_input import PathInput from mindee.input.url_input_source import URLInputSource from mindee.v2.client import Client -from mindee.v2.error import MindeeAPIV2Error -from mindee.v2.error.mindee_http_error_v2 import MindeeHTTPErrorV2 +from mindee.v2.error.mindee_http_error_v2 import ( + MindeeHTTPErrorV2, + MindeeHTTPUnknownErrorV2, +) from mindee.v2.parsing import InferenceActiveOptions from mindee.v2.product.extraction.extraction_response import ExtractionResponse from tests.utils import FILE_TYPES_DIR, V2_PRODUCT_DATA_DIR @@ -355,16 +358,15 @@ def test_explicit_timeout_failure(findoc_model_id) -> None: @pytest.mark.v2 @pytest.mark.integration @respx.mock -def test_explicit_500_server_error(findoc_model_id) -> None: - respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( +def test_explicit_500_server_error(findoc_model_id: str) -> None: + respx.post(re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue")).mock( return_value=httpx.Response(500, json={"message": "Internal Server Error"}) ) client = Client(api_key="dummy") input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") params = ExtractionParameters(model_id=findoc_model_id) - - with pytest.raises(MindeeAPIV2Error) as exc_info: + with pytest.raises(MindeeHTTPUnknownErrorV2) as exc_info: client.enqueue(input_source, params) - assert exc_info.value.status_code == 500 + assert "Couldn't deserialize server error" in str(exc_info.value)