-
Notifications
You must be signed in to change notification settings - Fork 1
Stabilize the stubbed workspace APIs #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a6cef2a
50abeca
4ca072d
42e7345
81a4bc6
bb77461
67adbff
ea1645c
d836079
febc60a
894aa17
84d0f65
279ab67
9769c56
4919eaf
f692702
443b697
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| import asyncio | ||
| import json | ||
| from typing import Any | ||
|
|
||
| import httpx | ||
| import jsonschema | ||
| from fastapi import HTTPException, status | ||
|
|
||
| from api.core.config import settings | ||
|
|
||
| # Shared HTTP client. Initialized by main.py lifespan. | ||
| _http_client: httpx.AsyncClient | None = None | ||
|
|
||
| _longform_schema: dict | None = None | ||
| _longform_schema_lock = asyncio.Lock() | ||
|
|
||
| _imagery_schema: dict | None = None | ||
| _imagery_schema_lock = asyncio.Lock() | ||
|
|
||
|
|
||
| def init_json_schema_client() -> None: | ||
| global _http_client | ||
| _http_client = httpx.AsyncClient( | ||
| timeout=httpx.Timeout(connect=10, read=30, write=30, pool=10), | ||
| ) | ||
|
|
||
|
|
||
| def get_http_client() -> httpx.AsyncClient | None: | ||
| return _http_client | ||
|
|
||
|
|
||
| async def close_json_schema_client() -> None: | ||
| global _http_client | ||
| if _http_client is not None: | ||
| await _http_client.aclose() | ||
| _http_client = None | ||
|
|
||
|
|
||
| async def _fetch_longform_schema() -> dict: | ||
| global _longform_schema | ||
| if _longform_schema is None: | ||
| async with _longform_schema_lock: | ||
| if _longform_schema is None: | ||
| response = await _http_client.get(settings.LONGFORM_SCHEMA_URL) | ||
| response.raise_for_status() | ||
| _longform_schema = response.json() | ||
| return _longform_schema | ||
|
|
||
|
|
||
| async def _fetch_imagery_schema() -> dict: | ||
| global _imagery_schema | ||
| if _imagery_schema is None: | ||
| async with _imagery_schema_lock: | ||
| if _imagery_schema is None: | ||
| response = await _http_client.get(settings.IMAGERY_SCHEMA_URL) | ||
| response.raise_for_status() | ||
| _imagery_schema = response.json() | ||
| return _imagery_schema | ||
|
|
||
|
|
||
| def _raise_for_fetch_error(e: Exception, label: str) -> None: | ||
| if isinstance(e, httpx.TimeoutException): | ||
| raise HTTPException( | ||
| status_code=status.HTTP_504_GATEWAY_TIMEOUT, | ||
| detail=f"Timed out fetching {label} schema", | ||
| ) | ||
| raise HTTPException( | ||
| status_code=status.HTTP_502_BAD_GATEWAY, | ||
| detail=f"Failed to fetch {label} schema: {e}", | ||
| ) | ||
|
|
||
|
|
||
| async def validate_quest_definition_schema(definition: str) -> None: | ||
| """ | ||
| Parse, type-check, and validate a quest definition string against the long- | ||
| form quest JSON schema. | ||
| """ | ||
| try: | ||
| parsed = json.loads(definition) | ||
| except json.JSONDecodeError as e: | ||
| raise HTTPException( | ||
| status_code=status.HTTP_400_BAD_REQUEST, | ||
| detail=f"'definition' must be valid JSON: {e}", | ||
| ) | ||
| if not parsed or not isinstance(parsed, dict): | ||
| raise HTTPException( | ||
| status_code=status.HTTP_400_BAD_REQUEST, | ||
| detail="'definition' must be a JSON object.", | ||
| ) | ||
|
|
||
| try: | ||
| schema = await _fetch_longform_schema() | ||
| except HTTPException: | ||
| raise | ||
| except Exception as e: | ||
| _raise_for_fetch_error(e, "quest") | ||
|
Comment on lines
+95
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Verify current broad exception handling and where it appears.
rg -n -C2 'except Exception as e:' api/core/json_schema.py
# Confirm narrowed fetch-only exception handling after applying the fix.
rg -n -C2 'except \(httpx\.TimeoutException, httpx\.HTTPError\) as e:' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 341 🏁 Script executed: # Read the file to see the full context around the problematic lines
head -120 api/core/json_schema.py | tail -30Repository: TaskarCenterAtUW/workspaces-backend Length of output: 956 🏁 Script executed: # Check the imports at the top of the file to see what's imported
head -20 api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 497 🏁 Script executed: # Look for _raise_for_fetch_error function definition
rg -n -A 5 'def _raise_for_fetch_error' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 358 🏁 Script executed: # Search for httpx imports to verify the exception types
rg -n 'import.*httpx|from.*httpx' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 94 🏁 Script executed: # Get the complete _raise_for_fetch_error function
sed -n '61,80p' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 757 🏁 Script executed: # Look for the _fetch_longform_schema and _fetch_imagery_schema functions to see what they do
rg -n -A 10 'async def _fetch_longform_schema|async def _fetch_imagery_schema' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 923 🏁 Script executed: # Check what httpx exception types are available and used
rg -n 'httpx\.' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 346 🌐 Web query:
💡 Result: In the httpx library, all exceptions inherit from a common base class, HTTPError [1]. The exception hierarchy is structured to allow for granular error handling, distinguishing between general request issues, transport-level problems, and HTTP status errors [1][2]. The relationship between the requested exceptions is as follows: 1. HTTPError: This is the base class for all exceptions raised by the httpx library [1][2]. 2. TimeoutException: This class inherits from TransportError, which in turn inherits from RequestError, and finally from HTTPError [1]. It serves as the base class for all timeout-related errors, including ConnectTimeout, ReadTimeout, WriteTimeout, and PoolTimeout [1][2]. 3. HTTPStatusError: This class inherits directly from HTTPError [1]. It is raised when a response has an error status code (e.g., 4xx or 5xx) and raise_for_status is called [2]. The full hierarchy is organized as follows [1]: - HTTPError - RequestError - TransportError - TimeoutException (base for ConnectTimeout, ReadTimeout, WriteTimeout, PoolTimeout) - NetworkError (base for ConnectError, ReadError, WriteError, CloseError) - ProtocolError (base for LocalProtocolError, RemoteProtocolError) - ProxyError - UnsupportedProtocol - DecodingError - TooManyRedirects - HTTPStatusError Other notable exceptions, such as InvalidURL, CookieConflict, and various StreamError subclasses, also exist outside of the RequestError branch but remain under the HTTPError umbrella [1][2]. Citations:
🏁 Script executed: # Check if there are other exception handlers in the codebase for httpx
rg -n 'except.*httpx\.' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 61 🏁 Script executed: # Check if json.JSONDecodeError is handled elsewhere or if there are similar patterns in the codebase
rg -n 'json\.JSONDecodeError' api/core/json_schema.pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 120 🏁 Script executed: # Look at the usage context - what calls validate_quest_definition_schema
rg -rn 'validate_quest_definition_schema|validate_imagery_definition_schema' --type pyRepository: TaskarCenterAtUW/workspaces-backend Length of output: 483 Catch specific exceptions instead of all exceptions around schema fetch. Lines 95-96 and 115-116 currently catch all exceptions and convert them to 502 errors, which can hide real app bugs like malformed JSON responses. The fetch operations call - except Exception as e:
+ except (httpx.HTTPError, json.JSONDecodeError) as e:
_raise_for_fetch_error(e, "quest")Apply the same fix at line 115-116 for the imagery schema fetch. Also applies to: 115-116 🧰 Tools🪛 Ruff (0.15.13)[warning] 95-95: Do not catch blind exception: (BLE001) 🤖 Prompt for AI Agents |
||
|
|
||
| try: | ||
| jsonschema.validate(instance=parsed, schema=schema) | ||
| except jsonschema.ValidationError as e: | ||
| raise HTTPException( | ||
| status_code=status.HTTP_400_BAD_REQUEST, | ||
| detail=f"{e.message} at {list(e.path)}", | ||
| ) | ||
|
|
||
|
|
||
| async def validate_imagery_definition_schema(definition: list[Any]) -> None: | ||
| """ | ||
| Validate the provided definition against the imagery list schema. | ||
| """ | ||
| try: | ||
| schema = await _fetch_imagery_schema() | ||
| except HTTPException: | ||
| raise | ||
| except Exception as e: | ||
| _raise_for_fetch_error(e, "imagery") | ||
|
|
||
| try: | ||
| jsonschema.validate(instance=definition, schema=schema) | ||
| except jsonschema.ValidationError as e: | ||
| raise HTTPException( | ||
| status_code=status.HTTP_400_BAD_REQUEST, | ||
| detail=f"{e.message} at {list(e.path)}", | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| from api.core import config | ||
| from api.core.config import settings | ||
| from api.core.database import get_task_session | ||
| from api.core.json_schema import close_json_schema_client, init_json_schema_client | ||
| from api.core.logging import get_logger, setup_logging | ||
| from api.core.security import ( | ||
| UserInfo, | ||
|
|
@@ -59,13 +60,15 @@ async def lifespan(_app: FastAPI): | |
| timeout=httpx.Timeout(connect=10, read=7200, write=7200, pool=10), | ||
| ) | ||
| init_tdei_client() | ||
| init_json_schema_client() | ||
|
|
||
| yield # App runs | ||
|
|
||
| # Run after app cleanup: | ||
| await _osm_client.aclose() | ||
| _osm_client = None | ||
| await close_tdei_client() | ||
| await close_json_schema_client() | ||
|
|
||
|
|
||
| app = FastAPI( | ||
|
|
@@ -141,8 +144,10 @@ def get_workspace_repository( | |
| AUTH_WHITELIST_PATTERNS = [ | ||
| re.compile(p) | ||
| for p in [ | ||
| r"^/api/0\.6/user/.*$", # used during authentication | ||
| r"^/api/0\.6/workspaces/[0-9]+/bbox\.json$", # used to get workspace bbox without workspace header, to be removed | ||
| # Creating/deleting workspaces and JOSM path rewriting: | ||
| r"^/api/0\.6/workspaces.*$", | ||
| # Provisioning users during authentication: | ||
| r"^/api/0\.6/user/.*$", | ||
|
Comment on lines
+147
to
+150
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whitelist regex is overly broad and weakens access control. Line 148 ( Suggested change-AUTH_WHITELIST_PATTERNS = [
- re.compile(p)
- for p in [
- r"^/api/0\.6/workspaces.*$",
- r"^/api/0\.6/user/.*$",
- ]
-]
+AUTH_WHITELIST = [
+ (re.compile(r"^/api/0\.6/workspaces/create\.json$"), {"POST"}),
+ (re.compile(r"^/api/0\.6/workspaces/[0-9]+/delete\.json$"), {"POST", "DELETE"}),
+ (re.compile(r"^/api/0\.6/user/.*$"), {"GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"}),
+]
...
- if not any(p.fullmatch(request.url.path) for p in AUTH_WHITELIST_PATTERNS):
+ if not any(
+ p.fullmatch(request.url.path) and request.method in methods
+ for p, methods in AUTH_WHITELIST
+ ):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No X-Workspace header supplied",
)Also applies to: 228-232 🤖 Prompt for AI Agents |
||
| ] | ||
| ] | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You saw the things here can be overridden by env vars? Just checking the var is named this and not what was there before? :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When you see this and respond, you can either merge or fix + merge. ty!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, this one didn't match the existing variable name, so I dropped the
WS_prefix for consistency with the rest of these vars. It gets mapped here:https://github.com/TaskarCenterAtUW/workspaces-stack/blob/0f66fc91815b4965f817cc4c72b1122ed228081f/docker-compose.deploy.yml#L94
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perfect, copy that. Merging then.