Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion hatch/installers/dependency_installation_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,12 @@ def _resolve_and_load_package(
self._resolved_package_location = str(path.resolve())

else:
# Remote package
# Remote package — requires a live or cached registry
if self.registry_data.get("status") == "unavailable":
raise DependencyInstallationError(
f"Cannot install '{package_path_or_name}': registry is unavailable. "
"Connect to the internet and retry, or use a local package path."
)
if not self.registry_service.package_exists(package_path_or_name):
raise DependencyInstallationError(
f"Package {package_path_or_name} does not exist in registry"
Expand Down
46 changes: 39 additions & 7 deletions hatch/registry_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ def _clear_registry_status() -> None:
print(" " * 60, end="\r", file=sys.stderr, flush=True)


# Sentinel returned when the registry is unreachable and no local cache exists.
# IMPORTANT: this object must NEVER be written to disk — callers that receive it
# must check ``registry_data.get("status") == "unavailable"`` before persisting.
REGISTRY_UNAVAILABLE: Dict[str, Any] = {"status": "unavailable", "repositories": []}


class RegistryRetriever:
"""Manages the retrieval and caching of the Hatch package registry.

Expand Down Expand Up @@ -211,8 +217,9 @@ def _fetch_remote_registry(self) -> Dict[str, Any]:
date = yesterday.strftime("%Y-%m-%d")

if not self._registry_exists(date):
self.logger.error(
f"Yesterday's registry ({date}) also not found, cannot proceed"
self.logger.warning(
f"Registry unavailable for {self.today_str} and {date} "
"(network may be offline or releases not yet published)."
)
raise Exception("No valid registry found for today or yesterday")

Expand Down Expand Up @@ -253,24 +260,29 @@ def _registry_exists(self, date_str: str) -> bool:
except Exception:
return False

except Exception as e:
self.logger.error(f"Failed to fetch registry: {e}")
raise e

def get_registry(self, force_refresh: bool = False) -> Dict[str, Any]:
"""Fetch the registry file.

This method implements a multi-level caching strategy:
1. First checks the in-memory cache
2. Then checks the local file cache
3. Finally fetches from the source (local file or remote URL)
4. Falls back to local cache if remote fetch fails

The fetched data is stored in both the in-memory and file caches.

Args:
force_refresh (bool, optional): Force refresh the registry even if cache is valid. Defaults to False.

Returns:
Dict[str, Any]: Registry data.
Dict[str, Any]: Registry data or an unavailable status object if all sources fail.

Raises:
Exception: If fetching the registry fails.
Exception: If fetching from both remote and local cache fails.
"""
current_time = datetime.datetime.now(datetime.timezone.utc).timestamp()

Expand Down Expand Up @@ -308,9 +320,29 @@ def get_registry(self, force_refresh: bool = False) -> Dict[str, Any]:
# In simulation mode, we must have a local registry file
registry_data = self._read_local_cache()
else:
_print_registry_status(" Refreshing registry cache...")
registry_data = self._fetch_remote_registry()
_clear_registry_status()
try:
_print_registry_status(" Refreshing registry cache...")
registry_data = self._fetch_remote_registry()
_clear_registry_status()
except Exception:
_clear_registry_status()
if self.registry_cache_path.exists():
self.logger.warning(
"Registry unreachable (offline?) — falling back to "
"cached registry (may be stale)."
)
registry_data = self._read_local_cache()
else:
self.logger.warning(
"Registry unavailable and no local cache found. "
"Hatch is running offline with an empty registry. "
"Commands that install or search packages will not work. "
"Run any hatch command while online to populate the local cache."
)
# Early return — bypasses _write_local_cache, _registry_cache
# update, and _save_last_fetch_time so the sentinel is never
# persisted anywhere.
return REGISTRY_UNAVAILABLE

# Update local cache
# Note that in case of simulation mode AND default cache path,
Expand Down
55 changes: 55 additions & 0 deletions tests/test_registry_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,61 @@ def test_persistent_timestamp_edge_cases(self):
"Missing timestamp file should be treated as no timestamp",
)

@regression_test
def test_offline_no_cache_returns_unavailable_sentinel(self):
"""Offline with no local cache must return REGISTRY_UNAVAILABLE, not crash."""
from unittest.mock import patch
from hatch.registry_retriever import REGISTRY_UNAVAILABLE

retriever = RegistryRetriever(
local_cache_dir=self.cache_dir, simulation_mode=False
)
# Simulate network down: _registry_exists always returns False
with patch.object(retriever, "_registry_exists", return_value=False):
result = retriever.get_registry()

self.assertEqual(result, REGISTRY_UNAVAILABLE)
self.assertEqual(result.get("status"), "unavailable")
self.assertEqual(result.get("repositories"), [])
# Critical invariant: sentinel must never be persisted to disk
self.assertFalse(
retriever.registry_cache_path.exists(),
"REGISTRY_UNAVAILABLE sentinel must not be written to the local cache file",
)

@regression_test
def test_offline_stale_cache_returns_stale_data_not_sentinel(self):
"""Offline with a stale local cache returns stale data, not the unavailable sentinel."""
import json
from unittest.mock import patch

# Pre-populate a stale cache file
cache_registry_dir = self.cache_dir / "registry"
cache_registry_dir.mkdir(parents=True, exist_ok=True)
stale_data = {
"repositories": [{"name": "test-repo", "packages": []}],
"last_updated": "2020-01-01",
}
cache_file = cache_registry_dir / "hatch_packages_registry.json"
cache_file.write_text(json.dumps(stale_data))

retriever = RegistryRetriever(
local_cache_dir=self.cache_dir, simulation_mode=False
)
# Simulate network down + cache considered outdated (stale from yesterday)
with patch.object(
retriever, "_registry_exists", return_value=False
), patch.object(retriever, "is_cache_outdated", return_value=True):
result = retriever.get_registry()

# Must return stale real data, not the unavailable sentinel
self.assertNotEqual(
result.get("status"),
"unavailable",
"Stale cache should be returned, not the unavailable sentinel",
)
self.assertEqual(result.get("repositories"), stale_data["repositories"])


if __name__ == "__main__":
unittest.main()
Loading