Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion extensions/EXTENSION-USER-GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ In addition to extension-specific environment variables (`SPECKIT_{EXT_ID}_*`),
| Variable | Description | Default |
|----------|-------------|---------|
| `SPECKIT_CATALOG_URL` | Override the full catalog stack with a single URL (backward compat) | Built-in default stack |
| `GH_TOKEN` / `GITHUB_TOKEN` | GitHub API token for downloads | None |
| `GH_TOKEN` / `GITHUB_TOKEN` | GitHub token for authenticated requests to GitHub-hosted URLs (`raw.githubusercontent.com`, `github.com`, `api.github.com`). Required when your catalog JSON or extension ZIPs are hosted in a private GitHub repository. | None |

#### Example: Using a custom catalog for testing

Expand All @@ -433,6 +433,21 @@ export SPECKIT_CATALOG_URL="http://localhost:8000/catalog.json"
export SPECKIT_CATALOG_URL="https://example.com/staging/catalog.json"
```

#### Example: Using a private GitHub-hosted catalog

```bash
# Authenticate with a token (gh CLI, PAT, or GITHUB_TOKEN in CI)
export GITHUB_TOKEN=$(gh auth token)

# Search a private catalog added via `specify extension catalog add`
specify extension search jira

# Install from a private catalog
specify extension add jira-sync
```

The token is attached automatically to requests targeting GitHub domains. Non-GitHub catalog URLs are always fetched without credentials.

---

## Extension Catalogs
Expand Down
57 changes: 54 additions & 3 deletions src/specify_cli/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,57 @@ def _validate_catalog_url(self, url: str) -> None:
if not parsed.netloc:
raise ValidationError("Catalog URL must be a valid URL with a host.")

def _make_request(self, url: str) -> "urllib.request.Request":
"""Build a urllib Request, adding a GitHub auth header when available.

Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an
``Authorization: token <value>`` header for requests to GitHub-hosted
domains (``raw.githubusercontent.com``, ``github.com``,
``api.github.com``). Non-GitHub URLs are returned as plain requests
so credentials are never leaked to third-party hosts.
"""
import os
import urllib.request
from urllib.parse import urlparse

headers: Dict[str, str] = {}
token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")
hostname = (urlparse(url).hostname or "").lower()
github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"}
if token and hostname in github_hosts:
headers["Authorization"] = f"token {token}"
return urllib.request.Request(url, headers=headers)

def _open_url(self, url: str, timeout: int = 10):
"""Open a URL using _make_request, stripping auth on cross-host redirects.

When the request carries an Authorization header, a custom redirect
handler is used to drop that header if the redirect target is not a
GitHub-hosted domain, preventing token leakage to CDNs or other
third-party hosts that GitHub may redirect to.
"""
import urllib.request
from urllib.parse import urlparse

req = self._make_request(url)

if not req.get_header("Authorization"):
return urllib.request.urlopen(req, timeout=timeout)

_github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"}

class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler):
def redirect_request(_self, req, fp, code, msg, headers, newurl):
new_req = super().redirect_request(req, fp, code, msg, headers, newurl)
if new_req is not None:
hostname = (urlparse(newurl).hostname or "").lower()
if hostname not in _github_hosts:
new_req.headers.pop("Authorization", None)
return new_req

opener = urllib.request.build_opener(_StripAuthOnRedirect)
return opener.open(req, timeout=timeout)

def _load_catalog_config(self, config_path: Path) -> Optional[List[CatalogEntry]]:
"""Load catalog stack configuration from a YAML file.

Expand Down Expand Up @@ -1601,7 +1652,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False

# Fetch from network
try:
with urllib.request.urlopen(entry.url, timeout=10) as response:
with self._open_url(entry.url, timeout=10) as response:
catalog_data = json.loads(response.read())

if "schema_version" not in catalog_data or "extensions" not in catalog_data:
Expand Down Expand Up @@ -1718,7 +1769,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]:
import urllib.request
import urllib.error

with urllib.request.urlopen(catalog_url, timeout=10) as response:
with self._open_url(catalog_url, timeout=10) as response:
catalog_data = json.loads(response.read())

# Validate catalog structure
Expand Down Expand Up @@ -1861,7 +1912,7 @@ def download_extension(self, extension_id: str, target_dir: Optional[Path] = Non

# Download the ZIP file
try:
with urllib.request.urlopen(download_url, timeout=60) as response:
with self._open_url(download_url, timeout=60) as response:
zip_data = response.read()

zip_path.write_bytes(zip_data)
Expand Down
152 changes: 152 additions & 0 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2142,6 +2142,158 @@ def test_clear_cache(self, temp_dir):
assert not catalog.cache_file.exists()
assert not catalog.cache_metadata_file.exists()

# --- _make_request / GitHub auth ---

def _make_catalog(self, temp_dir):
project_dir = temp_dir / "project"
project_dir.mkdir()
(project_dir / ".specify").mkdir()
return ExtensionCatalog(project_dir)

def test_make_request_no_token_no_auth_header(self, temp_dir, monkeypatch):
"""Without a token, requests carry no Authorization header."""
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
monkeypatch.delenv("GH_TOKEN", raising=False)
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json")
assert "Authorization" not in req.headers

def test_make_request_github_token_added_for_raw_githubusercontent(self, temp_dir, monkeypatch):
"""GITHUB_TOKEN is attached for raw.githubusercontent.com URLs."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
monkeypatch.delenv("GH_TOKEN", raising=False)
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json")
assert req.get_header("Authorization") == "token ghp_testtoken"

def test_make_request_gh_token_fallback(self, temp_dir, monkeypatch):
"""GH_TOKEN is used when GITHUB_TOKEN is absent."""
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
monkeypatch.setenv("GH_TOKEN", "ghp_ghtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://github.com/org/repo/releases/download/v1/ext.zip")
assert req.get_header("Authorization") == "token ghp_ghtoken"

def test_make_request_github_token_takes_precedence_over_gh_token(self, temp_dir, monkeypatch):
"""GITHUB_TOKEN takes precedence over GH_TOKEN when both are set."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_primary")
monkeypatch.setenv("GH_TOKEN", "ghp_secondary")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://api.github.com/repos/org/repo")
assert req.get_header("Authorization") == "token ghp_primary"

def test_make_request_token_not_added_for_non_github_url(self, temp_dir, monkeypatch):
"""Auth header is never attached to non-GitHub URLs to prevent credential leakage."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://internal.example.com/catalog.json")
assert "Authorization" not in req.headers

Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current tests cover a generic non-GitHub domain, but they don't cover common spoofing cases that would slip through the current substring-based domain check (e.g., https://github.com.evil.com/... or a non-GitHub host whose path/query contains github.com). Add negative tests for these URL shapes to ensure the auth header is never attached outside the intended allowlist.

Suggested change
def test_make_request_token_not_added_for_github_lookalike_host(self, temp_dir, monkeypatch):
"""Auth header is not attached to non-GitHub hosts that only contain github.com in the hostname."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://github.com.evil.com/org/repo/releases/download/v1/ext.zip")
assert "Authorization" not in req.headers
def test_make_request_token_not_added_for_non_github_host_with_github_in_path(self, temp_dir, monkeypatch):
"""Auth header is not attached when a non-GitHub host includes github.com only in the URL path."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://evil.example.com/github.com/org/repo/releases/download/v1/ext.zip")
assert "Authorization" not in req.headers
def test_make_request_token_not_added_for_non_github_host_with_github_in_query(self, temp_dir, monkeypatch):
"""Auth header is not attached when a non-GitHub host includes github.com only in the query string."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://evil.example.com/download?source=https://github.com/org/repo/releases/download/v1/ext.zip")
assert "Authorization" not in req.headers

Copilot uses AI. Check for mistakes.
def test_make_request_token_not_added_for_github_lookalike_host(self, temp_dir, monkeypatch):
"""Auth header is not attached to hosts that include github.com as a suffix."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://github.com.evil.com/org/repo/releases/download/v1/ext.zip")
assert "Authorization" not in req.headers

def test_make_request_token_not_added_for_github_in_path(self, temp_dir, monkeypatch):
"""Auth header is not attached when github.com appears only in the URL path."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://evil.example.com/github.com/org/repo/releases/download/v1/ext.zip")
assert "Authorization" not in req.headers

def test_make_request_token_not_added_for_github_in_query(self, temp_dir, monkeypatch):
"""Auth header is not attached when github.com appears only in the query string."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://evil.example.com/download?source=https://github.com/org/repo/v1/ext.zip")
assert "Authorization" not in req.headers

def test_make_request_token_added_for_api_github_com(self, temp_dir, monkeypatch):
"""GITHUB_TOKEN is attached for api.github.com URLs."""
monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)
req = catalog._make_request("https://api.github.com/repos/org/repo/releases/assets/1")
assert req.get_header("Authorization") == "token ghp_testtoken"

def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch):
"""_fetch_single_catalog passes Authorization header via opener for GitHub URLs."""
from unittest.mock import patch, MagicMock

monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)

catalog_data = {"schema_version": "1.0", "extensions": {}}
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(catalog_data).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)

captured = {}
mock_opener = MagicMock()

def fake_open(req, timeout=None):
captured["req"] = req
return mock_response

mock_opener.open.side_effect = fake_open

entry = CatalogEntry(
url="https://raw.githubusercontent.com/org/repo/main/catalog.json",
name="private",
priority=1,
install_allowed=True,
)

with patch("urllib.request.build_opener", return_value=mock_opener):
catalog._fetch_single_catalog(entry, force_refresh=True)

assert captured["req"].get_header("Authorization") == "token ghp_testtoken"

def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch):
"""download_extension passes Authorization header to urlopen for GitHub URLs."""
from unittest.mock import patch, MagicMock
import zipfile, io

monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
catalog = self._make_catalog(temp_dir)

# Build a minimal valid ZIP in memory
zip_buf = io.BytesIO()
with zipfile.ZipFile(zip_buf, "w") as zf:
zf.writestr("extension.yml", "id: test-ext\nname: Test\nversion: 1.0.0\n")
zip_bytes = zip_buf.getvalue()

mock_response = MagicMock()
mock_response.read.return_value = zip_bytes
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)

captured = {}

mock_opener = MagicMock()

def fake_open(req, timeout=None):
captured["req"] = req
return mock_response

mock_opener.open.side_effect = fake_open

ext_info = {
"id": "test-ext",
"name": "Test Extension",
"version": "1.0.0",
"download_url": "https://github.com/org/repo/releases/download/v1/test-ext.zip",
}

with patch.object(catalog, "get_extension_info", return_value=ext_info), \
patch("urllib.request.build_opener", return_value=mock_opener):
catalog.download_extension("test-ext", target_dir=temp_dir)

assert captured["req"].get_header("Authorization") == "token ghp_testtoken"



# ===== CatalogEntry Tests =====

Expand Down