From 5cbd5552cdc7a4fd63b31d04887da520e64c583b Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 27 Apr 2026 12:00:22 -0400 Subject: [PATCH 1/2] Wire reference generator into docs checks --- .github/workflows/pr_docs_changes.yaml | 10 +++++- Makefile | 12 ++++++- docs/.gitignore | 3 ++ docs/_quarto.yml | 1 + docs/reference/index.md | 48 ++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 docs/reference/index.md diff --git a/.github/workflows/pr_docs_changes.yaml b/.github/workflows/pr_docs_changes.yaml index c51dc153..9c0f059f 100644 --- a/.github/workflows/pr_docs_changes.yaml +++ b/.github/workflows/pr_docs_changes.yaml @@ -18,7 +18,15 @@ jobs: steps: - name: Checkout repo uses: actions/checkout@v4 + - name: Install uv + uses: astral-sh/setup-uv@v5 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.13' - name: Set up Quarto uses: quarto-dev/quarto-actions/setup@v2 + - name: Test reference generator smoke build + run: make docs-reference-smoke - name: Test documentation builds - run: quarto render docs + run: make docs diff --git a/Makefile b/Makefile index 4dc38a33..fac724f4 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,20 @@ -.PHONY: docs docs-serve +.PHONY: docs docs-serve docs-generate-reference docs-reference-smoke all: build-package docs: quarto render docs +docs-generate-reference: + uv run --extra us python docs/_generator/build_reference.py --country us --out docs/_generated/reference/us + +docs-reference-smoke: + rm -rf /tmp/policyengine-reference-smoke + uv run --extra us python docs/_generator/build_reference.py --country us --filter chip --out /tmp/policyengine-reference-smoke/us + quarto render /tmp/policyengine-reference-smoke/us/index.qmd --output-dir /tmp/policyengine-reference-smoke/rendered + quarto render /tmp/policyengine-reference-smoke/us/programs.qmd --output-dir /tmp/policyengine-reference-smoke/rendered + quarto render $$(find /tmp/policyengine-reference-smoke/us -type f -name "*.qmd" ! -name "index.qmd" ! -name "programs.qmd" | head -n 1) --output-dir /tmp/policyengine-reference-smoke/rendered + docs-serve: quarto preview docs diff --git a/docs/.gitignore b/docs/.gitignore index d05d3238..2ee4c0b1 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -3,3 +3,6 @@ _site _freeze /.quarto/ **/*.quarto_ipynb + +# Generated reference output can be rebuilt from installed country models. +_generated/ diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 090d8ecc..179f8834 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -51,6 +51,7 @@ website: - programs/us-chip.md - section: "Reference" contents: + - reference/index.md - countries.md - release-bundles.md - data-publishing-design.md diff --git a/docs/reference/index.md b/docs/reference/index.md new file mode 100644 index 00000000..35b5e67b --- /dev/null +++ b/docs/reference/index.md @@ -0,0 +1,48 @@ +--- +title: "Reference" +--- + +Reference pages are generated from the installed country-model packages. Authored methodology pages explain why the model is structured the way it is; generated reference pages expose the exact release contents. + +## What generated reference should include + +The variable reference generator already reads the installed country model and can emit: + +- one page per variable +- entity, period, unit, value type, and `defined_for` +- variable documentation +- `adds` and `subtracts` relationships +- statutory references where the country model provides them +- source file path and line number +- a program coverage page from `programs.yaml` + +## Generate locally + +Generate the full US variable reference: + +```bash +make docs-generate-reference +``` + +This writes generated pages under `docs/_generated/reference/us`, which is ignored by Git. + +For a fast smoke test: + +```bash +make docs-reference-smoke +``` + +The smoke test generates a CHIP-filtered US reference into `/tmp` and renders it with Quarto. CI runs this target so changes to the generator fail early without checking thousands of generated pages into the repository. + +## Next generator layers + +The current generator is only the first layer. The same pattern should extend to: + +| Layer | Source | +|---|---| +| Parameters | country model parameter YAML | +| Program metadata | `programs.yaml` | +| Data lineage | country data package build metadata | +| Calibration targets | country data package target files and validation artifacts | + +Once those layers are generated, authored program pages can stay short and structural, while exact values, citations, source paths, and calibration details remain release-synchronized. From 54fa514521cbc92599fe25ba20b0dea275af7913 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 27 Apr 2026 12:15:50 -0400 Subject: [PATCH 2/2] Generate program reference pages --- Makefile | 7 +- docs/_generator/README.md | 6 +- docs/_generator/build_reference.py | 247 +++++++++++++++++++++++++++-- docs/reference/index.md | 7 +- 4 files changed, 245 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index fac724f4..6e0c7d9f 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,10 @@ docs-generate-reference: docs-reference-smoke: rm -rf /tmp/policyengine-reference-smoke uv run --extra us python docs/_generator/build_reference.py --country us --filter chip --out /tmp/policyengine-reference-smoke/us - quarto render /tmp/policyengine-reference-smoke/us/index.qmd --output-dir /tmp/policyengine-reference-smoke/rendered - quarto render /tmp/policyengine-reference-smoke/us/programs.qmd --output-dir /tmp/policyengine-reference-smoke/rendered - quarto render $$(find /tmp/policyengine-reference-smoke/us -type f -name "*.qmd" ! -name "index.qmd" ! -name "programs.qmd" | head -n 1) --output-dir /tmp/policyengine-reference-smoke/rendered + quarto render /tmp/policyengine-reference-smoke/us/index.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/root + quarto render /tmp/policyengine-reference-smoke/us/programs.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/program-index + quarto render /tmp/policyengine-reference-smoke/us/programs/chip.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/program + quarto render /tmp/policyengine-reference-smoke/us/gov/hhs/chip/chip.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/variable docs-serve: quarto preview docs diff --git a/docs/_generator/README.md b/docs/_generator/README.md index ef5c7268..02578a15 100644 --- a/docs/_generator/README.md +++ b/docs/_generator/README.md @@ -1,6 +1,6 @@ # Reference generator prototype -Auto-generates one Quarto page per variable in a country model, plus a program-coverage page, purely from metadata on the `Variable` classes and `programs.yaml`. +Auto-generates one Quarto page per variable in a country model, plus a program coverage index and one page per program, purely from metadata on the `Variable` classes and `programs.yaml`. ## Run @@ -29,7 +29,7 @@ Per variable: - Statutory references (from `reference = ...`) - Source file path and line number -Per program: a row in the generated program-coverage page pulled from `programs.yaml` (id, name, category, agency, status, coverage). +Per program: a row in the generated program coverage index pulled from `programs.yaml` (name, category, agency, status, coverage, root variable), plus a generated program page with metadata, notes, and links to implementation variables. Per directory (`gov/hhs/chip/`, `gov/usda/snap/`, etc.): a listing page using Quarto's built-in directory listing so the nav auto-organizes. @@ -49,4 +49,4 @@ Extensions worth considering: 1. Walk `parameters/` YAML tree and emit a page per parameter with its time series, breakdowns, and references. 2. For each variable with a formula, surface the dependency graph (other variables / parameters it reads). `policyengine_core`'s `Variable.exhaustive_parameter_dependencies` gets partway there. 3. For each calibration target (in `policyengine-us-data/storage/calibration_targets/*.csv`), emit a page describing source, aggregation level, freshness. -4. Cross-link variables to the programs they contribute to via `programs.yaml`'s `variable:` field. +4. Add reverse links from variable pages back to the programs that use them. diff --git a/docs/_generator/build_reference.py b/docs/_generator/build_reference.py index 490420cd..687aebe0 100644 --- a/docs/_generator/build_reference.py +++ b/docs/_generator/build_reference.py @@ -47,6 +47,7 @@ import argparse import importlib import logging +import os import re import textwrap from dataclasses import dataclass @@ -85,6 +86,10 @@ class VariableRecord: tree_path: tuple[str, ...] +def _variable_page_path(record: VariableRecord, out_root: Path) -> Path: + return out_root.joinpath(*record.tree_path) / f"{_slug(record.name)}.qmd" + + def _tree_path_from_source( source_file: Path | None, package_root: Path ) -> tuple[str, ...]: @@ -255,6 +260,16 @@ def _slug(value: str) -> str: return re.sub(r"[^A-Za-z0-9_-]+", "-", value).strip("-") +def _relative_link(source: Path, target: Path) -> str: + return os.path.relpath(target, start=source.parent).replace(os.sep, "/") + + +def _table_cell(value: object) -> str: + if value is None: + return "" + return str(value).replace("\n", " ").replace("|", "\\|") + + def _write_variables( records: list[VariableRecord], out_root: Path, @@ -262,9 +277,9 @@ def _write_variables( ) -> int: written = 0 for record in records: - tree_dir = out_root.joinpath(*record.tree_path) + page_path = _variable_page_path(record, out_root) + tree_dir = page_path.parent tree_dir.mkdir(parents=True, exist_ok=True) - page_path = tree_dir / f"{_slug(record.name)}.qmd" page_path.write_text(_render_variable_page(record, country)) written += 1 return written @@ -295,36 +310,240 @@ def _write_tree_indices(out_root: Path) -> int: return written -def _write_programs_index(country: str, out_root: Path) -> int: +def _load_programs(country: str) -> list[dict]: module_name = COUNTRY_MODULES[country] country_module = importlib.import_module(module_name) package_root = Path(country_module.__file__).parent programs_path = package_root / "programs.yaml" if not programs_path.exists(): - return 0 + return [] with programs_path.open() as f: registry = yaml.safe_load(f) - programs = registry.get("programs", []) + return registry.get("programs", []) + + +def _program_page_path(program: dict, out_root: Path) -> Path: + identifier = program.get("id") or program.get("name") or "program" + return out_root / "programs" / f"{_slug(str(identifier))}.qmd" + + +def _program_title(program: dict) -> str: + return str(program.get("full_name") or program.get("name") or program.get("id")) + + +def _program_variable_records( + program: dict, + records: list[VariableRecord], +) -> list[VariableRecord]: + root_variable = program.get("variable") + parameter_prefix = program.get("parameter_prefix") + prefix_parts = ( + tuple(str(parameter_prefix).replace("/", ".").split(".")) + if parameter_prefix + else () + ) + selected: list[VariableRecord] = [] + for record in records: + if root_variable and record.name == root_variable: + selected.append(record) + continue + if prefix_parts and record.tree_path[: len(prefix_parts)] == prefix_parts: + selected.append(record) + + return sorted( + selected, + key=lambda record: ( + 0 if root_variable and record.name == root_variable else 1, + "/".join(record.tree_path), + record.name, + ), + ) + + +def _render_program_variable_link( + record: VariableRecord, + record_pages: dict[str, Path], + page_path: Path, +) -> str: + target = record_pages.get(record.name) + if target is None: + return f"`{record.name}`" + return f"[`{record.name}`]({_relative_link(page_path, target)})" + + +def _render_program_page( + program: dict, + records: list[VariableRecord], + record_pages: dict[str, Path], + out_root: Path, +) -> str: + page_path = _program_page_path(program, out_root) + title = _program_title(program) + identifier = str(program.get("id") or "") + lines: list[str] = [ + "---", + f'title: "{_escape_yaml_scalar(title)}"', + ] + if identifier: + lines.append(f'subtitle: "`{_escape_yaml_scalar(identifier)}`"') + lines.extend(["---", ""]) + + root_variable = program.get("variable") + if root_variable and root_variable in record_pages: + root_value = ( + f"[`{root_variable}`]" + f"({_relative_link(page_path, record_pages[str(root_variable)])})" + ) + elif root_variable: + root_value = f"`{root_variable}`" + else: + root_value = "" + + verified_start_year = program.get("verified_start_year") + verified_end_year = program.get("verified_end_year") + if verified_start_year and verified_end_year: + verified = f"{verified_start_year}-{verified_end_year}" + elif verified_start_year: + verified = f"{verified_start_year}+" + elif verified_end_year: + verified = f"through {verified_end_year}" + else: + verified = "" + + metadata = [ + ("Program ID", f"`{identifier}`" if identifier else ""), + ("Category", program.get("category")), + ("Agency", program.get("agency")), + ("Status", program.get("status")), + ("Coverage", program.get("coverage")), + ( + "State variation", + "Yes" if program.get("has_state_variation") else "No", + ), + ("Verification years", verified), + ( + "Parameter prefix", + f"`{program.get('parameter_prefix')}`" + if program.get("parameter_prefix") + else "", + ), + ("Root variable", root_value), + ] + lines.append("| Field | Value |") + lines.append("|---|---|") + for key, value in metadata: + lines.append(f"| {key} | {_table_cell(value)} |") + lines.append("") + + if program.get("notes"): + lines.append("## Notes") + lines.append("") + lines.append(str(program["notes"])) + lines.append("") + + program_records = _program_variable_records(program, records) + lines.append("## Implementation variables") + lines.append("") + if program_records: + lines.append("| Variable | Label | Entity | Period |") + lines.append("|---|---|---|---|") + for record in program_records: + lines.append( + "| " + + " | ".join( + [ + _render_program_variable_link(record, record_pages, page_path), + _table_cell(record.label), + f"`{record.entity}`" if record.entity else "", + f"`{record.definition_period}`" + if record.definition_period + else "", + ] + ) + + " |" + ) + lines.append("") + else: + lines.append( + "No implementation variables were emitted for this program in this " + "reference run." + ) + lines.append("") + + return "\n".join(lines) + + +def _write_program_pages( + programs: list[dict], + records: list[VariableRecord], + out_root: Path, +) -> int: + if not programs: + return 0 + record_pages = { + record.name: _variable_page_path(record, out_root) for record in records + } + program_dir = out_root / "programs" + program_dir.mkdir(parents=True, exist_ok=True) + for program in programs: + page_path = _program_page_path(program, out_root) + page_path.write_text( + _render_program_page(program, records, record_pages, out_root) + ) + return len(programs) + + +def _write_programs_index( + programs: list[dict], + records: list[VariableRecord], + out_root: Path, +) -> int: + if not programs: + return 0 + record_pages = { + record.name: _variable_page_path(record, out_root) for record in records + } + programs_index_path = out_root / "programs.qmd" lines: list[str] = [ "---", 'title: "Program coverage"', 'description: "Programs modeled in the country model, generated from programs.yaml."', "---", "", - "| ID | Name | Category | Agency | Status | Coverage |", + "| Program | Category | Agency | Status | Coverage | Root variable |", "|---|---|---|---|---|---|", ] for program in programs: + page_path = _program_page_path(program, out_root) + program_link = ( + f"[{_program_title(program)}]" + f"({_relative_link(programs_index_path, page_path)})" + ) + root_variable = program.get("variable") + if root_variable and root_variable in record_pages: + root_value = ( + f"[`{root_variable}`]" + f"({_relative_link(programs_index_path, record_pages[str(root_variable)])})" + ) + elif root_variable: + root_value = f"`{root_variable}`" + else: + root_value = "" lines.append( "| " + " | ".join( - str(program.get(field, "")).replace("\n", " ") - for field in ("id", "name", "category", "agency", "status", "coverage") + [ + _table_cell(program_link), + _table_cell(program.get("category")), + _table_cell(program.get("agency")), + _table_cell(program.get("status")), + _table_cell(program.get("coverage")), + _table_cell(root_value), + ] ) + " |" ) - target = out_root / "programs.qmd" - target.write_text("\n".join(lines) + "\n") + programs_index_path.write_text("\n".join(lines) + "\n") return 1 @@ -344,11 +563,13 @@ def build_reference( or needle in " ".join(str(p).lower() for p in r.tree_path) ] variables_written = _write_variables(records, out_root, country) - programs_written = _write_programs_index(country, out_root) + programs = _load_programs(country) + program_pages_written = _write_program_pages(programs, records, out_root) + programs_index_written = _write_programs_index(programs, records, out_root) indices_written = _write_tree_indices(out_root) return { "variables": variables_written, - "programs": programs_written, + "programs": program_pages_written + programs_index_written, "indices": indices_written, } @@ -380,7 +601,7 @@ def main() -> None: args = _parse_args() stats = build_reference(args.country, args.out, args.filter) logger.info( - "Wrote %d variable pages, %d programs page, %d directory indices to %s", + "Wrote %d variable pages, %d program pages, %d directory indices to %s", stats["variables"], stats["programs"], stats["indices"], diff --git a/docs/reference/index.md b/docs/reference/index.md index 35b5e67b..7699176d 100644 --- a/docs/reference/index.md +++ b/docs/reference/index.md @@ -14,7 +14,8 @@ The variable reference generator already reads the installed country model and c - `adds` and `subtracts` relationships - statutory references where the country model provides them - source file path and line number -- a program coverage page from `programs.yaml` +- a program coverage index from `programs.yaml` +- one page per program with links to implementation variables ## Generate locally @@ -32,7 +33,7 @@ For a fast smoke test: make docs-reference-smoke ``` -The smoke test generates a CHIP-filtered US reference into `/tmp` and renders it with Quarto. CI runs this target so changes to the generator fail early without checking thousands of generated pages into the repository. +The smoke test generates a CHIP-filtered US reference into `/tmp` and renders the root index, program index, one generated program page, and one generated variable page with Quarto. CI runs this target so changes to the generator fail early without checking thousands of generated pages into the repository. ## Next generator layers @@ -41,7 +42,7 @@ The current generator is only the first layer. The same pattern should extend to | Layer | Source | |---|---| | Parameters | country model parameter YAML | -| Program metadata | `programs.yaml` | +| Program metadata | `programs.yaml`, linked to generated variable pages | | Data lineage | country data package build metadata | | Calibration targets | country data package target files and validation artifacts |