Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/pr_docs_changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,15 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Set up Quarto
uses: quarto-dev/quarto-actions/setup@v2
- name: Test reference generator smoke build
run: make docs-reference-smoke
- name: Test documentation builds
run: quarto render docs
run: make docs
13 changes: 12 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
.PHONY: docs docs-serve
.PHONY: docs docs-serve docs-generate-reference docs-reference-smoke

all: build-package

docs:
quarto render docs

docs-generate-reference:
uv run --extra us python docs/_generator/build_reference.py --country us --out docs/_generated/reference/us

docs-reference-smoke:
rm -rf /tmp/policyengine-reference-smoke
uv run --extra us python docs/_generator/build_reference.py --country us --filter chip --out /tmp/policyengine-reference-smoke/us
quarto render /tmp/policyengine-reference-smoke/us/index.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/root
quarto render /tmp/policyengine-reference-smoke/us/programs.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/program-index
quarto render /tmp/policyengine-reference-smoke/us/programs/chip.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/program
quarto render /tmp/policyengine-reference-smoke/us/gov/hhs/chip/chip.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/variable

docs-serve:
quarto preview docs

Expand Down
3 changes: 3 additions & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ _site
_freeze
/.quarto/
**/*.quarto_ipynb

# Generated reference output can be rebuilt from installed country models.
_generated/
6 changes: 3 additions & 3 deletions docs/_generator/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Reference generator prototype

Auto-generates one Quarto page per variable in a country model, plus a program-coverage page, purely from metadata on the `Variable` classes and `programs.yaml`.
Auto-generates one Quarto page per variable in a country model, plus a program coverage index and one page per program, purely from metadata on the `Variable` classes and `programs.yaml`.

## Run

Expand Down Expand Up @@ -29,7 +29,7 @@ Per variable:
- Statutory references (from `reference = ...`)
- Source file path and line number

Per program: a row in the generated program-coverage page pulled from `programs.yaml` (id, name, category, agency, status, coverage).
Per program: a row in the generated program coverage index pulled from `programs.yaml` (name, category, agency, status, coverage, root variable), plus a generated program page with metadata, notes, and links to implementation variables.

Per directory (`gov/hhs/chip/`, `gov/usda/snap/`, etc.): a listing page using Quarto's built-in directory listing so the nav auto-organizes.

Expand All @@ -49,4 +49,4 @@ Extensions worth considering:
1. Walk `parameters/` YAML tree and emit a page per parameter with its time series, breakdowns, and references.
2. For each variable with a formula, surface the dependency graph (other variables / parameters it reads). `policyengine_core`'s `Variable.exhaustive_parameter_dependencies` gets partway there.
3. For each calibration target (in `policyengine-us-data/storage/calibration_targets/*.csv`), emit a page describing source, aggregation level, freshness.
4. Cross-link variables to the programs they contribute to via `programs.yaml`'s `variable:` field.
4. Add reverse links from variable pages back to the programs that use them.
247 changes: 234 additions & 13 deletions docs/_generator/build_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import argparse
import importlib
import logging
import os
import re
import textwrap
from dataclasses import dataclass
Expand Down Expand Up @@ -85,6 +86,10 @@ class VariableRecord:
tree_path: tuple[str, ...]


def _variable_page_path(record: VariableRecord, out_root: Path) -> Path:
return out_root.joinpath(*record.tree_path) / f"{_slug(record.name)}.qmd"


def _tree_path_from_source(
source_file: Path | None, package_root: Path
) -> tuple[str, ...]:
Expand Down Expand Up @@ -255,16 +260,26 @@ def _slug(value: str) -> str:
return re.sub(r"[^A-Za-z0-9_-]+", "-", value).strip("-")


def _relative_link(source: Path, target: Path) -> str:
return os.path.relpath(target, start=source.parent).replace(os.sep, "/")


def _table_cell(value: object) -> str:
if value is None:
return ""
return str(value).replace("\n", " ").replace("|", "\\|")


def _write_variables(
records: list[VariableRecord],
out_root: Path,
country: str,
) -> int:
written = 0
for record in records:
tree_dir = out_root.joinpath(*record.tree_path)
page_path = _variable_page_path(record, out_root)
tree_dir = page_path.parent
tree_dir.mkdir(parents=True, exist_ok=True)
page_path = tree_dir / f"{_slug(record.name)}.qmd"
page_path.write_text(_render_variable_page(record, country))
written += 1
return written
Expand Down Expand Up @@ -295,36 +310,240 @@ def _write_tree_indices(out_root: Path) -> int:
return written


def _write_programs_index(country: str, out_root: Path) -> int:
def _load_programs(country: str) -> list[dict]:
module_name = COUNTRY_MODULES[country]
country_module = importlib.import_module(module_name)
package_root = Path(country_module.__file__).parent
programs_path = package_root / "programs.yaml"
if not programs_path.exists():
return 0
return []
with programs_path.open() as f:
registry = yaml.safe_load(f)
programs = registry.get("programs", [])
return registry.get("programs", [])


def _program_page_path(program: dict, out_root: Path) -> Path:
identifier = program.get("id") or program.get("name") or "program"
return out_root / "programs" / f"{_slug(str(identifier))}.qmd"


def _program_title(program: dict) -> str:
return str(program.get("full_name") or program.get("name") or program.get("id"))


def _program_variable_records(
program: dict,
records: list[VariableRecord],
) -> list[VariableRecord]:
root_variable = program.get("variable")
parameter_prefix = program.get("parameter_prefix")
prefix_parts = (
tuple(str(parameter_prefix).replace("/", ".").split("."))
if parameter_prefix
else ()
)
selected: list[VariableRecord] = []
for record in records:
if root_variable and record.name == root_variable:
selected.append(record)
continue
if prefix_parts and record.tree_path[: len(prefix_parts)] == prefix_parts:
selected.append(record)

return sorted(
selected,
key=lambda record: (
0 if root_variable and record.name == root_variable else 1,
"/".join(record.tree_path),
record.name,
),
)


def _render_program_variable_link(
record: VariableRecord,
record_pages: dict[str, Path],
page_path: Path,
) -> str:
target = record_pages.get(record.name)
if target is None:
return f"`{record.name}`"
return f"[`{record.name}`]({_relative_link(page_path, target)})"


def _render_program_page(
program: dict,
records: list[VariableRecord],
record_pages: dict[str, Path],
out_root: Path,
) -> str:
page_path = _program_page_path(program, out_root)
title = _program_title(program)
identifier = str(program.get("id") or "")
lines: list[str] = [
"---",
f'title: "{_escape_yaml_scalar(title)}"',
]
if identifier:
lines.append(f'subtitle: "`{_escape_yaml_scalar(identifier)}`"')
lines.extend(["---", ""])

root_variable = program.get("variable")
if root_variable and root_variable in record_pages:
root_value = (
f"[`{root_variable}`]"
f"({_relative_link(page_path, record_pages[str(root_variable)])})"
)
elif root_variable:
root_value = f"`{root_variable}`"
else:
root_value = ""

verified_start_year = program.get("verified_start_year")
verified_end_year = program.get("verified_end_year")
if verified_start_year and verified_end_year:
verified = f"{verified_start_year}-{verified_end_year}"
elif verified_start_year:
verified = f"{verified_start_year}+"
elif verified_end_year:
verified = f"through {verified_end_year}"
else:
verified = ""

metadata = [
("Program ID", f"`{identifier}`" if identifier else ""),
("Category", program.get("category")),
("Agency", program.get("agency")),
("Status", program.get("status")),
("Coverage", program.get("coverage")),
(
"State variation",
"Yes" if program.get("has_state_variation") else "No",
),
("Verification years", verified),
(
"Parameter prefix",
f"`{program.get('parameter_prefix')}`"
if program.get("parameter_prefix")
else "",
),
("Root variable", root_value),
]
lines.append("| Field | Value |")
lines.append("|---|---|")
for key, value in metadata:
lines.append(f"| {key} | {_table_cell(value)} |")
lines.append("")

if program.get("notes"):
lines.append("## Notes")
lines.append("")
lines.append(str(program["notes"]))
lines.append("")

program_records = _program_variable_records(program, records)
lines.append("## Implementation variables")
lines.append("")
if program_records:
lines.append("| Variable | Label | Entity | Period |")
lines.append("|---|---|---|---|")
for record in program_records:
lines.append(
"| "
+ " | ".join(
[
_render_program_variable_link(record, record_pages, page_path),
_table_cell(record.label),
f"`{record.entity}`" if record.entity else "",
f"`{record.definition_period}`"
if record.definition_period
else "",
]
)
+ " |"
)
lines.append("")
else:
lines.append(
"No implementation variables were emitted for this program in this "
"reference run."
)
lines.append("")

return "\n".join(lines)


def _write_program_pages(
programs: list[dict],
records: list[VariableRecord],
out_root: Path,
) -> int:
if not programs:
return 0
record_pages = {
record.name: _variable_page_path(record, out_root) for record in records
}
program_dir = out_root / "programs"
program_dir.mkdir(parents=True, exist_ok=True)
for program in programs:
page_path = _program_page_path(program, out_root)
page_path.write_text(
_render_program_page(program, records, record_pages, out_root)
)
return len(programs)


def _write_programs_index(
programs: list[dict],
records: list[VariableRecord],
out_root: Path,
) -> int:
if not programs:
return 0
record_pages = {
record.name: _variable_page_path(record, out_root) for record in records
}
programs_index_path = out_root / "programs.qmd"
lines: list[str] = [
"---",
'title: "Program coverage"',
'description: "Programs modeled in the country model, generated from programs.yaml."',
"---",
"",
"| ID | Name | Category | Agency | Status | Coverage |",
"| Program | Category | Agency | Status | Coverage | Root variable |",
"|---|---|---|---|---|---|",
]
for program in programs:
page_path = _program_page_path(program, out_root)
program_link = (
f"[{_program_title(program)}]"
f"({_relative_link(programs_index_path, page_path)})"
)
root_variable = program.get("variable")
if root_variable and root_variable in record_pages:
root_value = (
f"[`{root_variable}`]"
f"({_relative_link(programs_index_path, record_pages[str(root_variable)])})"
)
elif root_variable:
root_value = f"`{root_variable}`"
else:
root_value = ""
lines.append(
"| "
+ " | ".join(
str(program.get(field, "")).replace("\n", " ")
for field in ("id", "name", "category", "agency", "status", "coverage")
[
_table_cell(program_link),
_table_cell(program.get("category")),
_table_cell(program.get("agency")),
_table_cell(program.get("status")),
_table_cell(program.get("coverage")),
_table_cell(root_value),
]
)
+ " |"
)
target = out_root / "programs.qmd"
target.write_text("\n".join(lines) + "\n")
programs_index_path.write_text("\n".join(lines) + "\n")
return 1


Expand All @@ -344,11 +563,13 @@ def build_reference(
or needle in " ".join(str(p).lower() for p in r.tree_path)
]
variables_written = _write_variables(records, out_root, country)
programs_written = _write_programs_index(country, out_root)
programs = _load_programs(country)
program_pages_written = _write_program_pages(programs, records, out_root)
programs_index_written = _write_programs_index(programs, records, out_root)
indices_written = _write_tree_indices(out_root)
return {
"variables": variables_written,
"programs": programs_written,
"programs": program_pages_written + programs_index_written,
"indices": indices_written,
}

Expand Down Expand Up @@ -380,7 +601,7 @@ def main() -> None:
args = _parse_args()
stats = build_reference(args.country, args.out, args.filter)
logger.info(
"Wrote %d variable pages, %d programs page, %d directory indices to %s",
"Wrote %d variable pages, %d program pages, %d directory indices to %s",
stats["variables"],
stats["programs"],
stats["indices"],
Expand Down
1 change: 1 addition & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ website:
- programs/us-chip.md
- section: "Reference"
contents:
- reference/index.md
- countries.md
- release-bundles.md
- data-publishing-design.md
Expand Down
Loading
Loading