diff --git a/README.md b/README.md
index 02d8143..bd03881 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,25 @@ built on top of the generic `microplex` engine.
- [PE construction parity](./docs/pe-construction-parity.md)
- [Superseding `policyengine-us-data`](./docs/superseding-policyengine-us-data.md)
+## Diagnostics dashboard
+
+The static dashboard in `dashboard/` loads the full PE-native per-target
+diagnostic JSON written by:
+
+```bash
+microplex-us-pe-native-target-diagnostics \
+ --from-dataset /path/to/enhanced_cps_2024.h5 \
+ --to-dataset /path/to/policyengine_us.h5 \
+ --policyengine-targets-db /path/to/policy_data.db \
+ --output-path artifacts/pe_native_target_diagnostics_current.json
+```
+
+The dashboard uses the exported Cosilico design tokens from
+`@cosilico/config/theme.css`; run `python scripts/sync_cosilico_theme.py --check`
+to verify the local browser-readable token copy is still synced.
+When a PolicyEngine target DB is available, the JSON annotates PE-native legacy
+labels with structured target IDs and flags legacy-only gaps.
+
## Current focus
`microplex-us` is being built as a library-first US runtime with
diff --git a/dashboard/app.js b/dashboard/app.js
new file mode 100644
index 0000000..668fb22
--- /dev/null
+++ b/dashboard/app.js
@@ -0,0 +1,506 @@
+"use strict";
+
+const DEFAULT_DATA_URL = "../artifacts/pe_native_target_diagnostics_current.json";
+const TABLE_LIMIT = 500;
+
+const state = {
+ data: null,
+ search: "",
+ family: "all",
+ scope: "all",
+ winner: "all",
+ dbMatch: "all",
+ sort: "weighted_term_delta:asc",
+};
+
+const el = {
+ dashboard: document.getElementById("dashboard"),
+ emptyState: document.getElementById("emptyState"),
+ fileInput: document.getElementById("fileInput"),
+ loadStatus: document.getElementById("loadStatus"),
+ kpiTargets: document.getElementById("kpiTargets"),
+ kpiToWinLabel: document.getElementById("kpiToWinLabel"),
+ kpiWinRate: document.getElementById("kpiWinRate"),
+ kpiLossDelta: document.getElementById("kpiLossDelta"),
+ kpiLossPair: document.getElementById("kpiLossPair"),
+ kpiDbMatch: document.getElementById("kpiDbMatch"),
+ kpiDbDetail: document.getElementById("kpiDbDetail"),
+ scopeSummary: document.getElementById("scopeSummary"),
+ familySummary: document.getElementById("familySummary"),
+ topImprovements: document.getElementById("topImprovements"),
+ topRegressions: document.getElementById("topRegressions"),
+ tableCount: document.getElementById("tableCount"),
+ searchInput: document.getElementById("searchInput"),
+ familyFilter: document.getElementById("familyFilter"),
+ scopeFilter: document.getElementById("scopeFilter"),
+ winnerFilter: document.getElementById("winnerFilter"),
+ dbFilter: document.getElementById("dbFilter"),
+ sortSelect: document.getElementById("sortSelect"),
+ targetTable: document.getElementById("targetTable"),
+};
+
+function labels() {
+ const datasetLabels = state.data?.dataset_labels || {};
+ return {
+ from: datasetLabels.from || "baseline",
+ to: datasetLabels.to || "candidate",
+ };
+}
+
+function numberOrNull(value) {
+ const numeric = Number(value);
+ return Number.isFinite(numeric) ? numeric : null;
+}
+
+function formatNumber(value, options = {}) {
+ const numeric = numberOrNull(value);
+ if (numeric === null) {
+ return "-";
+ }
+ const abs = Math.abs(numeric);
+ if (abs >= 1_000_000 || (abs > 0 && abs < 0.001)) {
+ return numeric.toExponential(2);
+ }
+ return new Intl.NumberFormat("en-US", {
+ maximumFractionDigits: options.maximumFractionDigits ?? 3,
+ minimumFractionDigits: options.minimumFractionDigits ?? 0,
+ }).format(numeric);
+}
+
+function formatCompact(value) {
+ const numeric = numberOrNull(value);
+ if (numeric === null) {
+ return "-";
+ }
+ return new Intl.NumberFormat("en-US", {
+ notation: "compact",
+ maximumFractionDigits: 2,
+ }).format(numeric);
+}
+
+function formatPercent(value) {
+ const numeric = numberOrNull(value);
+ if (numeric === null) {
+ return "-";
+ }
+ return new Intl.NumberFormat("en-US", {
+ style: "percent",
+ maximumFractionDigits: 1,
+ }).format(numeric);
+}
+
+function formatSigned(value) {
+ const numeric = numberOrNull(value);
+ if (numeric === null) {
+ return "-";
+ }
+ const sign = numeric > 0 ? "+" : "";
+ return `${sign}${formatNumber(numeric, { maximumFractionDigits: 4 })}`;
+}
+
+function formatError(value) {
+ const numeric = numberOrNull(value);
+ if (numeric === null) {
+ return "-";
+ }
+ return `${formatNumber(numeric, { maximumFractionDigits: 2 })}%`;
+}
+
+function classForDelta(value) {
+ const numeric = numberOrNull(value) || 0;
+ if (numeric < 0) {
+ return "good";
+ }
+ if (numeric > 0) {
+ return "bad";
+ }
+ return "";
+}
+
+function winnerLabel(winner) {
+ const currentLabels = labels();
+ if (winner === "to") {
+ return currentLabels.to;
+ }
+ if (winner === "from") {
+ return currentLabels.from;
+ }
+ return "tie";
+}
+
+function dbMatchLabel(row) {
+ const status = row.policyengine_target_match || "unparsed";
+ if (status === "matched") {
+ return row.policyengine_target_id ? `#${row.policyengine_target_id}` : "matched";
+ }
+ if (status === "legacy_only") {
+ return "legacy only";
+ }
+ if (status === "db_unavailable") {
+ return "db unavailable";
+ }
+ return status.replaceAll("_", " ");
+}
+
+function summarizeRows(rows) {
+ const nTargets = rows.length;
+ const fromWins = rows.filter((row) => row.winner === "from").length;
+ const toWins = rows.filter((row) => row.winner === "to").length;
+ const ties = nTargets - fromWins - toWins;
+ const fromLoss = mean(rows.map((row) => row.from_weighted_term));
+ const toLoss = mean(rows.map((row) => row.to_weighted_term));
+ return {
+ n_targets: nTargets,
+ from_wins: fromWins,
+ to_wins: toWins,
+ ties,
+ from_win_rate: nTargets ? fromWins / nTargets : null,
+ to_win_rate: nTargets ? toWins / nTargets : null,
+ from_loss: fromLoss,
+ to_loss: toLoss,
+ loss_delta: toLoss - fromLoss,
+ mean_weighted_term_delta: mean(rows.map((row) => row.weighted_term_delta)),
+ };
+}
+
+function mean(values) {
+ const numbers = values.map(Number).filter(Number.isFinite);
+ if (!numbers.length) {
+ return null;
+ }
+ return numbers.reduce((sum, value) => sum + value, 0) / numbers.length;
+}
+
+function groupSummary(rows, field) {
+ const grouped = new Map();
+ for (const row of rows) {
+ const key = row[field] || "other";
+ if (!grouped.has(key)) {
+ grouped.set(key, []);
+ }
+ grouped.get(key).push(row);
+ }
+ return Array.from(grouped.entries()).map(([key, groupRows]) => ({
+ [field]: key,
+ ...summarizeRows(groupRows),
+ }));
+}
+
+function normalizePayload(payload) {
+ const rows = Array.isArray(payload.targets) ? payload.targets : [];
+ return {
+ ...payload,
+ summary: payload.summary || summarizeRows(rows),
+ family_summaries: Array.isArray(payload.family_summaries)
+ ? payload.family_summaries
+ : groupSummary(rows, "target_family"),
+ scope_summaries: Array.isArray(payload.scope_summaries)
+ ? payload.scope_summaries
+ : groupSummary(rows, "target_scope"),
+ top_improvements: Array.isArray(payload.top_improvements)
+ ? payload.top_improvements
+ : [...rows]
+ .sort((a, b) => Number(a.weighted_term_delta) - Number(b.weighted_term_delta))
+ .slice(0, 25),
+ top_regressions: Array.isArray(payload.top_regressions)
+ ? payload.top_regressions
+ : [...rows]
+ .sort((a, b) => Number(b.weighted_term_delta) - Number(a.weighted_term_delta))
+ .slice(0, 25),
+ };
+}
+
+function setData(payload, sourceLabel) {
+ state.data = normalizePayload(payload);
+ el.dashboard.hidden = false;
+ el.emptyState.hidden = true;
+ el.loadStatus.textContent = sourceLabel;
+ populateFilters();
+ render();
+}
+
+function showEmpty(message) {
+ state.data = null;
+ el.dashboard.hidden = true;
+ el.emptyState.hidden = false;
+ el.loadStatus.textContent = message;
+}
+
+async function loadDefault() {
+ try {
+ const response = await fetch(`${DEFAULT_DATA_URL}?v=${Date.now()}`, {
+ cache: "no-store",
+ });
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}`);
+ }
+ setData(await response.json(), "Default artifact loaded");
+ } catch (_error) {
+ showEmpty("Default artifact unavailable");
+ }
+}
+
+function loadFile(file) {
+ const reader = new FileReader();
+ reader.addEventListener("load", () => {
+ try {
+ setData(JSON.parse(String(reader.result)), file.name);
+ } catch (error) {
+ showEmpty(`Invalid JSON: ${error.message}`);
+ }
+ });
+ reader.readAsText(file);
+}
+
+function populateSelect(select, label, values) {
+ const current = select.value || "all";
+ select.replaceChildren();
+ const allOption = document.createElement("option");
+ allOption.value = "all";
+ allOption.textContent = label;
+ select.append(allOption);
+ for (const value of values) {
+ const option = document.createElement("option");
+ option.value = value;
+ option.textContent = value;
+ select.append(option);
+ }
+ select.value = values.includes(current) ? current : "all";
+}
+
+function populateFilters() {
+ const rows = state.data.targets || [];
+ const families = [...new Set(rows.map((row) => row.target_family || "other"))].sort();
+ const scopes = [...new Set(rows.map((row) => row.target_scope || "other"))].sort();
+ const dbStatuses = [...new Set(rows.map((row) => row.policyengine_target_match || "unparsed"))].sort();
+ populateSelect(el.familyFilter, "All families", families);
+ populateSelect(el.scopeFilter, "All scopes", scopes);
+ populateSelect(el.dbFilter, "All DB statuses", dbStatuses);
+
+ const currentLabels = labels();
+ el.winnerFilter.replaceChildren();
+ for (const [value, label] of [
+ ["all", "All winners"],
+ ["to", currentLabels.to],
+ ["from", currentLabels.from],
+ ["tie", "Ties"],
+ ]) {
+ const option = document.createElement("option");
+ option.value = value;
+ option.textContent = label;
+ el.winnerFilter.append(option);
+ }
+}
+
+function filteredRows() {
+ const query = state.search.trim().toLowerCase();
+ const rows = state.data?.targets || [];
+ return rows
+ .filter((row) => {
+ if (state.family !== "all" && row.target_family !== state.family) {
+ return false;
+ }
+ if (state.scope !== "all" && row.target_scope !== state.scope) {
+ return false;
+ }
+ if (state.winner !== "all" && row.winner !== state.winner) {
+ return false;
+ }
+ if (
+ state.dbMatch !== "all" &&
+ (row.policyengine_target_match || "unparsed") !== state.dbMatch
+ ) {
+ return false;
+ }
+ if (!query) {
+ return true;
+ }
+ return [
+ row.target_name,
+ row.target_family,
+ row.target_scope,
+ row.policyengine_target_match,
+ row.policyengine_target_id,
+ row.policyengine_target_source,
+ row.policyengine_target_domain_variable,
+ ]
+ .join(" ")
+ .toLowerCase()
+ .includes(query);
+ })
+ .sort((a, b) => {
+ const [field, direction] = state.sort.split(":");
+ const av = Number(a[field]);
+ const bv = Number(b[field]);
+ const result = Number.isFinite(av) && Number.isFinite(bv)
+ ? av - bv
+ : String(a[field] || "").localeCompare(String(b[field] || ""));
+ return direction === "desc" ? -result : result;
+ });
+}
+
+function render() {
+ if (!state.data) {
+ return;
+ }
+ renderKpis();
+ renderSummaries();
+ renderTargetList(el.topImprovements, state.data.top_improvements || [], true);
+ renderTargetList(el.topRegressions, state.data.top_regressions || [], false);
+ renderTable(filteredRows());
+}
+
+function renderKpis() {
+ const currentLabels = labels();
+ const summary = state.data.summary || {};
+ el.kpiTargets.textContent = formatNumber(summary.n_targets);
+ el.kpiToWinLabel.textContent = `${currentLabels.to} Wins`;
+ el.kpiWinRate.textContent = formatPercent(summary.to_win_rate);
+ el.kpiLossDelta.textContent = formatSigned(summary.loss_delta);
+ el.kpiLossDelta.className = classForDelta(summary.loss_delta);
+ el.kpiLossPair.textContent = `${formatNumber(summary.from_loss)} -> ${formatNumber(summary.to_loss)}`;
+ const dbSummary = state.data.target_db_summary || {};
+ el.kpiDbMatch.textContent = dbSummary.match_rate === null || dbSummary.match_rate === undefined
+ ? formatNumber(dbSummary.matched)
+ : formatPercent(dbSummary.match_rate);
+ el.kpiDbDetail.textContent = `${formatNumber(dbSummary.matched)} matched / ${formatNumber(dbSummary.legacy_only)} legacy`;
+}
+
+function renderSummaries() {
+ const familyRows = [...(state.data.family_summaries || [])].sort(
+ (a, b) => Number(a.loss_delta) - Number(b.loss_delta),
+ );
+ const scopeRows = [...(state.data.scope_summaries || [])].sort(
+ (a, b) => String(a.target_scope).localeCompare(String(b.target_scope)),
+ );
+ renderSummaryList(el.scopeSummary, scopeRows, "target_scope");
+ renderSummaryList(el.familySummary, familyRows, "target_family");
+}
+
+function renderSummaryList(container, rows, field) {
+ container.replaceChildren();
+ for (const row of rows) {
+ const wrapper = document.createElement("div");
+ wrapper.className = "summary-row";
+
+ const left = document.createElement("div");
+ const name = document.createElement("div");
+ name.className = "summary-name";
+ name.textContent = row[field] || "other";
+ const meta = document.createElement("div");
+ meta.className = "summary-meta";
+ meta.textContent = `${formatNumber(row.n_targets)} targets - ${formatPercent(row.to_win_rate)} wins`;
+ left.append(name, meta);
+
+ const value = document.createElement("div");
+ value.className = `summary-value ${classForDelta(row.loss_delta)}`;
+ value.textContent = formatSigned(row.loss_delta);
+ wrapper.append(left, value);
+ container.append(wrapper);
+ }
+}
+
+function renderTargetList(container, rows, improvementList) {
+ container.replaceChildren();
+ const displayRows = rows.slice(0, 12);
+ for (const row of displayRows) {
+ const wrapper = document.createElement("div");
+ wrapper.className = "target-row";
+ wrapper.title = row.target_name || "";
+
+ const left = document.createElement("div");
+ const name = document.createElement("div");
+ name.className = "target-name";
+ name.textContent = row.target_name || "-";
+ const meta = document.createElement("div");
+ meta.className = "target-meta";
+ meta.textContent = `${row.target_family || "other"} - ${winnerLabel(row.winner)} - ${dbMatchLabel(row)}`;
+ left.append(name, meta);
+
+ const delta = document.createElement("div");
+ delta.className = `delta ${classForDelta(row.weighted_term_delta)}`;
+ delta.textContent = formatSigned(row.weighted_term_delta);
+ if (improvementList && Number(row.weighted_term_delta) > 0) {
+ delta.classList.add("bad");
+ }
+ wrapper.append(left, delta);
+ container.append(wrapper);
+ }
+}
+
+function renderTable(rows) {
+ el.targetTable.replaceChildren();
+ const visibleRows = rows.slice(0, TABLE_LIMIT);
+ el.tableCount.textContent = rows.length > TABLE_LIMIT
+ ? `${formatNumber(TABLE_LIMIT)} of ${formatNumber(rows.length)} rows`
+ : `${formatNumber(rows.length)} rows`;
+
+ const fragment = document.createDocumentFragment();
+ for (const row of visibleRows) {
+ const tr = document.createElement("tr");
+ tr.title = row.target_name || "";
+ appendCell(tr, row.target_name || "-");
+ appendCell(tr, row.target_family || "other");
+ appendCell(tr, row.target_scope || "other");
+ appendCell(tr, winnerLabel(row.winner), `winner ${row.winner || "tie"}`);
+ appendCell(tr, formatSigned(row.weighted_term_delta), `mono ${classForDelta(row.weighted_term_delta)}`);
+ appendCell(tr, formatError(row.from_abs_pct_error), "mono");
+ appendCell(tr, formatError(row.to_abs_pct_error), "mono");
+ appendCell(tr, formatCompact(row.target_value), "mono");
+ appendCell(
+ tr,
+ dbMatchLabel(row),
+ `db-status ${row.policyengine_target_match || "unparsed"}`,
+ );
+ fragment.append(tr);
+ }
+ el.targetTable.append(fragment);
+}
+
+function appendCell(row, text, className = "") {
+ const cell = document.createElement("td");
+ cell.textContent = text;
+ if (className) {
+ cell.className = className;
+ }
+ row.append(cell);
+}
+
+el.fileInput.addEventListener("change", (event) => {
+ const [file] = event.target.files || [];
+ if (file) {
+ loadFile(file);
+ }
+});
+
+el.searchInput.addEventListener("input", (event) => {
+ state.search = event.target.value;
+ render();
+});
+
+el.familyFilter.addEventListener("change", (event) => {
+ state.family = event.target.value;
+ render();
+});
+
+el.scopeFilter.addEventListener("change", (event) => {
+ state.scope = event.target.value;
+ render();
+});
+
+el.winnerFilter.addEventListener("change", (event) => {
+ state.winner = event.target.value;
+ render();
+});
+
+el.dbFilter.addEventListener("change", (event) => {
+ state.dbMatch = event.target.value;
+ render();
+});
+
+el.sortSelect.addEventListener("change", (event) => {
+ state.sort = event.target.value;
+ render();
+});
+
+loadDefault();
diff --git a/dashboard/cosilico-theme.css b/dashboard/cosilico-theme.css
new file mode 100644
index 0000000..5c501ce
--- /dev/null
+++ b/dashboard/cosilico-theme.css
@@ -0,0 +1,28 @@
+/* Generated from the exported Cosilico design tokens.
+ Source: cosilico.ai/packages/config/theme.css
+ Re-run: python scripts/sync_cosilico_theme.py
+*/
+:root {
+--color-void: #06070a;
+ --color-bg: #090b10;
+ --color-elevated: rgba(18, 19, 26, 0.9);
+ --color-card: rgba(20, 21, 30, 0.78);
+ --color-surface: rgba(27, 29, 40, 0.96);
+ --color-border: rgba(244, 239, 230, 0.13);
+ --color-border-subtle: rgba(244, 239, 230, 0.08);
+
+ --color-text: #f4efe6;
+ --color-text-secondary: #cbc3b8;
+ --color-text-muted: #928a7f;
+
+ --color-cyan: #7ce2cf;
+ --color-cyan-bright: #b9fff0;
+ --color-cyan-dim: #4aa391;
+ --color-cyan-ghost: rgba(124, 226, 207, 0.08);
+ --color-amber: #d5a565;
+ --color-green: #b0ef9f;
+ --color-coral: #ff8f6b;
+
+ --ease-out: cubic-bezier(0.16, 1, 0.3, 1);
+ --ease-spring: cubic-bezier(0.34, 1.56, 0.64, 1);
+}
diff --git a/dashboard/index.html b/dashboard/index.html
new file mode 100644
index 0000000..1a0dd00
--- /dev/null
+++ b/dashboard/index.html
@@ -0,0 +1,156 @@
+
+
+
+
+
+ Microplex US Diagnostics
+
+
+
+
+
+
+
+
+
+
+
+
Microplex US
+
Diagnostics
+
+
+
+
Loading default artifact
+
+
+
+
+ No diagnostic dataset loaded
+ Generate or load a PE-native target diagnostic JSON.
+ microplex-us-pe-native-target-diagnostics --output-path artifacts/pe_native_target_diagnostics_current.json
+
+
+
+
+
+ Targets
+ -
+
+
+ Candidate Wins
+ -
+
+
+ Mean Loss Delta
+ -
+
+
+ Target Loss
+ -
+
+
+ Target DB
+ -
+ -
+
+
+
+
+
+
+
+
+
+
+
+
Better
+
Top Improvements
+
+
+
+
+
+
+
+
+
Worse
+
Top Regressions
+
+
+
+
+
+
+
+
+
+
Targets
+
Full Diagnostic Dataset
+
+
0 rows
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ | Target |
+ Family |
+ Scope |
+ Winner |
+ Delta |
+ Baseline Error |
+ Candidate Error |
+ Target |
+ Target DB |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dashboard/styles.css b/dashboard/styles.css
new file mode 100644
index 0000000..bdafc4f
--- /dev/null
+++ b/dashboard/styles.css
@@ -0,0 +1,525 @@
+* {
+ box-sizing: border-box;
+}
+
+:root {
+ --f-display: var(--font-display), Georgia, serif;
+ --f-body: var(--font-body), "Helvetica Neue", Arial, sans-serif;
+ --f-mono: var(--font-mono), "Fira Code", ui-monospace, SFMono-Regular, Menlo,
+ Consolas, monospace;
+}
+
+html {
+ min-height: 100%;
+ background: var(--color-void);
+ color: var(--color-text);
+ -webkit-font-smoothing: antialiased;
+ text-rendering: optimizeLegibility;
+}
+
+body {
+ min-height: 100vh;
+ margin: 0;
+ font-family: var(--f-body);
+ background:
+ radial-gradient(circle at top left, rgba(124, 226, 207, 0.1), transparent 28%),
+ radial-gradient(circle at 88% 8%, rgba(213, 165, 101, 0.1), transparent 24%),
+ linear-gradient(180deg, #07080d 0%, #090b10 48%, #06070a 100%);
+}
+
+button,
+input,
+select {
+ font: inherit;
+}
+
+code {
+ display: block;
+ width: 100%;
+ overflow-x: auto;
+ padding: 14px 16px;
+ border: 1px solid var(--color-border-subtle);
+ border-radius: 8px;
+ color: var(--color-cyan-bright);
+ background: rgba(6, 7, 10, 0.72);
+ font-family: var(--f-mono);
+ font-size: 12px;
+}
+
+.grid-bg,
+.noise {
+ position: fixed;
+ inset: 0;
+ pointer-events: none;
+}
+
+.grid-bg {
+ z-index: 0;
+ background-image:
+ linear-gradient(rgba(124, 226, 207, 0.03) 1px, transparent 1px),
+ linear-gradient(90deg, rgba(124, 226, 207, 0.03) 1px, transparent 1px);
+ background-size: 78px 78px;
+ mask-image: radial-gradient(
+ ellipse 74% 62% at 50% 0%,
+ black 0%,
+ transparent 100%
+ );
+}
+
+.noise {
+ z-index: 1;
+ opacity: 0.018;
+ background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E");
+}
+
+.shell {
+ position: relative;
+ z-index: 2;
+ width: min(1480px, calc(100% - 32px));
+ margin: 0 auto;
+ padding: 32px 0 48px;
+}
+
+.topbar {
+ display: flex;
+ align-items: end;
+ justify-content: space-between;
+ gap: 24px;
+ padding: 0 0 24px;
+ border-bottom: 1px solid var(--color-border-subtle);
+}
+
+.eyebrow {
+ margin: 0 0 8px;
+ color: var(--color-cyan);
+ font-family: var(--f-mono);
+ font-size: 11px;
+ line-height: 1;
+ text-transform: uppercase;
+}
+
+h1,
+h2 {
+ margin: 0;
+ font-family: var(--f-display);
+ font-weight: 400;
+ letter-spacing: 0;
+}
+
+h1 {
+ font-size: clamp(38px, 6vw, 82px);
+ line-height: 0.9;
+}
+
+h2 {
+ color: var(--color-text);
+ font-size: 18px;
+ line-height: 1.2;
+}
+
+.load-control {
+ display: flex;
+ align-items: center;
+ gap: 14px;
+ min-width: min(100%, 420px);
+ justify-content: flex-end;
+}
+
+.file-button {
+ position: relative;
+ display: inline-flex;
+ min-height: 40px;
+ align-items: center;
+ border: 1px solid color-mix(in srgb, var(--color-cyan) 42%, transparent);
+ border-radius: 8px;
+ padding: 0 16px;
+ color: var(--color-cyan-bright);
+ background: var(--color-cyan-ghost);
+ cursor: pointer;
+ transition:
+ border-color 160ms var(--ease-out),
+ background 160ms var(--ease-out);
+}
+
+.file-button:hover {
+ border-color: var(--color-cyan);
+ background: rgba(124, 226, 207, 0.13);
+}
+
+.file-button input {
+ position: absolute;
+ inset: 0;
+ opacity: 0;
+ cursor: pointer;
+}
+
+.status-text {
+ margin: 0;
+ color: var(--color-text-muted);
+ font-family: var(--f-mono);
+ font-size: 11px;
+ line-height: 1.4;
+}
+
+.empty-state {
+ margin: 64px 0 0;
+ padding: 32px;
+ border: 1px solid var(--color-border);
+ border-radius: 8px;
+ background: rgba(18, 19, 26, 0.58);
+}
+
+.empty-state h2 {
+ max-width: 760px;
+ margin-bottom: 24px;
+ font-size: clamp(24px, 4vw, 44px);
+}
+
+.kpi-strip {
+ display: grid;
+ grid-template-columns: repeat(5, minmax(0, 1fr));
+ margin: 24px 0;
+ border-block: 1px solid var(--color-border-subtle);
+}
+
+.kpi-strip div {
+ min-width: 0;
+ padding: 18px 22px;
+ border-right: 1px solid var(--color-border-subtle);
+}
+
+.kpi-strip div:last-child {
+ border-right: 0;
+}
+
+.kpi-strip span {
+ display: block;
+ margin-bottom: 8px;
+ color: var(--color-text-muted);
+ font-family: var(--f-mono);
+ font-size: 11px;
+ text-transform: uppercase;
+}
+
+.kpi-strip strong {
+ display: block;
+ overflow-wrap: anywhere;
+ color: var(--color-text);
+ font-family: var(--f-mono);
+ font-size: clamp(20px, 2.7vw, 34px);
+ font-weight: 500;
+ line-height: 1.05;
+}
+
+.kpi-strip small {
+ display: block;
+ margin-top: 7px;
+ overflow-wrap: anywhere;
+ color: var(--color-text-muted);
+ font-family: var(--f-mono);
+ font-size: 10px;
+ line-height: 1.3;
+}
+
+.workspace {
+ display: grid;
+ grid-template-columns: minmax(280px, 330px) minmax(0, 1fr);
+ gap: 24px;
+}
+
+.rail,
+.main-pane {
+ display: flex;
+ min-width: 0;
+ flex-direction: column;
+ gap: 24px;
+}
+
+.panel {
+ min-width: 0;
+ border: 1px solid var(--color-border-subtle);
+ border-radius: 8px;
+ background: rgba(18, 19, 26, 0.58);
+ backdrop-filter: blur(14px);
+}
+
+.section-head {
+ padding: 18px 20px 16px;
+ border-bottom: 1px solid var(--color-border-subtle);
+}
+
+.section-head.row {
+ display: flex;
+ align-items: end;
+ justify-content: space-between;
+ gap: 16px;
+}
+
+.summary-list {
+ display: flex;
+ flex-direction: column;
+}
+
+.summary-row {
+ display: grid;
+ grid-template-columns: minmax(0, 1fr) auto;
+ gap: 14px;
+ padding: 14px 18px;
+ border-bottom: 1px solid var(--color-border-subtle);
+}
+
+.summary-row:last-child {
+ border-bottom: 0;
+}
+
+.summary-row:hover,
+tbody tr:hover {
+ background: rgba(124, 226, 207, 0.055);
+}
+
+.summary-name,
+.target-name {
+ min-width: 0;
+ overflow: hidden;
+ color: var(--color-text);
+ text-overflow: ellipsis;
+ white-space: nowrap;
+}
+
+.summary-meta,
+.target-meta {
+ color: var(--color-text-muted);
+ font-family: var(--f-mono);
+ font-size: 11px;
+}
+
+.summary-value {
+ color: var(--color-cyan-bright);
+ font-family: var(--f-mono);
+ font-size: 13px;
+ text-align: right;
+ white-space: nowrap;
+}
+
+.dense .summary-row {
+ padding-block: 11px;
+}
+
+.split {
+ display: grid;
+ grid-template-columns: repeat(2, minmax(0, 1fr));
+ gap: 24px;
+}
+
+.target-list {
+ display: flex;
+ max-height: 484px;
+ overflow: auto;
+ flex-direction: column;
+}
+
+.target-row {
+ display: grid;
+ grid-template-columns: minmax(0, 1fr) auto;
+ gap: 16px;
+ padding: 14px 18px;
+ border-bottom: 1px solid var(--color-border-subtle);
+}
+
+.target-row:last-child {
+ border-bottom: 0;
+}
+
+.target-row:hover {
+ background: rgba(124, 226, 207, 0.055);
+}
+
+.delta {
+ font-family: var(--f-mono);
+ font-size: 13px;
+ text-align: right;
+ white-space: nowrap;
+}
+
+.delta.good,
+.winner.to {
+ color: var(--color-green);
+}
+
+.delta.bad,
+.winner.from {
+ color: var(--color-coral);
+}
+
+.winner.tie {
+ color: var(--color-amber);
+}
+
+.table-panel {
+ overflow: hidden;
+}
+
+.filters {
+ display: grid;
+ grid-template-columns: minmax(180px, 1.4fr) repeat(5, minmax(120px, 1fr));
+ gap: 10px;
+ padding: 16px 20px;
+ border-bottom: 1px solid var(--color-border-subtle);
+}
+
+input,
+select {
+ min-width: 0;
+ min-height: 38px;
+ border: 1px solid var(--color-border);
+ border-radius: 6px;
+ padding: 0 11px;
+ color: var(--color-text);
+ background: rgba(6, 7, 10, 0.58);
+ outline: none;
+}
+
+input:focus,
+select:focus {
+ border-color: color-mix(in srgb, var(--color-cyan) 58%, var(--color-border));
+ box-shadow: 0 0 0 3px rgba(124, 226, 207, 0.08);
+}
+
+.table-wrap {
+ max-height: 620px;
+ overflow: auto;
+}
+
+table {
+ width: 100%;
+ border-collapse: collapse;
+ table-layout: fixed;
+}
+
+th,
+td {
+ overflow: hidden;
+ border-bottom: 1px solid var(--color-border-subtle);
+ padding: 11px 12px;
+ text-align: left;
+ text-overflow: ellipsis;
+ white-space: nowrap;
+}
+
+th {
+ position: sticky;
+ top: 0;
+ z-index: 1;
+ color: var(--color-text-muted);
+ background: rgba(9, 11, 16, 0.98);
+ font-family: var(--f-mono);
+ font-size: 10px;
+ font-weight: 500;
+ text-transform: uppercase;
+}
+
+td {
+ color: var(--color-text-secondary);
+ font-size: 13px;
+}
+
+td.mono {
+ font-family: var(--f-mono);
+ font-size: 12px;
+}
+
+td:first-child,
+th:first-child {
+ width: 30%;
+}
+
+td:nth-child(2),
+th:nth-child(2) {
+ width: 14%;
+}
+
+td:nth-child(3),
+th:nth-child(3),
+td:nth-child(4),
+th:nth-child(4) {
+ width: 8%;
+}
+
+td:nth-child(5),
+th:nth-child(5),
+td:nth-child(6),
+th:nth-child(6),
+td:nth-child(7),
+th:nth-child(7),
+td:nth-child(8),
+th:nth-child(8),
+td:nth-child(9),
+th:nth-child(9) {
+ width: 8%;
+}
+
+.db-status.matched {
+ color: var(--color-cyan-bright);
+}
+
+.db-status.legacy_only,
+.db-status.ambiguous {
+ color: var(--color-amber);
+}
+
+.db-status.db_unavailable,
+.db-status.unparsed {
+ color: var(--color-text-muted);
+}
+
+@media (max-width: 1080px) {
+ .workspace,
+ .split,
+ .kpi-strip {
+ grid-template-columns: 1fr;
+ }
+
+ .kpi-strip div {
+ border-right: 0;
+ border-bottom: 1px solid var(--color-border-subtle);
+ }
+
+ .kpi-strip div:last-child {
+ border-bottom: 0;
+ }
+
+ .filters {
+ grid-template-columns: 1fr 1fr;
+ }
+}
+
+@media (max-width: 680px) {
+ .shell {
+ width: min(100% - 20px, 1480px);
+ padding-top: 20px;
+ }
+
+ .topbar,
+ .load-control {
+ align-items: stretch;
+ flex-direction: column;
+ }
+
+ .load-control {
+ justify-content: flex-start;
+ }
+
+ .filters {
+ grid-template-columns: 1fr;
+ }
+
+ .section-head.row {
+ align-items: start;
+ flex-direction: column;
+ }
+
+ th,
+ td {
+ padding-inline: 10px;
+ }
+}
diff --git a/docs/benchmarking.md b/docs/benchmarking.md
index 3f2f68a..52da096 100644
--- a/docs/benchmarking.md
+++ b/docs/benchmarking.md
@@ -68,9 +68,29 @@ Every serious saved run can write:
- `policyengine_harness.json`
- `run_registry.jsonl`
- `run_index.duckdb`
+- `pe_native_target_diagnostics_current.json`
These live under the selected artifact root.
+## Diagnostics dashboard
+
+The repo includes a static dashboard at `dashboard/` for inspecting the full
+PE-native target diagnostic dataset. It expects the JSON payload written by:
+
+```bash
+microplex-us-pe-native-target-diagnostics \
+ --from-dataset /path/to/enhanced_cps_2024.h5 \
+ --to-dataset /path/to/policyengine_us.h5 \
+ --policyengine-targets-db /path/to/policy_data.db \
+ --output-path artifacts/pe_native_target_diagnostics_current.json
+```
+
+The JSON includes full per-target rows, family summaries, scope summaries, top
+improvements, top regressions, and target DB match metadata when a structured
+PolicyEngine target DB is available. The dashboard loads that default artifact
+when served from the repo root, and can also load an arbitrary diagnostic JSON
+from disk.
+
## Inspecting runs
Useful Python APIs:
diff --git a/pyproject.toml b/pyproject.toml
index 99ea739..542981a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ Repository = "https://github.com/CosilicoAI/microplex-us"
microplex-us-backfill-pe-native-audit = "microplex_us.pipelines.backfill_pe_native_audit:main"
microplex-us-backfill-pe-native-scores = "microplex_us.pipelines.backfill_pe_native_scores:main"
microplex-us-check-site-snapshot = "microplex_us.pipelines.check_site_snapshot:main"
+microplex-us-pe-native-target-diagnostics = "microplex_us.pipelines.pe_native_scores:main_target_diagnostics"
microplex-us-score-pe-native-loss = "microplex_us.pipelines.pe_native_scores:main"
microplex-us-version-bump-benchmark = "microplex_us.pipelines.version_benchmark:main"
diff --git a/scripts/sync_cosilico_theme.py b/scripts/sync_cosilico_theme.py
new file mode 100644
index 0000000..be97f50
--- /dev/null
+++ b/scripts/sync_cosilico_theme.py
@@ -0,0 +1,88 @@
+"""Sync exported Cosilico design tokens into browser-readable CSS variables."""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+
+def default_source(repo_root: Path) -> Path:
+ """Return the first local Cosilico token export next to this repo."""
+
+ candidates = (
+ repo_root.parent / "cosilico.ai" / "packages" / "config" / "theme.css",
+ repo_root.parent / "cosilico" / "packages" / "config" / "theme.css",
+ repo_root.parent / "cosilico" / "apps" / "web" / "src" / "app" / "globals.css",
+ )
+ for candidate in candidates:
+ if candidate.exists():
+ return candidate
+ searched = ", ".join(str(path) for path in candidates)
+ raise FileNotFoundError(f"Could not find exported Cosilico theme. Searched: {searched}")
+
+
+def render_browser_tokens(
+ source_text: str,
+ *,
+ source_path: Path,
+ repo_root: Path,
+) -> str:
+ """Convert a Tailwind v4 @theme block into CSS custom properties."""
+
+ match = re.search(r"@theme\s*\{(?P.*?)\}", source_text, flags=re.DOTALL)
+ if not match:
+ raise ValueError(f"No @theme block found in {source_path}")
+ body = match.group("body").strip()
+ try:
+ display_source = source_path.relative_to(repo_root.parent)
+ except ValueError:
+ display_source = source_path
+ return (
+ "/* Generated from the exported Cosilico design tokens.\n"
+ f" Source: {display_source}\n"
+ " Re-run: python scripts/sync_cosilico_theme.py\n"
+ "*/\n"
+ ":root {\n"
+ f"{body}\n"
+ "}\n"
+ )
+
+
+def main(argv: list[str] | None = None) -> int:
+ repo_root = Path(__file__).resolve().parents[1]
+ parser = argparse.ArgumentParser(
+ description="Sync @cosilico/config theme tokens into dashboard CSS."
+ )
+ parser.add_argument("--source", type=Path)
+ parser.add_argument(
+ "--output",
+ type=Path,
+ default=repo_root / "dashboard" / "cosilico-theme.css",
+ )
+ parser.add_argument("--check", action="store_true")
+ args = parser.parse_args(argv)
+
+ source = (args.source or default_source(repo_root)).expanduser().resolve()
+ rendered = render_browser_tokens(
+ source.read_text(),
+ source_path=source,
+ repo_root=repo_root,
+ )
+
+ if args.check:
+ current = args.output.read_text() if args.output.exists() else ""
+ if current != rendered:
+ print(f"{args.output} is not synced with {source}", file=sys.stderr)
+ return 1
+ return 0
+
+ args.output.parent.mkdir(parents=True, exist_ok=True)
+ args.output.write_text(rendered)
+ print(args.output)
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/src/microplex_us/pipelines/pe_native_scores.py b/src/microplex_us/pipelines/pe_native_scores.py
index c0b9924..416ea29 100644
--- a/src/microplex_us/pipelines/pe_native_scores.py
+++ b/src/microplex_us/pipelines/pe_native_scores.py
@@ -5,6 +5,7 @@
import argparse
import json
import os
+import re
import subprocess
import sys
from dataclasses import dataclass, field
@@ -23,6 +24,11 @@
"LC_ALL",
"TZ",
)
+_EITC_AGI_CHILD_DOMAIN_VARIABLE = "adjusted_gross_income,eitc,eitc_child_count"
+_EITC_AGI_CHILD_LABEL = re.compile(
+ r"^nation/irs/eitc/(?Preturns|amount)/"
+ r"c(?P\d+)_(?P[^_]+)_(?P[^/]+)$"
+)
_ENHANCED_CPS_BAD_TARGETS: tuple[str, ...] = (
"nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Head of Household",
@@ -536,36 +542,150 @@ def compute(dataset_path: str):
}
+def classify_target_family(target_name: str) -> str:
+ parts = target_name.split("/")
+ if target_name.startswith("state/census/age/"):
+ return "state_age_distribution"
+ if target_name.startswith("state/census/population_by_state/"):
+ return "state_population"
+ if target_name.startswith("state/census/population_under_5_by_state/"):
+ return "state_population_under_5"
+ if target_name.startswith("nation/irs/aca_spending/"):
+ return "state_aca_spending"
+ if target_name.startswith("state/irs/aca_enrollment/"):
+ return "state_aca_enrollment"
+ if target_name.startswith("irs/medicaid_enrollment/"):
+ return "state_medicaid_enrollment"
+ if target_name.endswith("/snap-cost"):
+ return "state_snap_cost"
+ if target_name.endswith("/snap-hhs"):
+ return "state_snap_households"
+ if target_name.startswith("state/real_estate_taxes/"):
+ return "state_real_estate_taxes"
+ if len(parts) >= 3 and parts[0] == "state" and parts[2] == "adjusted_gross_income":
+ return "state_agi_distribution"
+ if target_name.startswith("nation/jct/"):
+ return "national_tax_expenditures"
+ if target_name.startswith("nation/net_worth/"):
+ return "national_net_worth"
+ if target_name.startswith("nation/ssa/"):
+ return "national_ssa"
+ if target_name.startswith("nation/census/population_by_age/"):
+ return "national_population_by_age"
+ if target_name == "nation/census/infants":
+ return "national_infants"
+ if target_name.startswith("nation/census/agi_in_spm_threshold_decile_"):
+ return "national_spm_threshold_agi"
+ if target_name.startswith("nation/census/count_in_spm_threshold_decile_"):
+ return "national_spm_threshold_count"
+ if target_name.startswith("nation/census/"):
+ return "national_census_other"
+ if target_name.startswith("nation/irs/"):
+ return "national_irs_other"
+ return "other"
+
+
+def target_scope(target_name: str) -> str:
+ if target_name.startswith("nation/"):
+ return "national"
+ if target_name.startswith("state/") or target_name.endswith("/snap-cost") or target_name.endswith("/snap-hhs"):
+ return "state"
+ return "other"
+
+
+def abs_pct_error(estimate: float, target: float) -> float:
+ return abs(estimate - target) / max(abs(target), 1.0) * 100.0
+
+
+def build_target_rows(from_payload, to_payload):
+ rows = []
+ for idx, name in enumerate(from_payload["target_names"]):
+ from_term = float(from_payload["weighted_terms"][idx])
+ to_term = float(to_payload["weighted_terms"][idx])
+ from_error = float(from_payload["rel_error"][idx])
+ to_error = float(to_payload["rel_error"][idx])
+ target_value = float(from_payload["targets"][idx])
+ from_estimate = float(from_payload["estimate"][idx])
+ to_estimate = float(to_payload["estimate"][idx])
+ if to_error < from_error:
+ winner = "to"
+ elif from_error < to_error:
+ winner = "from"
+ else:
+ winner = "tie"
+ rows.append(
+ {
+ "target_name": name,
+ "target_family": classify_target_family(name),
+ "target_scope": target_scope(name),
+ "winner": winner,
+ "weighted_term_delta": to_term - from_term,
+ "from_weighted_term": from_term,
+ "to_weighted_term": to_term,
+ "target_value": target_value,
+ "from_estimate": from_estimate,
+ "to_estimate": to_estimate,
+ "from_rel_error": from_error,
+ "to_rel_error": to_error,
+ "from_abs_pct_error": abs_pct_error(from_estimate, target_value),
+ "to_abs_pct_error": abs_pct_error(to_estimate, target_value),
+ }
+ )
+ return rows
+
+
+def summarize_target_rows(rows, *, group_field=None):
+ if group_field is None:
+ grouped = [("all", rows)]
+ else:
+ values = sorted({row[group_field] for row in rows})
+ grouped = [(value, [row for row in rows if row[group_field] == value]) for value in values]
+
+ summaries = []
+ for value, group_rows in grouped:
+ n_targets = len(group_rows)
+ from_wins = sum(1 for row in group_rows if row["winner"] == "from")
+ to_wins = sum(1 for row in group_rows if row["winner"] == "to")
+ ties = n_targets - from_wins - to_wins
+ from_loss = float(np.mean([row["from_weighted_term"] for row in group_rows]))
+ to_loss = float(np.mean([row["to_weighted_term"] for row in group_rows]))
+ summary = {
+ "n_targets": n_targets,
+ "from_wins": from_wins,
+ "to_wins": to_wins,
+ "ties": ties,
+ "from_win_rate": from_wins / n_targets if n_targets else None,
+ "to_win_rate": to_wins / n_targets if n_targets else None,
+ "from_loss": from_loss,
+ "to_loss": to_loss,
+ "loss_delta": to_loss - from_loss,
+ "mean_weighted_term_delta": float(
+ np.mean([row["weighted_term_delta"] for row in group_rows])
+ ),
+ }
+ if group_field is not None:
+ summary[group_field] = value
+ summaries.append(summary)
+ return summaries[0] if group_field is None else summaries
+
+
from_payload = compute(FROM_DATASET)
to_payload = compute(TO_DATASET)
if from_payload["target_names"] != to_payload["target_names"]:
raise ValueError("Datasets produced different target names after filtering")
-rows = []
-for idx, name in enumerate(from_payload["target_names"]):
- from_term = float(from_payload["weighted_terms"][idx])
- to_term = float(to_payload["weighted_terms"][idx])
- rows.append(
- {
- "target_name": name,
- "weighted_term_delta": to_term - from_term,
- "from_weighted_term": from_term,
- "to_weighted_term": to_term,
- "target_value": float(from_payload["targets"][idx]),
- "from_estimate": float(from_payload["estimate"][idx]),
- "to_estimate": float(to_payload["estimate"][idx]),
- "from_rel_error": float(from_payload["rel_error"][idx]),
- "to_rel_error": float(to_payload["rel_error"][idx]),
- }
- )
-
+rows = build_target_rows(from_payload, to_payload)
rows.sort(key=lambda row: row["weighted_term_delta"], reverse=True)
payload = {
"metric": "enhanced_cps_native_loss_target_delta",
"period": PERIOD,
"from_dataset": FROM_DATASET,
"to_dataset": TO_DATASET,
+ "summary": summarize_target_rows(rows),
+ "family_summaries": summarize_target_rows(rows, group_field="target_family"),
+ "scope_summaries": summarize_target_rows(rows, group_field="target_scope"),
+ "targets": rows,
"top_regressions": rows[:TOP_K],
"top_improvements": list(reversed(rows[-TOP_K:])),
}
@@ -653,31 +773,141 @@ def compute(dataset_path: str):
}
-baseline_payload = compute(BASELINE_DATASET)
-results = []
-for candidate_dataset in CANDIDATE_DATASETS:
- candidate_payload = compute(candidate_dataset)
- if baseline_payload["target_names"] != candidate_payload["target_names"]:
- raise ValueError("Datasets produced different target names after filtering")
+def classify_target_family(target_name: str) -> str:
+ parts = target_name.split("/")
+ if target_name.startswith("state/census/age/"):
+ return "state_age_distribution"
+ if target_name.startswith("state/census/population_by_state/"):
+ return "state_population"
+ if target_name.startswith("state/census/population_under_5_by_state/"):
+ return "state_population_under_5"
+ if target_name.startswith("nation/irs/aca_spending/"):
+ return "state_aca_spending"
+ if target_name.startswith("state/irs/aca_enrollment/"):
+ return "state_aca_enrollment"
+ if target_name.startswith("irs/medicaid_enrollment/"):
+ return "state_medicaid_enrollment"
+ if target_name.endswith("/snap-cost"):
+ return "state_snap_cost"
+ if target_name.endswith("/snap-hhs"):
+ return "state_snap_households"
+ if target_name.startswith("state/real_estate_taxes/"):
+ return "state_real_estate_taxes"
+ if len(parts) >= 3 and parts[0] == "state" and parts[2] == "adjusted_gross_income":
+ return "state_agi_distribution"
+ if target_name.startswith("nation/jct/"):
+ return "national_tax_expenditures"
+ if target_name.startswith("nation/net_worth/"):
+ return "national_net_worth"
+ if target_name.startswith("nation/ssa/"):
+ return "national_ssa"
+ if target_name.startswith("nation/census/population_by_age/"):
+ return "national_population_by_age"
+ if target_name == "nation/census/infants":
+ return "national_infants"
+ if target_name.startswith("nation/census/agi_in_spm_threshold_decile_"):
+ return "national_spm_threshold_agi"
+ if target_name.startswith("nation/census/count_in_spm_threshold_decile_"):
+ return "national_spm_threshold_count"
+ if target_name.startswith("nation/census/"):
+ return "national_census_other"
+ if target_name.startswith("nation/irs/"):
+ return "national_irs_other"
+ return "other"
+
+
+def target_scope(target_name: str) -> str:
+ if target_name.startswith("nation/"):
+ return "national"
+ if target_name.startswith("state/") or target_name.endswith("/snap-cost") or target_name.endswith("/snap-hhs"):
+ return "state"
+ return "other"
+
+
+def abs_pct_error(estimate: float, target: float) -> float:
+ return abs(estimate - target) / max(abs(target), 1.0) * 100.0
+
+def build_target_rows(from_payload, to_payload):
rows = []
- for idx, name in enumerate(baseline_payload["target_names"]):
- from_term = float(baseline_payload["weighted_terms"][idx])
- to_term = float(candidate_payload["weighted_terms"][idx])
+ for idx, name in enumerate(from_payload["target_names"]):
+ from_term = float(from_payload["weighted_terms"][idx])
+ to_term = float(to_payload["weighted_terms"][idx])
+ from_error = float(from_payload["rel_error"][idx])
+ to_error = float(to_payload["rel_error"][idx])
+ target_value = float(from_payload["targets"][idx])
+ from_estimate = float(from_payload["estimate"][idx])
+ to_estimate = float(to_payload["estimate"][idx])
+ if to_error < from_error:
+ winner = "to"
+ elif from_error < to_error:
+ winner = "from"
+ else:
+ winner = "tie"
rows.append(
{
"target_name": name,
+ "target_family": classify_target_family(name),
+ "target_scope": target_scope(name),
+ "winner": winner,
"weighted_term_delta": to_term - from_term,
"from_weighted_term": from_term,
"to_weighted_term": to_term,
- "target_value": float(baseline_payload["targets"][idx]),
- "from_estimate": float(baseline_payload["estimate"][idx]),
- "to_estimate": float(candidate_payload["estimate"][idx]),
- "from_rel_error": float(baseline_payload["rel_error"][idx]),
- "to_rel_error": float(candidate_payload["rel_error"][idx]),
+ "target_value": target_value,
+ "from_estimate": from_estimate,
+ "to_estimate": to_estimate,
+ "from_rel_error": from_error,
+ "to_rel_error": to_error,
+ "from_abs_pct_error": abs_pct_error(from_estimate, target_value),
+ "to_abs_pct_error": abs_pct_error(to_estimate, target_value),
}
)
+ return rows
+
+
+def summarize_target_rows(rows, *, group_field=None):
+ if group_field is None:
+ grouped = [("all", rows)]
+ else:
+ values = sorted({row[group_field] for row in rows})
+ grouped = [(value, [row for row in rows if row[group_field] == value]) for value in values]
+
+ summaries = []
+ for value, group_rows in grouped:
+ n_targets = len(group_rows)
+ from_wins = sum(1 for row in group_rows if row["winner"] == "from")
+ to_wins = sum(1 for row in group_rows if row["winner"] == "to")
+ ties = n_targets - from_wins - to_wins
+ from_loss = float(np.mean([row["from_weighted_term"] for row in group_rows]))
+ to_loss = float(np.mean([row["to_weighted_term"] for row in group_rows]))
+ summary = {
+ "n_targets": n_targets,
+ "from_wins": from_wins,
+ "to_wins": to_wins,
+ "ties": ties,
+ "from_win_rate": from_wins / n_targets if n_targets else None,
+ "to_win_rate": to_wins / n_targets if n_targets else None,
+ "from_loss": from_loss,
+ "to_loss": to_loss,
+ "loss_delta": to_loss - from_loss,
+ "mean_weighted_term_delta": float(
+ np.mean([row["weighted_term_delta"] for row in group_rows])
+ ),
+ }
+ if group_field is not None:
+ summary[group_field] = value
+ summaries.append(summary)
+ return summaries[0] if group_field is None else summaries
+
+
+baseline_payload = compute(BASELINE_DATASET)
+results = []
+for candidate_dataset in CANDIDATE_DATASETS:
+ candidate_payload = compute(candidate_dataset)
+ if baseline_payload["target_names"] != candidate_payload["target_names"]:
+ raise ValueError("Datasets produced different target names after filtering")
+ rows = build_target_rows(baseline_payload, candidate_payload)
rows.sort(key=lambda row: row["weighted_term_delta"], reverse=True)
results.append(
{
@@ -685,6 +915,10 @@ def compute(dataset_path: str):
"period": PERIOD,
"from_dataset": BASELINE_DATASET,
"to_dataset": candidate_dataset,
+ "summary": summarize_target_rows(rows),
+ "family_summaries": summarize_target_rows(rows, group_field="target_family"),
+ "scope_summaries": summarize_target_rows(rows, group_field="target_scope"),
+ "targets": rows,
"top_regressions": rows[:TOP_K],
"top_improvements": list(reversed(rows[-TOP_K:])),
}
@@ -1796,6 +2030,298 @@ def compute_batch_us_pe_native_scores(
return results
+@dataclass(frozen=True)
+class PENativeTargetLookupKey:
+ """Structured lookup key for a legacy PE-native target label."""
+
+ variable: str
+ count_children: int
+ agi_lower: float
+ agi_upper: float
+
+ def as_tuple(self) -> tuple[str, int, float, float]:
+ return (self.variable, self.count_children, self.agi_lower, self.agi_upper)
+
+ @staticmethod
+ def _json_safe_bound(value: float) -> float | str:
+ if value == float("inf"):
+ return "inf"
+ if value == float("-inf"):
+ return "-inf"
+ return value
+
+ def expected_constraints(self) -> list[dict[str, str | float | int]]:
+ if self.count_children < 3:
+ child_constraint: dict[str, str | float | int] = {
+ "variable": "eitc_child_count",
+ "operation": "==",
+ "value": self.count_children,
+ }
+ else:
+ child_constraint = {
+ "variable": "eitc_child_count",
+ "operation": ">",
+ "value": 2,
+ }
+ return [
+ {"variable": "tax_unit_is_filer", "operation": "==", "value": 1},
+ {"variable": "eitc", "operation": ">", "value": 0},
+ child_constraint,
+ {
+ "variable": "adjusted_gross_income",
+ "operation": ">=",
+ "value": self._json_safe_bound(self.agi_lower),
+ },
+ {
+ "variable": "adjusted_gross_income",
+ "operation": "<",
+ "value": self._json_safe_bound(self.agi_upper),
+ },
+ ]
+
+ def expected_target(self) -> dict[str, Any]:
+ return {
+ "variable": self.variable,
+ "geo_level": "national",
+ "geographic_id": "US",
+ "domain_variable": _EITC_AGI_CHILD_DOMAIN_VARIABLE,
+ "constraints": self.expected_constraints(),
+ }
+
+
+def _parse_pe_native_numeric_token(token: str) -> float:
+ if token == "-inf":
+ return float("-inf")
+ if token == "inf":
+ return float("inf")
+ multipliers = {
+ "bn": 1_000_000_000.0,
+ "m": 1_000_000.0,
+ "k": 1_000.0,
+ }
+ for suffix, multiplier in multipliers.items():
+ if token.endswith(suffix):
+ return float(token[: -len(suffix)]) * multiplier
+ return float(token)
+
+
+def parse_pe_native_target_lookup_key(
+ target_name: str,
+) -> PENativeTargetLookupKey | None:
+ """Parse PE-native labels that now have structured DB equivalents."""
+
+ match = _EITC_AGI_CHILD_LABEL.match(target_name)
+ if match is None:
+ return None
+ metric = match.group("metric")
+ variable = "tax_unit_count" if metric == "returns" else "eitc"
+ return PENativeTargetLookupKey(
+ variable=variable,
+ count_children=int(match.group("count_children")),
+ agi_lower=_parse_pe_native_numeric_token(match.group("agi_lower")),
+ agi_upper=_parse_pe_native_numeric_token(match.group("agi_upper")),
+ )
+
+
+def _constraint_value_as_float(value: str) -> float | None:
+ try:
+ return float(value)
+ except (TypeError, ValueError):
+ return None
+
+
+def _target_lookup_key_from_policyengine_target(
+ target: Any,
+) -> tuple[str, int, float, float] | None:
+ if target.geo_level != "national":
+ return None
+ if target.variable not in {"eitc", "tax_unit_count"}:
+ return None
+ if target.domain_variable != _EITC_AGI_CHILD_DOMAIN_VARIABLE:
+ return None
+
+ agi_lower: float | None = None
+ agi_upper: float | None = None
+ count_children: int | None = None
+ has_eitc_positive_constraint = False
+
+ for constraint in target.constraints:
+ value = str(constraint.value)
+ numeric_value = _constraint_value_as_float(value)
+ if (
+ constraint.variable == "adjusted_gross_income"
+ and constraint.operation == ">="
+ and numeric_value is not None
+ ):
+ agi_lower = numeric_value
+ elif (
+ constraint.variable == "adjusted_gross_income"
+ and constraint.operation == "<"
+ and numeric_value is not None
+ ):
+ agi_upper = numeric_value
+ elif constraint.variable == "eitc" and constraint.operation == ">":
+ has_eitc_positive_constraint = numeric_value == 0
+ elif constraint.variable == "eitc_child_count" and numeric_value is not None:
+ if constraint.operation == "==":
+ count_children = int(numeric_value)
+ elif constraint.operation == ">" and numeric_value == 2:
+ count_children = 3
+ elif constraint.operation == ">=" and numeric_value == 3:
+ count_children = 3
+
+ if (
+ agi_lower is None
+ or agi_upper is None
+ or count_children is None
+ or not has_eitc_positive_constraint
+ ):
+ return None
+ return (target.variable, count_children, agi_lower, agi_upper)
+
+
+def _policyengine_target_payload(target: Any) -> dict[str, Any]:
+ return {
+ "target_id": target.target_id,
+ "variable": target.variable,
+ "period": target.period,
+ "value": target.value,
+ "source": target.source,
+ "notes": target.notes,
+ "geo_level": target.geo_level,
+ "geographic_id": target.geographic_id,
+ "domain_variable": target.domain_variable,
+ "constraints": [
+ {
+ "variable": constraint.variable,
+ "operation": constraint.operation,
+ "value": constraint.value,
+ }
+ for constraint in target.constraints
+ ],
+ }
+
+
+def _load_policyengine_target_match_index(
+ target_db_path: str | Path,
+ *,
+ period: int,
+) -> dict[tuple[str, int, float, float], list[dict[str, Any]]]:
+ from microplex_us.policyengine.us import PolicyEngineUSDBTargetProvider
+
+ provider = PolicyEngineUSDBTargetProvider(target_db_path, validate=False)
+ targets = provider.load_targets(
+ period=period,
+ variables=["eitc", "tax_unit_count"],
+ domain_variable_values=[_EITC_AGI_CHILD_DOMAIN_VARIABLE],
+ geo_levels=["national"],
+ )
+ matches: dict[tuple[str, int, float, float], list[dict[str, Any]]] = {}
+ for target in targets:
+ key = _target_lookup_key_from_policyengine_target(target)
+ if key is None:
+ continue
+ matches.setdefault(key, []).append(_policyengine_target_payload(target))
+ return matches
+
+
+def _default_policyengine_targets_db_path(
+ policyengine_us_data_repo: str | Path | None,
+) -> Path | None:
+ try:
+ repo = resolve_policyengine_us_data_repo_root(policyengine_us_data_repo)
+ except FileNotFoundError:
+ return None
+ path = repo / "policyengine_us_data" / "storage" / "calibration" / "policy_data.db"
+ return path if path.exists() else None
+
+
+def annotate_pe_native_target_db_matches(
+ payload: dict[str, Any],
+ *,
+ target_db_path: str | Path | None,
+ period: int,
+) -> dict[str, Any]:
+ """Attach structured PolicyEngine target DB matches to diagnostic rows."""
+
+ rows = list(payload.get("targets") or [])
+ resolved_db_path = Path(target_db_path).expanduser() if target_db_path else None
+ match_index: dict[tuple[str, int, float, float], list[dict[str, Any]]] = {}
+ target_db_error = None
+ if resolved_db_path is not None and resolved_db_path.exists():
+ try:
+ match_index = _load_policyengine_target_match_index(
+ resolved_db_path,
+ period=period,
+ )
+ except Exception as exc: # pragma: no cover - defensive diagnostic path
+ target_db_error = str(exc)
+
+ counts = {
+ "matched": 0,
+ "legacy_only": 0,
+ "unparsed": 0,
+ "ambiguous": 0,
+ "db_unavailable": 0,
+ }
+ annotations_by_name: dict[str, dict[str, Any]] = {}
+ for row in rows:
+ target_name = str(row.get("target_name", ""))
+ key = parse_pe_native_target_lookup_key(target_name)
+ if key is None:
+ annotation: dict[str, Any] = {"policyengine_target_match": "unparsed"}
+ elif resolved_db_path is None or not resolved_db_path.exists() or target_db_error:
+ annotation = {
+ "policyengine_target_match": "db_unavailable",
+ "policyengine_target_expected": key.expected_target(),
+ }
+ else:
+ matches = match_index.get(key.as_tuple(), [])
+ if len(matches) == 1:
+ match = matches[0]
+ annotation = {
+ "policyengine_target_match": "matched",
+ "policyengine_target_id": match["target_id"],
+ "policyengine_target_variable": match["variable"],
+ "policyengine_target_period": match["period"],
+ "policyengine_target_value": match["value"],
+ "policyengine_target_source": match["source"],
+ "policyengine_target_domain_variable": match["domain_variable"],
+ "policyengine_target_constraints": match["constraints"],
+ }
+ elif len(matches) > 1:
+ annotation = {
+ "policyengine_target_match": "ambiguous",
+ "policyengine_target_match_count": len(matches),
+ "policyengine_target_matches": matches,
+ "policyengine_target_expected": key.expected_target(),
+ }
+ else:
+ annotation = {
+ "policyengine_target_match": "legacy_only",
+ "policyengine_target_expected": key.expected_target(),
+ }
+ counts[annotation["policyengine_target_match"]] += 1
+ row.update(annotation)
+ annotations_by_name[target_name] = annotation
+
+ for list_name in ("top_improvements", "top_regressions"):
+ for row in payload.get(list_name) or []:
+ annotation = annotations_by_name.get(str(row.get("target_name", "")))
+ if annotation:
+ row.update(annotation)
+
+ parsed_total = counts["matched"] + counts["legacy_only"] + counts["ambiguous"]
+ payload["target_db_summary"] = {
+ "target_db_path": str(resolved_db_path) if resolved_db_path else None,
+ "target_db_error": target_db_error,
+ **counts,
+ "parsed_targets": parsed_total,
+ "match_rate": counts["matched"] / parsed_total if parsed_total else None,
+ }
+ return payload
+
+
def compare_us_pe_native_target_deltas(
*,
from_dataset_path: str | Path,
@@ -2012,6 +2538,52 @@ def write_us_pe_native_scores(
)
destination = Path(output_path)
destination.parent.mkdir(parents=True, exist_ok=True)
+ destination.write_text(
+ json.dumps(payload, indent=2, sort_keys=True, allow_nan=False)
+ )
+ return destination
+
+
+def write_us_pe_native_target_diagnostics(
+ output_path: str | Path,
+ *,
+ from_dataset_path: str | Path,
+ to_dataset_path: str | Path,
+ period: int = 2024,
+ top_k: int = 50,
+ from_label: str = "policyengine-us-data",
+ to_label: str = "microplex-us",
+ policyengine_us_data_repo: str | Path | None = None,
+ policyengine_us_data_python: str | Path | None = None,
+ policyengine_targets_db_path: str | Path | None = None,
+) -> Path:
+ """Write the full PE-native per-target diagnostic dataset to disk."""
+
+ payload = compare_us_pe_native_target_deltas(
+ from_dataset_path=from_dataset_path,
+ to_dataset_path=to_dataset_path,
+ period=period,
+ top_k=top_k,
+ policyengine_us_data_repo=policyengine_us_data_repo,
+ policyengine_us_data_python=policyengine_us_data_python,
+ )
+ payload["diagnostic_schema_version"] = 1
+ payload["dataset_labels"] = {
+ "from": from_label,
+ "to": to_label,
+ }
+ target_db_path = (
+ Path(policyengine_targets_db_path).expanduser()
+ if policyengine_targets_db_path is not None
+ else _default_policyengine_targets_db_path(policyengine_us_data_repo)
+ )
+ annotate_pe_native_target_db_matches(
+ payload,
+ target_db_path=target_db_path,
+ period=period,
+ )
+ destination = Path(output_path)
+ destination.parent.mkdir(parents=True, exist_ok=True)
destination.write_text(json.dumps(payload, indent=2, sort_keys=True))
return destination
@@ -2040,5 +2612,42 @@ def main(argv: list[str] | None = None) -> int:
return 0
+def main_target_diagnostics(argv: list[str] | None = None) -> int:
+ """CLI for full PE-native per-target diagnostics."""
+
+ parser = argparse.ArgumentParser(
+ description=(
+ "Write a full per-target PE-native diagnostic JSON comparing a "
+ "baseline dataset to a Microplex candidate."
+ )
+ )
+ parser.add_argument("--from-dataset", required=True)
+ parser.add_argument("--to-dataset", required=True)
+ parser.add_argument("--output-path", required=True)
+ parser.add_argument("--period", type=int, default=2024)
+ parser.add_argument("--top-k", type=int, default=50)
+ parser.add_argument("--from-label", default="policyengine-us-data")
+ parser.add_argument("--to-label", default="microplex-us")
+ parser.add_argument("--policyengine-us-data-python")
+ parser.add_argument("--policyengine-us-data-repo")
+ parser.add_argument("--policyengine-targets-db")
+ args = parser.parse_args(argv)
+
+ path = write_us_pe_native_target_diagnostics(
+ args.output_path,
+ from_dataset_path=args.from_dataset,
+ to_dataset_path=args.to_dataset,
+ period=args.period,
+ top_k=args.top_k,
+ from_label=args.from_label,
+ to_label=args.to_label,
+ policyengine_us_data_python=args.policyengine_us_data_python,
+ policyengine_us_data_repo=args.policyengine_us_data_repo,
+ policyengine_targets_db_path=args.policyengine_targets_db,
+ )
+ print(str(path))
+ return 0
+
+
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))
diff --git a/src/microplex_us/policyengine/target_profiles.py b/src/microplex_us/policyengine/target_profiles.py
index 3e8aa78..0b54941 100644
--- a/src/microplex_us/policyengine/target_profiles.py
+++ b/src/microplex_us/policyengine/target_profiles.py
@@ -34,6 +34,11 @@ def to_provider_filter(self) -> dict[str, str | None]:
PolicyEngineUSTargetCell("dividend_income", geo_level="national", domain_variable="dividend_income"),
PolicyEngineUSTargetCell("eitc", geo_level="national"),
PolicyEngineUSTargetCell("eitc", geo_level="national", domain_variable="eitc_child_count"),
+ PolicyEngineUSTargetCell(
+ "eitc",
+ geo_level="national",
+ domain_variable="adjusted_gross_income,eitc,eitc_child_count",
+ ),
PolicyEngineUSTargetCell(
"health_insurance_premiums_without_medicare_part_b",
geo_level="national",
@@ -116,6 +121,11 @@ def to_provider_filter(self) -> dict[str, str | None]:
geo_level="national",
domain_variable="eitc_child_count",
),
+ PolicyEngineUSTargetCell(
+ "tax_unit_count",
+ geo_level="national",
+ domain_variable="adjusted_gross_income,eitc,eitc_child_count",
+ ),
PolicyEngineUSTargetCell("tax_unit_count", geo_level="national", domain_variable="income_tax"),
PolicyEngineUSTargetCell(
"tax_unit_count",
diff --git a/tests/pipelines/test_pe_native_scores.py b/tests/pipelines/test_pe_native_scores.py
index 4df19ea..5006097 100644
--- a/tests/pipelines/test_pe_native_scores.py
+++ b/tests/pipelines/test_pe_native_scores.py
@@ -8,6 +8,7 @@
from microplex_us.pipelines.pe_native_scores import (
PolicyEngineUSEnhancedCPSNativeScores,
+ annotate_pe_native_target_db_matches,
build_policyengine_us_data_pythonpath,
build_policyengine_us_data_subprocess_env,
compare_us_pe_native_target_deltas,
@@ -16,8 +17,10 @@
compute_batch_us_pe_native_target_deltas,
compute_us_pe_native_scores,
compute_us_pe_native_support_audit,
+ parse_pe_native_target_lookup_key,
resolve_policyengine_us_data_python,
write_us_pe_native_scores,
+ write_us_pe_native_target_diagnostics,
)
@@ -309,9 +312,58 @@ def test_compare_us_pe_native_target_deltas_wraps_subprocess_payload(
"period": 2024,
"from_dataset": str(before),
"to_dataset": str(after),
+ "summary": {
+ "n_targets": 2,
+ "from_wins": 1,
+ "to_wins": 1,
+ "ties": 0,
+ "from_win_rate": 0.5,
+ "to_win_rate": 0.5,
+ "from_loss": 0.5,
+ "to_loss": 1.0,
+ "loss_delta": 0.5,
+ "mean_weighted_term_delta": 0.5,
+ },
+ "family_summaries": [
+ {
+ "target_family": "national_irs_other",
+ "n_targets": 1,
+ "to_win_rate": 0.0,
+ "loss_delta": 1.5,
+ }
+ ],
+ "scope_summaries": [
+ {
+ "target_scope": "national",
+ "n_targets": 1,
+ "to_win_rate": 0.0,
+ "loss_delta": 1.5,
+ }
+ ],
+ "targets": [
+ {
+ "target_name": "nation/irs/example",
+ "target_family": "national_irs_other",
+ "target_scope": "national",
+ "winner": "from",
+ "weighted_term_delta": 1.5,
+ "from_weighted_term": 0.2,
+ "to_weighted_term": 1.7,
+ "target_value": 10.0,
+ "from_estimate": 1.0,
+ "to_estimate": 0.0,
+ "from_rel_error": 0.3,
+ "to_rel_error": 1.0,
+ "from_abs_pct_error": 90.0,
+ "to_abs_pct_error": 100.0,
+ }
+ ],
"top_regressions": [
{
"target_name": "nation/irs/example",
+ "target_family": "national_irs_other",
+ "target_scope": "national",
+ "winner": "from",
"weighted_term_delta": 1.5,
"from_weighted_term": 0.2,
"to_weighted_term": 1.7,
@@ -325,6 +377,9 @@ def test_compare_us_pe_native_target_deltas_wraps_subprocess_payload(
"top_improvements": [
{
"target_name": "state/example",
+ "target_family": "other",
+ "target_scope": "state",
+ "winner": "to",
"weighted_term_delta": -0.5,
"from_weighted_term": 0.8,
"to_weighted_term": 0.3,
@@ -361,6 +416,147 @@ def test_compare_us_pe_native_target_deltas_wraps_subprocess_payload(
assert result["metric"] == "enhanced_cps_native_loss_target_delta"
assert result["top_regressions"][0]["target_name"] == "nation/irs/example"
+ assert result["summary"]["to_win_rate"] == 0.5
+ assert result["targets"][0]["target_family"] == "national_irs_other"
+
+
+def test_parse_pe_native_target_lookup_key_maps_eitc_agi_child_labels() -> None:
+ amount_key = parse_pe_native_target_lookup_key(
+ "nation/irs/eitc/amount/c3_1_1k"
+ )
+ returns_key = parse_pe_native_target_lookup_key(
+ "nation/irs/eitc/returns/c2_1_1k"
+ )
+
+ assert amount_key is not None
+ assert amount_key.variable == "eitc"
+ assert amount_key.count_children == 3
+ assert amount_key.agi_lower == 1.0
+ assert amount_key.agi_upper == 1_000.0
+ assert amount_key.expected_target()["domain_variable"] == (
+ "adjusted_gross_income,eitc,eitc_child_count"
+ )
+ assert returns_key is not None
+ assert returns_key.variable == "tax_unit_count"
+ assert returns_key.count_children == 2
+
+
+def test_annotate_pe_native_target_db_matches_marks_matches_and_gaps(
+ monkeypatch,
+ tmp_path,
+) -> None:
+ db_path = tmp_path / "policy_data.db"
+ db_path.write_text("stub")
+ matched_name = "nation/irs/eitc/amount/c3_1_1k"
+ matched_key = parse_pe_native_target_lookup_key(matched_name)
+ assert matched_key is not None
+
+ monkeypatch.setattr(
+ "microplex_us.pipelines.pe_native_scores._load_policyengine_target_match_index",
+ lambda *_args, **_kwargs: {
+ matched_key.as_tuple(): [
+ {
+ "target_id": 123,
+ "variable": "eitc",
+ "period": 2022,
+ "value": 2_079_000.0,
+ "source": "IRS SOI",
+ "notes": "Table 2.5",
+ "geo_level": "national",
+ "geographic_id": "US",
+ "domain_variable": (
+ "adjusted_gross_income,eitc,eitc_child_count"
+ ),
+ "constraints": [
+ {
+ "variable": "eitc_child_count",
+ "operation": ">",
+ "value": "2",
+ }
+ ],
+ }
+ ]
+ },
+ )
+ payload = {
+ "targets": [
+ {"target_name": matched_name},
+ {"target_name": "nation/irs/eitc/returns/c2_1_1k"},
+ {"target_name": "nation/census/infants"},
+ ],
+ "top_improvements": [{"target_name": matched_name}],
+ "top_regressions": [],
+ }
+
+ annotate_pe_native_target_db_matches(
+ payload,
+ target_db_path=db_path,
+ period=2024,
+ )
+
+ assert payload["targets"][0]["policyengine_target_match"] == "matched"
+ assert payload["targets"][0]["policyengine_target_id"] == 123
+ assert payload["targets"][1]["policyengine_target_match"] == "legacy_only"
+ assert payload["targets"][1]["policyengine_target_expected"]["variable"] == (
+ "tax_unit_count"
+ )
+ assert payload["targets"][2]["policyengine_target_match"] == "unparsed"
+ assert payload["top_improvements"][0]["policyengine_target_match"] == "matched"
+ assert payload["target_db_summary"]["matched"] == 1
+ assert payload["target_db_summary"]["legacy_only"] == 1
+ assert payload["target_db_summary"]["unparsed"] == 1
+ assert payload["target_db_summary"]["match_rate"] == 0.5
+
+
+def test_write_us_pe_native_target_diagnostics_persists_full_payload(
+ monkeypatch,
+ tmp_path,
+) -> None:
+ before = tmp_path / "before.h5"
+ after = tmp_path / "after.h5"
+ output_path = tmp_path / "diagnostics.json"
+ for path in (before, after):
+ path.write_text(path.stem)
+
+ monkeypatch.setattr(
+ "microplex_us.pipelines.pe_native_scores.compare_us_pe_native_target_deltas",
+ lambda **_kwargs: {
+ "metric": "enhanced_cps_native_loss_target_delta",
+ "period": 2024,
+ "from_dataset": str(before),
+ "to_dataset": str(after),
+ "summary": {"n_targets": 1, "to_win_rate": 1.0},
+ "family_summaries": [{"target_family": "national_irs_other"}],
+ "scope_summaries": [{"target_scope": "national"}],
+ "targets": [
+ {
+ "target_name": "nation/irs/example",
+ "target_family": "national_irs_other",
+ "winner": "to",
+ "weighted_term_delta": -1.0,
+ }
+ ],
+ "top_regressions": [],
+ "top_improvements": [],
+ },
+ )
+
+ written = write_us_pe_native_target_diagnostics(
+ output_path,
+ from_dataset_path=before,
+ to_dataset_path=after,
+ from_label="baseline",
+ to_label="candidate",
+ policyengine_targets_db_path=tmp_path / "missing.db",
+ )
+
+ payload = json.loads(written.read_text())
+ assert written == output_path
+ assert payload["diagnostic_schema_version"] == 1
+ assert payload["dataset_labels"] == {"from": "baseline", "to": "candidate"}
+ assert payload["targets"][0]["target_name"] == "nation/irs/example"
+ assert payload["targets"][0]["policyengine_target_match"] == "unparsed"
+ assert payload["target_db_summary"]["unparsed"] == 1
def test_compute_batch_us_pe_native_target_deltas_wraps_multiple_candidates(