diff --git a/README.md b/README.md index 02d8143..bd03881 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,25 @@ built on top of the generic `microplex` engine. - [PE construction parity](./docs/pe-construction-parity.md) - [Superseding `policyengine-us-data`](./docs/superseding-policyengine-us-data.md) +## Diagnostics dashboard + +The static dashboard in `dashboard/` loads the full PE-native per-target +diagnostic JSON written by: + +```bash +microplex-us-pe-native-target-diagnostics \ + --from-dataset /path/to/enhanced_cps_2024.h5 \ + --to-dataset /path/to/policyengine_us.h5 \ + --policyengine-targets-db /path/to/policy_data.db \ + --output-path artifacts/pe_native_target_diagnostics_current.json +``` + +The dashboard uses the exported Cosilico design tokens from +`@cosilico/config/theme.css`; run `python scripts/sync_cosilico_theme.py --check` +to verify the local browser-readable token copy is still synced. +When a PolicyEngine target DB is available, the JSON annotates PE-native legacy +labels with structured target IDs and flags legacy-only gaps. + ## Current focus `microplex-us` is being built as a library-first US runtime with diff --git a/dashboard/app.js b/dashboard/app.js new file mode 100644 index 0000000..668fb22 --- /dev/null +++ b/dashboard/app.js @@ -0,0 +1,506 @@ +"use strict"; + +const DEFAULT_DATA_URL = "../artifacts/pe_native_target_diagnostics_current.json"; +const TABLE_LIMIT = 500; + +const state = { + data: null, + search: "", + family: "all", + scope: "all", + winner: "all", + dbMatch: "all", + sort: "weighted_term_delta:asc", +}; + +const el = { + dashboard: document.getElementById("dashboard"), + emptyState: document.getElementById("emptyState"), + fileInput: document.getElementById("fileInput"), + loadStatus: document.getElementById("loadStatus"), + kpiTargets: document.getElementById("kpiTargets"), + kpiToWinLabel: document.getElementById("kpiToWinLabel"), + kpiWinRate: document.getElementById("kpiWinRate"), + kpiLossDelta: document.getElementById("kpiLossDelta"), + kpiLossPair: document.getElementById("kpiLossPair"), + kpiDbMatch: document.getElementById("kpiDbMatch"), + kpiDbDetail: document.getElementById("kpiDbDetail"), + scopeSummary: document.getElementById("scopeSummary"), + familySummary: document.getElementById("familySummary"), + topImprovements: document.getElementById("topImprovements"), + topRegressions: document.getElementById("topRegressions"), + tableCount: document.getElementById("tableCount"), + searchInput: document.getElementById("searchInput"), + familyFilter: document.getElementById("familyFilter"), + scopeFilter: document.getElementById("scopeFilter"), + winnerFilter: document.getElementById("winnerFilter"), + dbFilter: document.getElementById("dbFilter"), + sortSelect: document.getElementById("sortSelect"), + targetTable: document.getElementById("targetTable"), +}; + +function labels() { + const datasetLabels = state.data?.dataset_labels || {}; + return { + from: datasetLabels.from || "baseline", + to: datasetLabels.to || "candidate", + }; +} + +function numberOrNull(value) { + const numeric = Number(value); + return Number.isFinite(numeric) ? numeric : null; +} + +function formatNumber(value, options = {}) { + const numeric = numberOrNull(value); + if (numeric === null) { + return "-"; + } + const abs = Math.abs(numeric); + if (abs >= 1_000_000 || (abs > 0 && abs < 0.001)) { + return numeric.toExponential(2); + } + return new Intl.NumberFormat("en-US", { + maximumFractionDigits: options.maximumFractionDigits ?? 3, + minimumFractionDigits: options.minimumFractionDigits ?? 0, + }).format(numeric); +} + +function formatCompact(value) { + const numeric = numberOrNull(value); + if (numeric === null) { + return "-"; + } + return new Intl.NumberFormat("en-US", { + notation: "compact", + maximumFractionDigits: 2, + }).format(numeric); +} + +function formatPercent(value) { + const numeric = numberOrNull(value); + if (numeric === null) { + return "-"; + } + return new Intl.NumberFormat("en-US", { + style: "percent", + maximumFractionDigits: 1, + }).format(numeric); +} + +function formatSigned(value) { + const numeric = numberOrNull(value); + if (numeric === null) { + return "-"; + } + const sign = numeric > 0 ? "+" : ""; + return `${sign}${formatNumber(numeric, { maximumFractionDigits: 4 })}`; +} + +function formatError(value) { + const numeric = numberOrNull(value); + if (numeric === null) { + return "-"; + } + return `${formatNumber(numeric, { maximumFractionDigits: 2 })}%`; +} + +function classForDelta(value) { + const numeric = numberOrNull(value) || 0; + if (numeric < 0) { + return "good"; + } + if (numeric > 0) { + return "bad"; + } + return ""; +} + +function winnerLabel(winner) { + const currentLabels = labels(); + if (winner === "to") { + return currentLabels.to; + } + if (winner === "from") { + return currentLabels.from; + } + return "tie"; +} + +function dbMatchLabel(row) { + const status = row.policyengine_target_match || "unparsed"; + if (status === "matched") { + return row.policyengine_target_id ? `#${row.policyengine_target_id}` : "matched"; + } + if (status === "legacy_only") { + return "legacy only"; + } + if (status === "db_unavailable") { + return "db unavailable"; + } + return status.replaceAll("_", " "); +} + +function summarizeRows(rows) { + const nTargets = rows.length; + const fromWins = rows.filter((row) => row.winner === "from").length; + const toWins = rows.filter((row) => row.winner === "to").length; + const ties = nTargets - fromWins - toWins; + const fromLoss = mean(rows.map((row) => row.from_weighted_term)); + const toLoss = mean(rows.map((row) => row.to_weighted_term)); + return { + n_targets: nTargets, + from_wins: fromWins, + to_wins: toWins, + ties, + from_win_rate: nTargets ? fromWins / nTargets : null, + to_win_rate: nTargets ? toWins / nTargets : null, + from_loss: fromLoss, + to_loss: toLoss, + loss_delta: toLoss - fromLoss, + mean_weighted_term_delta: mean(rows.map((row) => row.weighted_term_delta)), + }; +} + +function mean(values) { + const numbers = values.map(Number).filter(Number.isFinite); + if (!numbers.length) { + return null; + } + return numbers.reduce((sum, value) => sum + value, 0) / numbers.length; +} + +function groupSummary(rows, field) { + const grouped = new Map(); + for (const row of rows) { + const key = row[field] || "other"; + if (!grouped.has(key)) { + grouped.set(key, []); + } + grouped.get(key).push(row); + } + return Array.from(grouped.entries()).map(([key, groupRows]) => ({ + [field]: key, + ...summarizeRows(groupRows), + })); +} + +function normalizePayload(payload) { + const rows = Array.isArray(payload.targets) ? payload.targets : []; + return { + ...payload, + summary: payload.summary || summarizeRows(rows), + family_summaries: Array.isArray(payload.family_summaries) + ? payload.family_summaries + : groupSummary(rows, "target_family"), + scope_summaries: Array.isArray(payload.scope_summaries) + ? payload.scope_summaries + : groupSummary(rows, "target_scope"), + top_improvements: Array.isArray(payload.top_improvements) + ? payload.top_improvements + : [...rows] + .sort((a, b) => Number(a.weighted_term_delta) - Number(b.weighted_term_delta)) + .slice(0, 25), + top_regressions: Array.isArray(payload.top_regressions) + ? payload.top_regressions + : [...rows] + .sort((a, b) => Number(b.weighted_term_delta) - Number(a.weighted_term_delta)) + .slice(0, 25), + }; +} + +function setData(payload, sourceLabel) { + state.data = normalizePayload(payload); + el.dashboard.hidden = false; + el.emptyState.hidden = true; + el.loadStatus.textContent = sourceLabel; + populateFilters(); + render(); +} + +function showEmpty(message) { + state.data = null; + el.dashboard.hidden = true; + el.emptyState.hidden = false; + el.loadStatus.textContent = message; +} + +async function loadDefault() { + try { + const response = await fetch(`${DEFAULT_DATA_URL}?v=${Date.now()}`, { + cache: "no-store", + }); + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + setData(await response.json(), "Default artifact loaded"); + } catch (_error) { + showEmpty("Default artifact unavailable"); + } +} + +function loadFile(file) { + const reader = new FileReader(); + reader.addEventListener("load", () => { + try { + setData(JSON.parse(String(reader.result)), file.name); + } catch (error) { + showEmpty(`Invalid JSON: ${error.message}`); + } + }); + reader.readAsText(file); +} + +function populateSelect(select, label, values) { + const current = select.value || "all"; + select.replaceChildren(); + const allOption = document.createElement("option"); + allOption.value = "all"; + allOption.textContent = label; + select.append(allOption); + for (const value of values) { + const option = document.createElement("option"); + option.value = value; + option.textContent = value; + select.append(option); + } + select.value = values.includes(current) ? current : "all"; +} + +function populateFilters() { + const rows = state.data.targets || []; + const families = [...new Set(rows.map((row) => row.target_family || "other"))].sort(); + const scopes = [...new Set(rows.map((row) => row.target_scope || "other"))].sort(); + const dbStatuses = [...new Set(rows.map((row) => row.policyengine_target_match || "unparsed"))].sort(); + populateSelect(el.familyFilter, "All families", families); + populateSelect(el.scopeFilter, "All scopes", scopes); + populateSelect(el.dbFilter, "All DB statuses", dbStatuses); + + const currentLabels = labels(); + el.winnerFilter.replaceChildren(); + for (const [value, label] of [ + ["all", "All winners"], + ["to", currentLabels.to], + ["from", currentLabels.from], + ["tie", "Ties"], + ]) { + const option = document.createElement("option"); + option.value = value; + option.textContent = label; + el.winnerFilter.append(option); + } +} + +function filteredRows() { + const query = state.search.trim().toLowerCase(); + const rows = state.data?.targets || []; + return rows + .filter((row) => { + if (state.family !== "all" && row.target_family !== state.family) { + return false; + } + if (state.scope !== "all" && row.target_scope !== state.scope) { + return false; + } + if (state.winner !== "all" && row.winner !== state.winner) { + return false; + } + if ( + state.dbMatch !== "all" && + (row.policyengine_target_match || "unparsed") !== state.dbMatch + ) { + return false; + } + if (!query) { + return true; + } + return [ + row.target_name, + row.target_family, + row.target_scope, + row.policyengine_target_match, + row.policyengine_target_id, + row.policyengine_target_source, + row.policyengine_target_domain_variable, + ] + .join(" ") + .toLowerCase() + .includes(query); + }) + .sort((a, b) => { + const [field, direction] = state.sort.split(":"); + const av = Number(a[field]); + const bv = Number(b[field]); + const result = Number.isFinite(av) && Number.isFinite(bv) + ? av - bv + : String(a[field] || "").localeCompare(String(b[field] || "")); + return direction === "desc" ? -result : result; + }); +} + +function render() { + if (!state.data) { + return; + } + renderKpis(); + renderSummaries(); + renderTargetList(el.topImprovements, state.data.top_improvements || [], true); + renderTargetList(el.topRegressions, state.data.top_regressions || [], false); + renderTable(filteredRows()); +} + +function renderKpis() { + const currentLabels = labels(); + const summary = state.data.summary || {}; + el.kpiTargets.textContent = formatNumber(summary.n_targets); + el.kpiToWinLabel.textContent = `${currentLabels.to} Wins`; + el.kpiWinRate.textContent = formatPercent(summary.to_win_rate); + el.kpiLossDelta.textContent = formatSigned(summary.loss_delta); + el.kpiLossDelta.className = classForDelta(summary.loss_delta); + el.kpiLossPair.textContent = `${formatNumber(summary.from_loss)} -> ${formatNumber(summary.to_loss)}`; + const dbSummary = state.data.target_db_summary || {}; + el.kpiDbMatch.textContent = dbSummary.match_rate === null || dbSummary.match_rate === undefined + ? formatNumber(dbSummary.matched) + : formatPercent(dbSummary.match_rate); + el.kpiDbDetail.textContent = `${formatNumber(dbSummary.matched)} matched / ${formatNumber(dbSummary.legacy_only)} legacy`; +} + +function renderSummaries() { + const familyRows = [...(state.data.family_summaries || [])].sort( + (a, b) => Number(a.loss_delta) - Number(b.loss_delta), + ); + const scopeRows = [...(state.data.scope_summaries || [])].sort( + (a, b) => String(a.target_scope).localeCompare(String(b.target_scope)), + ); + renderSummaryList(el.scopeSummary, scopeRows, "target_scope"); + renderSummaryList(el.familySummary, familyRows, "target_family"); +} + +function renderSummaryList(container, rows, field) { + container.replaceChildren(); + for (const row of rows) { + const wrapper = document.createElement("div"); + wrapper.className = "summary-row"; + + const left = document.createElement("div"); + const name = document.createElement("div"); + name.className = "summary-name"; + name.textContent = row[field] || "other"; + const meta = document.createElement("div"); + meta.className = "summary-meta"; + meta.textContent = `${formatNumber(row.n_targets)} targets - ${formatPercent(row.to_win_rate)} wins`; + left.append(name, meta); + + const value = document.createElement("div"); + value.className = `summary-value ${classForDelta(row.loss_delta)}`; + value.textContent = formatSigned(row.loss_delta); + wrapper.append(left, value); + container.append(wrapper); + } +} + +function renderTargetList(container, rows, improvementList) { + container.replaceChildren(); + const displayRows = rows.slice(0, 12); + for (const row of displayRows) { + const wrapper = document.createElement("div"); + wrapper.className = "target-row"; + wrapper.title = row.target_name || ""; + + const left = document.createElement("div"); + const name = document.createElement("div"); + name.className = "target-name"; + name.textContent = row.target_name || "-"; + const meta = document.createElement("div"); + meta.className = "target-meta"; + meta.textContent = `${row.target_family || "other"} - ${winnerLabel(row.winner)} - ${dbMatchLabel(row)}`; + left.append(name, meta); + + const delta = document.createElement("div"); + delta.className = `delta ${classForDelta(row.weighted_term_delta)}`; + delta.textContent = formatSigned(row.weighted_term_delta); + if (improvementList && Number(row.weighted_term_delta) > 0) { + delta.classList.add("bad"); + } + wrapper.append(left, delta); + container.append(wrapper); + } +} + +function renderTable(rows) { + el.targetTable.replaceChildren(); + const visibleRows = rows.slice(0, TABLE_LIMIT); + el.tableCount.textContent = rows.length > TABLE_LIMIT + ? `${formatNumber(TABLE_LIMIT)} of ${formatNumber(rows.length)} rows` + : `${formatNumber(rows.length)} rows`; + + const fragment = document.createDocumentFragment(); + for (const row of visibleRows) { + const tr = document.createElement("tr"); + tr.title = row.target_name || ""; + appendCell(tr, row.target_name || "-"); + appendCell(tr, row.target_family || "other"); + appendCell(tr, row.target_scope || "other"); + appendCell(tr, winnerLabel(row.winner), `winner ${row.winner || "tie"}`); + appendCell(tr, formatSigned(row.weighted_term_delta), `mono ${classForDelta(row.weighted_term_delta)}`); + appendCell(tr, formatError(row.from_abs_pct_error), "mono"); + appendCell(tr, formatError(row.to_abs_pct_error), "mono"); + appendCell(tr, formatCompact(row.target_value), "mono"); + appendCell( + tr, + dbMatchLabel(row), + `db-status ${row.policyengine_target_match || "unparsed"}`, + ); + fragment.append(tr); + } + el.targetTable.append(fragment); +} + +function appendCell(row, text, className = "") { + const cell = document.createElement("td"); + cell.textContent = text; + if (className) { + cell.className = className; + } + row.append(cell); +} + +el.fileInput.addEventListener("change", (event) => { + const [file] = event.target.files || []; + if (file) { + loadFile(file); + } +}); + +el.searchInput.addEventListener("input", (event) => { + state.search = event.target.value; + render(); +}); + +el.familyFilter.addEventListener("change", (event) => { + state.family = event.target.value; + render(); +}); + +el.scopeFilter.addEventListener("change", (event) => { + state.scope = event.target.value; + render(); +}); + +el.winnerFilter.addEventListener("change", (event) => { + state.winner = event.target.value; + render(); +}); + +el.dbFilter.addEventListener("change", (event) => { + state.dbMatch = event.target.value; + render(); +}); + +el.sortSelect.addEventListener("change", (event) => { + state.sort = event.target.value; + render(); +}); + +loadDefault(); diff --git a/dashboard/cosilico-theme.css b/dashboard/cosilico-theme.css new file mode 100644 index 0000000..5c501ce --- /dev/null +++ b/dashboard/cosilico-theme.css @@ -0,0 +1,28 @@ +/* Generated from the exported Cosilico design tokens. + Source: cosilico.ai/packages/config/theme.css + Re-run: python scripts/sync_cosilico_theme.py +*/ +:root { +--color-void: #06070a; + --color-bg: #090b10; + --color-elevated: rgba(18, 19, 26, 0.9); + --color-card: rgba(20, 21, 30, 0.78); + --color-surface: rgba(27, 29, 40, 0.96); + --color-border: rgba(244, 239, 230, 0.13); + --color-border-subtle: rgba(244, 239, 230, 0.08); + + --color-text: #f4efe6; + --color-text-secondary: #cbc3b8; + --color-text-muted: #928a7f; + + --color-cyan: #7ce2cf; + --color-cyan-bright: #b9fff0; + --color-cyan-dim: #4aa391; + --color-cyan-ghost: rgba(124, 226, 207, 0.08); + --color-amber: #d5a565; + --color-green: #b0ef9f; + --color-coral: #ff8f6b; + + --ease-out: cubic-bezier(0.16, 1, 0.3, 1); + --ease-spring: cubic-bezier(0.34, 1.56, 0.64, 1); +} diff --git a/dashboard/index.html b/dashboard/index.html new file mode 100644 index 0000000..1a0dd00 --- /dev/null +++ b/dashboard/index.html @@ -0,0 +1,156 @@ + + + + + + Microplex US Diagnostics + + + + + + + + +
+
+
+

Microplex US

+

Diagnostics

+
+
+ +

Loading default artifact

+
+
+ + + + +
+ + + + diff --git a/dashboard/styles.css b/dashboard/styles.css new file mode 100644 index 0000000..bdafc4f --- /dev/null +++ b/dashboard/styles.css @@ -0,0 +1,525 @@ +* { + box-sizing: border-box; +} + +:root { + --f-display: var(--font-display), Georgia, serif; + --f-body: var(--font-body), "Helvetica Neue", Arial, sans-serif; + --f-mono: var(--font-mono), "Fira Code", ui-monospace, SFMono-Regular, Menlo, + Consolas, monospace; +} + +html { + min-height: 100%; + background: var(--color-void); + color: var(--color-text); + -webkit-font-smoothing: antialiased; + text-rendering: optimizeLegibility; +} + +body { + min-height: 100vh; + margin: 0; + font-family: var(--f-body); + background: + radial-gradient(circle at top left, rgba(124, 226, 207, 0.1), transparent 28%), + radial-gradient(circle at 88% 8%, rgba(213, 165, 101, 0.1), transparent 24%), + linear-gradient(180deg, #07080d 0%, #090b10 48%, #06070a 100%); +} + +button, +input, +select { + font: inherit; +} + +code { + display: block; + width: 100%; + overflow-x: auto; + padding: 14px 16px; + border: 1px solid var(--color-border-subtle); + border-radius: 8px; + color: var(--color-cyan-bright); + background: rgba(6, 7, 10, 0.72); + font-family: var(--f-mono); + font-size: 12px; +} + +.grid-bg, +.noise { + position: fixed; + inset: 0; + pointer-events: none; +} + +.grid-bg { + z-index: 0; + background-image: + linear-gradient(rgba(124, 226, 207, 0.03) 1px, transparent 1px), + linear-gradient(90deg, rgba(124, 226, 207, 0.03) 1px, transparent 1px); + background-size: 78px 78px; + mask-image: radial-gradient( + ellipse 74% 62% at 50% 0%, + black 0%, + transparent 100% + ); +} + +.noise { + z-index: 1; + opacity: 0.018; + background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E"); +} + +.shell { + position: relative; + z-index: 2; + width: min(1480px, calc(100% - 32px)); + margin: 0 auto; + padding: 32px 0 48px; +} + +.topbar { + display: flex; + align-items: end; + justify-content: space-between; + gap: 24px; + padding: 0 0 24px; + border-bottom: 1px solid var(--color-border-subtle); +} + +.eyebrow { + margin: 0 0 8px; + color: var(--color-cyan); + font-family: var(--f-mono); + font-size: 11px; + line-height: 1; + text-transform: uppercase; +} + +h1, +h2 { + margin: 0; + font-family: var(--f-display); + font-weight: 400; + letter-spacing: 0; +} + +h1 { + font-size: clamp(38px, 6vw, 82px); + line-height: 0.9; +} + +h2 { + color: var(--color-text); + font-size: 18px; + line-height: 1.2; +} + +.load-control { + display: flex; + align-items: center; + gap: 14px; + min-width: min(100%, 420px); + justify-content: flex-end; +} + +.file-button { + position: relative; + display: inline-flex; + min-height: 40px; + align-items: center; + border: 1px solid color-mix(in srgb, var(--color-cyan) 42%, transparent); + border-radius: 8px; + padding: 0 16px; + color: var(--color-cyan-bright); + background: var(--color-cyan-ghost); + cursor: pointer; + transition: + border-color 160ms var(--ease-out), + background 160ms var(--ease-out); +} + +.file-button:hover { + border-color: var(--color-cyan); + background: rgba(124, 226, 207, 0.13); +} + +.file-button input { + position: absolute; + inset: 0; + opacity: 0; + cursor: pointer; +} + +.status-text { + margin: 0; + color: var(--color-text-muted); + font-family: var(--f-mono); + font-size: 11px; + line-height: 1.4; +} + +.empty-state { + margin: 64px 0 0; + padding: 32px; + border: 1px solid var(--color-border); + border-radius: 8px; + background: rgba(18, 19, 26, 0.58); +} + +.empty-state h2 { + max-width: 760px; + margin-bottom: 24px; + font-size: clamp(24px, 4vw, 44px); +} + +.kpi-strip { + display: grid; + grid-template-columns: repeat(5, minmax(0, 1fr)); + margin: 24px 0; + border-block: 1px solid var(--color-border-subtle); +} + +.kpi-strip div { + min-width: 0; + padding: 18px 22px; + border-right: 1px solid var(--color-border-subtle); +} + +.kpi-strip div:last-child { + border-right: 0; +} + +.kpi-strip span { + display: block; + margin-bottom: 8px; + color: var(--color-text-muted); + font-family: var(--f-mono); + font-size: 11px; + text-transform: uppercase; +} + +.kpi-strip strong { + display: block; + overflow-wrap: anywhere; + color: var(--color-text); + font-family: var(--f-mono); + font-size: clamp(20px, 2.7vw, 34px); + font-weight: 500; + line-height: 1.05; +} + +.kpi-strip small { + display: block; + margin-top: 7px; + overflow-wrap: anywhere; + color: var(--color-text-muted); + font-family: var(--f-mono); + font-size: 10px; + line-height: 1.3; +} + +.workspace { + display: grid; + grid-template-columns: minmax(280px, 330px) minmax(0, 1fr); + gap: 24px; +} + +.rail, +.main-pane { + display: flex; + min-width: 0; + flex-direction: column; + gap: 24px; +} + +.panel { + min-width: 0; + border: 1px solid var(--color-border-subtle); + border-radius: 8px; + background: rgba(18, 19, 26, 0.58); + backdrop-filter: blur(14px); +} + +.section-head { + padding: 18px 20px 16px; + border-bottom: 1px solid var(--color-border-subtle); +} + +.section-head.row { + display: flex; + align-items: end; + justify-content: space-between; + gap: 16px; +} + +.summary-list { + display: flex; + flex-direction: column; +} + +.summary-row { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 14px; + padding: 14px 18px; + border-bottom: 1px solid var(--color-border-subtle); +} + +.summary-row:last-child { + border-bottom: 0; +} + +.summary-row:hover, +tbody tr:hover { + background: rgba(124, 226, 207, 0.055); +} + +.summary-name, +.target-name { + min-width: 0; + overflow: hidden; + color: var(--color-text); + text-overflow: ellipsis; + white-space: nowrap; +} + +.summary-meta, +.target-meta { + color: var(--color-text-muted); + font-family: var(--f-mono); + font-size: 11px; +} + +.summary-value { + color: var(--color-cyan-bright); + font-family: var(--f-mono); + font-size: 13px; + text-align: right; + white-space: nowrap; +} + +.dense .summary-row { + padding-block: 11px; +} + +.split { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 24px; +} + +.target-list { + display: flex; + max-height: 484px; + overflow: auto; + flex-direction: column; +} + +.target-row { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 16px; + padding: 14px 18px; + border-bottom: 1px solid var(--color-border-subtle); +} + +.target-row:last-child { + border-bottom: 0; +} + +.target-row:hover { + background: rgba(124, 226, 207, 0.055); +} + +.delta { + font-family: var(--f-mono); + font-size: 13px; + text-align: right; + white-space: nowrap; +} + +.delta.good, +.winner.to { + color: var(--color-green); +} + +.delta.bad, +.winner.from { + color: var(--color-coral); +} + +.winner.tie { + color: var(--color-amber); +} + +.table-panel { + overflow: hidden; +} + +.filters { + display: grid; + grid-template-columns: minmax(180px, 1.4fr) repeat(5, minmax(120px, 1fr)); + gap: 10px; + padding: 16px 20px; + border-bottom: 1px solid var(--color-border-subtle); +} + +input, +select { + min-width: 0; + min-height: 38px; + border: 1px solid var(--color-border); + border-radius: 6px; + padding: 0 11px; + color: var(--color-text); + background: rgba(6, 7, 10, 0.58); + outline: none; +} + +input:focus, +select:focus { + border-color: color-mix(in srgb, var(--color-cyan) 58%, var(--color-border)); + box-shadow: 0 0 0 3px rgba(124, 226, 207, 0.08); +} + +.table-wrap { + max-height: 620px; + overflow: auto; +} + +table { + width: 100%; + border-collapse: collapse; + table-layout: fixed; +} + +th, +td { + overflow: hidden; + border-bottom: 1px solid var(--color-border-subtle); + padding: 11px 12px; + text-align: left; + text-overflow: ellipsis; + white-space: nowrap; +} + +th { + position: sticky; + top: 0; + z-index: 1; + color: var(--color-text-muted); + background: rgba(9, 11, 16, 0.98); + font-family: var(--f-mono); + font-size: 10px; + font-weight: 500; + text-transform: uppercase; +} + +td { + color: var(--color-text-secondary); + font-size: 13px; +} + +td.mono { + font-family: var(--f-mono); + font-size: 12px; +} + +td:first-child, +th:first-child { + width: 30%; +} + +td:nth-child(2), +th:nth-child(2) { + width: 14%; +} + +td:nth-child(3), +th:nth-child(3), +td:nth-child(4), +th:nth-child(4) { + width: 8%; +} + +td:nth-child(5), +th:nth-child(5), +td:nth-child(6), +th:nth-child(6), +td:nth-child(7), +th:nth-child(7), +td:nth-child(8), +th:nth-child(8), +td:nth-child(9), +th:nth-child(9) { + width: 8%; +} + +.db-status.matched { + color: var(--color-cyan-bright); +} + +.db-status.legacy_only, +.db-status.ambiguous { + color: var(--color-amber); +} + +.db-status.db_unavailable, +.db-status.unparsed { + color: var(--color-text-muted); +} + +@media (max-width: 1080px) { + .workspace, + .split, + .kpi-strip { + grid-template-columns: 1fr; + } + + .kpi-strip div { + border-right: 0; + border-bottom: 1px solid var(--color-border-subtle); + } + + .kpi-strip div:last-child { + border-bottom: 0; + } + + .filters { + grid-template-columns: 1fr 1fr; + } +} + +@media (max-width: 680px) { + .shell { + width: min(100% - 20px, 1480px); + padding-top: 20px; + } + + .topbar, + .load-control { + align-items: stretch; + flex-direction: column; + } + + .load-control { + justify-content: flex-start; + } + + .filters { + grid-template-columns: 1fr; + } + + .section-head.row { + align-items: start; + flex-direction: column; + } + + th, + td { + padding-inline: 10px; + } +} diff --git a/docs/benchmarking.md b/docs/benchmarking.md index 3f2f68a..52da096 100644 --- a/docs/benchmarking.md +++ b/docs/benchmarking.md @@ -68,9 +68,29 @@ Every serious saved run can write: - `policyengine_harness.json` - `run_registry.jsonl` - `run_index.duckdb` +- `pe_native_target_diagnostics_current.json` These live under the selected artifact root. +## Diagnostics dashboard + +The repo includes a static dashboard at `dashboard/` for inspecting the full +PE-native target diagnostic dataset. It expects the JSON payload written by: + +```bash +microplex-us-pe-native-target-diagnostics \ + --from-dataset /path/to/enhanced_cps_2024.h5 \ + --to-dataset /path/to/policyengine_us.h5 \ + --policyengine-targets-db /path/to/policy_data.db \ + --output-path artifacts/pe_native_target_diagnostics_current.json +``` + +The JSON includes full per-target rows, family summaries, scope summaries, top +improvements, top regressions, and target DB match metadata when a structured +PolicyEngine target DB is available. The dashboard loads that default artifact +when served from the repo root, and can also load an arbitrary diagnostic JSON +from disk. + ## Inspecting runs Useful Python APIs: diff --git a/pyproject.toml b/pyproject.toml index 99ea739..542981a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ Repository = "https://github.com/CosilicoAI/microplex-us" microplex-us-backfill-pe-native-audit = "microplex_us.pipelines.backfill_pe_native_audit:main" microplex-us-backfill-pe-native-scores = "microplex_us.pipelines.backfill_pe_native_scores:main" microplex-us-check-site-snapshot = "microplex_us.pipelines.check_site_snapshot:main" +microplex-us-pe-native-target-diagnostics = "microplex_us.pipelines.pe_native_scores:main_target_diagnostics" microplex-us-score-pe-native-loss = "microplex_us.pipelines.pe_native_scores:main" microplex-us-version-bump-benchmark = "microplex_us.pipelines.version_benchmark:main" diff --git a/scripts/sync_cosilico_theme.py b/scripts/sync_cosilico_theme.py new file mode 100644 index 0000000..be97f50 --- /dev/null +++ b/scripts/sync_cosilico_theme.py @@ -0,0 +1,88 @@ +"""Sync exported Cosilico design tokens into browser-readable CSS variables.""" + +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path + + +def default_source(repo_root: Path) -> Path: + """Return the first local Cosilico token export next to this repo.""" + + candidates = ( + repo_root.parent / "cosilico.ai" / "packages" / "config" / "theme.css", + repo_root.parent / "cosilico" / "packages" / "config" / "theme.css", + repo_root.parent / "cosilico" / "apps" / "web" / "src" / "app" / "globals.css", + ) + for candidate in candidates: + if candidate.exists(): + return candidate + searched = ", ".join(str(path) for path in candidates) + raise FileNotFoundError(f"Could not find exported Cosilico theme. Searched: {searched}") + + +def render_browser_tokens( + source_text: str, + *, + source_path: Path, + repo_root: Path, +) -> str: + """Convert a Tailwind v4 @theme block into CSS custom properties.""" + + match = re.search(r"@theme\s*\{(?P.*?)\}", source_text, flags=re.DOTALL) + if not match: + raise ValueError(f"No @theme block found in {source_path}") + body = match.group("body").strip() + try: + display_source = source_path.relative_to(repo_root.parent) + except ValueError: + display_source = source_path + return ( + "/* Generated from the exported Cosilico design tokens.\n" + f" Source: {display_source}\n" + " Re-run: python scripts/sync_cosilico_theme.py\n" + "*/\n" + ":root {\n" + f"{body}\n" + "}\n" + ) + + +def main(argv: list[str] | None = None) -> int: + repo_root = Path(__file__).resolve().parents[1] + parser = argparse.ArgumentParser( + description="Sync @cosilico/config theme tokens into dashboard CSS." + ) + parser.add_argument("--source", type=Path) + parser.add_argument( + "--output", + type=Path, + default=repo_root / "dashboard" / "cosilico-theme.css", + ) + parser.add_argument("--check", action="store_true") + args = parser.parse_args(argv) + + source = (args.source or default_source(repo_root)).expanduser().resolve() + rendered = render_browser_tokens( + source.read_text(), + source_path=source, + repo_root=repo_root, + ) + + if args.check: + current = args.output.read_text() if args.output.exists() else "" + if current != rendered: + print(f"{args.output} is not synced with {source}", file=sys.stderr) + return 1 + return 0 + + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(rendered) + print(args.output) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/microplex_us/pipelines/pe_native_scores.py b/src/microplex_us/pipelines/pe_native_scores.py index c0b9924..416ea29 100644 --- a/src/microplex_us/pipelines/pe_native_scores.py +++ b/src/microplex_us/pipelines/pe_native_scores.py @@ -5,6 +5,7 @@ import argparse import json import os +import re import subprocess import sys from dataclasses import dataclass, field @@ -23,6 +24,11 @@ "LC_ALL", "TZ", ) +_EITC_AGI_CHILD_DOMAIN_VARIABLE = "adjusted_gross_income,eitc,eitc_child_count" +_EITC_AGI_CHILD_LABEL = re.compile( + r"^nation/irs/eitc/(?Preturns|amount)/" + r"c(?P\d+)_(?P[^_]+)_(?P[^/]+)$" +) _ENHANCED_CPS_BAD_TARGETS: tuple[str, ...] = ( "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Head of Household", @@ -536,36 +542,150 @@ def compute(dataset_path: str): } +def classify_target_family(target_name: str) -> str: + parts = target_name.split("/") + if target_name.startswith("state/census/age/"): + return "state_age_distribution" + if target_name.startswith("state/census/population_by_state/"): + return "state_population" + if target_name.startswith("state/census/population_under_5_by_state/"): + return "state_population_under_5" + if target_name.startswith("nation/irs/aca_spending/"): + return "state_aca_spending" + if target_name.startswith("state/irs/aca_enrollment/"): + return "state_aca_enrollment" + if target_name.startswith("irs/medicaid_enrollment/"): + return "state_medicaid_enrollment" + if target_name.endswith("/snap-cost"): + return "state_snap_cost" + if target_name.endswith("/snap-hhs"): + return "state_snap_households" + if target_name.startswith("state/real_estate_taxes/"): + return "state_real_estate_taxes" + if len(parts) >= 3 and parts[0] == "state" and parts[2] == "adjusted_gross_income": + return "state_agi_distribution" + if target_name.startswith("nation/jct/"): + return "national_tax_expenditures" + if target_name.startswith("nation/net_worth/"): + return "national_net_worth" + if target_name.startswith("nation/ssa/"): + return "national_ssa" + if target_name.startswith("nation/census/population_by_age/"): + return "national_population_by_age" + if target_name == "nation/census/infants": + return "national_infants" + if target_name.startswith("nation/census/agi_in_spm_threshold_decile_"): + return "national_spm_threshold_agi" + if target_name.startswith("nation/census/count_in_spm_threshold_decile_"): + return "national_spm_threshold_count" + if target_name.startswith("nation/census/"): + return "national_census_other" + if target_name.startswith("nation/irs/"): + return "national_irs_other" + return "other" + + +def target_scope(target_name: str) -> str: + if target_name.startswith("nation/"): + return "national" + if target_name.startswith("state/") or target_name.endswith("/snap-cost") or target_name.endswith("/snap-hhs"): + return "state" + return "other" + + +def abs_pct_error(estimate: float, target: float) -> float: + return abs(estimate - target) / max(abs(target), 1.0) * 100.0 + + +def build_target_rows(from_payload, to_payload): + rows = [] + for idx, name in enumerate(from_payload["target_names"]): + from_term = float(from_payload["weighted_terms"][idx]) + to_term = float(to_payload["weighted_terms"][idx]) + from_error = float(from_payload["rel_error"][idx]) + to_error = float(to_payload["rel_error"][idx]) + target_value = float(from_payload["targets"][idx]) + from_estimate = float(from_payload["estimate"][idx]) + to_estimate = float(to_payload["estimate"][idx]) + if to_error < from_error: + winner = "to" + elif from_error < to_error: + winner = "from" + else: + winner = "tie" + rows.append( + { + "target_name": name, + "target_family": classify_target_family(name), + "target_scope": target_scope(name), + "winner": winner, + "weighted_term_delta": to_term - from_term, + "from_weighted_term": from_term, + "to_weighted_term": to_term, + "target_value": target_value, + "from_estimate": from_estimate, + "to_estimate": to_estimate, + "from_rel_error": from_error, + "to_rel_error": to_error, + "from_abs_pct_error": abs_pct_error(from_estimate, target_value), + "to_abs_pct_error": abs_pct_error(to_estimate, target_value), + } + ) + return rows + + +def summarize_target_rows(rows, *, group_field=None): + if group_field is None: + grouped = [("all", rows)] + else: + values = sorted({row[group_field] for row in rows}) + grouped = [(value, [row for row in rows if row[group_field] == value]) for value in values] + + summaries = [] + for value, group_rows in grouped: + n_targets = len(group_rows) + from_wins = sum(1 for row in group_rows if row["winner"] == "from") + to_wins = sum(1 for row in group_rows if row["winner"] == "to") + ties = n_targets - from_wins - to_wins + from_loss = float(np.mean([row["from_weighted_term"] for row in group_rows])) + to_loss = float(np.mean([row["to_weighted_term"] for row in group_rows])) + summary = { + "n_targets": n_targets, + "from_wins": from_wins, + "to_wins": to_wins, + "ties": ties, + "from_win_rate": from_wins / n_targets if n_targets else None, + "to_win_rate": to_wins / n_targets if n_targets else None, + "from_loss": from_loss, + "to_loss": to_loss, + "loss_delta": to_loss - from_loss, + "mean_weighted_term_delta": float( + np.mean([row["weighted_term_delta"] for row in group_rows]) + ), + } + if group_field is not None: + summary[group_field] = value + summaries.append(summary) + return summaries[0] if group_field is None else summaries + + from_payload = compute(FROM_DATASET) to_payload = compute(TO_DATASET) if from_payload["target_names"] != to_payload["target_names"]: raise ValueError("Datasets produced different target names after filtering") -rows = [] -for idx, name in enumerate(from_payload["target_names"]): - from_term = float(from_payload["weighted_terms"][idx]) - to_term = float(to_payload["weighted_terms"][idx]) - rows.append( - { - "target_name": name, - "weighted_term_delta": to_term - from_term, - "from_weighted_term": from_term, - "to_weighted_term": to_term, - "target_value": float(from_payload["targets"][idx]), - "from_estimate": float(from_payload["estimate"][idx]), - "to_estimate": float(to_payload["estimate"][idx]), - "from_rel_error": float(from_payload["rel_error"][idx]), - "to_rel_error": float(to_payload["rel_error"][idx]), - } - ) - +rows = build_target_rows(from_payload, to_payload) rows.sort(key=lambda row: row["weighted_term_delta"], reverse=True) payload = { "metric": "enhanced_cps_native_loss_target_delta", "period": PERIOD, "from_dataset": FROM_DATASET, "to_dataset": TO_DATASET, + "summary": summarize_target_rows(rows), + "family_summaries": summarize_target_rows(rows, group_field="target_family"), + "scope_summaries": summarize_target_rows(rows, group_field="target_scope"), + "targets": rows, "top_regressions": rows[:TOP_K], "top_improvements": list(reversed(rows[-TOP_K:])), } @@ -653,31 +773,141 @@ def compute(dataset_path: str): } -baseline_payload = compute(BASELINE_DATASET) -results = [] -for candidate_dataset in CANDIDATE_DATASETS: - candidate_payload = compute(candidate_dataset) - if baseline_payload["target_names"] != candidate_payload["target_names"]: - raise ValueError("Datasets produced different target names after filtering") +def classify_target_family(target_name: str) -> str: + parts = target_name.split("/") + if target_name.startswith("state/census/age/"): + return "state_age_distribution" + if target_name.startswith("state/census/population_by_state/"): + return "state_population" + if target_name.startswith("state/census/population_under_5_by_state/"): + return "state_population_under_5" + if target_name.startswith("nation/irs/aca_spending/"): + return "state_aca_spending" + if target_name.startswith("state/irs/aca_enrollment/"): + return "state_aca_enrollment" + if target_name.startswith("irs/medicaid_enrollment/"): + return "state_medicaid_enrollment" + if target_name.endswith("/snap-cost"): + return "state_snap_cost" + if target_name.endswith("/snap-hhs"): + return "state_snap_households" + if target_name.startswith("state/real_estate_taxes/"): + return "state_real_estate_taxes" + if len(parts) >= 3 and parts[0] == "state" and parts[2] == "adjusted_gross_income": + return "state_agi_distribution" + if target_name.startswith("nation/jct/"): + return "national_tax_expenditures" + if target_name.startswith("nation/net_worth/"): + return "national_net_worth" + if target_name.startswith("nation/ssa/"): + return "national_ssa" + if target_name.startswith("nation/census/population_by_age/"): + return "national_population_by_age" + if target_name == "nation/census/infants": + return "national_infants" + if target_name.startswith("nation/census/agi_in_spm_threshold_decile_"): + return "national_spm_threshold_agi" + if target_name.startswith("nation/census/count_in_spm_threshold_decile_"): + return "national_spm_threshold_count" + if target_name.startswith("nation/census/"): + return "national_census_other" + if target_name.startswith("nation/irs/"): + return "national_irs_other" + return "other" + + +def target_scope(target_name: str) -> str: + if target_name.startswith("nation/"): + return "national" + if target_name.startswith("state/") or target_name.endswith("/snap-cost") or target_name.endswith("/snap-hhs"): + return "state" + return "other" + + +def abs_pct_error(estimate: float, target: float) -> float: + return abs(estimate - target) / max(abs(target), 1.0) * 100.0 + +def build_target_rows(from_payload, to_payload): rows = [] - for idx, name in enumerate(baseline_payload["target_names"]): - from_term = float(baseline_payload["weighted_terms"][idx]) - to_term = float(candidate_payload["weighted_terms"][idx]) + for idx, name in enumerate(from_payload["target_names"]): + from_term = float(from_payload["weighted_terms"][idx]) + to_term = float(to_payload["weighted_terms"][idx]) + from_error = float(from_payload["rel_error"][idx]) + to_error = float(to_payload["rel_error"][idx]) + target_value = float(from_payload["targets"][idx]) + from_estimate = float(from_payload["estimate"][idx]) + to_estimate = float(to_payload["estimate"][idx]) + if to_error < from_error: + winner = "to" + elif from_error < to_error: + winner = "from" + else: + winner = "tie" rows.append( { "target_name": name, + "target_family": classify_target_family(name), + "target_scope": target_scope(name), + "winner": winner, "weighted_term_delta": to_term - from_term, "from_weighted_term": from_term, "to_weighted_term": to_term, - "target_value": float(baseline_payload["targets"][idx]), - "from_estimate": float(baseline_payload["estimate"][idx]), - "to_estimate": float(candidate_payload["estimate"][idx]), - "from_rel_error": float(baseline_payload["rel_error"][idx]), - "to_rel_error": float(candidate_payload["rel_error"][idx]), + "target_value": target_value, + "from_estimate": from_estimate, + "to_estimate": to_estimate, + "from_rel_error": from_error, + "to_rel_error": to_error, + "from_abs_pct_error": abs_pct_error(from_estimate, target_value), + "to_abs_pct_error": abs_pct_error(to_estimate, target_value), } ) + return rows + + +def summarize_target_rows(rows, *, group_field=None): + if group_field is None: + grouped = [("all", rows)] + else: + values = sorted({row[group_field] for row in rows}) + grouped = [(value, [row for row in rows if row[group_field] == value]) for value in values] + + summaries = [] + for value, group_rows in grouped: + n_targets = len(group_rows) + from_wins = sum(1 for row in group_rows if row["winner"] == "from") + to_wins = sum(1 for row in group_rows if row["winner"] == "to") + ties = n_targets - from_wins - to_wins + from_loss = float(np.mean([row["from_weighted_term"] for row in group_rows])) + to_loss = float(np.mean([row["to_weighted_term"] for row in group_rows])) + summary = { + "n_targets": n_targets, + "from_wins": from_wins, + "to_wins": to_wins, + "ties": ties, + "from_win_rate": from_wins / n_targets if n_targets else None, + "to_win_rate": to_wins / n_targets if n_targets else None, + "from_loss": from_loss, + "to_loss": to_loss, + "loss_delta": to_loss - from_loss, + "mean_weighted_term_delta": float( + np.mean([row["weighted_term_delta"] for row in group_rows]) + ), + } + if group_field is not None: + summary[group_field] = value + summaries.append(summary) + return summaries[0] if group_field is None else summaries + + +baseline_payload = compute(BASELINE_DATASET) +results = [] +for candidate_dataset in CANDIDATE_DATASETS: + candidate_payload = compute(candidate_dataset) + if baseline_payload["target_names"] != candidate_payload["target_names"]: + raise ValueError("Datasets produced different target names after filtering") + rows = build_target_rows(baseline_payload, candidate_payload) rows.sort(key=lambda row: row["weighted_term_delta"], reverse=True) results.append( { @@ -685,6 +915,10 @@ def compute(dataset_path: str): "period": PERIOD, "from_dataset": BASELINE_DATASET, "to_dataset": candidate_dataset, + "summary": summarize_target_rows(rows), + "family_summaries": summarize_target_rows(rows, group_field="target_family"), + "scope_summaries": summarize_target_rows(rows, group_field="target_scope"), + "targets": rows, "top_regressions": rows[:TOP_K], "top_improvements": list(reversed(rows[-TOP_K:])), } @@ -1796,6 +2030,298 @@ def compute_batch_us_pe_native_scores( return results +@dataclass(frozen=True) +class PENativeTargetLookupKey: + """Structured lookup key for a legacy PE-native target label.""" + + variable: str + count_children: int + agi_lower: float + agi_upper: float + + def as_tuple(self) -> tuple[str, int, float, float]: + return (self.variable, self.count_children, self.agi_lower, self.agi_upper) + + @staticmethod + def _json_safe_bound(value: float) -> float | str: + if value == float("inf"): + return "inf" + if value == float("-inf"): + return "-inf" + return value + + def expected_constraints(self) -> list[dict[str, str | float | int]]: + if self.count_children < 3: + child_constraint: dict[str, str | float | int] = { + "variable": "eitc_child_count", + "operation": "==", + "value": self.count_children, + } + else: + child_constraint = { + "variable": "eitc_child_count", + "operation": ">", + "value": 2, + } + return [ + {"variable": "tax_unit_is_filer", "operation": "==", "value": 1}, + {"variable": "eitc", "operation": ">", "value": 0}, + child_constraint, + { + "variable": "adjusted_gross_income", + "operation": ">=", + "value": self._json_safe_bound(self.agi_lower), + }, + { + "variable": "adjusted_gross_income", + "operation": "<", + "value": self._json_safe_bound(self.agi_upper), + }, + ] + + def expected_target(self) -> dict[str, Any]: + return { + "variable": self.variable, + "geo_level": "national", + "geographic_id": "US", + "domain_variable": _EITC_AGI_CHILD_DOMAIN_VARIABLE, + "constraints": self.expected_constraints(), + } + + +def _parse_pe_native_numeric_token(token: str) -> float: + if token == "-inf": + return float("-inf") + if token == "inf": + return float("inf") + multipliers = { + "bn": 1_000_000_000.0, + "m": 1_000_000.0, + "k": 1_000.0, + } + for suffix, multiplier in multipliers.items(): + if token.endswith(suffix): + return float(token[: -len(suffix)]) * multiplier + return float(token) + + +def parse_pe_native_target_lookup_key( + target_name: str, +) -> PENativeTargetLookupKey | None: + """Parse PE-native labels that now have structured DB equivalents.""" + + match = _EITC_AGI_CHILD_LABEL.match(target_name) + if match is None: + return None + metric = match.group("metric") + variable = "tax_unit_count" if metric == "returns" else "eitc" + return PENativeTargetLookupKey( + variable=variable, + count_children=int(match.group("count_children")), + agi_lower=_parse_pe_native_numeric_token(match.group("agi_lower")), + agi_upper=_parse_pe_native_numeric_token(match.group("agi_upper")), + ) + + +def _constraint_value_as_float(value: str) -> float | None: + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _target_lookup_key_from_policyengine_target( + target: Any, +) -> tuple[str, int, float, float] | None: + if target.geo_level != "national": + return None + if target.variable not in {"eitc", "tax_unit_count"}: + return None + if target.domain_variable != _EITC_AGI_CHILD_DOMAIN_VARIABLE: + return None + + agi_lower: float | None = None + agi_upper: float | None = None + count_children: int | None = None + has_eitc_positive_constraint = False + + for constraint in target.constraints: + value = str(constraint.value) + numeric_value = _constraint_value_as_float(value) + if ( + constraint.variable == "adjusted_gross_income" + and constraint.operation == ">=" + and numeric_value is not None + ): + agi_lower = numeric_value + elif ( + constraint.variable == "adjusted_gross_income" + and constraint.operation == "<" + and numeric_value is not None + ): + agi_upper = numeric_value + elif constraint.variable == "eitc" and constraint.operation == ">": + has_eitc_positive_constraint = numeric_value == 0 + elif constraint.variable == "eitc_child_count" and numeric_value is not None: + if constraint.operation == "==": + count_children = int(numeric_value) + elif constraint.operation == ">" and numeric_value == 2: + count_children = 3 + elif constraint.operation == ">=" and numeric_value == 3: + count_children = 3 + + if ( + agi_lower is None + or agi_upper is None + or count_children is None + or not has_eitc_positive_constraint + ): + return None + return (target.variable, count_children, agi_lower, agi_upper) + + +def _policyengine_target_payload(target: Any) -> dict[str, Any]: + return { + "target_id": target.target_id, + "variable": target.variable, + "period": target.period, + "value": target.value, + "source": target.source, + "notes": target.notes, + "geo_level": target.geo_level, + "geographic_id": target.geographic_id, + "domain_variable": target.domain_variable, + "constraints": [ + { + "variable": constraint.variable, + "operation": constraint.operation, + "value": constraint.value, + } + for constraint in target.constraints + ], + } + + +def _load_policyengine_target_match_index( + target_db_path: str | Path, + *, + period: int, +) -> dict[tuple[str, int, float, float], list[dict[str, Any]]]: + from microplex_us.policyengine.us import PolicyEngineUSDBTargetProvider + + provider = PolicyEngineUSDBTargetProvider(target_db_path, validate=False) + targets = provider.load_targets( + period=period, + variables=["eitc", "tax_unit_count"], + domain_variable_values=[_EITC_AGI_CHILD_DOMAIN_VARIABLE], + geo_levels=["national"], + ) + matches: dict[tuple[str, int, float, float], list[dict[str, Any]]] = {} + for target in targets: + key = _target_lookup_key_from_policyengine_target(target) + if key is None: + continue + matches.setdefault(key, []).append(_policyengine_target_payload(target)) + return matches + + +def _default_policyengine_targets_db_path( + policyengine_us_data_repo: str | Path | None, +) -> Path | None: + try: + repo = resolve_policyengine_us_data_repo_root(policyengine_us_data_repo) + except FileNotFoundError: + return None + path = repo / "policyengine_us_data" / "storage" / "calibration" / "policy_data.db" + return path if path.exists() else None + + +def annotate_pe_native_target_db_matches( + payload: dict[str, Any], + *, + target_db_path: str | Path | None, + period: int, +) -> dict[str, Any]: + """Attach structured PolicyEngine target DB matches to diagnostic rows.""" + + rows = list(payload.get("targets") or []) + resolved_db_path = Path(target_db_path).expanduser() if target_db_path else None + match_index: dict[tuple[str, int, float, float], list[dict[str, Any]]] = {} + target_db_error = None + if resolved_db_path is not None and resolved_db_path.exists(): + try: + match_index = _load_policyengine_target_match_index( + resolved_db_path, + period=period, + ) + except Exception as exc: # pragma: no cover - defensive diagnostic path + target_db_error = str(exc) + + counts = { + "matched": 0, + "legacy_only": 0, + "unparsed": 0, + "ambiguous": 0, + "db_unavailable": 0, + } + annotations_by_name: dict[str, dict[str, Any]] = {} + for row in rows: + target_name = str(row.get("target_name", "")) + key = parse_pe_native_target_lookup_key(target_name) + if key is None: + annotation: dict[str, Any] = {"policyengine_target_match": "unparsed"} + elif resolved_db_path is None or not resolved_db_path.exists() or target_db_error: + annotation = { + "policyengine_target_match": "db_unavailable", + "policyengine_target_expected": key.expected_target(), + } + else: + matches = match_index.get(key.as_tuple(), []) + if len(matches) == 1: + match = matches[0] + annotation = { + "policyengine_target_match": "matched", + "policyengine_target_id": match["target_id"], + "policyengine_target_variable": match["variable"], + "policyengine_target_period": match["period"], + "policyengine_target_value": match["value"], + "policyengine_target_source": match["source"], + "policyengine_target_domain_variable": match["domain_variable"], + "policyengine_target_constraints": match["constraints"], + } + elif len(matches) > 1: + annotation = { + "policyengine_target_match": "ambiguous", + "policyengine_target_match_count": len(matches), + "policyengine_target_matches": matches, + "policyengine_target_expected": key.expected_target(), + } + else: + annotation = { + "policyengine_target_match": "legacy_only", + "policyengine_target_expected": key.expected_target(), + } + counts[annotation["policyengine_target_match"]] += 1 + row.update(annotation) + annotations_by_name[target_name] = annotation + + for list_name in ("top_improvements", "top_regressions"): + for row in payload.get(list_name) or []: + annotation = annotations_by_name.get(str(row.get("target_name", ""))) + if annotation: + row.update(annotation) + + parsed_total = counts["matched"] + counts["legacy_only"] + counts["ambiguous"] + payload["target_db_summary"] = { + "target_db_path": str(resolved_db_path) if resolved_db_path else None, + "target_db_error": target_db_error, + **counts, + "parsed_targets": parsed_total, + "match_rate": counts["matched"] / parsed_total if parsed_total else None, + } + return payload + + def compare_us_pe_native_target_deltas( *, from_dataset_path: str | Path, @@ -2012,6 +2538,52 @@ def write_us_pe_native_scores( ) destination = Path(output_path) destination.parent.mkdir(parents=True, exist_ok=True) + destination.write_text( + json.dumps(payload, indent=2, sort_keys=True, allow_nan=False) + ) + return destination + + +def write_us_pe_native_target_diagnostics( + output_path: str | Path, + *, + from_dataset_path: str | Path, + to_dataset_path: str | Path, + period: int = 2024, + top_k: int = 50, + from_label: str = "policyengine-us-data", + to_label: str = "microplex-us", + policyengine_us_data_repo: str | Path | None = None, + policyengine_us_data_python: str | Path | None = None, + policyengine_targets_db_path: str | Path | None = None, +) -> Path: + """Write the full PE-native per-target diagnostic dataset to disk.""" + + payload = compare_us_pe_native_target_deltas( + from_dataset_path=from_dataset_path, + to_dataset_path=to_dataset_path, + period=period, + top_k=top_k, + policyengine_us_data_repo=policyengine_us_data_repo, + policyengine_us_data_python=policyengine_us_data_python, + ) + payload["diagnostic_schema_version"] = 1 + payload["dataset_labels"] = { + "from": from_label, + "to": to_label, + } + target_db_path = ( + Path(policyengine_targets_db_path).expanduser() + if policyengine_targets_db_path is not None + else _default_policyengine_targets_db_path(policyengine_us_data_repo) + ) + annotate_pe_native_target_db_matches( + payload, + target_db_path=target_db_path, + period=period, + ) + destination = Path(output_path) + destination.parent.mkdir(parents=True, exist_ok=True) destination.write_text(json.dumps(payload, indent=2, sort_keys=True)) return destination @@ -2040,5 +2612,42 @@ def main(argv: list[str] | None = None) -> int: return 0 +def main_target_diagnostics(argv: list[str] | None = None) -> int: + """CLI for full PE-native per-target diagnostics.""" + + parser = argparse.ArgumentParser( + description=( + "Write a full per-target PE-native diagnostic JSON comparing a " + "baseline dataset to a Microplex candidate." + ) + ) + parser.add_argument("--from-dataset", required=True) + parser.add_argument("--to-dataset", required=True) + parser.add_argument("--output-path", required=True) + parser.add_argument("--period", type=int, default=2024) + parser.add_argument("--top-k", type=int, default=50) + parser.add_argument("--from-label", default="policyengine-us-data") + parser.add_argument("--to-label", default="microplex-us") + parser.add_argument("--policyengine-us-data-python") + parser.add_argument("--policyengine-us-data-repo") + parser.add_argument("--policyengine-targets-db") + args = parser.parse_args(argv) + + path = write_us_pe_native_target_diagnostics( + args.output_path, + from_dataset_path=args.from_dataset, + to_dataset_path=args.to_dataset, + period=args.period, + top_k=args.top_k, + from_label=args.from_label, + to_label=args.to_label, + policyengine_us_data_python=args.policyengine_us_data_python, + policyengine_us_data_repo=args.policyengine_us_data_repo, + policyengine_targets_db_path=args.policyengine_targets_db, + ) + print(str(path)) + return 0 + + if __name__ == "__main__": raise SystemExit(main(sys.argv[1:])) diff --git a/src/microplex_us/policyengine/target_profiles.py b/src/microplex_us/policyengine/target_profiles.py index 3e8aa78..0b54941 100644 --- a/src/microplex_us/policyengine/target_profiles.py +++ b/src/microplex_us/policyengine/target_profiles.py @@ -34,6 +34,11 @@ def to_provider_filter(self) -> dict[str, str | None]: PolicyEngineUSTargetCell("dividend_income", geo_level="national", domain_variable="dividend_income"), PolicyEngineUSTargetCell("eitc", geo_level="national"), PolicyEngineUSTargetCell("eitc", geo_level="national", domain_variable="eitc_child_count"), + PolicyEngineUSTargetCell( + "eitc", + geo_level="national", + domain_variable="adjusted_gross_income,eitc,eitc_child_count", + ), PolicyEngineUSTargetCell( "health_insurance_premiums_without_medicare_part_b", geo_level="national", @@ -116,6 +121,11 @@ def to_provider_filter(self) -> dict[str, str | None]: geo_level="national", domain_variable="eitc_child_count", ), + PolicyEngineUSTargetCell( + "tax_unit_count", + geo_level="national", + domain_variable="adjusted_gross_income,eitc,eitc_child_count", + ), PolicyEngineUSTargetCell("tax_unit_count", geo_level="national", domain_variable="income_tax"), PolicyEngineUSTargetCell( "tax_unit_count", diff --git a/tests/pipelines/test_pe_native_scores.py b/tests/pipelines/test_pe_native_scores.py index 4df19ea..5006097 100644 --- a/tests/pipelines/test_pe_native_scores.py +++ b/tests/pipelines/test_pe_native_scores.py @@ -8,6 +8,7 @@ from microplex_us.pipelines.pe_native_scores import ( PolicyEngineUSEnhancedCPSNativeScores, + annotate_pe_native_target_db_matches, build_policyengine_us_data_pythonpath, build_policyengine_us_data_subprocess_env, compare_us_pe_native_target_deltas, @@ -16,8 +17,10 @@ compute_batch_us_pe_native_target_deltas, compute_us_pe_native_scores, compute_us_pe_native_support_audit, + parse_pe_native_target_lookup_key, resolve_policyengine_us_data_python, write_us_pe_native_scores, + write_us_pe_native_target_diagnostics, ) @@ -309,9 +312,58 @@ def test_compare_us_pe_native_target_deltas_wraps_subprocess_payload( "period": 2024, "from_dataset": str(before), "to_dataset": str(after), + "summary": { + "n_targets": 2, + "from_wins": 1, + "to_wins": 1, + "ties": 0, + "from_win_rate": 0.5, + "to_win_rate": 0.5, + "from_loss": 0.5, + "to_loss": 1.0, + "loss_delta": 0.5, + "mean_weighted_term_delta": 0.5, + }, + "family_summaries": [ + { + "target_family": "national_irs_other", + "n_targets": 1, + "to_win_rate": 0.0, + "loss_delta": 1.5, + } + ], + "scope_summaries": [ + { + "target_scope": "national", + "n_targets": 1, + "to_win_rate": 0.0, + "loss_delta": 1.5, + } + ], + "targets": [ + { + "target_name": "nation/irs/example", + "target_family": "national_irs_other", + "target_scope": "national", + "winner": "from", + "weighted_term_delta": 1.5, + "from_weighted_term": 0.2, + "to_weighted_term": 1.7, + "target_value": 10.0, + "from_estimate": 1.0, + "to_estimate": 0.0, + "from_rel_error": 0.3, + "to_rel_error": 1.0, + "from_abs_pct_error": 90.0, + "to_abs_pct_error": 100.0, + } + ], "top_regressions": [ { "target_name": "nation/irs/example", + "target_family": "national_irs_other", + "target_scope": "national", + "winner": "from", "weighted_term_delta": 1.5, "from_weighted_term": 0.2, "to_weighted_term": 1.7, @@ -325,6 +377,9 @@ def test_compare_us_pe_native_target_deltas_wraps_subprocess_payload( "top_improvements": [ { "target_name": "state/example", + "target_family": "other", + "target_scope": "state", + "winner": "to", "weighted_term_delta": -0.5, "from_weighted_term": 0.8, "to_weighted_term": 0.3, @@ -361,6 +416,147 @@ def test_compare_us_pe_native_target_deltas_wraps_subprocess_payload( assert result["metric"] == "enhanced_cps_native_loss_target_delta" assert result["top_regressions"][0]["target_name"] == "nation/irs/example" + assert result["summary"]["to_win_rate"] == 0.5 + assert result["targets"][0]["target_family"] == "national_irs_other" + + +def test_parse_pe_native_target_lookup_key_maps_eitc_agi_child_labels() -> None: + amount_key = parse_pe_native_target_lookup_key( + "nation/irs/eitc/amount/c3_1_1k" + ) + returns_key = parse_pe_native_target_lookup_key( + "nation/irs/eitc/returns/c2_1_1k" + ) + + assert amount_key is not None + assert amount_key.variable == "eitc" + assert amount_key.count_children == 3 + assert amount_key.agi_lower == 1.0 + assert amount_key.agi_upper == 1_000.0 + assert amount_key.expected_target()["domain_variable"] == ( + "adjusted_gross_income,eitc,eitc_child_count" + ) + assert returns_key is not None + assert returns_key.variable == "tax_unit_count" + assert returns_key.count_children == 2 + + +def test_annotate_pe_native_target_db_matches_marks_matches_and_gaps( + monkeypatch, + tmp_path, +) -> None: + db_path = tmp_path / "policy_data.db" + db_path.write_text("stub") + matched_name = "nation/irs/eitc/amount/c3_1_1k" + matched_key = parse_pe_native_target_lookup_key(matched_name) + assert matched_key is not None + + monkeypatch.setattr( + "microplex_us.pipelines.pe_native_scores._load_policyengine_target_match_index", + lambda *_args, **_kwargs: { + matched_key.as_tuple(): [ + { + "target_id": 123, + "variable": "eitc", + "period": 2022, + "value": 2_079_000.0, + "source": "IRS SOI", + "notes": "Table 2.5", + "geo_level": "national", + "geographic_id": "US", + "domain_variable": ( + "adjusted_gross_income,eitc,eitc_child_count" + ), + "constraints": [ + { + "variable": "eitc_child_count", + "operation": ">", + "value": "2", + } + ], + } + ] + }, + ) + payload = { + "targets": [ + {"target_name": matched_name}, + {"target_name": "nation/irs/eitc/returns/c2_1_1k"}, + {"target_name": "nation/census/infants"}, + ], + "top_improvements": [{"target_name": matched_name}], + "top_regressions": [], + } + + annotate_pe_native_target_db_matches( + payload, + target_db_path=db_path, + period=2024, + ) + + assert payload["targets"][0]["policyengine_target_match"] == "matched" + assert payload["targets"][0]["policyengine_target_id"] == 123 + assert payload["targets"][1]["policyengine_target_match"] == "legacy_only" + assert payload["targets"][1]["policyengine_target_expected"]["variable"] == ( + "tax_unit_count" + ) + assert payload["targets"][2]["policyengine_target_match"] == "unparsed" + assert payload["top_improvements"][0]["policyengine_target_match"] == "matched" + assert payload["target_db_summary"]["matched"] == 1 + assert payload["target_db_summary"]["legacy_only"] == 1 + assert payload["target_db_summary"]["unparsed"] == 1 + assert payload["target_db_summary"]["match_rate"] == 0.5 + + +def test_write_us_pe_native_target_diagnostics_persists_full_payload( + monkeypatch, + tmp_path, +) -> None: + before = tmp_path / "before.h5" + after = tmp_path / "after.h5" + output_path = tmp_path / "diagnostics.json" + for path in (before, after): + path.write_text(path.stem) + + monkeypatch.setattr( + "microplex_us.pipelines.pe_native_scores.compare_us_pe_native_target_deltas", + lambda **_kwargs: { + "metric": "enhanced_cps_native_loss_target_delta", + "period": 2024, + "from_dataset": str(before), + "to_dataset": str(after), + "summary": {"n_targets": 1, "to_win_rate": 1.0}, + "family_summaries": [{"target_family": "national_irs_other"}], + "scope_summaries": [{"target_scope": "national"}], + "targets": [ + { + "target_name": "nation/irs/example", + "target_family": "national_irs_other", + "winner": "to", + "weighted_term_delta": -1.0, + } + ], + "top_regressions": [], + "top_improvements": [], + }, + ) + + written = write_us_pe_native_target_diagnostics( + output_path, + from_dataset_path=before, + to_dataset_path=after, + from_label="baseline", + to_label="candidate", + policyengine_targets_db_path=tmp_path / "missing.db", + ) + + payload = json.loads(written.read_text()) + assert written == output_path + assert payload["diagnostic_schema_version"] == 1 + assert payload["dataset_labels"] == {"from": "baseline", "to": "candidate"} + assert payload["targets"][0]["target_name"] == "nation/irs/example" + assert payload["targets"][0]["policyengine_target_match"] == "unparsed" + assert payload["target_db_summary"]["unparsed"] == 1 def test_compute_batch_us_pe_native_target_deltas_wraps_multiple_candidates(