diff --git a/packages/app/cypress/e2e/reproduce-drawer.cy.ts b/packages/app/cypress/e2e/reproduce-drawer.cy.ts new file mode 100644 index 00000000..d0cbe637 --- /dev/null +++ b/packages/app/cypress/e2e/reproduce-drawer.cy.ts @@ -0,0 +1,68 @@ +/** + * Tests for the Reproduce drawer — opens from the inference table row, + * scatter pinned tooltip, and GPU graph tooltip. Verifies drawer state is + * URL-safe (closing does not perturb chart zoom or query string). + */ +describe('Reproduce drawer', () => { + beforeEach(() => { + cy.window().then((win) => { + win.localStorage.setItem('inferencex-star-modal-dismissed', String(Date.now())); + }); + cy.visit('/inference'); + cy.get('[data-testid="scatter-graph"]') + .first() + .find('svg .dot-group') + .should('have.length.greaterThan', 0); + }); + + it('opens from the inference table Reproduce button and shows the three tabs', () => { + cy.get('[data-testid="inference-table-view-btn"]').first().click(); + cy.get('[data-testid="inference-results-table"]').should('be.visible'); + cy.get('[data-testid="inference-table-reproduce-btn"]').first().click(); + + cy.get('[data-testid="reproduce-drawer"]').should('be.visible'); + cy.contains('Reproduce this benchmark').should('be.visible'); + cy.contains('button', 'Command').should('be.visible'); + cy.contains('button', 'Config JSON').should('be.visible'); + cy.contains('button', 'Environment').should('be.visible'); + }); + + it('exposes a copy button on every tab', () => { + cy.get('[data-testid="inference-table-view-btn"]').first().click(); + cy.get('[data-testid="inference-table-reproduce-btn"]').first().click(); + cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible'); + cy.contains('button', 'Config JSON').click(); + cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible'); + cy.contains('button', 'Environment').click(); + cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible'); + }); + + it('Esc closes the drawer without changing the URL hash', () => { + cy.get('[data-testid="inference-table-view-btn"]').first().click(); + cy.url().then((before) => { + cy.get('[data-testid="inference-table-reproduce-btn"]').first().click(); + cy.get('[data-testid="reproduce-drawer"]').should('be.visible'); + cy.get('body').type('{esc}'); + cy.get('[data-testid="reproduce-drawer"]').should('not.exist'); + cy.url().should('eq', before); + }); + }); + + it('renders correctly for an unofficial-run overlay row when one is loaded', () => { + // Re-visit with the overlay query param. We do NOT assert which row is + // rendered — we only assert the drawer can be opened from whatever points + // appear for the official path on top of the overlay. The wiring is the + // same code path: clicking a Reproduce control feeds the InferenceData + // through to the drawer regardless of where the row originated. + const candidateRunId = '15000000000'; + cy.visit(`/inference?unofficialrun=${candidateRunId}`); + cy.get('[data-testid="scatter-graph"]') + .first() + .find('svg .dot-group') + .should('have.length.greaterThan', 0); + cy.get('[data-testid="inference-table-view-btn"]').first().click(); + cy.get('[data-testid="inference-results-table"]').should('be.visible'); + cy.get('[data-testid="inference-table-reproduce-btn"]').first().click(); + cy.get('[data-testid="reproduce-drawer"]').should('be.visible'); + }); +}); diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index 10d27f1e..04599d60 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -250,6 +250,9 @@ export function createMockInferenceContext( activePresetId: null, setActivePresetId: namedStub('setActivePresetId'), presetGuardRef: { current: false } as React.RefObject, + reproducePoint: null, + openReproduceDrawer: namedStub('openReproduceDrawer'), + closeReproduceDrawer: namedStub('closeReproduceDrawer'), ...overrides, }; } diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index 5d30b8a8..64f26d6e 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -146,6 +146,23 @@ export function InferenceProvider({ // --- Tracked configs state --- const [trackedConfigs, setTrackedConfigs] = useState([]); + // --- Reproduce drawer state --- + // Local-only — we do NOT sync this to the URL because closing the drawer + // should not perturb chart zoom or share-link state. + const [reproducePoint, setReproducePoint] = useState(null); + const openReproduceDrawer = useCallback((point: InferenceData, source: string) => { + setReproducePoint(point); + track('reproduce_drawer_open_clicked', { + source, + framework: point.framework, + hwKey: point.hwKey, + precision: point.precision, + tp: point.tp, + conc: point.conc, + }); + }, []); + const closeReproduceDrawer = useCallback(() => setReproducePoint(null), []); + // --- Favorite presets state --- const [pendingHwFilter, setPendingHwFilter] = useState(null); const [activePresetId, setActivePresetId] = useState(null); @@ -977,6 +994,9 @@ export function InferenceProvider({ activePresetId, setActivePresetId, presetGuardRef, + reproducePoint, + openReproduceDrawer, + closeReproduceDrawer, }), [ activeHwTypes, @@ -1030,6 +1050,9 @@ export function InferenceProvider({ removeTrackedConfig, clearTrackedConfigs, activePresetId, + reproducePoint, + openReproduceDrawer, + closeReproduceDrawer, ], ); diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index 0ea63fca..9a6dbee5 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -531,6 +531,12 @@ export interface InferenceChartContextType { activePresetId: string | null; setActivePresetId: (id: string | null) => void; presetGuardRef: React.RefObject; + /** The point currently shown in the Reproduce drawer, or null when closed. */ + reproducePoint: InferenceData | null; + /** Open the Reproduce drawer for a given chart point. */ + openReproduceDrawer: (point: InferenceData, source: string) => void; + /** Close the Reproduce drawer. */ + closeReproduceDrawer: () => void; } export interface CalculateUserCostsRequest { model: string; diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx index 0f7fa75b..40c71ba1 100644 --- a/packages/app/src/components/inference/ui/ChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx @@ -48,8 +48,11 @@ import ComparisonChangelog from './ComparisonChangelog'; import CustomCosts from './CustomCosts'; import CustomPowers from './CustomPowers'; import GPUGraph from './GPUGraph'; +import ReproduceDrawer from './ReproduceDrawer'; import TrendChart from './TrendChart'; +import { sequenceToIslOsl } from '@semianalysisai/inferencex-constants'; + const ModelArchitectureDiagram = dynamic(() => import('./ModelArchitectureDiagram'), { ssr: false, loading: () => , @@ -149,8 +152,15 @@ export default function ChartDisplay() { activeHwTypes, activeDates, setSelectedE2eXAxisMetric, + reproducePoint, + closeReproduceDrawer, } = useInference(); + const reproduceSequence = useMemo( + () => (selectedSequence ? sequenceToIslOsl(selectedSequence) : null) ?? undefined, + [selectedSequence], + ); + const { changelogs, loading: changelogsLoading, @@ -684,6 +694,13 @@ export default function ChartDisplay() { + + ); } diff --git a/packages/app/src/components/inference/ui/GPUGraph.tsx b/packages/app/src/components/inference/ui/GPUGraph.tsx index cafd3a82..fd90c978 100644 --- a/packages/app/src/components/inference/ui/GPUGraph.tsx +++ b/packages/app/src/components/inference/ui/GPUGraph.tsx @@ -73,6 +73,7 @@ const GPUGraph = React.memo( selectAllActiveDates, showLineLabels, setShowLineLabels, + openReproduceDrawer, } = useInference(); const { resolvedTheme } = useTheme(); const chartRef = useRef(null); @@ -692,6 +693,18 @@ const GPUGraph = React.memo( sel.select('.visible-shape') as any, getShapeKeyForPrecision(d.precision, selectedPrecisions), ), + onPointClick: (d: InferenceData) => { + const tooltipEl = chartRef.current?.getTooltipElement(); + if (!tooltipEl) return; + const reproduceBtn = tooltipEl.querySelector('[data-action="reproduce"]'); + if (!reproduceBtn) return; + reproduceBtn.addEventListener('click', (btnEvent) => { + btnEvent.stopPropagation(); + openReproduceDrawer(d, 'gpu_graph_tooltip'); + chartRef.current?.dismissTooltip(); + chartRef.current?.hideTooltip(); + }); + }, attachToLayer: 1, }} onRender={(ctx: RenderContext) => { diff --git a/packages/app/src/components/inference/ui/InferenceTable.tsx b/packages/app/src/components/inference/ui/InferenceTable.tsx index c300e60d..31075534 100644 --- a/packages/app/src/components/inference/ui/InferenceTable.tsx +++ b/packages/app/src/components/inference/ui/InferenceTable.tsx @@ -1,9 +1,12 @@ 'use client'; import { useMemo } from 'react'; +import { Wrench } from 'lucide-react'; +import { useInference } from '@/components/inference/InferenceContext'; import type { ChartDefinition, InferenceData } from '@/components/inference/types'; import { type DataTableColumn, DataTable } from '@/components/ui/data-table'; +import { track } from '@/lib/analytics'; import { getHardwareConfig } from '@/lib/constants'; import { getNestedYValue } from '@/lib/chart-utils'; import { type Precision, getPrecisionLabel } from '@/lib/data-mappings'; @@ -29,6 +32,7 @@ export default function InferenceTable({ chartDefinition, selectedYAxisMetric, }: InferenceTableProps) { + const { openReproduceDrawer } = useInference(); const yPath = chartDefinition[selectedYAxisMetric as keyof ChartDefinition] as string | undefined; const yLabel = chartDefinition[`${selectedYAxisMetric}_label` as keyof ChartDefinition] as string; const xLabel = chartDefinition.x_label; @@ -110,8 +114,34 @@ export default function InferenceTable({ sortValue: (row) => row.median_intvty ?? 0, className: 'tabular-nums', }, + { + header: '', + align: 'center', + cell: (row) => ( + + ), + className: 'whitespace-nowrap', + }, ], - [yPath, yLabel, xLabel], + [yPath, yLabel, xLabel, openReproduceDrawer], ); return ( diff --git a/packages/app/src/components/inference/ui/ReproduceDrawer.tsx b/packages/app/src/components/inference/ui/ReproduceDrawer.tsx new file mode 100644 index 00000000..f8888d2f --- /dev/null +++ b/packages/app/src/components/inference/ui/ReproduceDrawer.tsx @@ -0,0 +1,396 @@ +'use client'; + +import { useEffect, useMemo, useState } from 'react'; +import { Check, Copy, ExternalLink } from 'lucide-react'; + +import type { InferenceData } from '@/components/inference/types'; +import { Dialog, DialogContent, DialogTitle } from '@/components/ui/dialog'; +import { track } from '@/lib/analytics'; +import { getHardwareConfig } from '@/lib/constants'; +import { buildLaunchCommand } from '@/lib/reproduce-command'; +import { getDisplayLabel, updateRepoUrl } from '@/lib/utils'; + +type DrawerTab = 'command' | 'config' | 'environment'; + +interface ReproduceDrawerProps { + /** The point to reproduce, or null when the drawer is closed. */ + point: InferenceData | null; + /** ISL/OSL of the active sequence, used for command-line generation. */ + sequence?: { isl: number; osl: number }; + /** Selected model display key, passed through to launch-command generation. */ + model?: string; + onClose: () => void; +} + +/** + * Drawer that explains how to reproduce a benchmark point: launch command, + * full config JSON, environment (image, framework SHA, GPU SKU, run URL). + * + * Exits on Esc and outside-click without disturbing chart zoom or URL state — + * the only state that lives outside this component is the selected `point`, + * which the caller wipes via `onClose`. + */ +export default function ReproduceDrawer({ point, sequence, model, onClose }: ReproduceDrawerProps) { + const open = point !== null; + const [activeTab, setActiveTab] = useState('command'); + + useEffect(() => { + if (!open) return; + setActiveTab('command'); + }, [point?.hwKey, point?.tp, point?.conc, point?.precision, open]); + + const launch = useMemo(() => { + if (!point) return null; + return buildLaunchCommand(point.framework ?? '', { + model, + precision: point.precision, + tp: point.tp, + ep: point.ep, + dp_attention: point.dp_attention, + spec_decoding: point.spec_decoding, + disagg: point.disagg, + prefill_tp: point.prefill_tp, + prefill_ep: point.prefill_ep, + prefill_dp_attention: point.prefill_dp_attention, + prefill_num_workers: point.prefill_num_workers, + num_prefill_gpu: point.num_prefill_gpu, + decode_tp: point.decode_tp, + decode_ep: point.decode_ep, + decode_dp_attention: point.decode_dp_attention, + decode_num_workers: point.decode_num_workers, + num_decode_gpu: point.num_decode_gpu, + conc: point.conc, + isl: sequence?.isl, + osl: sequence?.osl, + image: point.image, + }); + }, [point, sequence?.isl, sequence?.osl, model]); + + const configJson = useMemo(() => { + if (!point) return ''; + // Strip chart-only derived fields — keep the raw benchmark identity. This is + // the JSON the user can copy and feed back as a future config diff input. + const { + x: _x, + y: _y, + hidden: _hidden, + tpPerGpu: _tpg, + tpPerMw: _tpm, + outputTputPerGpu: _otg, + inputTputPerGpu: _itg, + outputTputPerMw: _otm, + inputTputPerMw: _itm, + costh: _ch, + costn: _cn, + costr: _cr, + costhi: _chi, + costni: _cni, + costri: _cri, + costhOutput: _cho, + costnOutput: _cno, + costrOutput: _cro, + costUser: _cu, + powerUser: _pu, + jTotal: _jt, + jOutput: _jo, + jInput: _ji, + ...essentials + } = point; + return JSON.stringify(essentials, null, 2); + }, [point]); + + const runUrl = point?.run_url ? updateRepoUrl(point.run_url) : undefined; + const hwLabel = useMemo(() => { + if (!point) return ''; + try { + return getDisplayLabel(getHardwareConfig(point.hwKey)); + } catch { + return point.hwKey; + } + }, [point]); + + // Fire drawer-open analytics once per opened point. + useEffect(() => { + if (!point) return; + track('reproduce_drawer_opened', { + framework: point.framework, + hwKey: point.hwKey, + precision: point.precision, + tp: point.tp, + conc: point.conc, + disagg: Boolean(point.disagg), + }); + }, [point]); + + const copyTextForActiveTab = (): string => { + if (!point) return ''; + if (activeTab === 'config') return configJson; + if (activeTab === 'environment') { + return [ + `GPU: ${hwLabel}`, + `Framework: ${point.framework ?? '(unknown)'}`, + point.precision ? `Precision: ${point.precision.toUpperCase()}` : '', + point.image ? `Container image: ${point.image}` : '', + point.spec_decoding && point.spec_decoding !== 'none' + ? `Speculative decoding: ${point.spec_decoding}` + : '', + point.actualDate + ? `Run date: ${point.actualDate}` + : point.date + ? `Run date: ${point.date}` + : '', + runUrl ? `Run URL: ${runUrl}` : '', + ] + .filter(Boolean) + .join('\n'); + } + if (!launch) return ''; + if (launch.kind === 'single' && launch.command) return launch.command; + if (launch.kind === 'disagg' && launch.commands) { + return launch.commands.map((c) => `# ${c.label}\n${c.command}`).join('\n\n'); + } + return launch.fallbackReason ?? ''; + }; + + const handleCopy = async () => { + if (!point) return; + const text = copyTextForActiveTab(); + if (!text) return; + try { + await navigator.clipboard.writeText(text); + } catch { + // Clipboard can be unavailable in non-secure contexts; tracking still useful. + } + track('reproduce_copy', { + tab: activeTab, + framework: point.framework, + hwKey: point.hwKey, + precision: point.precision, + tp: point.tp, + conc: point.conc, + }); + }; + + return ( + { + if (!o) onClose(); + }} + > + +
+
+ Reproduce this benchmark + {point && ( +
+ {hwLabel} + {' · '} + TP{point.tp} + {' · '} + conc {point.conc} + {' · '} + {point.precision} + {point.disagg && · disagg} +
+ )} +
+
+ +
+ setActiveTab('command')} + /> + setActiveTab('config')} + /> + setActiveTab('environment')} + /> + +
+ +
+ {!point ? null : activeTab === 'command' ? ( + + ) : activeTab === 'config' ? ( + + ) : ( + + )} +
+
+
+ ); +} + +interface TabButtonProps { + label: string; + active: boolean; + onClick: () => void; +} + +function TabButton({ label, active, onClick }: TabButtonProps) { + return ( + + ); +} + +function CopyButton({ onCopy, testId }: { onCopy: () => void | Promise; testId?: string }) { + const [copied, setCopied] = useState(false); + return ( + + ); +} + +function CommandTab({ launch }: { launch: ReturnType | null }) { + if (!launch) return null; + if (launch.kind === 'fallback') { + return ( +
+

No launch command

+

{launch.fallbackReason}

+
+ ); + } + if (launch.kind === 'single' && launch.command) { + return ; + } + if (launch.kind === 'disagg' && launch.commands) { + return ( +
+ {launch.commands.map((cmd) => ( +
+
+ {cmd.label} +
+ +
+ ))} +
+ ); + } + return null; +} + +function EnvironmentTab({ + point, + hwLabel, + runUrl, +}: { + point: InferenceData; + hwLabel: string; + runUrl?: string; +}) { + const rows: { label: string; value: string | undefined }[] = [ + { label: 'GPU', value: hwLabel }, + { label: 'Framework', value: point.framework }, + { + label: 'Precision', + value: point.precision ? point.precision.toUpperCase() : undefined, + }, + { + label: 'Speculative decoding', + value: point.spec_decoding && point.spec_decoding !== 'none' ? point.spec_decoding : 'none', + }, + { label: 'Container image', value: point.image }, + { + label: 'Run date', + value: point.actualDate ?? point.date, + }, + { label: 'Workflow run', value: runUrl }, + ]; + return ( +
+ {rows.map(({ label, value }) => ( +
+
{label}
+
+ {value || (not recorded)} +
+
+ ))} +
+ ); +} + +function CodeBlock({ value, language: _language }: { value: string; language: 'bash' | 'json' }) { + return ( +
+      {value}
+    
+ ); +} diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 9cb8414b..fffa9c19 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -149,6 +149,7 @@ const ScatterGraph = React.memo( trackedConfigs, addTrackedConfig, removeTrackedConfig, + openReproduceDrawer, } = useInference(); const { @@ -676,6 +677,15 @@ const ScatterGraph = React.memo( }); }); } + const reproduceBtn = tooltipEl.querySelector('[data-action="reproduce"]'); + if (reproduceBtn) { + reproduceBtn.addEventListener('click', (btnEvent) => { + btnEvent.stopPropagation(); + openReproduceDrawer(d, 'scatter_tooltip'); + chartRef.current?.dismissTooltip(); + chartRef.current?.hideTooltip(); + }); + } } }, attachToLayer: 1, // scatter layer is index 1 (after rooflines at 0) @@ -690,6 +700,7 @@ const ScatterGraph = React.memo( removeTrackedConfig, chartDefinition.chartType, selectedPrecisions, + openReproduceDrawer, ], ); diff --git a/packages/app/src/components/inference/utils/tooltip-utils.test.ts b/packages/app/src/components/inference/utils/tooltip-utils.test.ts index bb8caafa..596a6ba4 100644 --- a/packages/app/src/components/inference/utils/tooltip-utils.test.ts +++ b/packages/app/src/components/inference/utils/tooltip-utils.test.ts @@ -264,6 +264,17 @@ describe('generateTooltipContent', () => { expect(html).toContain('Track Over Time'); expect(html).not.toContain('Untrack Over Time'); }); + + it('shows the Reproduce button when pinned', () => { + const html = generateTooltipContent(tooltipConfig({ isPinned: true })); + expect(html).toContain('data-action="reproduce"'); + expect(html).toContain('Reproduce'); + }); + + it('does not show the Reproduce button when not pinned', () => { + const html = generateTooltipContent(tooltipConfig({ isPinned: false })); + expect(html).not.toContain('data-action="reproduce"'); + }); }); // =========================================================================== @@ -365,4 +376,14 @@ describe('generateGPUGraphTooltipContent', () => { ); expect(html).toContain('vllm-v0.6.0
abc123'); }); + + it('shows the Reproduce button when pinned', () => { + const html = generateGPUGraphTooltipContent(tooltipConfig({ isPinned: true })); + expect(html).toContain('data-action="reproduce"'); + }); + + it('does not show the Reproduce button when not pinned', () => { + const html = generateGPUGraphTooltipContent(tooltipConfig({ isPinned: false })); + expect(html).not.toContain('data-action="reproduce"'); + }); }); diff --git a/packages/app/src/components/inference/utils/tooltipUtils.ts b/packages/app/src/components/inference/utils/tooltipUtils.ts index 0f004db6..9b4f404b 100644 --- a/packages/app/src/components/inference/utils/tooltipUtils.ts +++ b/packages/app/src/components/inference/utils/tooltipUtils.ts @@ -192,7 +192,12 @@ export const generateTooltipContent = (config: TooltipConfig): string => { margin-top: 8px; width: 100%; padding: 4px 8px; font-size: 11px; font-weight: 500; border: 1px solid var(--border); border-radius: 6px; cursor: pointer; background: var(--accent); color: var(--accent-foreground); - ">${config.isTracked ? 'Untrack Over Time' : 'Track Over Time'}` + ">${config.isTracked ? 'Untrack Over Time' : 'Track Over Time'} + ` : '' } @@ -301,6 +306,15 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string => Precision: ${d.precision.toUpperCase()} ${runLinkHTML(runUrl)} + ${ + isPinned + ? `` + : '' + } `; }; diff --git a/packages/app/src/lib/reproduce-command.test.ts b/packages/app/src/lib/reproduce-command.test.ts new file mode 100644 index 00000000..ee883d2e --- /dev/null +++ b/packages/app/src/lib/reproduce-command.test.ts @@ -0,0 +1,243 @@ +import { describe, expect, it } from 'vitest'; + +import { buildLaunchCommand } from './reproduce-command'; + +describe('buildLaunchCommand', () => { + describe('vllm', () => { + it('builds a basic single-process command', () => { + const result = buildLaunchCommand('vllm', { + model: 'deepseek-ai/DeepSeek-R1', + precision: 'fp8', + tp: 8, + conc: 64, + isl: 1024, + osl: 1024, + }); + expect(result.kind).toBe('single'); + expect(result.framework).toBe('vllm'); + expect(result.command).toContain('vllm serve'); + expect(result.command).toContain('--tensor-parallel-size 8'); + expect(result.command).toContain('--dtype fp8'); + expect(result.command).toContain('--max-num-seqs 64'); + expect(result.command).toContain('--max-model-len 2048'); + // Properly quotes the model when it contains a / + expect(result.command).toContain('deepseek-ai/DeepSeek-R1'); + }); + + it('emits expert-parallel and dp-attention flags when requested', () => { + const result = buildLaunchCommand('vllm', { + model: 'm', + precision: 'fp4', + tp: 8, + ep: 8, + dp_attention: true, + }); + expect(result.command).toContain('--expert-parallel-size 8'); + expect(result.command).toContain('--data-parallel-attention'); + }); + + it('emits a JSON speculative-config when spec_decoding is set', () => { + const result = buildLaunchCommand('vllm', { + model: 'm', + precision: 'fp8', + tp: 4, + spec_decoding: 'mtp', + }); + // JSON gets shell-quoted because of the curly braces / quotes. + expect(result.command).toMatch(/--speculative-config '\{"method":"mtp"\}'/); + }); + + it('omits speculative-config when spec_decoding is "none"', () => { + const result = buildLaunchCommand('vllm', { + model: 'm', + precision: 'fp8', + tp: 1, + spec_decoding: 'none', + }); + expect(result.command).not.toContain('speculative-config'); + }); + }); + + describe('sglang', () => { + it('builds a basic single-process command', () => { + const result = buildLaunchCommand('sglang', { + model: 'meta-llama/Llama-3.3-70B', + precision: 'fp8', + tp: 4, + conc: 32, + isl: 8192, + osl: 1024, + }); + expect(result.kind).toBe('single'); + expect(result.command).toContain('python -m sglang.launch_server'); + expect(result.command).toContain('--tp 4'); + expect(result.command).toContain('--max-running-requests 32'); + expect(result.command).toContain('--context-length 9216'); + }); + + it('uses --enable-dp-attention for sglang', () => { + const result = buildLaunchCommand('sglang', { + model: 'm', + precision: 'fp8', + tp: 8, + dp_attention: true, + }); + expect(result.command).toContain('--enable-dp-attention'); + }); + + it('emits --speculative-algorithm when spec_decoding is set', () => { + const result = buildLaunchCommand('sglang', { + model: 'm', + precision: 'fp8', + tp: 4, + spec_decoding: 'eagle3', + }); + expect(result.command).toContain('--speculative-algorithm EAGLE3'); + }); + }); + + describe('trt / trtllm alias', () => { + it('treats trtllm as an alias for trt', () => { + const result = buildLaunchCommand('trtllm', { + model: 'm', + precision: 'fp4', + tp: 8, + }); + expect(result.framework).toBe('trt'); + expect(result.command).toContain('trtllm-serve'); + expect(result.command).toContain('--tp_size 8'); + }); + + it('builds a basic trt command', () => { + const result = buildLaunchCommand('trt', { + model: 'm', + precision: 'fp4', + tp: 4, + ep: 4, + conc: 16, + isl: 1024, + osl: 256, + }); + expect(result.command).toContain('--backend pytorch'); + expect(result.command).toContain('--tp_size 4'); + expect(result.command).toContain('--ep_size 4'); + expect(result.command).toContain('--max_batch_size 16'); + expect(result.command).toContain('--max_seq_len 1280'); + expect(result.command).toContain('--kv_cache_dtype fp4'); + }); + + it('emits --speculative_config={"decoding_type":...} for spec', () => { + const result = buildLaunchCommand('trt', { + model: 'm', + precision: 'fp4', + tp: 1, + spec_decoding: 'mtp', + }); + // The flag is a single token because --speculative_config=... has no + // space separator. The shell quoter kicks in because of the curly braces. + expect(result.command).toMatch(/--speculative_config=\{"decoding_type":"MTP"\}/); + }); + }); + + describe('disagg', () => { + it('returns two commands for vllm disagg with separate prefill / decode TPs', () => { + const result = buildLaunchCommand('vllm', { + model: 'm', + precision: 'fp8', + tp: 8, + disagg: true, + prefill_tp: 4, + prefill_num_workers: 2, + num_prefill_gpu: 8, + decode_tp: 16, + decode_num_workers: 1, + num_decode_gpu: 16, + }); + expect(result.kind).toBe('disagg'); + expect(result.commands).toHaveLength(2); + expect(result.commands?.[0].label).toContain('Prefill'); + expect(result.commands?.[0].command).toContain('--tensor-parallel-size 4'); + expect(result.commands?.[0].command).toContain('--disagg-role prefill'); + expect(result.commands?.[1].label).toContain('Decode'); + expect(result.commands?.[1].command).toContain('--tensor-parallel-size 16'); + expect(result.commands?.[1].command).toContain('--disagg-role decode'); + }); + + it('uses --disaggregate_role for trt disagg', () => { + const result = buildLaunchCommand('trt', { + model: 'm', + precision: 'fp4', + tp: 8, + disagg: true, + prefill_tp: 4, + decode_tp: 8, + }); + expect(result.kind).toBe('disagg'); + expect(result.commands?.[0].command).toContain('--disaggregate_role prefill'); + expect(result.commands?.[1].command).toContain('--disaggregate_role decode'); + }); + + it('falls back to top-level tp when prefill_tp/decode_tp missing', () => { + const result = buildLaunchCommand('sglang', { + model: 'm', + precision: 'fp8', + tp: 4, + disagg: true, + }); + expect(result.kind).toBe('disagg'); + expect(result.commands?.[0].command).toContain('--tp 4'); + expect(result.commands?.[1].command).toContain('--tp 4'); + }); + }); + + describe('compound / orchestrator frameworks → fallback', () => { + const compounds = [ + ['atom', /ATOM/], + ['mori-sglang', /MoRI/], + ['dynamo-vllm', /Dynamo vLLM/], + ['dynamo-trt', /Dynamo TRT/], + ['dynamo-sglang', /Dynamo SGLang/], + ] as const; + + it.each(compounds)('returns kind="fallback" for %s', (fw, msgRe) => { + const result = buildLaunchCommand(fw, { + model: 'm', + precision: 'fp8', + tp: 8, + }); + expect(result.kind).toBe('fallback'); + expect(result.framework).toBe(fw); + expect(result.fallbackReason).toMatch(msgRe); + }); + + it('resolves the dynamo-trtllm alias before deciding fallback', () => { + const result = buildLaunchCommand('dynamo-trtllm', { + model: 'm', + precision: 'fp8', + tp: 8, + }); + expect(result.kind).toBe('fallback'); + expect(result.framework).toBe('dynamo-trt'); + }); + }); + + describe('unknown framework', () => { + it('returns a fallback with a clear reason for unknown frameworks', () => { + const result = buildLaunchCommand('made-up-framework', { + model: 'm', + precision: 'fp8', + tp: 1, + }); + expect(result.kind).toBe('fallback'); + expect(result.fallbackReason).toContain('made-up-framework'); + }); + }); + + describe('placeholders for missing fields', () => { + it('uses and placeholders when omitted', () => { + const result = buildLaunchCommand('vllm', { tp: 1 }); + expect(result.command).toContain(''); + expect(result.command).toContain(''); + }); + }); +}); diff --git a/packages/app/src/lib/reproduce-command.ts b/packages/app/src/lib/reproduce-command.ts new file mode 100644 index 00000000..05a46764 --- /dev/null +++ b/packages/app/src/lib/reproduce-command.ts @@ -0,0 +1,255 @@ +import { resolveFrameworkAlias } from '@semianalysisai/inferencex-constants'; + +/** + * Pure description of a benchmark config — the bits that actually feed the + * launch command. Everything is optional so callers can pass a partial + * `InferenceData` row or a partial `AggDataEntry`; the generators read what + * they need and emit a clear "missing field" comment for anything absent. + */ +export interface ReproduceConfig { + framework: string; + model?: string; + precision?: string; + tp?: number; + ep?: number; + dp_attention?: boolean; + spec_decoding?: string; + disagg?: boolean; + prefill_tp?: number; + prefill_ep?: number; + prefill_dp_attention?: boolean; + prefill_num_workers?: number; + num_prefill_gpu?: number; + decode_tp?: number; + decode_ep?: number; + decode_dp_attention?: boolean; + decode_num_workers?: number; + num_decode_gpu?: number; + conc?: number; + isl?: number; + osl?: number; + image?: string; +} + +export type LaunchCommandKind = 'single' | 'disagg' | 'fallback'; + +/** Result of `buildLaunchCommand`. */ +export interface LaunchCommandResult { + /** "single", "disagg" (prefill + decode workers), or "fallback" (no recipe). */ + kind: LaunchCommandKind; + /** The canonical framework key the command was built for. */ + framework: string; + /** Single-command output (kind === 'single'). */ + command?: string; + /** Disagg output: ordered list of stitched commands. */ + commands?: { label: string; command: string }[]; + /** + * Fallback explanation shown in the drawer when we can't render a launch + * command — typically because the framework is a multi-process orchestrator + * (Dynamo, ATOM, MoRI) or the field set is missing. + */ + fallbackReason?: string; +} + +/** Frameworks that orchestrate multiple worker processes — too much to inline. */ +const COMPOUND_FRAMEWORKS = new Set([ + 'atom', + 'mori-sglang', + 'dynamo-vllm', + 'dynamo-trt', + 'dynamo-sglang', +]); + +const FALLBACK_REASONS: Record = { + atom: 'ATOM orchestrates several worker processes — see the Config JSON tab for the full launch graph.', + 'mori-sglang': + 'MoRI SGLang spans prefill / decode / scheduler workers — see the Config JSON tab for the full launch graph.', + 'dynamo-vllm': + 'Dynamo vLLM is launched via the Dynamo runtime against multiple workers — see the Config JSON tab for the full launch graph.', + 'dynamo-trt': + 'Dynamo TRT is launched via the Dynamo runtime against multiple workers — see the Config JSON tab for the full launch graph.', + 'dynamo-sglang': + 'Dynamo SGLang is launched via the Dynamo runtime against multiple workers — see the Config JSON tab for the full launch graph.', +}; + +/** Format a single-line CLI command from an array of args, escaping where needed. */ +const joinArgs = (args: string[]): string => args.filter(Boolean).map(quoteIfNeeded).join(' '); + +const QUOTE_RE = /[^A-Za-z0-9._\-/=:,@%+]/; +const quoteIfNeeded = (s: string): string => { + if (s === '') return "''"; + // Already a quoted block (e.g. a multi-flag chunk) — leave as-is. + if (s.includes('\n') || s.startsWith('--')) return s; + if (!QUOTE_RE.test(s)) return s; + return `'${s.replaceAll("'", String.raw`'\''`)}'`; +}; + +/** Format a chunk of CLI args as one indented line per logical group. */ +const formatChunks = (chunks: string[][]): string => + chunks.map((chunk, i) => (i === 0 ? joinArgs(chunk) : ` ${joinArgs(chunk)}`)).join(' \\\n'); + +const baseChunks = (cfg: ReproduceConfig): { precision: string; model: string } => ({ + precision: cfg.precision ?? '', + model: cfg.model ?? '', +}); + +const buildVllmCommand = (cfg: ReproduceConfig): string => { + const { model, precision } = baseChunks(cfg); + const tp = cfg.tp ?? 1; + const flags: string[][] = [ + ['vllm', 'serve', model], + ['--dtype', precision], + ['--tensor-parallel-size', String(tp)], + ]; + if (cfg.ep !== undefined && cfg.ep > 0) { + flags.push(['--expert-parallel-size', String(cfg.ep)]); + } + if (cfg.dp_attention) flags.push(['--data-parallel-attention']); + if (cfg.spec_decoding && cfg.spec_decoding !== 'none') { + flags.push(['--speculative-config', JSON.stringify({ method: cfg.spec_decoding })]); + } + flags.push(['--max-num-seqs', String(cfg.conc ?? 256)]); + if (cfg.isl !== undefined && cfg.osl !== undefined) { + flags.push(['--max-model-len', String(cfg.isl + cfg.osl)]); + } + return formatChunks(flags); +}; + +const buildSglangCommand = (cfg: ReproduceConfig): string => { + const { model, precision } = baseChunks(cfg); + const tp = cfg.tp ?? 1; + const flags: string[][] = [ + ['python', '-m', 'sglang.launch_server'], + ['--model-path', model], + ['--dtype', precision], + ['--tp', String(tp)], + ]; + if (cfg.ep !== undefined && cfg.ep > 0) { + flags.push(['--ep-size', String(cfg.ep)]); + } + if (cfg.dp_attention) flags.push(['--enable-dp-attention']); + if (cfg.spec_decoding && cfg.spec_decoding !== 'none') { + flags.push(['--speculative-algorithm', cfg.spec_decoding.toUpperCase()]); + } + flags.push(['--max-running-requests', String(cfg.conc ?? 256)]); + if (cfg.isl !== undefined && cfg.osl !== undefined) { + flags.push(['--context-length', String(cfg.isl + cfg.osl)]); + } + return formatChunks(flags); +}; + +const buildTrtCommand = (cfg: ReproduceConfig): string => { + const { model, precision } = baseChunks(cfg); + const tp = cfg.tp ?? 1; + const flags: string[][] = [ + ['trtllm-serve', model], + ['--backend', 'pytorch'], + ['--tp_size', String(tp)], + ['--kv_cache_dtype', precision], + ]; + if (cfg.ep !== undefined && cfg.ep > 0) { + flags.push(['--ep_size', String(cfg.ep)]); + } + if (cfg.spec_decoding && cfg.spec_decoding !== 'none') { + flags.push([`--speculative_config={"decoding_type":"${cfg.spec_decoding.toUpperCase()}"}`]); + } + flags.push(['--max_batch_size', String(cfg.conc ?? 256)]); + if (cfg.isl !== undefined && cfg.osl !== undefined) { + flags.push(['--max_seq_len', String(cfg.isl + cfg.osl)]); + } + return formatChunks(flags); +}; + +const SIMPLE_BUILDERS: Record<'vllm' | 'sglang' | 'trt', (cfg: ReproduceConfig) => string> = { + vllm: buildVllmCommand, + sglang: buildSglangCommand, + trt: buildTrtCommand, +}; + +const buildDisaggCommands = ( + cfg: ReproduceConfig, + framework: 'vllm' | 'sglang' | 'trt', +): { label: string; command: string }[] => { + const prefill: ReproduceConfig = { + ...cfg, + tp: cfg.prefill_tp ?? cfg.tp, + ep: cfg.prefill_ep ?? cfg.ep, + dp_attention: cfg.prefill_dp_attention ?? cfg.dp_attention, + }; + const decode: ReproduceConfig = { + ...cfg, + tp: cfg.decode_tp ?? cfg.tp, + ep: cfg.decode_ep ?? cfg.ep, + dp_attention: cfg.decode_dp_attention ?? cfg.dp_attention, + }; + const builder = SIMPLE_BUILDERS[framework]; + // Disagg launch lines append a role flag so the user can paste both into + // separate terminals — this matches how SGLang & vLLM disagg expects + // prefill / decode workers to be tagged. + const roleFlag = framework === 'trt' ? '--disaggregate_role' : '--disagg-role'; + const prefillWorkers = cfg.prefill_num_workers ?? 1; + const decodeWorkers = cfg.decode_num_workers ?? 1; + return [ + { + label: `Prefill workers (×${prefillWorkers}, ${cfg.num_prefill_gpu ?? '?'} GPUs)`, + command: `${builder(prefill)} \\\n ${roleFlag} prefill`, + }, + { + label: `Decode workers (×${decodeWorkers}, ${cfg.num_decode_gpu ?? '?'} GPUs)`, + command: `${builder(decode)} \\\n ${roleFlag} decode`, + }, + ]; +}; + +/** + * Pure function from `(framework, config)` → CLI launch command string. + * + * Returns one of three shapes: + * - `kind: "single"` — a single command (most non-disagg runs). + * - `kind: "disagg"` — two stitched commands for prefill / decode workers. + * - `kind: "fallback"` — no launch command available; the drawer should + * point the user at the Config JSON tab. `fallbackReason` explains why. + * + * The function is intentionally side-effect-free so it can be unit-tested + * per framework and reused for future diffing between runs. + */ +export function buildLaunchCommand( + framework: string, + cfg: Omit, +): LaunchCommandResult { + const canonical = resolveFrameworkAlias(framework); + + if (COMPOUND_FRAMEWORKS.has(canonical)) { + return { + kind: 'fallback', + framework: canonical, + fallbackReason: + FALLBACK_REASONS[canonical] ?? + 'This framework orchestrates several worker processes — see the Config JSON tab.', + }; + } + + if (canonical !== 'vllm' && canonical !== 'sglang' && canonical !== 'trt') { + return { + kind: 'fallback', + framework: canonical, + fallbackReason: `No launch-command recipe is registered for "${canonical}" yet — see the Config JSON tab.`, + }; + } + + const fullCfg: ReproduceConfig = { ...cfg, framework: canonical }; + + if (cfg.disagg) { + return { + kind: 'disagg', + framework: canonical, + commands: buildDisaggCommands(fullCfg, canonical), + }; + } + + return { + kind: 'single', + framework: canonical, + command: SIMPLE_BUILDERS[canonical](fullCfg), + }; +}