SemiAnalysisAI · Oseltamivir · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
@@ -1610,14 +1610,12 @@ dsv4-fp4-mi355x-sglang:
       - { tp: 8, dp-attn: true, conc-start: 16, conc-end: 128 }
       - { tp: 8, dp-attn: false, conc-start: 1, conc-end: 16 }
 
-# vLLM with AITER MLA decode for DSv4 on MI355X (vllm-project/vllm#40889,
-# stacked on #40871). Uses the ATOM MI355X image (ROCm 7.2.2, aiter with
-# MLA decode, MI355X GPU detection); vLLM is rebuilt from the PR branch
-# at runtime by benchmarks/single_node/dsv4_fp8_mi355x_vllm.sh at a
-# pinned SHA. Once both PRs merge into a release, switch to a vLLM ROCm
-# MI355X image and remove the build step.
+# vLLM DSv4-Pro FP8 on MI355X following the recipes.vllm.ai
+# DeepSeek-V4-Pro page (vllm-project/vllm#40871 base + #41217 MLA
+# Indexer). Uses rocm/vllm-dev:deepseek-v4-latest which ships the
+# validated build with AITER + ROCm 7.2.1 — no runtime PR overlay.
 dsv4-fp8-mi355x-vllm:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  image: rocm/vllm-dev:deepseek-v4-latest@sha256:d29cd950e683bbbd2a211812ce76db1f46401ca2c43bb6b9627a71a113723d3f
   model: deepseek-ai/DeepSeek-V4-Pro
   model-prefix: dsv4
   runner: mi355x
@@ -1629,11 +1627,11 @@ dsv4-fp8-mi355x-vllm:
     - isl: 1024
       osl: 1024
       search-space:
-      - { tp: 8, conc-start: 1, conc-end: 1 }
+      - { tp: 8, conc-start: 1, conc-end: 128 }
     - isl: 8192
       osl: 1024
       search-space:
-      - { tp: 8, conc-start: 1, conc-end: 1 }
+      - { tp: 8, conc-start: 1, conc-end: 128 }
 
 # Day-0 single-sequence marker for DeepSeek-V4 on ATOM (ROCm/ATOM#650).
 # PR1 of the ATOM DSv4 series still uses torch sparse-attention fallbacks