diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index b68a955f3..46d26b329 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2004,7 +2004,7 @@ dsr1-fp8-b300-sglang: # DeepSeek-V4-Pro on B300 with sglang (non-MTP). # Uses nightly image with megamoe backend for high-concurrency profiles. dsv4-fp4-b300-sglang: - image: lmsysorg/sglang:nightly-dev-cu13-20260529-a8cfae0b + image: lmsysorg/sglang:nightly-dev-cu13-20260624-b2c8f7a2 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: b300 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 116c2a7f5..e829cf997 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -4179,6 +4179,12 @@ - "Uses the vllm/vllm-openai:minimax-m3-0618-x86_64-cu130 image and the TEP4/TEP8 8k1k topologies not covered by PR #1890." pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1891 +- config-keys: + - dsv4-fp4-b300-sglang + description: + - "Update B300 FP4 SGLang (non-MTP) image to latest nightly: lmsysorg/sglang:nightly-dev-cu13-20260624-b2c8f7a2 (was nightly-dev-cu13-20260529-a8cfae0b)." + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1913 + - config-keys: - minimaxm3-fp8-mi355x-atom-disagg description: