diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 3caa5faae..4e9e06ebf 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -679,7 +679,7 @@ glm5-fp8-mi355x-atom:
       - { tp: 8, conc-start: 4, conc-end: 256 }
 
 glm5.1-fp4-mi355x-sglang:
-  image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529
+  image: lmsysorg/sglang-rocm:v0.5.13.post1-rocm720-mi35x-20260622
   model: amd/GLM-5.1-MXFP4
   model-prefix: glm5.1
   runner: mi355x
diff --git a/benchmarks/single_node/fixed_seq_len/glm5.1_fp4_mi355x.sh b/benchmarks/single_node/fixed_seq_len/glm5.1_fp4_mi355x.sh
index aada63d56..4e0d507c6 100644
--- a/benchmarks/single_node/fixed_seq_len/glm5.1_fp4_mi355x.sh
+++ b/benchmarks/single_node/fixed_seq_len/glm5.1_fp4_mi355x.sh
@@ -52,6 +52,7 @@ python3 -m sglang.launch_server \
     --nsa-decode-backend tilelang $EVAL_CONTEXT_ARGS  \
     --kv-cache-dtype fp8_e4m3 \
     --tokenizer-worker-num $((TP*2)) \
+    --enable-aiter-allreduce-fusion \
     --disable-radix-cache> $SERVER_LOG 2>&1 &
 
 SERVER_PID=$!
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index d6a5f35e4..e13e68c81 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -4041,6 +4041,13 @@
     - "No benchmark configuration change; reuse the exact 25-point fixed-sequence matrix and 2 eval jobs"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1869
 
+- config-keys:
+    - glm5.1-fp4-mi355x-sglang
+  description:
+    - "Bump SGLang ROCm image from v0.5.12.post1-rocm720-mi35x-20260529 to v0.5.13.post1-rocm720-mi35x-20260622"
+    - "Enable aiter allreduce fusion via --enable-aiter-allreduce-fusion in benchmarks/single_node/fixed_seq_len/glm5.1_fp4_mi355x.sh"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1905
+
 - config-keys:
     - minimaxm3-fp8-b300-dynamo-vllm
   description: