From 5b9c01ca82bc7fd7b728fd5719c450c83c231ad1 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 13 Apr 2026 19:12:02 -0700 Subject: [PATCH 1/2] Fix multiple calls of `ReduceOps::eval_mf` Previously, `ReduceOps::eval_mf` (the fused MultiFab path) always rewrote the device accumulation buffer from scratch. This would produce incorrect results if the user calls ReduceOps::eval_mf multiple times. --- Src/Base/AMReX_Reduce.H | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index 2e57eb288a..49b3ca227e 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -631,7 +631,9 @@ public: auto pdst = reduce_data.devicePtr(stream); int nblocks_ec = std::min(nblocks, reduce_data.maxBlocks()); AMREX_ASSERT(Long(nblocks_ec)*2 <= Long(std::numeric_limits::max())); - reduce_data.nBlocks(stream) = nblocks_ec; + int& nblocks_ref = reduce_data.nBlocks(stream); + int old_nblocks = nblocks_ref; + nblocks_ref = amrex::max(nblocks_ref, nblocks_ec); reduce_data.updateMaxStreamIndex(stream); #ifdef AMREX_USE_SYCL @@ -651,7 +653,7 @@ public: ReduceTuple r; Reduce::detail::for_each_init<0, ReduceTuple, Ps...>(r); ReduceTuple& dst = pdst[blockIdx.x]; - if (threadIdx.x == 0) { + if (threadIdx.x == 0 && blockIdx.x >= old_nblocks) { dst = r; } for (int iblock = blockIdx.x; iblock < nblocks; iblock += nblocks_ec) { From 2f9385bbe193d1cfc91f79e793d4a3dbc0ffdba3 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 13 Apr 2026 19:27:32 -0700 Subject: [PATCH 2/2] Fix warning --- Src/Base/AMReX_Reduce.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index 49b3ca227e..4fcf80a158 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -632,7 +632,7 @@ public: int nblocks_ec = std::min(nblocks, reduce_data.maxBlocks()); AMREX_ASSERT(Long(nblocks_ec)*2 <= Long(std::numeric_limits::max())); int& nblocks_ref = reduce_data.nBlocks(stream); - int old_nblocks = nblocks_ref; + auto old_nblocks = static_cast(nblocks_ref); nblocks_ref = amrex::max(nblocks_ref, nblocks_ec); reduce_data.updateMaxStreamIndex(stream);