From 906a61365faf3407243825c2ce13534cb5ebe5bb Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Thu, 8 Jan 2026 12:15:22 +1100 Subject: [PATCH 01/12] Testing performance of fused EB Linear Solver. Updated Fapply --- .../MLMG/AMReX_MLEBABecLap_2D_K.H | 711 +++--- .../MLMG/AMReX_MLEBABecLap_3D_K.H | 2198 +++++++++-------- .../MLMG/AMReX_MLEBABecLap_F.cpp | 241 +- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H | 162 +- 4 files changed, 1774 insertions(+), 1538 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H index 1c4e6153927..aae4844fb84 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H @@ -7,33 +7,34 @@ namespace amrex { +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, - Array4 const& x, Array4 const& a, - Array4 const& bX, Array4 const& bY, +void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, + Array4 const& x, Array4 const& a, + Array4 const& bX, Array4 const& bY, Array4 const& flag, - Array4 const& vfrc, - Array4 const& apx, Array4 const& apy, - Array4 const& fcx, Array4 const& fcy, - Array4 const& ccent, Array4 const& ba, - Array4 const& bcent, Array4 const& beb, - Array4 const& phieb, + Array4 const& vfrc, + Array4 const& apx, Array4 const& apy, + Array4 const& fcx, Array4 const& fcy, + Array4 const& ccent, Array4 const& ba, + Array4 const& bcent, Array4 const& beb, + Array4 const& phieb, const int& domlo_x, const int& domlo_y, const int& domhi_x, const int& domhi_y, const bool& on_x_face, const bool& on_y_face, bool is_eb_dirichlet, bool is_eb_inhomog, - GpuArray const& dxinv, - Real alpha, Real beta, int ncomp) noexcept + GpuArray const& dxinv, + T alpha, T beta, int ncomp) noexcept { - Real dhx = beta*dxinv[0]*dxinv[0]; - Real dhy = beta*dxinv[1]*dxinv[1]; - Real dh = beta*dxinv[0]*dxinv[1]; + T dhx = beta*dxinv[0]*dxinv[0]; + T dhy = beta*dxinv[1]*dxinv[1]; + T dh = beta*dxinv[0]*dxinv[1]; amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept { if (flag(i,j,k).isCovered()) { - y(i,j,k,n) = Real(0.0); + y(i,j,k,n) = T(0.0); } else if (flag(i,j,k).isRegular() && ((flag(i-1,j ,k).isRegular() && flag(i+1,j ,k).isRegular() && @@ -49,11 +50,11 @@ void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, } else { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); // First get EB-aware slope that doesn't know about extdir bool needs_bdry_stencil = (i <= domlo_x) || (i >= domhi_x) || @@ -66,10 +67,10 @@ void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, // is actually needed for most cases but it will return the correct // value in all cases. - Real fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); - if ( (apxm != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i-1,j,k) != Real(1.0) || vfrc(i+1,j,k) != Real(1.0)) ) + T fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); + if ( (apxm != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0)) ) { - Real yloc_on_xface = fcx(i,j,k); + T yloc_on_xface = fcx(i,j,k); if(needs_bdry_stencil) { @@ -85,9 +86,9 @@ void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, fxm *= bX(i,j,k,n); } - Real fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); - if ( (apxp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i+1,j,k) != Real(1.0) || vfrc(i-1,j,k) != Real(1.0)) ) { - Real yloc_on_xface = fcx(i+1,j,k,0); + T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); + if ( (apxp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0)) ) { + T yloc_on_xface = fcx(i+1,j,k,0); if(needs_bdry_stencil) { fxp = grad_x_of_phi_on_centroids_extdir(i+1,j,k,n,x,phieb,flag,ccent,bcent,vfrc, yloc_on_xface,is_eb_dirichlet,is_eb_inhomog, @@ -102,9 +103,9 @@ void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, } - Real fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); - if ( (apym != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j-1,k) != Real(1.0) || vfrc(i,j+1,k) != Real(1.0)) ) { - Real xloc_on_yface = fcy(i,j,k,0); + T fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); + if ( (apym != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0)) ) { + T xloc_on_yface = fcy(i,j,k,0); if(needs_bdry_stencil) { @@ -120,9 +121,9 @@ void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, fym *= bY(i,j,k,n); } - Real fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); - if ( (apyp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j+1,k) != Real(1.0) || vfrc(i,j-1,k) != Real(1.0)) ) { - Real xloc_on_yface = fcy(i,j+1,k,0); + T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); + if ( (apyp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0)) ) { + T xloc_on_yface = fcy(i,j+1,k,0); if(needs_bdry_stencil) { fyp = grad_y_of_phi_on_centroids_extdir(i,j+1,k,n,x,phieb,flag,ccent,bcent,vfrc, xloc_on_yface,is_eb_dirichlet,is_eb_inhomog, @@ -136,15 +137,15 @@ void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, fyp *= bY(i,j+1,k,n); } - Real feb = Real(0.0); + T feb = T(0.0); if (is_eb_dirichlet && flag(i,j,k).isSingleValued()) { - Real dapx = (apxm-apxp)/dxinv[1]; - Real dapy = (apym-apyp)/dxinv[0]; - Real anorm = std::hypot(dapx,dapy); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; + T dapx = (apxm-apxp)/dxinv[1]; + T dapy = (apym-apyp)/dxinv[0]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; feb = grad_eb_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, @@ -156,29 +157,30 @@ void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, } - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (Real(1.0)/kappa) * + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * (dhx*(apxm*fxm-apxp*fxp) + dhy*(apym*fym-apyp*fyp) - dh*feb); } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx (Box const& box, Array4 const& y, - Array4 const& x, Array4 const& a, - Array4 const& bX, Array4 const& bY, +void mlebabeclap_adotx (Box const& box, Array4 const& y, + Array4 const& x, Array4 const& a, + Array4 const& bX, Array4 const& bY, Array4 const& ccm, Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& fcx, - Array4 const& fcy, Array4 const& ba, - Array4 const& bc, Array4 const& beb, - bool is_dirichlet, Array4 const& phieb, - bool is_inhomog, GpuArray const& dxinv, - Real alpha, Real beta, int ncomp, + Array4 const& vfrc, Array4 const& apx, + Array4 const& apy, Array4 const& fcx, + Array4 const& fcy, Array4 const& ba, + Array4 const& bc, Array4 const& beb, + bool is_dirichlet, Array4 const& phieb, + bool is_inhomog, GpuArray const& dxinv, + T alpha, T beta, int ncomp, bool beta_on_centroid, bool phi_on_centroid) noexcept { - Real dhx = beta*dxinv[0]*dxinv[0]; - Real dhy = beta*dxinv[1]*dxinv[1]; - Real dh = beta*dxinv[0]*dxinv[1]; + T dhx = beta*dxinv[0]*dxinv[0]; + T dhy = beta*dxinv[1]*dxinv[1]; + T dh = beta*dxinv[0]*dxinv[1]; bool beta_on_center = !(beta_on_centroid); @@ -188,7 +190,7 @@ void mlebabeclap_adotx (Box const& box, Array4 const& y, { if (flag(i,j,k).isCovered()) { - y(i,j,k,n) = Real(0.0); + y(i,j,k,n) = T(0.0); } else if (flag(i,j,k).isRegular()) { @@ -200,76 +202,76 @@ void mlebabeclap_adotx (Box const& box, Array4 const& y, } else { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); - - Real fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); - if (apxm != Real(0.0) && apxm != Real(1.0)) { - int jj = j + static_cast(std::copysign(Real(1.0),fcx(i,j,k))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k)) : Real(0.0); + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + + T fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); + if (apxm != T(0.0) && apxm != T(1.0)) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i,j,k))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k)) : T(0.0); if (beta_on_center && phi_on_center) { - fxm = (Real(1.0)-fracy)*fxm + fracy*bX(i,jj,k,n)*(x(i,jj,k,n)-x(i-1,jj,k,n)); + fxm = (T(1.0)-fracy)*fxm + fracy*bX(i,jj,k,n)*(x(i,jj,k,n)-x(i-1,jj,k,n)); } else if (beta_on_centroid && phi_on_center) { - fxm = bX(i,j,k,n) * ( (Real(1.0)-fracy)*(x(i, j,k,n)-x(i-1, j,k,n)) + fxm = bX(i,j,k,n) * ( (T(1.0)-fracy)*(x(i, j,k,n)-x(i-1, j,k,n)) + fracy *(x(i,jj,k,n)-x(i-1,jj,k,n)) ); } } - Real fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); - if (apxp != Real(0.0) && apxp != Real(1.0)) { - int jj = j + static_cast(std::copysign(Real(1.0),fcx(i+1,j,k))); - Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx(i+1,j,k)) : Real(0.0); + T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); + if (apxp != T(0.0) && apxp != T(1.0)) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k))); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx(i+1,j,k)) : T(0.0); if (beta_on_center && phi_on_center) { - fxp = (Real(1.0)-fracy)*fxp + fracy*bX(i+1,jj,k,n)*(x(i+1,jj,k,n)-x(i,jj,k,n)); + fxp = (T(1.0)-fracy)*fxp + fracy*bX(i+1,jj,k,n)*(x(i+1,jj,k,n)-x(i,jj,k,n)); } else if (beta_on_centroid && phi_on_center) { - fxp = bX(i+1,j,k,n) * ( (Real(1.0)-fracy)*(x(i+1, j,k,n)-x(i, j,k,n)) + fxp = bX(i+1,j,k,n) * ( (T(1.0)-fracy)*(x(i+1, j,k,n)-x(i, j,k,n)) + fracy *(x(i+1,jj,k,n)-x(i,jj,k,n)) ); } } - Real fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); - if (apym != Real(0.0) && apym != Real(1.0)) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j,k))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k)) : Real(0.0); + T fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); + if (apym != T(0.0) && apym != T(1.0)) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k)) : T(0.0); if (beta_on_center && phi_on_center) { - fym = (Real(1.0)-fracx)*fym + fracx*bY(ii,j,k,n)*(x(ii,j,k,n)-x(ii,j-1,k,n)); + fym = (T(1.0)-fracx)*fym + fracx*bY(ii,j,k,n)*(x(ii,j,k,n)-x(ii,j-1,k,n)); } else if (beta_on_centroid && phi_on_center) { - fym = bY(i,j,k,n) * ( (Real(1.0)-fracx)*(x( i,j,k,n)-x( i,j-1,k,n)) + fym = bY(i,j,k,n) * ( (T(1.0)-fracx)*(x( i,j,k,n)-x( i,j-1,k,n)) + fracx *(x(ii,j,k,n)-x(ii,j-1,k,n)) ); } } - Real fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); - if (apyp != Real(0.0) && apyp != Real(1.0)) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j+1,k))); - Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy(i,j+1,k)) : Real(0.0); + T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); + if (apyp != T(0.0) && apyp != T(1.0)) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k))); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy(i,j+1,k)) : T(0.0); if (beta_on_center && phi_on_center) { - fyp = (Real(1.0)-fracx)*fyp + fracx*bY(ii,j+1,k,n)*(x(ii,j+1,k,n)-x(ii,j,k,n)); + fyp = (T(1.0)-fracx)*fyp + fracx*bY(ii,j+1,k,n)*(x(ii,j+1,k,n)-x(ii,j,k,n)); } else if (beta_on_centroid && phi_on_center) { - fyp = bY(i,j+1,k,n) * ( (Real(1.0)-fracx)*(x( i,j+1,k,n)-x( i,j,k,n)) + fyp = bY(i,j+1,k,n) * ( (T(1.0)-fracx)*(x( i,j+1,k,n)-x( i,j,k,n)) + fracx *(x(ii,j+1,k,n)-x(ii,j,k,n)) ); } } - Real feb = Real(0.0); + T feb = T(0.0); if (is_dirichlet) { - Real dapx = (apxm-apxp)/dxinv[1]; - Real dapy = (apym-apyp)/dxinv[0]; - Real anorm = std::hypot(dapx,dapy); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; + T dapx = (apxm-apxp)/dxinv[1]; + T dapy = (apym-apyp)/dxinv[0]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; - Real phib = is_inhomog ? phieb(i,j,k,n) : Real(0.0); + T phib = is_inhomog ? phieb(i,j,k,n) : T(0.0); - Real bctx = bc(i,j,k,0); - Real bcty = bc(i,j,k,1); - Real dx_eb = get_dx_eb(kappa); + T bctx = bc(i,j,k,0); + T bcty = bc(i,j,k,1); + T dx_eb = get_dx_eb(kappa); - Real dg, gx, gy, sx, sy; + T dg, gx, gy, sx, sy; if (std::abs(anrmx) > std::abs(anrmy)) { dg = dx_eb / std::abs(anrmx); } else { @@ -277,73 +279,74 @@ void mlebabeclap_adotx (Box const& box, Array4 const& y, } gx = (bctx - dg*anrmx); gy = (bcty - dg*anrmy); - sx = std::copysign(Real(1.0),anrmx); - sy = std::copysign(Real(1.0),anrmy); + sx = std::copysign(T(1.0),anrmx); + sy = std::copysign(T(1.0),anrmy); int ii = i - static_cast(sx); int jj = j - static_cast(sy); - Real phig = (Real(1.0) + gx*sx + gy*sy + gx*gy*sx*sy) * x(i ,j ,k,n) + T phig = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy) * x(i ,j ,k,n) + ( - gx*sx - gx*gy*sx*sy) * x(ii,j ,k,n) + ( - gy*sy - gx*gy*sx*sy) * x(i ,jj,k,n) + ( + gx*gy*sx*sy) * x(ii,jj,k,n) ; - Real dphidn = (phib-phig) / dg; + T dphidn = (phib-phig) / dg; feb = dphidn * ba(i,j,k) * beb(i,j,k,n); } - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (Real(1.0)/kappa) * + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * (dhx*(apxm*fxm-apxp*fxp) + dhy*(apym*fym-apyp*fyp) - dh*feb); } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_ebflux (int i, int j, int k, int n, - Array4 const& feb, - Array4 const& x, + Array4 const& feb, + Array4 const& x, Array4 const& flag, - Array4 const& vfrc, - Array4 const& apx, - Array4 const& apy, - Array4 const& bc, - Array4 const& beb, - Array4 const& phieb, + Array4 const& vfrc, + Array4 const& apx, + Array4 const& apy, + Array4 const& bc, + Array4 const& beb, + Array4 const& phieb, bool is_inhomog, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv) noexcept { if (!flag(i,j,k).isSingleValued()) { - feb(i,j,k,n) = Real(0.0); + feb(i,j,k,n) = T(0.0); } else { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); - - Real dapx = (apxm-apxp)/dxinv[1]; - Real dapy = (apym-apyp)/dxinv[0]; - Real anorm = std::hypot(dapx,dapy); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - const Real bareascaling = std::sqrt( (anrmx/dxinv[0])*(anrmx/dxinv[0]) + + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + + T dapx = (apxm-apxp)/dxinv[1]; + T dapy = (apym-apyp)/dxinv[0]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + const T bareascaling = std::sqrt( (anrmx/dxinv[0])*(anrmx/dxinv[0]) + (anrmy/dxinv[1])*(anrmy/dxinv[1]) ); - Real phib = is_inhomog ? phieb(i,j,k,n) : Real(0.0); + T phib = is_inhomog ? phieb(i,j,k,n) : T(0.0); - Real bctx = bc(i,j,k,0); - Real bcty = bc(i,j,k,1); - Real dx_eb = get_dx_eb(kappa); + T bctx = bc(i,j,k,0); + T bcty = bc(i,j,k,1); + T dx_eb = get_dx_eb(kappa); - Real dg, gx, gy, sx, sy; + T dg, gx, gy, sx, sy; if (std::abs(anrmx) > std::abs(anrmy)) { dg = dx_eb / std::abs(anrmx); } else { @@ -351,34 +354,35 @@ void mlebabeclap_ebflux (int i, int j, int k, int n, } gx = bctx - dg*anrmx; gy = bcty - dg*anrmy; - sx = std::copysign(Real(1.0),anrmx); - sy = std::copysign(Real(1.0),anrmy); + sx = std::copysign(T(1.0),anrmx); + sy = std::copysign(T(1.0),anrmy); int ii = i - static_cast(sx); int jj = j - static_cast(sy); - Real phig = (Real(1.0) + gx*sx + gy*sy + gx*gy*sx*sy) * x(i ,j ,k,n) + T phig = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy) * x(i ,j ,k,n) + ( - gx*sx - gx*gy*sx*sy) * x(ii,j ,k,n) + ( - gy*sy - gx*gy*sx*sy) * x(i ,jj,k,n) + ( + gx*gy*sx*sy) * x(ii,jj,k,n) ; - Real dphidn = (phib-phig)/(dg * bareascaling); + T dphidn = (phib-phig)/(dg * bareascaling); feb(i,j,k,n) = -beb(i,j,k,n) * dphidn; } } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_gsrb (Box const& box, - Array4 const& phi, Array4 const& rhs, - Real alpha, Array4 const& a, - Real dhx, Real dhy, Real dh, - GpuArray const& dx, - Array4 const& bX, Array4 const& bY, + Array4 const& phi, Array4 const& rhs, + T alpha, Array4 const& a, + T dhx, T dhy, T dh, + GpuArray const& dx, + Array4 const& bX, Array4 const& bY, Array4 const& m0, Array4 const& m2, Array4 const& m1, Array4 const& m3, - Array4 const& f0, Array4 const& f2, - Array4 const& f1, Array4 const& f3, - Array4 const& ccm, Array4 const& beb, + Array4 const& f0, Array4 const& f2, + Array4 const& f1, Array4 const& f3, + Array4 const& ccm, Array4 const& beb, EBData const& ebdata, bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, Box const& vbox, int redblack, int ncomp) noexcept @@ -393,158 +397,158 @@ void mlebabeclap_gsrb (Box const& box, auto const flag = ebdata.get(i,j,k); if (flag.isCovered()) { - phi(i,j,k,n) = Real(0.0); + phi(i,j,k,n) = T(0.0); } else { - Real cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) - ? f0(vlo.x,j,k,n) : Real(0.0); - Real cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) - ? f1(i,vlo.y,k,n) : Real(0.0); - Real cf2 = (i == vhi.x && m2(vhi.x+1,j,k) > 0) - ? f2(vhi.x,j,k,n) : Real(0.0); - Real cf3 = (j == vhi.y && m3(i,vhi.y+1,k) > 0) - ? f3(i,vhi.y,k,n) : Real(0.0); + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (i == vhi.x && m2(vhi.x+1,j,k) > 0) + ? f2(vhi.x,j,k,n) : T(0.0); + T cf3 = (j == vhi.y && m3(i,vhi.y+1,k) > 0) + ? f3(i,vhi.y,k,n) : T(0.0); if (flag.isRegular()) { - Real gamma = alpha*a(i,j,k) + T gamma = alpha*a(i,j,k) + dhx * (bX(i+1,j,k,n) + bX(i,j,k,n)) + dhy * (bY(i,j+1,k,n) + bY(i,j,k,n)); - Real rho = dhx * (bX(i+1,j,k,n)*phi(i+1,j,k,n) + T rho = dhx * (bX(i+1,j,k,n)*phi(i+1,j,k,n) + bX(i ,j,k,n)*phi(i-1,j,k,n)) + dhy * (bY(i,j+1,k,n)*phi(i,j+1,k,n) + bY(i,j ,k,n)*phi(i,j-1,k,n)); - Real delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf2) + T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf2) + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf3); - Real res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); phi(i,j,k,n) += res/(gamma-delta); } else { - Real kappa = ebdata.get(i,j,k); - Real apxm = ebdata.get(i ,j ,k); - Real apxp = ebdata.get(i+1,j ,k); - Real apym = ebdata.get(i ,j ,k); - Real apyp = ebdata.get(i ,j+1,k); - - Real fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); - Real oxm = -bX(i,j,k,n)*cf0; - Real sxm = bX(i,j,k,n); - if (apxm != Real(0.0) && apxm != Real(1.0)) { - Real fcx = ebdata.get(i,j,k); - int jj = j + static_cast(std::copysign(Real(1.0),fcx)); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx) : Real(0.0); + T kappa = ebdata.get(i,j,k); + T apxm = ebdata.get(i ,j ,k); + T apxp = ebdata.get(i+1,j ,k); + T apym = ebdata.get(i ,j ,k); + T apyp = ebdata.get(i ,j+1,k); + + T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); + T oxm = -bX(i,j,k,n)*cf0; + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0)) { + T fcx = ebdata.get(i,j,k); + int jj = j + static_cast(std::copysign(T(1.0),fcx)); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fxm = (Real(1.0)-fracy)*fxm + + fxm = (T(1.0)-fracy)*fxm + fracy *bX(i,jj,k,n)*(phi(i,jj,k,n)-phi(i-1,jj,k,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fxm = (Real(1.0)-fracy)*( -phi(i-1,j,k,n)) + + fxm = (T(1.0)-fracy)*( -phi(i-1,j,k,n)) + fracy *(phi(i,jj,k,n)-phi(i-1,jj,k,n)); fxm *= bX(i,j,k,n); } - oxm = Real(0.0); - sxm = (Real(1.0)-fracy)*sxm; + oxm = T(0.0); + sxm = (T(1.0)-fracy)*sxm; } - Real fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); - Real oxp = bX(i+1,j,k,n)*cf2; - Real sxp = -bX(i+1,j,k,n); - if (apxp != Real(0.0) && apxp != Real(1.0)) { - Real fcx = ebdata.get(i+1,j,k); - int jj = j + static_cast(std::copysign(Real(1.0),fcx)); - Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx) : Real(0.0); + T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); + T oxp = bX(i+1,j,k,n)*cf2; + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0)) { + T fcx = ebdata.get(i+1,j,k); + int jj = j + static_cast(std::copysign(T(1.0),fcx)); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fxp = (Real(1.0)-fracy)*fxp + + fxp = (T(1.0)-fracy)*fxp + fracy *bX(i+1,jj,k,n)*(phi(i+1,jj,k,n)-phi(i,jj,k,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fxp = (Real(1.0)-fracy)*(phi(i+1,j,k,n) ) + + fxp = (T(1.0)-fracy)*(phi(i+1,j,k,n) ) + fracy *(phi(i+1,jj,k,n)-phi(i,jj,k,n)); fxp *= bX(i+1,j,k,n); } - oxp = Real(0.0); - sxp = (Real(1.0)-fracy)*sxp; + oxp = T(0.0); + sxp = (T(1.0)-fracy)*sxp; } - Real fym = -bY(i,j,k,n)*phi(i,j-1,k,n); - Real oym = -bY(i,j,k,n)*cf1; - Real sym = bY(i,j,k,n); - if (apym != Real(0.0) && apym != Real(1.0)) { - Real fcy = ebdata.get(i,j,k); - int ii = i + static_cast(std::copysign(Real(1.0),fcy)); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy) : Real(0.0); + T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); + T oym = -bY(i,j,k,n)*cf1; + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0)) { + T fcy = ebdata.get(i,j,k); + int ii = i + static_cast(std::copysign(T(1.0),fcy)); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fym = (Real(1.0)-fracx)*fym + + fym = (T(1.0)-fracx)*fym + fracx *bY(ii,j,k,n)*(phi(ii,j,k,n)-phi(ii,j-1,k,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fym = (Real(1.0)-fracx)*( -phi( i,j-1,k,n)) + + fym = (T(1.0)-fracx)*( -phi( i,j-1,k,n)) + fracx *(phi(ii,j,k,n)-phi(ii,j-1,k,n)); fym *= bY(i,j,k,n); } - oym = Real(0.0); - sym = (Real(1.0)-fracx)*sym; + oym = T(0.0); + sym = (T(1.0)-fracx)*sym; } - Real fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); - Real oyp = bY(i,j+1,k,n)*cf3; - Real syp = -bY(i,j+1,k,n); - if (apyp != Real(0.0) && apyp != Real(1.0)) { - Real fcy = ebdata.get(i,j+1,k); - int ii = i + static_cast(std::copysign(Real(1.0),fcy)); - Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy) : Real(0.0); + T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); + T oyp = bY(i,j+1,k,n)*cf3; + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0)) { + T fcy = ebdata.get(i,j+1,k); + int ii = i + static_cast(std::copysign(T(1.0),fcy)); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fyp = (Real(1.0)-fracx)*fyp + + fyp = (T(1.0)-fracx)*fyp + fracx*bY(ii,j+1,k,n)*(phi(ii,j+1,k,n)-phi(ii,j,k,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fyp = (Real(1.0)-fracx)*(phi(i,j+1,k,n) )+ + fyp = (T(1.0)-fracx)*(phi(i,j+1,k,n) )+ fracx *(phi(ii,j+1,k,n)-phi(ii,j,k,n)); fyp *= bY(i,j+1,k,n); } - oyp = Real(0.0); - syp = (Real(1.0)-fracx)*syp; + oyp = T(0.0); + syp = (T(1.0)-fracx)*syp; } - Real vfrcinv = (Real(1.0)/kappa); - Real gamma = alpha*a(i,j,k) + vfrcinv * + T vfrcinv = (T(1.0)/kappa); + T gamma = alpha*a(i,j,k) + vfrcinv * (dhx*(apxm*sxm-apxp*sxp) + dhy*(apym*sym-apyp*syp)); - Real rho = -vfrcinv * + T rho = -vfrcinv * (dhx*(apxm*fxm-apxp*fxp) + dhy*(apym*fym-apyp*fyp)); - Real delta = -vfrcinv * + T delta = -vfrcinv * (dhx*(apxm*oxm-apxp*oxp) + dhy*(apym*oym-apyp*oyp)); if (is_dirichlet) { - Real dapx = (apxm-apxp)*dx[1]; - Real dapy = (apym-apyp)*dx[0]; - Real anorm = std::hypot(dapx,dapy); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - - Real bctx = ebdata.get(i,j,k,0); - Real bcty = ebdata.get(i,j,k,1); - Real dx_eb = get_dx_eb(kappa); - - Real dg, gx, gy, sx, sy; + T dapx = (apxm-apxp)*dx[1]; + T dapy = (apym-apyp)*dx[0]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + + T bctx = ebdata.get(i,j,k,0); + T bcty = ebdata.get(i,j,k,1); + T dx_eb = get_dx_eb(kappa); + + T dg, gx, gy, sx, sy; if (std::abs(anrmx) > std::abs(anrmy)) { dg = dx_eb / std::abs(anrmx); } else { @@ -552,30 +556,30 @@ void mlebabeclap_gsrb (Box const& box, } gx = bctx - dg*anrmx; gy = bcty - dg*anrmy; - sx = std::copysign(Real(1.0),anrmx); - sy = std::copysign(Real(1.0),anrmy); + sx = std::copysign(T(1.0),anrmx); + sy = std::copysign(T(1.0),anrmy); int ii = i - static_cast(sx); int jj = j - static_cast(sy); - Real phig_gamma = (Real(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); - Real phig = ( - gx*sx - gx*gy*sx*sy) * phi(ii,j ,k,n) + T phig_gamma = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); + T phig = ( - gx*sx - gx*gy*sx*sy) * phi(ii,j ,k,n) + ( - gy*sy - gx*gy*sx*sy) * phi(i ,jj,k,n) + ( + gx*gy*sx*sy) * phi(ii,jj,k,n); // In gsrb we are always in residual-correction form so phib = 0 - Real dphidn = ( -phig)/dg; + T dphidn = ( -phig)/dg; - Real ba = ebdata.get(i,j,k); + T ba = ebdata.get(i,j,k); - Real feb = dphidn * ba * beb(i,j,k,n); + T feb = dphidn * ba * beb(i,j,k,n); rho += -vfrcinv*(-dh)*feb; - Real feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); + T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); gamma += vfrcinv*(-dh)*feb_gamma; } - Real res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); phi(i,j,k,n) += res/(gamma-delta); } } @@ -583,11 +587,12 @@ void mlebabeclap_gsrb (Box const& box, }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4 const& apx, - Array4 const& fcx, Array4 const& sol, - Array4 const& bX, Array4 const& ccm, - Real dhx, int face_only, int ncomp, Box const& xbox, +void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4 const& apx, + Array4 const& fcx, Array4 const& sol, + Array4 const& bX, Array4 const& ccm, + T dhx, int face_only, int ncomp, Box const& xbox, bool beta_on_centroid, bool phi_on_centroid) noexcept { int lof = xbox.smallEnd(0); @@ -595,19 +600,19 @@ void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4(std::copysign(Real(1.0),fcx(i,j,k))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k)) : Real(0.0); + T fxm = bX(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n)); + int jj = j + static_cast(std::copysign(T(1.0),fcx(i,j,k))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k)) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fxm = (Real(1.0)-fracy)*fxm + fracy*bX(i,jj,k,n)*(sol(i,jj,k,n)-sol(i-1,jj,k,n)); + fxm = (T(1.0)-fracy)*fxm + fracy*bX(i,jj,k,n)*(sol(i,jj,k,n)-sol(i-1,jj,k,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fxm = bX(i,j,k,n) * ( (Real(1.0)-fracy)*(sol(i, j,k,n)-sol(i-1, j,k,n)) + + fxm = bX(i,j,k,n) * ( (T(1.0)-fracy)*(sol(i, j,k,n)-sol(i-1, j,k,n)) + fracy *(sol(i,jj,k,n)-sol(i-1,jj,k,n)) ); } fx(i,j,k,n) = -fxm*dhx; @@ -616,11 +621,12 @@ void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4 const& apy, - Array4 const& fcy, Array4 const& sol, - Array4 const& bY, Array4 const& ccm, - Real dhy, int face_only, int ncomp, Box const& ybox, +void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4 const& apy, + Array4 const& fcy, Array4 const& sol, + Array4 const& bY, Array4 const& ccm, + T dhy, int face_only, int ncomp, Box const& ybox, bool beta_on_centroid, bool phi_on_centroid) noexcept { int lof = ybox.smallEnd(1); @@ -628,19 +634,19 @@ void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4(std::copysign(Real(1.0),fcy(i,j,k))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k)) : Real(0.0); + T fym = bY(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n)); + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k)) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fym = (Real(1.0)-fracx)*fym + fracx*bY(ii,j,k,n)*(sol(ii,j,k,n)-sol(ii,j-1,k,n)); + fym = (T(1.0)-fracx)*fym + fracx*bY(ii,j,k,n)*(sol(ii,j,k,n)-sol(ii,j-1,k,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fym = bY(i,j,k,n) * ( (Real(1.0)-fracx)*(sol( i,j,k,n)-sol( i,j-1,k,n)) + + fym = bY(i,j,k,n) * ( (T(1.0)-fracx)*(sol( i,j,k,n)-sol( i,j-1,k,n)) + fracx *(sol(ii,j,k,n)-sol(ii,j-1,k,n)) ); } fy(i,j,k,n) = -fym*dhy; @@ -649,18 +655,19 @@ void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_x_0 (Box const& box, Array4 const& fx, Array4 const& apx, - Array4 const& sol, Array4 const& bX, - Real dhx, int face_only, int ncomp, Box const& xbox) noexcept +void mlebabeclap_flux_x_0 (Box const& box, Array4 const& fx, Array4 const& apx, + Array4 const& sol, Array4 const& bX, + T dhx, int face_only, int ncomp, Box const& xbox) noexcept { int lof = xbox.smallEnd(0); int hif = xbox.bigEnd(0); amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { if (!face_only || lof == i || hif == i) { - if (apx(i,j,k) == Real(0.0)) { - fx(i,j,k,n) = Real(0.0); + if (apx(i,j,k) == T(0.0)) { + fx(i,j,k,n) = T(0.0); } else { fx(i,j,k,n) = -dhx*bX(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n)); } @@ -668,18 +675,19 @@ void mlebabeclap_flux_x_0 (Box const& box, Array4 const& fx, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_y_0 (Box const& box, Array4 const& fy, Array4 const& apy, - Array4 const& sol, Array4 const& bY, - Real dhy, int face_only, int ncomp, Box const& ybox) noexcept +void mlebabeclap_flux_y_0 (Box const& box, Array4 const& fy, Array4 const& apy, + Array4 const& sol, Array4 const& bY, + T dhy, int face_only, int ncomp, Box const& ybox) noexcept { int lof = ybox.smallEnd(1); int hif = ybox.bigEnd(1); amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { if (!face_only || lof == j || hif == j) { - if (apy(i,j,k) == Real(0.0)) { - fy(i,j,k,n) = Real(0.0); + if (apy(i,j,k) == T(0.0)) { + fy(i,j,k,n) = T(0.0); } else { fy(i,j,k,n) = -dhy*bY(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n)); } @@ -687,94 +695,99 @@ void mlebabeclap_flux_y_0 (Box const& box, Array4 const& fy, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_x (Box const& box, Array4 const& gx, Array4 const& sol, - Array4 const& apx, Array4 const& fcx, +void mlebabeclap_grad_x (Box const& box, Array4 const& gx, Array4 const& sol, + Array4 const& apx, Array4 const& fcx, Array4 const& ccm, - Real dxi, int ncomp, bool phi_on_centroid) noexcept + T dxi, int ncomp, bool phi_on_centroid) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apx(i,j,k) == Real(0.0)) { - gx(i,j,k,n) = Real(0.0); - } else if (apx(i,j,k) == Real(1.0)) { + if (apx(i,j,k) == T(0.0)) { + gx(i,j,k,n) = T(0.0); + } else if (apx(i,j,k) == T(1.0)) { gx(i,j,k,n) = dxi*(sol(i,j,k,n)-sol(i-1,j,k,n)); } else { - Real gxm = (sol(i,j,k,n)-sol(i-1,j,k,n)); - int jj = j + static_cast(std::copysign(Real(1.0),fcx(i,j,k))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k)) : Real(0.0); + T gxm = (sol(i,j,k,n)-sol(i-1,j,k,n)); + int jj = j + static_cast(std::copysign(T(1.0),fcx(i,j,k))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k)) : T(0.0); if (!phi_on_centroid) { - gxm = (Real(1.0)-fracy)*gxm + fracy*(sol(i,jj,k,n)-sol(i-1,jj,k,n)); + gxm = (T(1.0)-fracy)*gxm + fracy*(sol(i,jj,k,n)-sol(i-1,jj,k,n)); } gx(i,j,k,n) = gxm*dxi; } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_y (Box const& box, Array4 const& gy, Array4 const& sol, - Array4 const& apy, Array4 const& fcy, +void mlebabeclap_grad_y (Box const& box, Array4 const& gy, Array4 const& sol, + Array4 const& apy, Array4 const& fcy, Array4 const& ccm, - Real dyi, int ncomp, bool phi_on_centroid) noexcept + T dyi, int ncomp, bool phi_on_centroid) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apy(i,j,k) == Real(0.0)) { - gy(i,j,k,n) = Real(0.0); - } else if (apy(i,j,k) == Real(1.0)) { + if (apy(i,j,k) == T(0.0)) { + gy(i,j,k,n) = T(0.0); + } else if (apy(i,j,k) == T(1.0)) { gy(i,j,k,n) = dyi*(sol(i,j,k,n)-sol(i,j-1,k,n)); } else { - Real gym = (sol(i,j,k,n)-sol(i,j-1,k,n)); - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j,k))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k)) : Real(0.0); + T gym = (sol(i,j,k,n)-sol(i,j-1,k,n)); + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k)) : T(0.0); if (!phi_on_centroid) { - gym = (Real(1.0)-fracx)*gym + fracx*(sol(ii,j,k,n)-sol(ii,j-1,k,n)); + gym = (T(1.0)-fracx)*gym + fracx*(sol(ii,j,k,n)-sol(ii,j-1,k,n)); } gy(i,j,k,n) = gym*dyi; } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_x_0 (Box const& box, Array4 const& gx, Array4 const& sol, - Array4 const& apx, Real dxi, int ncomp) noexcept +void mlebabeclap_grad_x_0 (Box const& box, Array4 const& gx, Array4 const& sol, + Array4 const& apx, T dxi, int ncomp) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apx(i,j,k) == Real(0.0)) { - gx(i,j,k,n) = Real(0.0); + if (apx(i,j,k) == T(0.0)) { + gx(i,j,k,n) = T(0.0); } else { gx(i,j,k,n) = dxi*(sol(i,j,k,n)-sol(i-1,j,k,n)); } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_y_0 (Box const& box, Array4 const& gy, Array4 const& sol, - Array4 const& apy, Real dyi, int ncomp) noexcept +void mlebabeclap_grad_y_0 (Box const& box, Array4 const& gy, Array4 const& sol, + Array4 const& apy, T dyi, int ncomp) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apy(i,j,k) == Real(0.0)) { - gy(i,j,k,n) = Real(0.0); + if (apy(i,j,k) == T(0.0)) { + gy(i,j,k,n) = T(0.0); } else { gy(i,j,k,n) = dyi*(sol(i,j,k,n)-sol(i,j-1,k,n)); } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_normalize (Box const& box, Array4 const& phi, - Real alpha, Array4 const& a, - Real dhx, Real dhy, Real dh, - const amrex::GpuArray& dx, - Array4 const& bX, Array4 const& bY, +void mlebabeclap_normalize (Box const& box, Array4 const& phi, + T alpha, Array4 const& a, + T dhx, T dhy, T dh, + const amrex::GpuArray& dx, + Array4 const& bX, Array4 const& bY, Array4 const& ccm, Array4 const& flag, - Array4 const& vfrc, - Array4 const& apx, Array4 const& apy, - Array4 const& fcx, Array4 const& fcy, - Array4 const& ba, Array4 const& bc, - Array4 const& beb, + Array4 const& vfrc, + Array4 const& apx, Array4 const& apy, + Array4 const& fcx, Array4 const& fcy, + Array4 const& ba, Array4 const& bc, + Array4 const& beb, bool is_dirichlet, bool beta_on_centroid, int ncomp) noexcept { amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept @@ -786,62 +799,62 @@ void mlebabeclap_normalize (Box const& box, Array4 const& phi, } else if (flag(i,j,k).isSingleValued()) { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); - - Real sxm = bX(i,j,k,n); - if (apxm != Real(0.0) && apxm != Real(1.0) && !beta_on_centroid) { - int jj = j + static_cast(std::copysign(Real(1.0),fcx(i,j,k))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) - ? std::abs(fcx(i,j,k)) : Real(0.0); - sxm = (Real(1.0)-fracy)*sxm; + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0) && !beta_on_centroid) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i,j,k))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) + ? std::abs(fcx(i,j,k)) : T(0.0); + sxm = (T(1.0)-fracy)*sxm; } - Real sxp = -bX(i+1,j,k,n); - if (apxp != Real(0.0) && apxp != Real(1.0) && !beta_on_centroid) { - int jj = j + static_cast(std::copysign(Real(1.0),fcx(i+1,j,k))); - Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) - ? std::abs(fcx(i+1,j,k)) : Real(0.0); - sxp = (Real(1.0)-fracy)*sxp; + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0) && !beta_on_centroid) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k))); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) + ? std::abs(fcx(i+1,j,k)) : T(0.0); + sxp = (T(1.0)-fracy)*sxp; } - Real sym = bY(i,j,k,n); - if (apym != Real(0.0) && apym != Real(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j,k))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) - ? std::abs(fcy(i,j,k)) : Real(0.0); - sym = (Real(1.0)-fracx)*sym; + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) + ? std::abs(fcy(i,j,k)) : T(0.0); + sym = (T(1.0)-fracx)*sym; } - Real syp = -bY(i,j+1,k,n); - if (apyp != Real(0.0) && apyp != Real(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j+1,k))); - Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) - ? std::abs(fcy(i,j+1,k)) : Real(0.0); - syp = (Real(1.0)-fracx)*syp; + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k))); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) + ? std::abs(fcy(i,j+1,k)) : T(0.0); + syp = (T(1.0)-fracx)*syp; } - Real vfrcinv = (Real(1.0)/kappa); - Real gamma = alpha*a(i,j,k) + vfrcinv * + T vfrcinv = (T(1.0)/kappa); + T gamma = alpha*a(i,j,k) + vfrcinv * (dhx*(apxm*sxm-apxp*sxp) + dhy*(apym*sym-apyp*syp)); if (is_dirichlet) { - Real dapx = (apxm-apxp)*dx[1]; - Real dapy = (apym-apyp)*dx[0]; - Real anorm = std::hypot(dapx,dapy); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - - Real bctx = bc(i,j,k,0); - Real bcty = bc(i,j,k,1); - Real dx_eb = get_dx_eb(vfrc(i,j,k)); - - Real dg, gx, gy, sx, sy; + T dapx = (apxm-apxp)*dx[1]; + T dapy = (apym-apyp)*dx[0]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + + T bctx = bc(i,j,k,0); + T bcty = bc(i,j,k,1); + T dx_eb = get_dx_eb(vfrc(i,j,k)); + + T dg, gx, gy, sx, sy; if (std::abs(anrmx) > std::abs(anrmy)) { dg = dx_eb / std::abs(anrmx); } else { @@ -849,11 +862,11 @@ void mlebabeclap_normalize (Box const& box, Array4 const& phi, } gx = bctx - dg*anrmx; gy = bcty - dg*anrmy; - sx = std::copysign(Real(1.0),anrmx); - sy = std::copysign(Real(1.0),anrmy); + sx = std::copysign(T(1.0),anrmx); + sy = std::copysign(T(1.0),anrmy); - Real phig_gamma = (Real(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); - Real feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); + T phig_gamma = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); + T feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); gamma += vfrcinv*(-dh)*feb_gamma; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H index 4c914b59655..94e371ae408 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H @@ -7,516 +7,586 @@ namespace amrex { +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, - Array4 const& x, Array4 const& a, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, +void mlebabeclap_adotx_centroid (int i, int j, int k, int n, Array4 const& y, + Array4 const& x, Array4 const& a, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, - Array4 const& ccent, Array4 const& ba, - Array4 const& bcent, Array4 const& beb, - Array4 const& phieb, + Array4 const& vfrc, Array4 const& apx, + Array4 const& apy, Array4 const& apz, + Array4 const& fcx, Array4 const& fcy, + Array4 const& fcz, + Array4 const& ccent, Array4 const& ba, + Array4 const& bcent, Array4 const& beb, + Array4 const& phieb, const int& domlo_x, const int& domlo_y, const int& domlo_z, const int& domhi_x, const int& domhi_y, const int& domhi_z, const bool& on_x_face, const bool& on_y_face, const bool& on_z_face, bool is_eb_dirichlet, bool is_eb_inhomog, - GpuArray const& dxinv, - Real alpha, Real beta, int ncomp) noexcept + T alpha, T dhx, T dhy, T dhz) noexcept { - Real dhx = beta*dxinv[0]*dxinv[0]; - Real dhy = beta*dxinv[1]*dxinv[1]; - Real dhz = beta*dxinv[2]*dxinv[2]; - - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + if (flag(i,j,k).isCovered()) { - if (flag(i,j,k).isCovered()) - { - y(i,j,k,n) = Real(0.0); - } - else if (flag(i,j,k).isRegular() && - ((flag(i-1,j ,k ).isRegular() && flag(i+1,j ,k ).isRegular() && + y(i,j,k,n) = T(0.0); + } + else if (flag(i,j,k).isRegular() && + ((flag(i-1,j ,k ).isRegular() && flag(i+1,j ,k ).isRegular() && flag(i ,j-1,k ).isRegular() && flag(i ,j+1,k ).isRegular() && flag(i ,j ,k-1).isRegular() && flag(i ,j ,k+1).isRegular()) )) - { - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) - - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) - -bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) - - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) - -bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))) - - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k ,n)) - -bZ(i,j,k ,n)*(x(i,j,k ,n) - x(i,j,k-1,n))); - } - else - { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); - Real apzm = apz(i,j,k); - Real apzp = apz(i,j,k+1); - - // First get EB-aware slope that doesn't know about extdir - bool needs_bdry_stencil = (i <= domlo_x) || (i >= domhi_x) || - (j <= domlo_y) || (j >= domhi_y) || - (k <= domlo_z) || (k >= domhi_z); - - Real fxm = bX(i,j,k,n)*(x(i,j,k,n) - x(i-1,j,k,n)); - if ( (apxm != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i-1,j,k) != Real(1.0) || vfrc(i+1,j,k) != Real(1.0)) ) { - Real yloc_on_xface = fcx(i,j,k,0); - Real zloc_on_xface = fcx(i,j,k,1); - - if(needs_bdry_stencil) { - fxm = grad_x_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - yloc_on_xface,zloc_on_xface, - is_eb_dirichlet,is_eb_inhomog, - on_x_face,domlo_x,domhi_x, - on_y_face,domlo_y,domhi_y, - on_z_face,domlo_z,domhi_z); - } else { - fxm = grad_x_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, - yloc_on_xface,zloc_on_xface,is_eb_dirichlet,is_eb_inhomog); - } - - fxm *= bX(i,j,k,n); + { + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) + -bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) + - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) + -bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))) + - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k ,n)) + -bZ(i,j,k ,n)*(x(i,j,k ,n) - x(i,j,k-1,n))); + } + else + { + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + T apzm = apz(i,j,k); + T apzp = apz(i,j,k+1); + + // First get EB-aware slope that doesn't know about extdir + bool needs_bdry_stencil = (i <= domlo_x) || (i >= domhi_x) || + (j <= domlo_y) || (j >= domhi_y) || + (k <= domlo_z) || (k >= domhi_z); + + T fxm = bX(i,j,k,n)*(x(i,j,k,n) - x(i-1,j,k,n)); + if ( (apxm != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0)) ) { + T yloc_on_xface = fcx(i,j,k,0); + T zloc_on_xface = fcx(i,j,k,1); + + if(needs_bdry_stencil) { + fxm = grad_x_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + yloc_on_xface,zloc_on_xface, + is_eb_dirichlet,is_eb_inhomog, + on_x_face,domlo_x,domhi_x, + on_y_face,domlo_y,domhi_y, + on_z_face,domlo_z,domhi_z); + } else { + fxm = grad_x_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, + yloc_on_xface,zloc_on_xface,is_eb_dirichlet,is_eb_inhomog); } - Real fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i,j,k,n)); - if ( (apxp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i+1,j,k) != Real(1.0) || vfrc(i-1,j,k) != Real(1.0)) ) { - Real yloc_on_xface = fcx(i+1,j,k,0); - Real zloc_on_xface = fcx(i+1,j,k,1); - - if(needs_bdry_stencil) { - fxp = grad_x_of_phi_on_centroids_extdir(i+1,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - yloc_on_xface,zloc_on_xface, - is_eb_dirichlet,is_eb_inhomog, - on_x_face,domlo_x,domhi_x, - on_y_face,domlo_y,domhi_y, - on_z_face,domlo_z,domhi_z); - } else { - fxp = grad_x_of_phi_on_centroids(i+1,j,k,n,x,phieb,flag,ccent,bcent, - yloc_on_xface,zloc_on_xface,is_eb_dirichlet,is_eb_inhomog); - } + fxm *= bX(i,j,k,n); + } - fxp *= bX(i+1,j,k,n); + T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i,j,k,n)); + if ( (apxp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0)) ) { + T yloc_on_xface = fcx(i+1,j,k,0); + T zloc_on_xface = fcx(i+1,j,k,1); + + if(needs_bdry_stencil) { + fxp = grad_x_of_phi_on_centroids_extdir(i+1,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + yloc_on_xface,zloc_on_xface, + is_eb_dirichlet,is_eb_inhomog, + on_x_face,domlo_x,domhi_x, + on_y_face,domlo_y,domhi_y, + on_z_face,domlo_z,domhi_z); + } else { + fxp = grad_x_of_phi_on_centroids(i+1,j,k,n,x,phieb,flag,ccent,bcent, + yloc_on_xface,zloc_on_xface,is_eb_dirichlet,is_eb_inhomog); } - Real fym = bY(i,j,k,n)*(x(i,j,k,n) - x(i,j-1,k,n)); - if ( (apym != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j-1,k) != Real(1.0) || vfrc(i,j+1,k) != Real(1.0)) ) { - Real xloc_on_yface = fcy(i,j,k,0); - Real zloc_on_yface = fcy(i,j,k,1); - - if(needs_bdry_stencil) { - fym = grad_y_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - xloc_on_yface,zloc_on_yface, - is_eb_dirichlet,is_eb_inhomog, - on_x_face,domlo_x,domhi_x, - on_y_face,domlo_y,domhi_y, - on_z_face,domlo_z,domhi_z); - } else { - fym = grad_y_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, - xloc_on_yface,zloc_on_yface,is_eb_dirichlet,is_eb_inhomog); - } + fxp *= bX(i+1,j,k,n); + } - fym *= bY(i,j,k,n); + T fym = bY(i,j,k,n)*(x(i,j,k,n) - x(i,j-1,k,n)); + if ( (apym != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0)) ) { + T xloc_on_yface = fcy(i,j,k,0); + T zloc_on_yface = fcy(i,j,k,1); + + if(needs_bdry_stencil) { + fym = grad_y_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + xloc_on_yface,zloc_on_yface, + is_eb_dirichlet,is_eb_inhomog, + on_x_face,domlo_x,domhi_x, + on_y_face,domlo_y,domhi_y, + on_z_face,domlo_z,domhi_z); + } else { + fym = grad_y_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, + xloc_on_yface,zloc_on_yface,is_eb_dirichlet,is_eb_inhomog); } - Real fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j,k,n)); - if ( (apyp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j+1,k) != Real(1.0) || vfrc(i,j-1,k) != Real(1.0)) ) { - Real xloc_on_yface = fcy(i,j+1,k,0); - Real zloc_on_yface = fcy(i,j+1,k,1); - - if(needs_bdry_stencil) { - fyp = grad_y_of_phi_on_centroids_extdir(i,j+1,k,n,x,phieb,flag,ccent,bcent,vfrc, - xloc_on_yface,zloc_on_yface, - is_eb_dirichlet,is_eb_inhomog, - on_x_face,domlo_x,domhi_x, - on_y_face,domlo_y,domhi_y, - on_z_face,domlo_z,domhi_z); - } else { - fyp = grad_y_of_phi_on_centroids(i,j+1,k,n,x,phieb,flag,ccent,bcent, - xloc_on_yface,zloc_on_yface,is_eb_dirichlet,is_eb_inhomog); - } + fym *= bY(i,j,k,n); + } - fyp *= bY(i,j+1,k,n); + T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j,k,n)); + if ( (apyp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0)) ) { + T xloc_on_yface = fcy(i,j+1,k,0); + T zloc_on_yface = fcy(i,j+1,k,1); + + if(needs_bdry_stencil) { + fyp = grad_y_of_phi_on_centroids_extdir(i,j+1,k,n,x,phieb,flag,ccent,bcent,vfrc, + xloc_on_yface,zloc_on_yface, + is_eb_dirichlet,is_eb_inhomog, + on_x_face,domlo_x,domhi_x, + on_y_face,domlo_y,domhi_y, + on_z_face,domlo_z,domhi_z); + } else { + fyp = grad_y_of_phi_on_centroids(i,j+1,k,n,x,phieb,flag,ccent,bcent, + xloc_on_yface,zloc_on_yface,is_eb_dirichlet,is_eb_inhomog); } - Real fzm = bZ(i,j,k,n)*(x(i,j,k,n) - x(i,j,k-1,n)); - if ( (apzm != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j,k-1) != Real(1.0) || vfrc(i,j,k+1) != Real(1.0)) ) { - Real xloc_on_zface = fcz(i,j,k,0); - Real yloc_on_zface = fcz(i,j,k,1); - - if(needs_bdry_stencil) { - fzm = grad_z_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - xloc_on_zface,yloc_on_zface, - is_eb_dirichlet,is_eb_inhomog, - on_x_face,domlo_x,domhi_x, - on_y_face,domlo_y,domhi_y, - on_z_face,domlo_z,domhi_z); - } else { - fzm = grad_z_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, - xloc_on_zface,yloc_on_zface,is_eb_dirichlet,is_eb_inhomog); - } + fyp *= bY(i,j+1,k,n); + } - fzm *= bZ(i,j,k,n); + T fzm = bZ(i,j,k,n)*(x(i,j,k,n) - x(i,j,k-1,n)); + if ( (apzm != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j,k-1) != T(1.0) || vfrc(i,j,k+1) != T(1.0)) ) { + T xloc_on_zface = fcz(i,j,k,0); + T yloc_on_zface = fcz(i,j,k,1); + + if(needs_bdry_stencil) { + fzm = grad_z_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + xloc_on_zface,yloc_on_zface, + is_eb_dirichlet,is_eb_inhomog, + on_x_face,domlo_x,domhi_x, + on_y_face,domlo_y,domhi_y, + on_z_face,domlo_z,domhi_z); + } else { + fzm = grad_z_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, + xloc_on_zface,yloc_on_zface,is_eb_dirichlet,is_eb_inhomog); } - Real fzp = bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k,n)); - if ( (apzp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j,k+1) != Real(1.0) || vfrc(i,j,k-1) != Real(1.0)) ) { - Real xloc_on_zface = fcz(i,j,k+1,0); - Real yloc_on_zface = fcz(i,j,k+1,1); - - if(needs_bdry_stencil) { - fzp = grad_z_of_phi_on_centroids_extdir(i,j,k+1,n,x,phieb,flag,ccent,bcent,vfrc, - xloc_on_zface,yloc_on_zface, - is_eb_dirichlet,is_eb_inhomog, - on_x_face,domlo_x,domhi_x, - on_y_face,domlo_y,domhi_y, - on_z_face,domlo_z,domhi_z); - } else { - fzp = grad_z_of_phi_on_centroids(i,j,k+1,n,x,phieb,flag,ccent,bcent, - xloc_on_zface,yloc_on_zface,is_eb_dirichlet,is_eb_inhomog); - } + fzm *= bZ(i,j,k,n); + } - fzp *= bZ(i,j,k+1,n); + T fzp = bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k,n)); + if ( (apzp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j,k+1) != T(1.0) || vfrc(i,j,k-1) != T(1.0)) ) { + T xloc_on_zface = fcz(i,j,k+1,0); + T yloc_on_zface = fcz(i,j,k+1,1); + + if(needs_bdry_stencil) { + fzp = grad_z_of_phi_on_centroids_extdir(i,j,k+1,n,x,phieb,flag,ccent,bcent,vfrc, + xloc_on_zface,yloc_on_zface, + is_eb_dirichlet,is_eb_inhomog, + on_x_face,domlo_x,domhi_x, + on_y_face,domlo_y,domhi_y, + on_z_face,domlo_z,domhi_z); + } else { + fzp = grad_z_of_phi_on_centroids(i,j,k+1,n,x,phieb,flag,ccent,bcent, + xloc_on_zface,yloc_on_zface,is_eb_dirichlet,is_eb_inhomog); } - Real feb = Real(0.0); - if (is_eb_dirichlet && flag(i,j,k).isSingleValued()) { - Real dapx = apxm-apxp; - Real dapy = apym-apyp; - Real dapz = apzm-apzp; - Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - Real anrmz = dapz * anorminv; - - feb = grad_eb_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - anrmx,anrmy,anrmz,is_eb_inhomog, - on_x_face,domlo_x,domhi_x, - on_y_face,domlo_y,domhi_y, - on_z_face,domlo_z,domhi_z); - feb *= ba(i,j,k) * beb(i,j,k,n); - } + fzp *= bZ(i,j,k+1,n); + } - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (Real(1.0)/kappa) * - (dhx*(apxm*fxm - apxp*fxp) + - dhy*(apym*fym - apyp*fyp) + - dhz*(apzm*fzm - apzp*fzp) - dhx*feb); + T feb = T(0.0); + if (is_eb_dirichlet && flag(i,j,k).isSingleValued()) { + T dapx = apxm-apxp; + T dapy = apym-apyp; + T dapz = apzm-apzp; + T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + T anrmz = dapz * anorminv; + + feb = grad_eb_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + anrmx,anrmy,anrmz,is_eb_inhomog, + on_x_face,domlo_x,domhi_x, + on_y_face,domlo_y,domhi_y, + on_z_face,domlo_z,domhi_z); + feb *= ba(i,j,k) * beb(i,j,k,n); } + + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * + (dhx*(apxm*fxm - apxp*fxp) + + dhy*(apym*fym - apyp*fyp) + + dhz*(apzm*fzm - apzp*fzp) - dhx*feb); + } +} + + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, + Array4 const& x, Array4 const& a, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, + Array4 const& flag, + Array4 const& vfrc, Array4 const& apx, + Array4 const& apy, Array4 const& apz, + Array4 const& fcx, Array4 const& fcy, + Array4 const& fcz, + Array4 const& ccent, Array4 const& ba, + Array4 const& bcent, Array4 const& beb, + Array4 const& phieb, + const int& domlo_x, const int& domlo_y, const int& domlo_z, + const int& domhi_x, const int& domhi_y, const int& domhi_z, + const bool& on_x_face, const bool& on_y_face, const bool& on_z_face, + bool is_eb_dirichlet, bool is_eb_inhomog, + GpuArray const& dxinv, + T alpha, T beta, int ncomp) noexcept +{ + T dhx = beta*dxinv[0]*dxinv[0]; + T dhy = beta*dxinv[1]*dxinv[1]; + T dhz = beta*dxinv[2]*dxinv[2]; + + amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + { + mlebabeclap_adotx_centroid(i, j, k, n, + y, x, a, + bX, bY, bZ, + flag, vfrc, + apx, apy, apz, + fcx, fcy, fcz, + ccent, ba, bcent, beb, + phieb, + domlo_x, domlo_y, domlo_z, + domhi_x, domhi_y, domhi_z, + on_x_face, on_y_face, on_z_face, + is_eb_dirichlet, is_eb_inhomog, + alpha, dhx, dhy, dhz); }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx (Box const& box, Array4 const& y, - Array4 const& x, Array4 const& a, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, Array4 const& ccm, +void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, + Array4 const& x, Array4 const& a, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, Array4 const& ccm, Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, Array4 const& ba, - Array4 const& bc, Array4 const& beb, - bool is_dirichlet, Array4 const& phieb, - bool is_inhomog, GpuArray const& dxinv, - Real alpha, Real beta, int ncomp, + Array4 const& vfrc, Array4 const& apx, + Array4 const& apy, Array4 const& apz, + Array4 const& fcx, Array4 const& fcy, + Array4 const& fcz, Array4 const& ba, + Array4 const& bc, Array4 const& beb, + bool is_dirichlet, Array4 const& phieb, + bool is_inhomog, T alpha, T dhx, T dhy, T dhz, bool beta_on_centroid, bool phi_on_centroid) noexcept { - Real dhx = beta*dxinv[0]*dxinv[0]; - Real dhy = beta*dxinv[1]*dxinv[1]; - Real dhz = beta*dxinv[2]*dxinv[2]; - bool beta_on_center = !(beta_on_centroid); bool phi_on_center = !( phi_on_centroid); - - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + if (flag(i,j,k).isCovered()) { - if (flag(i,j,k).isCovered()) - { - y(i,j,k,n) = Real(0.0); - } - else if (flag(i,j,k).isRegular()) - { - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) - - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) - -bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) - - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) - -bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))) - - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k ,n)) - -bZ(i,j,k ,n)*(x(i,j,k ,n) - x(i,j,k-1,n))); - } - else - { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); - Real apzm = apz(i,j,k); - Real apzp = apz(i,j,k+1); - - Real fxm = bX(i,j,k,n)*(x(i,j,k,n) - x(i-1,j,k,n)); - if (apxm != Real(0.0) && apxm != Real(1.0)) { - int jj = j + static_cast(std::copysign(Real(1.0), fcx(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0), fcx(i,j,k,1))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : Real(0.0); - if (beta_on_center && phi_on_center) - { - fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxm + - fracy*(Real(1.0)-fracz)*bX(i,jj,k ,n)*(x(i,jj,k ,n)-x(i-1,jj,k ,n)) + - fracz*(Real(1.0)-fracy)*bX(i,j ,kk,n)*(x(i,j ,kk,n)-x(i-1,j ,kk,n)) + - fracy* fracz *bX(i,jj,kk,n)*(x(i,jj,kk,n)-x(i-1,jj,kk,n)); - } - else if (beta_on_centroid && phi_on_center) - { - fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(x(i, j, k,n)-x(i-1, j, k,n)) + - fracy *(Real(1.0)-fracz)*(x(i,jj, k,n)-x(i-1,jj, k,n)) + - fracz *(Real(1.0)-fracy)*(x(i, j,kk,n)-x(i-1, j,kk,n)) + - fracy * fracz *(x(i,jj,kk,n)-x(i-1,jj,kk,n)); - fxm *= bX(i,j,k,n); - } + y(i,j,k,n) = T(0.0); + } + else if (flag(i,j,k).isRegular()) + { + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) + -bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) + - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) + -bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))) + - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k ,n)) + -bZ(i,j,k ,n)*(x(i,j,k ,n) - x(i,j,k-1,n))); + } + else + { + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + T apzm = apz(i,j,k); + T apzp = apz(i,j,k+1); + + T fxm = bX(i,j,k,n)*(x(i,j,k,n) - x(i-1,j,k,n)); + if (apxm != T(0.0) && apxm != T(1.0)) { + int jj = j + static_cast(std::copysign(T(1.0), fcx(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0), fcx(i,j,k,1))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : T(0.0); + if (beta_on_center && phi_on_center) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm + + fracy*(T(1.0)-fracz)*bX(i,jj,k ,n)*(x(i,jj,k ,n)-x(i-1,jj,k ,n)) + + fracz*(T(1.0)-fracy)*bX(i,j ,kk,n)*(x(i,j ,kk,n)-x(i-1,j ,kk,n)) + + fracy* fracz *bX(i,jj,kk,n)*(x(i,jj,kk,n)-x(i-1,jj,kk,n)); } + else if (beta_on_centroid && phi_on_center) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*(x(i, j, k,n)-x(i-1, j, k,n)) + + fracy *(T(1.0)-fracz)*(x(i,jj, k,n)-x(i-1,jj, k,n)) + + fracz *(T(1.0)-fracy)*(x(i, j,kk,n)-x(i-1, j,kk,n)) + + fracy * fracz *(x(i,jj,kk,n)-x(i-1,jj,kk,n)); + fxm *= bX(i,j,k,n); + } + } - Real fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i,j,k,n)); - if (apxp != Real(0.0) && apxp != Real(1.0)) { - int jj = j + static_cast(std::copysign(Real(1.0),fcx(i+1,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcx(i+1,j,k,1))); - Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx(i+1,j,k,0)) : Real(0.0); - Real fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) ? std::abs(fcx(i+1,j,k,1)) : Real(0.0); - if (beta_on_center && phi_on_center) - { - fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxp + - fracy*(Real(1.0)-fracz)*bX(i+1,jj,k ,n)*(x(i+1,jj,k ,n)-x(i,jj,k ,n)) + - fracz*(Real(1.0)-fracy)*bX(i+1,j ,kk,n)*(x(i+1,j ,kk,n)-x(i,j ,kk,n)) + - fracy* fracz *bX(i+1,jj,kk,n)*(x(i+1,jj,kk,n)-x(i,jj,kk,n)); - } - else if (beta_on_centroid && phi_on_center) - { - fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(x(i+1, j, k,n)-x(i, j, k,n)) + - fracy *(Real(1.0)-fracz)*(x(i+1,jj, k,n)-x(i,jj, k,n)) + - fracz *(Real(1.0)-fracy)*(x(i+1, j,kk,n)-x(i, j,kk,n)) + - fracy * fracz *(x(i+1,jj,kk,n)-x(i,jj,kk,n)); - fxp *= bX(i+1,j,k,n); - - } + T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i,j,k,n)); + if (apxp != T(0.0) && apxp != T(1.0)) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcx(i+1,j,k,1))); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx(i+1,j,k,0)) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) ? std::abs(fcx(i+1,j,k,1)) : T(0.0); + if (beta_on_center && phi_on_center) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp + + fracy*(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(x(i+1,jj,k ,n)-x(i,jj,k ,n)) + + fracz*(T(1.0)-fracy)*bX(i+1,j ,kk,n)*(x(i+1,j ,kk,n)-x(i,j ,kk,n)) + + fracy* fracz *bX(i+1,jj,kk,n)*(x(i+1,jj,kk,n)-x(i,jj,kk,n)); } + else if (beta_on_centroid && phi_on_center) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(x(i+1, j, k,n)-x(i, j, k,n)) + + fracy *(T(1.0)-fracz)*(x(i+1,jj, k,n)-x(i,jj, k,n)) + + fracz *(T(1.0)-fracy)*(x(i+1, j,kk,n)-x(i, j,kk,n)) + + fracy * fracz *(x(i+1,jj,kk,n)-x(i,jj,kk,n)); + fxp *= bX(i+1,j,k,n); - Real fym = bY(i,j,k,n)*(x(i,j,k,n) - x(i,j-1,k,n)); - if (apym != Real(0.0) && apym != Real(1.0)) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcy(i,j,k,1))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : Real(0.0); - if (beta_on_center && phi_on_center) - { - fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fym + - fracx*(Real(1.0)-fracz)*bY(ii,j,k ,n)*(x(ii,j,k ,n)-x(ii,j-1,k ,n)) + - fracz*(Real(1.0)-fracx)*bY(i ,j,kk,n)*(x(i ,j,kk,n)-x(i ,j-1,kk,n)) + - fracx* fracz *bY(ii,j,kk,n)*(x(ii,j,kk,n)-x(ii,j-1,kk,n)); - } - else if (beta_on_centroid && phi_on_center) - { - fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(x( i,j, k,n)-x( i,j-1, k,n)) + - fracx *(Real(1.0)-fracz)*(x(ii,j, k,n)-x(ii,j-1, k,n)) + - fracz *(Real(1.0)-fracx)*(x(i ,j,kk,n)-x( i,j-1,kk,n)) + - fracx * fracz *(x(ii,j,kk,n)-x(ii,j-1,kk,n)); - fym *= bY(i,j,k,n); + } + } - } + T fym = bY(i,j,k,n)*(x(i,j,k,n) - x(i,j-1,k,n)); + if (apym != T(0.0) && apym != T(1.0)) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j,k,1))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : T(0.0); + if (beta_on_center && phi_on_center) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym + + fracx*(T(1.0)-fracz)*bY(ii,j,k ,n)*(x(ii,j,k ,n)-x(ii,j-1,k ,n)) + + fracz*(T(1.0)-fracx)*bY(i ,j,kk,n)*(x(i ,j,kk,n)-x(i ,j-1,kk,n)) + + fracx* fracz *bY(ii,j,kk,n)*(x(ii,j,kk,n)-x(ii,j-1,kk,n)); } + else if (beta_on_centroid && phi_on_center) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*(x( i,j, k,n)-x( i,j-1, k,n)) + + fracx *(T(1.0)-fracz)*(x(ii,j, k,n)-x(ii,j-1, k,n)) + + fracz *(T(1.0)-fracx)*(x(i ,j,kk,n)-x( i,j-1,kk,n)) + + fracx * fracz *(x(ii,j,kk,n)-x(ii,j-1,kk,n)); + fym *= bY(i,j,k,n); - Real fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j,k,n)); - if (apyp != Real(0.0) && apyp != Real(1.0)) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j+1,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcy(i,j+1,k,1))); - Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy(i,j+1,k,0)) : Real(0.0); - Real fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) ? std::abs(fcy(i,j+1,k,1)) : Real(0.0); - if (beta_on_center && phi_on_center) - { - fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fyp + - fracx*(Real(1.0)-fracz)*bY(ii,j+1,k ,n)*(x(ii,j+1,k ,n)-x(ii,j,k ,n)) + - fracz*(Real(1.0)-fracx)*bY(i ,j+1,kk,n)*(x(i ,j+1,kk,n)-x(i ,j,kk,n)) + - fracx* fracz *bY(ii,j+1,kk,n)*(x(ii,j+1,kk,n)-x(ii,j,kk,n)); - } - else if (beta_on_centroid && phi_on_center) - { - fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(x( i,j+1, k,n)-x( i,j, k,n)) + - fracx *(Real(1.0)-fracz)*(x(ii,j+1, k,n)-x(ii,j, k,n)) + - fracz *(Real(1.0)-fracx)*(x( i,j+1,kk,n)-x( i,j,kk,n)) + - fracx * fracz *(x(ii,j+1,kk,n)-x(ii,j,kk,n)); - fyp *= bY(i,j+1,k,n); + } + } - } + T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j,k,n)); + if (apyp != T(0.0) && apyp != T(1.0)) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j+1,k,1))); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy(i,j+1,k,0)) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) ? std::abs(fcy(i,j+1,k,1)) : T(0.0); + if (beta_on_center && phi_on_center) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp + + fracx*(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(x(ii,j+1,k ,n)-x(ii,j,k ,n)) + + fracz*(T(1.0)-fracx)*bY(i ,j+1,kk,n)*(x(i ,j+1,kk,n)-x(i ,j,kk,n)) + + fracx* fracz *bY(ii,j+1,kk,n)*(x(ii,j+1,kk,n)-x(ii,j,kk,n)); } + else if (beta_on_centroid && phi_on_center) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(x( i,j+1, k,n)-x( i,j, k,n)) + + fracx *(T(1.0)-fracz)*(x(ii,j+1, k,n)-x(ii,j, k,n)) + + fracz *(T(1.0)-fracx)*(x( i,j+1,kk,n)-x( i,j,kk,n)) + + fracx * fracz *(x(ii,j+1,kk,n)-x(ii,j,kk,n)); + fyp *= bY(i,j+1,k,n); - Real fzm = bZ(i,j,k,n)*(x(i,j,k,n) - x(i,j,k-1,n)); - if (apzm != Real(0.0) && apzm != Real(1.0)) { - int ii = i + static_cast(std::copysign(Real(1.0),fcz(i,j,k,0))); - int jj = j + static_cast(std::copysign(Real(1.0),fcz(i,j,k,1))); - Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : Real(0.0); - Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : Real(0.0); - if (beta_on_center && phi_on_center) - { - fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzm + - fracx*(Real(1.0)-fracy)*bZ(ii,j ,k,n)*(x(ii,j ,k,n)-x(ii,j ,k-1,n)) + - fracy*(Real(1.0)-fracx)*bZ(i ,jj,k,n)*(x(i ,jj,k,n)-x(i ,jj,k-1,n)) + - fracx* fracy *bZ(ii,jj,k,n)*(x(ii,jj,k,n)-x(ii,jj,k-1,n)); - } - else if (beta_on_centroid && phi_on_center) - { - fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(x( i, j,k,n)-x( i, j,k-1,n)) + - fracx *(Real(1.0)-fracy)*(x(ii, j,k,n)-x(ii, j,k-1,n)) + - fracy *(Real(1.0)-fracx)*(x( i,jj,k,n)-x( i,jj,k-1,n)) + - fracx * fracy *(x(ii,jj,k,n)-x(ii,jj,k-1,n)); - fzm *= bZ(i,j,k,n); + } + } - } + T fzm = bZ(i,j,k,n)*(x(i,j,k,n) - x(i,j,k-1,n)); + if (apzm != T(0.0) && apzm != T(1.0)) { + int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k,0))); + int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k,1))); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : T(0.0); + if (beta_on_center && phi_on_center) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm + + fracx*(T(1.0)-fracy)*bZ(ii,j ,k,n)*(x(ii,j ,k,n)-x(ii,j ,k-1,n)) + + fracy*(T(1.0)-fracx)*bZ(i ,jj,k,n)*(x(i ,jj,k,n)-x(i ,jj,k-1,n)) + + fracx* fracy *bZ(ii,jj,k,n)*(x(ii,jj,k,n)-x(ii,jj,k-1,n)); } + else if (beta_on_centroid && phi_on_center) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*(x( i, j,k,n)-x( i, j,k-1,n)) + + fracx *(T(1.0)-fracy)*(x(ii, j,k,n)-x(ii, j,k-1,n)) + + fracy *(T(1.0)-fracx)*(x( i,jj,k,n)-x( i,jj,k-1,n)) + + fracx * fracy *(x(ii,jj,k,n)-x(ii,jj,k-1,n)); + fzm *= bZ(i,j,k,n); - Real fzp = bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k,n)); - if (apzp != Real(0.0) && apzp != Real(1.0)) { - int ii = i + static_cast(std::copysign(Real(1.0),fcz(i,j,k+1,0))); - int jj = j + static_cast(std::copysign(Real(1.0),fcz(i,j,k+1,1))); - Real fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) ? std::abs(fcz(i,j,k+1,0)) : Real(0.0); - Real fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) ? std::abs(fcz(i,j,k+1,1)) : Real(0.0); - if (beta_on_center && phi_on_center) - { - fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzp + - fracx*(Real(1.0)-fracy)*bZ(ii,j ,k+1,n)*(x(ii,j ,k+1,n)-x(ii,j ,k,n)) + - fracy*(Real(1.0)-fracx)*bZ(i ,jj,k+1,n)*(x(i ,jj,k+1,n)-x(i ,jj,k,n)) + - fracx* fracy *bZ(ii,jj,k+1,n)*(x(ii,jj,k+1,n)-x(ii,jj,k,n)); - } - else if (beta_on_centroid && phi_on_center) - { - fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(x( i, j,k+1,n)-x( i, j,k,n)) + - fracx *(Real(1.0)-fracy)*(x(ii, j,k+1,n)-x(ii, j,k,n)) + - fracy *(Real(1.0)-fracx)*(x( i,jj,k+1,n)-x( i,jj,k,n)) + - fracx * fracy *(x(ii,jj,k+1,n)-x(ii,jj,k,n)); - fzp *= bZ(i,j,k+1,n); + } + } - } + T fzp = bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k,n)); + if (apzp != T(0.0) && apzp != T(1.0)) { + int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k+1,0))); + int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k+1,1))); + T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) ? std::abs(fcz(i,j,k+1,0)) : T(0.0); + T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) ? std::abs(fcz(i,j,k+1,1)) : T(0.0); + if (beta_on_center && phi_on_center) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp + + fracx*(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(x(ii,j ,k+1,n)-x(ii,j ,k,n)) + + fracy*(T(1.0)-fracx)*bZ(i ,jj,k+1,n)*(x(i ,jj,k+1,n)-x(i ,jj,k,n)) + + fracx* fracy *bZ(ii,jj,k+1,n)*(x(ii,jj,k+1,n)-x(ii,jj,k,n)); } + else if (beta_on_centroid && phi_on_center) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(x( i, j,k+1,n)-x( i, j,k,n)) + + fracx *(T(1.0)-fracy)*(x(ii, j,k+1,n)-x(ii, j,k,n)) + + fracy *(T(1.0)-fracx)*(x( i,jj,k+1,n)-x( i,jj,k,n)) + + fracx * fracy *(x(ii,jj,k+1,n)-x(ii,jj,k,n)); + fzp *= bZ(i,j,k+1,n); - Real feb = Real(0.0); - if (is_dirichlet) { - Real dapx = apxm-apxp; - Real dapy = apym-apyp; - Real dapz = apzm-apzp; - Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - Real anrmz = dapz * anorminv; - - Real phib = is_inhomog ? phieb(i,j,k,n) : Real(0.0); - - Real bctx = bc(i,j,k,0); - Real bcty = bc(i,j,k,1); - Real bctz = bc(i,j,k,2); - Real dx_eb = get_dx_eb(kappa); - - Real dg = dx_eb / amrex::max(std::abs(anrmx), std::abs(anrmy), - std::abs(anrmz)); - Real gx = bctx - dg*anrmx; - Real gy = bcty - dg*anrmy; - Real gz = bctz - dg*anrmz; - Real sx = std::copysign(Real(1.0),anrmx); - Real sy = std::copysign(Real(1.0),anrmy); - Real sz = std::copysign(Real(1.0),anrmz); - int ii = i - static_cast(sx); - int jj = j - static_cast(sy); - int kk = k - static_cast(sz); - - gx = sx*gx; - gy = sy*gy; - gz = sz*gz; - Real gxy = gx*gy; - Real gxz = gx*gz; - Real gyz = gy*gz; - Real gxyz = gx*gy*gz; - Real phig = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz) * x(i ,j ,k ,n) - + (-gz - gxz - gyz - gxyz) * x(i ,j ,kk,n) - + (-gy - gxy - gyz - gxyz) * x(i ,jj,k ,n) - + (gyz + gxyz) * x(i ,jj,kk,n) - + (-gx - gxy - gxz - gxyz) * x(ii,j ,k ,n) - + (gxz + gxyz) * x(ii,j ,kk,n) - + (gxy + gxyz) * x(ii,jj,k ,n) - + (-gxyz) * x(ii,jj,kk,n); - - Real dphidn = (phib-phig)/dg; - - feb = dphidn * ba(i,j,k) * beb(i,j,k,n); } + } - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (Real(1.0)/kappa) * - (dhx*(apxm*fxm - apxp*fxp) + - dhy*(apym*fym - apyp*fyp) + - dhz*(apzm*fzm - apzp*fzp) - dhx*feb); + T feb = T(0.0); + if (is_dirichlet) { + T dapx = apxm-apxp; + T dapy = apym-apyp; + T dapz = apzm-apzp; + T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + T anrmz = dapz * anorminv; + + T phib = is_inhomog ? phieb(i,j,k,n) : T(0.0); + + T bctx = bc(i,j,k,0); + T bcty = bc(i,j,k,1); + T bctz = bc(i,j,k,2); + T dx_eb = get_dx_eb(kappa); + + T dg = dx_eb / amrex::max(std::abs(anrmx), std::abs(anrmy), + std::abs(anrmz)); + T gx = bctx - dg*anrmx; + T gy = bcty - dg*anrmy; + T gz = bctz - dg*anrmz; + T sx = std::copysign(T(1.0),anrmx); + T sy = std::copysign(T(1.0),anrmy); + T sz = std::copysign(T(1.0),anrmz); + int ii = i - static_cast(sx); + int jj = j - static_cast(sy); + int kk = k - static_cast(sz); + + gx = sx*gx; + gy = sy*gy; + gz = sz*gz; + T gxy = gx*gy; + T gxz = gx*gz; + T gyz = gy*gz; + T gxyz = gx*gy*gz; + T phig = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz) * x(i ,j ,k ,n) + + (-gz - gxz - gyz - gxyz) * x(i ,j ,kk,n) + + (-gy - gxy - gyz - gxyz) * x(i ,jj,k ,n) + + (gyz + gxyz) * x(i ,jj,kk,n) + + (-gx - gxy - gxz - gxyz) * x(ii,j ,k ,n) + + (gxz + gxyz) * x(ii,j ,kk,n) + + (gxy + gxyz) * x(ii,jj,k ,n) + + (-gxyz) * x(ii,jj,kk,n); + + T dphidn = (phib-phig)/dg; + + feb = dphidn * ba(i,j,k) * beb(i,j,k,n); } + + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * + (dhx*(apxm*fxm - apxp*fxp) + + dhy*(apym*fym - apyp*fyp) + + dhz*(apzm*fzm - apzp*fzp) - dhx*feb); + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebabeclap_adotx (Box const& box, Array4 const& y, + Array4 const& x, Array4 const& a, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, Array4 const& ccm, + Array4 const& flag, + Array4 const& vfrc, Array4 const& apx, + Array4 const& apy, Array4 const& apz, + Array4 const& fcx, Array4 const& fcy, + Array4 const& fcz, Array4 const& ba, + Array4 const& bc, Array4 const& beb, + bool is_dirichlet, Array4 const& phieb, + bool is_inhomog, GpuArray const& dxinv, + T alpha, T beta, int ncomp, + bool beta_on_centroid, bool phi_on_centroid) noexcept +{ + T dhx = beta*dxinv[0]*dxinv[0]; + T dhy = beta*dxinv[1]*dxinv[1]; + T dhz = beta*dxinv[2]*dxinv[2]; + + bool beta_on_center = !(beta_on_centroid); + bool phi_on_center = !( phi_on_centroid); + + amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + { + mlebabeclap_adotx(i, j, k, n, + y, x, a, + bX, bY, bZ, + ccm, flag, vfrc, + apx, apy, apz, + fcx, fcy, fcz, + ba, bc, beb, + is_dirichlet, phieb, + is_inhomog, alpha, + dhx, dhy, dhz, + beta_on_centroid, phi_on_centroid); }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_ebflux (int i, int j, int k, int n, - Array4 const& feb, - Array4 const& x, + Array4 const& feb, + Array4 const& x, Array4 const& flag, - Array4 const& vfrc, - Array4 const& apx, - Array4 const& apy, - Array4 const& apz, - Array4 const& bc, - Array4 const& beb, - Array4 const& phieb, + Array4 const& vfrc, + Array4 const& apx, + Array4 const& apy, + Array4 const& apz, + Array4 const& bc, + Array4 const& beb, + Array4 const& phieb, bool is_inhomog, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv) noexcept { - Real dhx = dxinv[0]; + T dhx = dxinv[0]; if (!flag(i,j,k).isSingleValued()) { - feb(i,j,k,n) = Real(0.0); + feb(i,j,k,n) = T(0.0); } else { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); - Real apzm = apz(i,j,k); - Real apzp = apz(i,j,k+1); - - Real dapx = apxm-apxp; - Real dapy = apym-apyp; - Real dapz = apzm-apzp; - Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - Real anrmz = dapz * anorminv; - - Real phib = is_inhomog ? phieb(i,j,k,n) : Real(0.0); - - Real bctx = bc(i,j,k,0); - Real bcty = bc(i,j,k,1); - Real bctz = bc(i,j,k,2); - Real dx_eb = get_dx_eb(kappa); - - Real dg = dx_eb / amrex::max(std::abs(anrmx), std::abs(anrmy), std::abs(anrmz)); - Real gx = bctx - dg*anrmx; - Real gy = bcty - dg*anrmy; - Real gz = bctz - dg*anrmz; - Real sx = std::copysign(Real(1.0),anrmx); - Real sy = std::copysign(Real(1.0),anrmy); - Real sz = std::copysign(Real(1.0),anrmz); + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + T apzm = apz(i,j,k); + T apzp = apz(i,j,k+1); + + T dapx = apxm-apxp; + T dapy = apym-apyp; + T dapz = apzm-apzp; + T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + T anrmz = dapz * anorminv; + + T phib = is_inhomog ? phieb(i,j,k,n) : T(0.0); + + T bctx = bc(i,j,k,0); + T bcty = bc(i,j,k,1); + T bctz = bc(i,j,k,2); + T dx_eb = get_dx_eb(kappa); + + T dg = dx_eb / amrex::max(std::abs(anrmx), std::abs(anrmy), std::abs(anrmz)); + T gx = bctx - dg*anrmx; + T gy = bcty - dg*anrmy; + T gz = bctz - dg*anrmz; + T sx = std::copysign(T(1.0),anrmx); + T sy = std::copysign(T(1.0),anrmy); + T sz = std::copysign(T(1.0),anrmz); int ii = i - static_cast(sx); int jj = j - static_cast(sy); int kk = k - static_cast(sz); @@ -524,11 +594,11 @@ void mlebabeclap_ebflux (int i, int j, int k, int n, gx = sx*gx; gy = sy*gy; gz = sz*gz; - Real gxy = gx*gy; - Real gxz = gx*gz; - Real gyz = gy*gz; - Real gxyz = gx*gy*gz; - Real phig = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz) * x(i ,j ,k ,n) + T gxy = gx*gy; + T gxz = gx*gz; + T gyz = gy*gz; + T gxyz = gx*gy*gz; + T phig = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz) * x(i ,j ,k ,n) + (-gz - gxz - gyz - gxyz) * x(i ,j ,kk,n) + (-gy - gxy - gyz - gxyz) * x(i ,jj,k ,n) + (gyz + gxyz) * x(i ,jj,kk,n) @@ -537,373 +607,410 @@ void mlebabeclap_ebflux (int i, int j, int k, int n, + (gxy + gxyz) * x(ii,jj,k ,n) + (-gxyz) * x(ii,jj,kk,n); - Real dphidn = dhx*(phib-phig)/dg; + T dphidn = dhx*(phib-phig)/dg; feb(i,j,k,n) = -beb(i,j,k,n) * dphidn; } } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_gsrb (Box const& box, - Array4 const& phi, Array4 const& rhs, - Real alpha, Array4 const& a, - Real dhx, Real dhy, Real dhz, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, +void mlebabeclap_gsrb (int i, int j, int k, int n, + Array4 const& phi, Array4 const& rhs, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, Array4 const& m0, Array4 const& m2, Array4 const& m4, Array4 const& m1, Array4 const& m3, Array4 const& m5, - Array4 const& f0, Array4 const& f2, - Array4 const& f4, - Array4 const& f1, Array4 const& f3, - Array4 const& f5, - Array4 const& ccm, Array4 const& beb, + Array4 const& f0, Array4 const& f2, + Array4 const& f4, + Array4 const& f1, Array4 const& f3, + Array4 const& f5, + Array4 const& ccm, Array4 const& beb, EBData const& ebdata, bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, - Box const& vbox, int redblack, int ncomp) noexcept + Box const& vbox, int redblack) noexcept { - constexpr Real omega = 1.15; - - const auto vlo = amrex::lbound(vbox); - const auto vhi = amrex::ubound(vbox); - -// amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept - // amrex::Loop here causes gcc 8 to crash. - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); - for (int n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) + constexpr T omega = 1.15; + if ((i+j+k+redblack) % 2 == 0) { - if ((i+j+k+redblack) % 2 == 0) + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + auto const flag = ebdata.get(i,j,k); + if (flag.isCovered()) { - auto const flag = ebdata.get(i,j,k); - if (flag.isCovered()) + phi(i,j,k,n) = T(0.0); + } + else + { + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z,n) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k,n) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k,n) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z,n) : T(0.0); + + if (flag.isRegular()) { - phi(i,j,k,n) = Real(0.0); + T gamma = alpha*a(i,j,k) + + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n)) + + dhy*(bY(i,j+1,k,n) + bY(i,j,k,n)) + + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n)); + + T rho = dhx*(bX(i+1,j ,k ,n)*phi(i+1,j ,k ,n) + + bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) + + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + + bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) + + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + + bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); + + T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) + + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += omega*res/(gamma-delta); } else { - Real cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) - ? f0(vlo.x,j,k,n) : Real(0.0); - Real cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) - ? f1(i,vlo.y,k,n) : Real(0.0); - Real cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) - ? f2(i,j,vlo.z,n) : Real(0.0); - Real cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) - ? f3(vhi.x,j,k,n) : Real(0.0); - Real cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) - ? f4(i,vhi.y,k,n) : Real(0.0); - Real cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) - ? f5(i,j,vhi.z,n) : Real(0.0); - - if (flag.isRegular()) - { - Real gamma = alpha*a(i,j,k) - + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n)) - + dhy*(bY(i,j+1,k,n) + bY(i,j,k,n)) - + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n)); - - Real rho = dhx*(bX(i+1,j ,k ,n)*phi(i+1,j ,k ,n) + - bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) - + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + - bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) - + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + - bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); - - Real delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) - + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) - + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); - - Real res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += omega*res/(gamma-delta); + T kappa = ebdata.get(i,j,k); + T apxm = ebdata.get(i ,j ,k ); + T apxp = ebdata.get(i+1,j ,k ); + T apym = ebdata.get(i ,j ,k ); + T apyp = ebdata.get(i ,j+1,k ); + T apzm = ebdata.get(i ,j ,k ); + T apzp = ebdata.get(i ,j ,k+1); + + T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); + T oxm = -bX(i,j,k,n)*cf0; + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0)) { + auto fcx0 = ebdata.get(i,j,k,0); + auto fcx1 = ebdata.get(i,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0), fcx0)); + int kk = k + static_cast(std::copysign(T(1.0), fcx1)); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) + ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) + ? std::abs(fcx1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm + + fracy *(T(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n)) + +(T(1.0)-fracy)* fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n)) + + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*( -phi(i-1, j, k,n)) + + fracy *(T(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n)) + +(T(1.0)-fracy)* fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n)) + + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + fxm *= bX(i,j,k,n); + + } + oxm = T(0.0); + sxm = (T(1.0)-fracy)*(T(1.0)-fracz)*sxm; } - else - { - Real kappa = ebdata.get(i,j,k); - Real apxm = ebdata.get(i ,j ,k ); - Real apxp = ebdata.get(i+1,j ,k ); - Real apym = ebdata.get(i ,j ,k ); - Real apyp = ebdata.get(i ,j+1,k ); - Real apzm = ebdata.get(i ,j ,k ); - Real apzp = ebdata.get(i ,j ,k+1); - - Real fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); - Real oxm = -bX(i,j,k,n)*cf0; - Real sxm = bX(i,j,k,n); - if (apxm != Real(0.0) && apxm != Real(1.0)) { - auto fcx0 = ebdata.get(i,j,k,0); - auto fcx1 = ebdata.get(i,j,k,1); - int jj = j + static_cast(std::copysign(Real(1.0), fcx0)); - int kk = k + static_cast(std::copysign(Real(1.0), fcx1)); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) - ? std::abs(fcx0) : Real(0.0); - Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) - ? std::abs(fcx1) : Real(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxm - + fracy *(Real(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n)) - +(Real(1.0)-fracy)* fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n)) - + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*( -phi(i-1, j, k,n)) - + fracy *(Real(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n)) - +(Real(1.0)-fracy)* fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n)) - + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); - fxm *= bX(i,j,k,n); - - } - oxm = Real(0.0); - sxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxm; + + T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); + T oxp = bX(i+1,j,k,n)*cf3; + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0)) { + auto fcx0 = ebdata.get(i+1,j,k,0); + auto fcx1 = ebdata.get(i+1,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0),fcx0)); + int kk = k + static_cast(std::copysign(T(1.0),fcx1)); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) + ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) + ? std::abs(fcx1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp + + fracy *(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n)) + +(T(1.0)-fracy)* fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n)) + + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(phi(i+1, j, k,n) ) + + fracy *(T(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) + + fracz *(T(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) + + fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + fxp *= bX(i+1,j,k,n); + + } + + oxp = T(0.0); + sxp = (T(1.0)-fracy)*(T(1.0)-fracz)*sxp; + } + + T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); + T oym = -bY(i,j,k,n)*cf1; + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0)) { + auto fcy0 = ebdata.get(i,j,k,0); + auto fcy1 = ebdata.get(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) + ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) + ? std::abs(fcy1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym + + fracx *(T(1.0)-fracz)*bY(ii,j,k ,n)*(phi(ii,j,k ,n)-phi(ii,j-1,k ,n)) + + (T(1.0)-fracx)* fracz *bY(i ,j,kk,n)*(phi(i ,j,kk,n)-phi(i ,j-1,kk,n)) + + fracx * fracz *bY(ii,j,kk,n)*(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); } + else if (beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*( -phi( i,j-1, k,n)) + + fracx *(T(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n)) + + (T(1.0)-fracx)* fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n)) + + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); + fym *= bY(i,j,k,n); - Real fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); - Real oxp = bX(i+1,j,k,n)*cf3; - Real sxp = -bX(i+1,j,k,n); - if (apxp != Real(0.0) && apxp != Real(1.0)) { - auto fcx0 = ebdata.get(i+1,j,k,0); - auto fcx1 = ebdata.get(i+1,j,k,1); - int jj = j + static_cast(std::copysign(Real(1.0),fcx0)); - int kk = k + static_cast(std::copysign(Real(1.0),fcx1)); - Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) - ? std::abs(fcx0) : Real(0.0); - Real fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) - ? std::abs(fcx1) : Real(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxp - + fracy *(Real(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n)) - +(Real(1.0)-fracy)* fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n)) - + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(phi(i+1, j, k,n) ) + - fracy *(Real(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) + - fracz *(Real(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) + - fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); - fxp *= bX(i+1,j,k,n); - - } - - oxp = Real(0.0); - sxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxp; } + oym = T(0.0); + sym = (T(1.0)-fracx)*(T(1.0)-fracz)*sym; + } - Real fym = -bY(i,j,k,n)*phi(i,j-1,k,n); - Real oym = -bY(i,j,k,n)*cf1; - Real sym = bY(i,j,k,n); - if (apym != Real(0.0) && apym != Real(1.0)) { - auto fcy0 = ebdata.get(i,j,k,0); - auto fcy1 = ebdata.get(i,j,k,1); - int ii = i + static_cast(std::copysign(Real(1.0),fcy0)); - int kk = k + static_cast(std::copysign(Real(1.0),fcy1)); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) - ? std::abs(fcy0) : Real(0.0); - Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) - ? std::abs(fcy1) : Real(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fym - + fracx *(Real(1.0)-fracz)*bY(ii,j,k ,n)*(phi(ii,j,k ,n)-phi(ii,j-1,k ,n)) - + (Real(1.0)-fracx)* fracz *bY(i ,j,kk,n)*(phi(i ,j,kk,n)-phi(i ,j-1,kk,n)) - + fracx * fracz *bY(ii,j,kk,n)*(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*( -phi( i,j-1, k,n)) - + fracx *(Real(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n)) - + (Real(1.0)-fracx)* fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n)) - + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); - fym *= bY(i,j,k,n); - - } - oym = Real(0.0); - sym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*sym; + T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); + T oyp = bY(i,j+1,k,n)*cf4; + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0)) { + auto fcy0 = ebdata.get(i,j+1,k,0); + auto fcy1 = ebdata.get(i,j+1,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) + ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) + ? std::abs(fcy1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp + + fracx *(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n)) + + (T(1.0)-fracx)* fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n)) + + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); } + else if (beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(phi( i,j+1, k,n) ) + + fracx *(T(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n)) + + (T(1.0)-fracx)* fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n)) + + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); + fyp *= bY(i,j+1,k,n); - Real fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); - Real oyp = bY(i,j+1,k,n)*cf4; - Real syp = -bY(i,j+1,k,n); - if (apyp != Real(0.0) && apyp != Real(1.0)) { - auto fcy0 = ebdata.get(i,j+1,k,0); - auto fcy1 = ebdata.get(i,j+1,k,1); - int ii = i + static_cast(std::copysign(Real(1.0),fcy0)); - int kk = k + static_cast(std::copysign(Real(1.0),fcy1)); - Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) - ? std::abs(fcy0) : Real(0.0); - Real fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) - ? std::abs(fcy1) : Real(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fyp - + fracx *(Real(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n)) - + (Real(1.0)-fracx)* fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n)) - + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(phi( i,j+1, k,n) ) - + fracx *(Real(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n)) - + (Real(1.0)-fracx)* fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n)) - + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); - fyp *= bY(i,j+1,k,n); - - } - oyp = Real(0.0); - syp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*syp; } + oyp = T(0.0); + syp = (T(1.0)-fracx)*(T(1.0)-fracz)*syp; + } - Real fzm = -bZ(i,j,k,n)*phi(i,j,k-1,n); - Real ozm = -bZ(i,j,k,n)*cf2; - Real szm = bZ(i,j,k,n); - if (apzm != Real(0.0) && apzm != Real(1.0)) { - auto fcz0 = ebdata.get(i,j,k,0); - auto fcz1 = ebdata.get(i,j,k,1); - int ii = i + static_cast(std::copysign(Real(1.0),fcz0)); - int jj = j + static_cast(std::copysign(Real(1.0),fcz1)); - Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) - ? std::abs(fcz0) : Real(0.0); - Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) - ? std::abs(fcz1) : Real(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzm - + fracx *(Real(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) - +(Real(1.0)-fracx)* fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n)) - + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*( -phi( i, j,k-1,n)) - + fracx *(Real(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) - + (Real(1.0)-fracx)* fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n)) - + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); - fzm *= bZ(i,j,k,n); - - } - ozm = Real(0.0); - szm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szm; + T fzm = -bZ(i,j,k,n)*phi(i,j,k-1,n); + T ozm = -bZ(i,j,k,n)*cf2; + T szm = bZ(i,j,k,n); + if (apzm != T(0.0) && apzm != T(1.0)) { + auto fcz0 = ebdata.get(i,j,k,0); + auto fcz1 = ebdata.get(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) + ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) + ? std::abs(fcz1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm + + fracx *(T(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) + +(T(1.0)-fracx)* fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n)) + + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); } + else if (beta_on_centroid && !phi_on_centroid) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*( -phi( i, j,k-1,n)) + + fracx *(T(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) + + (T(1.0)-fracx)* fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n)) + + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + fzm *= bZ(i,j,k,n); - Real fzp = bZ(i,j,k+1,n)*phi(i,j,k+1,n); - Real ozp = bZ(i,j,k+1,n)*cf5; - Real szp = -bZ(i,j,k+1,n); - if (apzp != Real(0.0) && apzp != Real(1.0)) { - auto fcz0 = ebdata.get(i,j,k+1,0); - auto fcz1 = ebdata.get(i,j,k+1,1); - int ii = i + static_cast(std::copysign(Real(1.0),fcz0)); - int jj = j + static_cast(std::copysign(Real(1.0),fcz1)); - Real fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) - ? std::abs(fcz0) : Real(0.0); - Real fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) - ? std::abs(fcz1) : Real(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzp - + fracx *(Real(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n)) - + (Real(1.0)-fracx)* fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n)) - + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(phi( i, j,k+1,n) ) - + fracx *(Real(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n)) - + (Real(1.0)-fracx)* fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n)) - + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); - fzp *= bZ(i,j,k+1,n); - - } - ozp = Real(0.0); - szp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szp; } + ozm = T(0.0); + szm = (T(1.0)-fracx)*(T(1.0)-fracy)*szm; + } - Real vfrcinv = Real(1.0)/kappa; - Real gamma = alpha*a(i,j,k) + vfrcinv * - (dhx*(apxm*sxm-apxp*sxp) + - dhy*(apym*sym-apyp*syp) + - dhz*(apzm*szm-apzp*szp)); - - Real rho = -vfrcinv * - (dhx*(apxm*fxm-apxp*fxp) + - dhy*(apym*fym-apyp*fyp) + - dhz*(apzm*fzm-apzp*fzp)); - - Real delta = -vfrcinv * - (dhx*(apxm*oxm-apxp*oxp) + - dhy*(apym*oym-apyp*oyp) + - dhz*(apzm*ozm-apzp*ozp)); - - if (is_dirichlet) { - Real dapx = apxm-apxp; - Real dapy = apym-apyp; - Real dapz = apzm-apzp; - Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - Real anrmz = dapz * anorminv; - Real bctx = ebdata.get(i,j,k,0); - Real bcty = ebdata.get(i,j,k,1); - Real bctz = ebdata.get(i,j,k,2); - Real dx_eb = get_dx_eb(kappa); - - Real dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), - std::abs(anrmz)); - - Real gx = bctx - dg*anrmx; - Real gy = bcty - dg*anrmy; - Real gz = bctz - dg*anrmz; - Real sx = std::copysign(Real(1.0),anrmx); - Real sy = std::copysign(Real(1.0),anrmy); - Real sz = std::copysign(Real(1.0),anrmz); - int ii = i - static_cast(sx); - int jj = j - static_cast(sy); - int kk = k - static_cast(sz); - - gx *= sx; - gy *= sy; - gz *= sz; - Real gxy = gx*gy; - Real gxz = gx*gz; - Real gyz = gy*gz; - Real gxyz = gx*gy*gz; - Real phig_gamma = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); - Real phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n) - + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n) - + (gyz + gxyz) * phi(i,jj,kk,n) - + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n) - + (gxz + gxyz) * phi(ii,j,kk,n) - + (gxy + gxyz) * phi(ii,jj,k,n) - + (-gxyz) * phi(ii,jj,kk,n); - - Real ba = ebdata.get(i,j,k); - - Real dphidn = ( -phig)/dg; - Real feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); - gamma += vfrcinv*(-dhx)*feb_gamma; - Real feb = dphidn * ba * beb(i,j,k,n); - rho += -vfrcinv*(-dhx)*feb; + T fzp = bZ(i,j,k+1,n)*phi(i,j,k+1,n); + T ozp = bZ(i,j,k+1,n)*cf5; + T szp = -bZ(i,j,k+1,n); + if (apzp != T(0.0) && apzp != T(1.0)) { + auto fcz0 = ebdata.get(i,j,k+1,0); + auto fcz1 = ebdata.get(i,j,k+1,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) + ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) + ? std::abs(fcz1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp + + fracx *(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n)) + + (T(1.0)-fracx)* fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n)) + + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); } + else if (beta_on_centroid && !phi_on_centroid) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(phi( i, j,k+1,n) ) + + fracx *(T(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n)) + + (T(1.0)-fracx)* fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n)) + + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + fzp *= bZ(i,j,k+1,n); - Real res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += omega*res/(gamma-delta); + } + ozp = T(0.0); + szp = (T(1.0)-fracx)*(T(1.0)-fracy)*szp; } + + T vfrcinv = T(1.0)/kappa; + T gamma = alpha*a(i,j,k) + vfrcinv * + (dhx*(apxm*sxm-apxp*sxp) + + dhy*(apym*sym-apyp*syp) + + dhz*(apzm*szm-apzp*szp)); + + T rho = -vfrcinv * + (dhx*(apxm*fxm-apxp*fxp) + + dhy*(apym*fym-apyp*fyp) + + dhz*(apzm*fzm-apzp*fzp)); + + T delta = -vfrcinv * + (dhx*(apxm*oxm-apxp*oxp) + + dhy*(apym*oym-apyp*oyp) + + dhz*(apzm*ozm-apzp*ozp)); + + if (is_dirichlet) { + T dapx = apxm-apxp; + T dapy = apym-apyp; + T dapz = apzm-apzp; + T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + T anrmz = dapz * anorminv; + T bctx = ebdata.get(i,j,k,0); + T bcty = ebdata.get(i,j,k,1); + T bctz = ebdata.get(i,j,k,2); + T dx_eb = get_dx_eb(kappa); + + T dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), + std::abs(anrmz)); + + T gx = bctx - dg*anrmx; + T gy = bcty - dg*anrmy; + T gz = bctz - dg*anrmz; + T sx = std::copysign(T(1.0),anrmx); + T sy = std::copysign(T(1.0),anrmy); + T sz = std::copysign(T(1.0),anrmz); + int ii = i - static_cast(sx); + int jj = j - static_cast(sy); + int kk = k - static_cast(sz); + + gx *= sx; + gy *= sy; + gz *= sz; + T gxy = gx*gy; + T gxz = gx*gz; + T gyz = gy*gz; + T gxyz = gx*gy*gz; + T phig_gamma = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); + T phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n) + + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n) + + (gyz + gxyz) * phi(i,jj,kk,n) + + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n) + + (gxz + gxyz) * phi(ii,j,kk,n) + + (gxy + gxyz) * phi(ii,jj,k,n) + + (-gxyz) * phi(ii,jj,kk,n); + + T ba = ebdata.get(i,j,k); + + T dphidn = ( -phig)/dg; + T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); + gamma += vfrcinv*(-dhx)*feb_gamma; + T feb = dphidn * ba * beb(i,j,k,n); + rho += -vfrcinv*(-dhx)*feb; + } + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += omega*res/(gamma-delta); } } + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebabeclap_gsrb (Box const& box, + Array4 const& phi, Array4 const& rhs, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, + Array4 const& m0, Array4 const& m2, + Array4 const& m4, + Array4 const& m1, Array4 const& m3, + Array4 const& m5, + Array4 const& f0, Array4 const& f2, + Array4 const& f4, + Array4 const& f1, Array4 const& f3, + Array4 const& f5, + Array4 const& ccm, Array4 const& beb, + EBData const& ebdata, + bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, + Box const& vbox, int redblack, int ncomp) noexcept +{ +// amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + // amrex::Loop here causes gcc 8 to crash. + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + for (int n = 0; n < ncomp; ++n) { + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) + { + mlebabeclap_gsrb(i, j, k, n, + phi, rhs, + alpha, a, + dhx, dhy, dhz, + bX, bY, bZ, + m0, m2, m4, + m1, m3, m5, + f0, f2, f4, + f1, f3, f5, + ccm, beb, + ebdata, + is_dirichlet, beta_on_centroid, phi_on_centroid, + vbox, redblack); + }}}} // }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4 const& apx, - Array4 const& fcx, Array4 const& sol, - Array4 const& bX, Array4 const& ccm, - Real dhx, int face_only, int ncomp, Box const& xbox, +void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4 const& apx, + Array4 const& fcx, Array4 const& sol, + Array4 const& bX, Array4 const& ccm, + T dhx, int face_only, int ncomp, Box const& xbox, bool beta_on_centroid, bool phi_on_centroid) noexcept { int lof = xbox.smallEnd(0); @@ -911,28 +1018,28 @@ void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4(std::copysign(Real(1.0), fcx(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0), fcx(i,j,k,1))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : Real(0.0); + T fxm = bX(i,j,k,n)*(sol(i,j,k,n) - sol(i-1,j,k,n)); + int jj = j + static_cast(std::copysign(T(1.0), fcx(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0), fcx(i,j,k,1))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxm + - fracy *(Real(1.0)-fracz)*bX(i,jj,k ,n)*(sol(i,jj,k ,n)-sol(i-1,jj,k ,n)) + - fracz *(Real(1.0)-fracy)*bX(i,j ,kk,n)*(sol(i,j ,kk,n)-sol(i-1,j ,kk,n)) + + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm + + fracy *(T(1.0)-fracz)*bX(i,jj,k ,n)*(sol(i,jj,k ,n)-sol(i-1,jj,k ,n)) + + fracz *(T(1.0)-fracy)*bX(i,j ,kk,n)*(sol(i,j ,kk,n)-sol(i-1,j ,kk,n)) + fracy* fracz *bX(i,jj,kk,n)*(sol(i,jj,kk,n)-sol(i-1,jj,kk,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(sol(i, j, k,n)-sol(i-1, j, k,n)) + - fracy *(Real(1.0)-fracz)*(sol(i,jj, k,n)-sol(i-1,jj, k,n)) + - fracz *(Real(1.0)-fracy)*(sol(i, j,kk,n)-sol(i-1, j,kk,n)) + + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*(sol(i, j, k,n)-sol(i-1, j, k,n)) + + fracy *(T(1.0)-fracz)*(sol(i,jj, k,n)-sol(i-1,jj, k,n)) + + fracz *(T(1.0)-fracy)*(sol(i, j,kk,n)-sol(i-1, j,kk,n)) + fracy* fracz *(sol(i,jj,kk,n)-sol(i-1,jj,kk,n)); fxm *= bX(i,j,k,n); @@ -943,11 +1050,12 @@ void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4 const& apy, - Array4 const& fcy, Array4 const& sol, - Array4 const& bY, Array4 const& ccm, - Real dhy, int face_only, int ncomp, Box const& ybox, +void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4 const& apy, + Array4 const& fcy, Array4 const& sol, + Array4 const& bY, Array4 const& ccm, + T dhy, int face_only, int ncomp, Box const& ybox, bool beta_on_centroid, bool phi_on_centroid) noexcept { int lof = ybox.smallEnd(1); @@ -955,28 +1063,28 @@ void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4(std::copysign(Real(1.0),fcy(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcy(i,j,k,1))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : Real(0.0); + T fym = bY(i,j,k,n)*(sol(i,j,k,n) - sol(i,j-1,k,n)); + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j,k,1))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fym + - fracx *(Real(1.0)-fracz)*bY(ii,j,k ,n)*(sol(ii,j,k ,n)-sol(ii,j-1,k ,n)) + - fracz *(Real(1.0)-fracx)*bY(i ,j,kk,n)*(sol(i ,j,kk,n)-sol(i ,j-1,kk,n)) + + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym + + fracx *(T(1.0)-fracz)*bY(ii,j,k ,n)*(sol(ii,j,k ,n)-sol(ii,j-1,k ,n)) + + fracz *(T(1.0)-fracx)*bY(i ,j,kk,n)*(sol(i ,j,kk,n)-sol(i ,j-1,kk,n)) + fracx * fracz *bY(ii,j,kk,n)*(sol(ii,j,kk,n)-sol(ii,j-1,kk,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(sol( i,j, k,n)-sol( i,j-1, k,n)) + - fracx *(Real(1.0)-fracz)*(sol(ii,j, k,n)-sol(ii,j-1, k,n)) + - fracz *(Real(1.0)-fracx)*(sol( i,j,kk,n)-sol( i,j-1,kk,n)) + + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*(sol( i,j, k,n)-sol( i,j-1, k,n)) + + fracx *(T(1.0)-fracz)*(sol(ii,j, k,n)-sol(ii,j-1, k,n)) + + fracz *(T(1.0)-fracx)*(sol( i,j,kk,n)-sol( i,j-1,kk,n)) + fracx * fracz *(sol(ii,j,kk,n)-sol(ii,j-1,kk,n)); fym *= bY(i,j,k,n); } @@ -987,11 +1095,12 @@ void mlebabeclap_flux_y (Box const& box, Array4 const& fy, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_z (Box const& box, Array4 const& fz, Array4 const& apz, - Array4 const& fcz, Array4 const& sol, - Array4 const& bZ, Array4 const& ccm, - Real dhz, int face_only, int ncomp, Box const& zbox, +void mlebabeclap_flux_z (Box const& box, Array4 const& fz, Array4 const& apz, + Array4 const& fcz, Array4 const& sol, + Array4 const& bZ, Array4 const& ccm, + T dhz, int face_only, int ncomp, Box const& zbox, bool beta_on_centroid, bool phi_on_centroid) noexcept { int lof = zbox.smallEnd(2); @@ -999,31 +1108,30 @@ void mlebabeclap_flux_z (Box const& box, Array4 const& fz, Array4(std::copysign(Real(1.0),fcz(i,j,k,0))); - int jj = j + static_cast(std::copysign(Real(1.0),fcz(i,j,k,1))); - Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : Real(0.0); - Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : Real(0.0); + T fzm = bZ(i,j,k,n)*(sol(i,j,k,n) - sol(i,j,k-1,n)); + int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k,0))); + int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k,1))); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : T(0.0); if (!beta_on_centroid && !phi_on_centroid) { - fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzm + - fracx*(Real(1.0)-fracy)*bZ(ii,j ,k,n)*(sol(ii,j ,k,n)-sol(ii,j ,k-1,n)) + - fracy*(Real(1.0)-fracx)*bZ(i ,jj,k,n)*(sol(i ,jj,k,n)-sol(i ,jj,k-1,n)) + - fracx* fracy *bZ(ii,jj,k,n)*(sol(ii,jj,k,n)-sol(ii,jj,k-1,n)); + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm + + fracx*(T(1.0)-fracy)*bZ(ii,j ,k,n)*(sol(ii,j ,k,n)-sol(ii,j ,k-1,n)) + + fracy*(T(1.0)-fracx)*bZ(i ,jj,k,n)*(sol(i ,jj,k,n)-sol(i ,jj,k-1,n)) + + fracx* fracy *bZ(ii,jj,k,n)*(sol(ii,jj,k,n)-sol(ii,jj,k-1,n)); } else if (beta_on_centroid && !phi_on_centroid) { - fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(sol( i, j,k,n)-sol( i, j,k-1,n)) + - fracx *(Real(1.0)-fracy)*(sol(ii, j,k,n)-sol(ii, j,k-1,n)) + - fracy *(Real(1.0)-fracx)*(sol( i,jj,k,n)-sol( i,jj,k-1,n)) + - fracx * fracy *(sol(ii,jj,k,n)-sol(ii,jj,k-1,n)); + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*(sol( i, j,k,n)-sol( i, j,k-1,n)) + + fracx *(T(1.0)-fracy)*(sol(ii, j,k,n)-sol(ii, j,k-1,n)) + + fracy *(T(1.0)-fracx)*(sol( i,jj,k,n)-sol( i,jj,k-1,n)) + + fracx * fracy *(sol(ii,jj,k,n)-sol(ii,jj,k-1,n)); fzm *= bZ(i,j,k,n); - } fz(i,j,k,n) = -dhz*fzm; @@ -1032,18 +1140,19 @@ void mlebabeclap_flux_z (Box const& box, Array4 const& fz, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_x_0 (Box const& box, Array4 const& fx, Array4 const& apx, - Array4 const& sol, Array4 const& bX, - Real dhx, int face_only, int ncomp, Box const& xbox) noexcept +void mlebabeclap_flux_x_0 (Box const& box, Array4 const& fx, Array4 const& apx, + Array4 const& sol, Array4 const& bX, + T dhx, int face_only, int ncomp, Box const& xbox) noexcept { int lof = xbox.smallEnd(0); int hif = xbox.bigEnd(0); amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { if (!face_only || lof == i || hif == i) { - if (apx(i,j,k) == Real(0.0)) { - fx(i,j,k,n) = Real(0.0); + if (apx(i,j,k) == T(0.0)) { + fx(i,j,k,n) = T(0.0); } else { fx(i,j,k,n) = -dhx*bX(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n)); } @@ -1051,18 +1160,19 @@ void mlebabeclap_flux_x_0 (Box const& box, Array4 const& fx, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_y_0 (Box const& box, Array4 const& fy, Array4 const& apy, - Array4 const& sol, Array4 const& bY, - Real dhy, int face_only, int ncomp, Box const& ybox) noexcept +void mlebabeclap_flux_y_0 (Box const& box, Array4 const& fy, Array4 const& apy, + Array4 const& sol, Array4 const& bY, + T dhy, int face_only, int ncomp, Box const& ybox) noexcept { int lof = ybox.smallEnd(1); int hif = ybox.bigEnd(1); amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { if (!face_only || lof == j || hif == j) { - if (apy(i,j,k) == Real(0.0)) { - fy(i,j,k,n) = Real(0.0); + if (apy(i,j,k) == T(0.0)) { + fy(i,j,k,n) = T(0.0); } else { fy(i,j,k,n) = -dhy*bY(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n)); } @@ -1070,18 +1180,19 @@ void mlebabeclap_flux_y_0 (Box const& box, Array4 const& fy, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_flux_z_0 (Box const& box, Array4 const& fz, Array4 const& apz, - Array4 const& sol, Array4 const& bZ, - Real dhz, int face_only, int ncomp, Box const& zbox) noexcept +void mlebabeclap_flux_z_0 (Box const& box, Array4 const& fz, Array4 const& apz, + Array4 const& sol, Array4 const& bZ, + T dhz, int face_only, int ncomp, Box const& zbox) noexcept { int lof = zbox.smallEnd(2); int hif = zbox.bigEnd(2); amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { if (!face_only || lof == k || hif == k) { - if (apz(i,j,k) == Real(0.0)) { - fz(i,j,k,n) = Real(0.0); + if (apz(i,j,k) == T(0.0)) { + fz(i,j,k,n) = T(0.0); } else { fz(i,j,k,n) = -dhz*bZ(i,j,k,n)*(sol(i,j,k,n)-sol(i,j,k-1,n)); } @@ -1089,89 +1200,92 @@ void mlebabeclap_flux_z_0 (Box const& box, Array4 const& fz, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_x (Box const& box, Array4 const& gx, Array4 const& sol, - Array4 const& apx, Array4 const& fcx, +void mlebabeclap_grad_x (Box const& box, Array4 const& gx, Array4 const& sol, + Array4 const& apx, Array4 const& fcx, Array4 const& ccm, - Real dxi, int ncomp, bool phi_on_centroid) noexcept + T dxi, int ncomp, bool phi_on_centroid) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apx(i,j,k) == Real(0.0)) { - gx(i,j,k,n) = Real(0.0); - } else if (apx(i,j,k) == Real(1.0)) { + if (apx(i,j,k) == T(0.0)) { + gx(i,j,k,n) = T(0.0); + } else if (apx(i,j,k) == T(1.0)) { gx(i,j,k,n) = dxi*(sol(i,j,k,n)-sol(i-1,j,k,n)); } else { - Real gxm = (sol(i,j,k,n) - sol(i-1,j,k,n)); - int jj = j + static_cast(std::copysign(Real(1.0), fcx(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0), fcx(i,j,k,1))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : Real(0.0); + T gxm = (sol(i,j,k,n) - sol(i-1,j,k,n)); + int jj = j + static_cast(std::copysign(T(1.0), fcx(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0), fcx(i,j,k,1))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : T(0.0); if (!phi_on_centroid) { - gxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*gxm + - fracy*(Real(1.0)-fracz)*(sol(i,jj,k ,n)-sol(i-1,jj,k ,n)) + - fracz*(Real(1.0)-fracy)*(sol(i,j ,kk,n)-sol(i-1,j ,kk,n)) + - fracy* fracz *(sol(i,jj,kk,n)-sol(i-1,jj,kk,n)); + gxm = (T(1.0)-fracy)*(T(1.0)-fracz)*gxm + + fracy*(T(1.0)-fracz)*(sol(i,jj,k ,n)-sol(i-1,jj,k ,n)) + + fracz*(T(1.0)-fracy)*(sol(i,j ,kk,n)-sol(i-1,j ,kk,n)) + + fracy* fracz *(sol(i,jj,kk,n)-sol(i-1,jj,kk,n)); } gx(i,j,k,n) = dxi*gxm; } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_y (Box const& box, Array4 const& gy, Array4 const& sol, - Array4 const& apy, Array4 const& fcy, +void mlebabeclap_grad_y (Box const& box, Array4 const& gy, Array4 const& sol, + Array4 const& apy, Array4 const& fcy, Array4 const& ccm, - Real dyi, int ncomp, bool phi_on_centroid) noexcept + T dyi, int ncomp, bool phi_on_centroid) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apy(i,j,k) == Real(0.0)) { - gy(i,j,k,n) = Real(0.0); - } else if (apy(i,j,k) == Real(1.0)) { + if (apy(i,j,k) == T(0.0)) { + gy(i,j,k,n) = T(0.0); + } else if (apy(i,j,k) == T(1.0)) { gy(i,j,k,n) = dyi*(sol(i,j,k,n)-sol(i,j-1,k,n)); } else { - Real gym = (sol(i,j,k,n) - sol(i,j-1,k,n)); - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcy(i,j,k,1))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : Real(0.0); + T gym = (sol(i,j,k,n) - sol(i,j-1,k,n)); + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j,k,1))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : T(0.0); if (!phi_on_centroid) { - gym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*gym + - fracx*(Real(1.0)-fracz)*(sol(ii,j,k ,n)-sol(ii,j-1,k ,n)) + - fracz*(Real(1.0)-fracx)*(sol(i ,j,kk,n)-sol(i ,j-1,kk,n)) + - fracx* fracz *(sol(ii,j,kk,n)-sol(ii,j-1,kk,n)); + gym = (T(1.0)-fracx)*(T(1.0)-fracz)*gym + + fracx*(T(1.0)-fracz)*(sol(ii,j,k ,n)-sol(ii,j-1,k ,n)) + + fracz*(T(1.0)-fracx)*(sol(i ,j,kk,n)-sol(i ,j-1,kk,n)) + + fracx* fracz *(sol(ii,j,kk,n)-sol(ii,j-1,kk,n)); } gy(i,j,k,n) = dyi*gym; } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_z (Box const& box, Array4 const& gz, Array4 const& sol, - Array4 const& apz, Array4 const& fcz, +void mlebabeclap_grad_z (Box const& box, Array4 const& gz, Array4 const& sol, + Array4 const& apz, Array4 const& fcz, Array4 const& ccm, - Real dzi, int ncomp, bool phi_on_centroid) noexcept + T dzi, int ncomp, bool phi_on_centroid) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apz(i,j,k) == Real(0.0)) { - gz(i,j,k,n) = Real(0.0); - } else if (apz(i,j,k) == Real(1.0)) { + if (apz(i,j,k) == T(0.0)) { + gz(i,j,k,n) = T(0.0); + } else if (apz(i,j,k) == T(1.0)) { gz(i,j,k,n) = dzi*(sol(i,j,k,n)-sol(i,j,k-1,n)); } else { - Real gzm = (sol(i,j,k,n) - sol(i,j,k-1,n)); - int ii = i + static_cast(std::copysign(Real(1.0),fcz(i,j,k,0))); - int jj = j + static_cast(std::copysign(Real(1.0),fcz(i,j,k,1))); - Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : Real(0.0); - Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : Real(0.0); + T gzm = (sol(i,j,k,n) - sol(i,j,k-1,n)); + int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k,0))); + int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k,1))); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : T(0.0); if (!phi_on_centroid) { - gzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*gzm + - fracx*(Real(1.0)-fracy)*(sol(ii,j ,k,n)-sol(ii,j ,k-1,n)) + - fracy*(Real(1.0)-fracx)*(sol(i ,jj,k,n)-sol(i ,jj,k-1,n)) + + gzm = (T(1.0)-fracx)*(T(1.0)-fracy)*gzm + + fracx*(T(1.0)-fracy)*(sol(ii,j ,k,n)-sol(ii,j ,k-1,n)) + + fracy*(T(1.0)-fracx)*(sol(i ,jj,k,n)-sol(i ,jj,k-1,n)) + fracx* fracy *(sol(ii,jj,k,n)-sol(ii,jj,k-1,n)); } gz(i,j,k,n) = dzi*gzm; @@ -1179,192 +1293,226 @@ void mlebabeclap_grad_z (Box const& box, Array4 const& gz, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_x_0 (Box const& box, Array4 const& gx, Array4 const& sol, - Array4 const& apx, Real dxi, int ncomp) noexcept +void mlebabeclap_grad_x_0 (Box const& box, Array4 const& gx, Array4 const& sol, + Array4 const& apx, T dxi, int ncomp) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apx(i,j,k) == Real(0.0)) { - gx(i,j,k,n) = Real(0.0); + if (apx(i,j,k) == T(0.0)) { + gx(i,j,k,n) = T(0.0); } else { gx(i,j,k,n) = dxi*(sol(i,j,k,n)-sol(i-1,j,k,n)); } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_y_0 (Box const& box, Array4 const& gy, Array4 const& sol, - Array4 const& apy, Real dyi, int ncomp) noexcept +void mlebabeclap_grad_y_0 (Box const& box, Array4 const& gy, Array4 const& sol, + Array4 const& apy, T dyi, int ncomp) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apy(i,j,k) == Real(0.0)) { - gy(i,j,k,n) = Real(0.0); + if (apy(i,j,k) == T(0.0)) { + gy(i,j,k,n) = T(0.0); } else { gy(i,j,k,n) = dyi*(sol(i,j,k,n)-sol(i,j-1,k,n)); } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_grad_z_0 (Box const& box, Array4 const& gz, Array4 const& sol, - Array4 const& apz, Real dzi, int ncomp) noexcept +void mlebabeclap_grad_z_0 (Box const& box, Array4 const& gz, Array4 const& sol, + Array4 const& apz, T dzi, int ncomp) noexcept { amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept { - if (apz(i,j,k) == Real(0.0)) { - gz(i,j,k,n) = Real(0.0); + if (apz(i,j,k) == T(0.0)) { + gz(i,j,k,n) = T(0.0); } else { gz(i,j,k,n) = dzi*(sol(i,j,k,n)-sol(i,j,k-1,n)); } }); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_normalize (Box const& box, Array4 const& phi, - Real alpha, Array4 const& a, - Real dhx, Real dhy, Real dhz, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, +void mlebabeclap_normalize (int i, int j, int k, int n, Array4 const& phi, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, Array4 const& ccm, Array4 const& flag, - Array4 const& vfrc, - Array4 const& apx, Array4 const& apy, - Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, - Array4 const& ba, Array4 const& bc, - Array4 const& beb, - bool is_dirichlet, bool beta_on_centroid, int ncomp) noexcept + Array4 const& vfrc, + Array4 const& apx, Array4 const& apy, + Array4 const& apz, + Array4 const& fcx, Array4 const& fcy, + Array4 const& fcz, + Array4 const& ba, Array4 const& bc, + Array4 const& beb, + bool is_dirichlet, bool beta_on_centroid) noexcept { - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + if (flag(i,j,k).isRegular()) { - if (flag(i,j,k).isRegular()) - { - phi(i,j,k,n) /= alpha*a(i,j,k) + dhx*(bX(i,j,k,n) + bX(i+1,j,k,n)) - + dhy*(bY(i,j,k,n) + bY(i,j+1,k,n)) - + dhz*(bZ(i,j,k,n) + bZ(i,j,k+1,n)); + phi(i,j,k,n) /= alpha*a(i,j,k) + dhx*(bX(i,j,k,n) + bX(i+1,j,k,n)) + + dhy*(bY(i,j,k,n) + bY(i,j+1,k,n)) + + dhz*(bZ(i,j,k,n) + bZ(i,j,k+1,n)); + } + else if (flag(i,j,k).isSingleValued()) + { + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + T apzm = apz(i,j,k); + T apzp = apz(i,j,k+1); + + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0) && !beta_on_centroid) { + int jj = j + static_cast(std::copysign(T(1.0), fcx(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0), fcx(i,j,k,1))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) + ? std::abs(fcx(i,j,k,0)) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) + ? std::abs(fcx(i,j,k,1)) : T(0.0); + sxm = (T(1.0)-fracy)*(T(1.0)-fracz)*sxm; } - else if (flag(i,j,k).isSingleValued()) - { - Real kappa = vfrc(i,j,k); - Real apxm = apx(i,j,k); - Real apxp = apx(i+1,j,k); - Real apym = apy(i,j,k); - Real apyp = apy(i,j+1,k); - Real apzm = apz(i,j,k); - Real apzp = apz(i,j,k+1); - - Real sxm = bX(i,j,k,n); - if (apxm != Real(0.0) && apxm != Real(1.0) && !beta_on_centroid) { - int jj = j + static_cast(std::copysign(Real(1.0), fcx(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0), fcx(i,j,k,1))); - Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) - ? std::abs(fcx(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) - ? std::abs(fcx(i,j,k,1)) : Real(0.0); - sxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxm; - } - - Real sxp = -bX(i+1,j,k,n); - if (apxp != Real(0.0) && apxp != Real(1.0) && !beta_on_centroid) { - int jj = j + static_cast(std::copysign(Real(1.0),fcx(i+1,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcx(i+1,j,k,1))); - Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) - ? std::abs(fcx(i+1,j,k,0)) : Real(0.0); - Real fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) - ? std::abs(fcx(i+1,j,k,1)) : Real(0.0); - sxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxp; - } - Real sym = bY(i,j,k,n); - if (apym != Real(0.0) && apym != Real(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcy(i,j,k,1))); - Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) - ? std::abs(fcy(i,j,k,0)) : Real(0.0); - Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) - ? std::abs(fcy(i,j,k,1)) : Real(0.0); - sym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*sym; - } + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0) && !beta_on_centroid) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcx(i+1,j,k,1))); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) + ? std::abs(fcx(i+1,j,k,0)) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) + ? std::abs(fcx(i+1,j,k,1)) : T(0.0); + sxp = (T(1.0)-fracy)*(T(1.0)-fracz)*sxp; + } - Real syp = -bY(i,j+1,k,n); - if (apyp != Real(0.0) && apyp != Real(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(Real(1.0),fcy(i,j+1,k,0))); - int kk = k + static_cast(std::copysign(Real(1.0),fcy(i,j+1,k,1))); - Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) - ? std::abs(fcy(i,j+1,k,0)) : Real(0.0); - Real fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) - ? std::abs(fcy(i,j+1,k,1)) : Real(0.0); - syp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*syp; - } + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j,k,1))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) + ? std::abs(fcy(i,j,k,0)) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) + ? std::abs(fcy(i,j,k,1)) : T(0.0); + sym = (T(1.0)-fracx)*(T(1.0)-fracz)*sym; + } - Real szm = bZ(i,j,k,n); - if (apzm != Real(0.0) && apzm != Real(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(Real(1.0),fcz(i,j,k,0))); - int jj = j + static_cast(std::copysign(Real(1.0),fcz(i,j,k,1))); - Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) - ? std::abs(fcz(i,j,k,0)) : Real(0.0); - Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) - ? std::abs(fcz(i,j,k,1)) : Real(0.0); - szm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szm; - } + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k,0))); + int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j+1,k,1))); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) + ? std::abs(fcy(i,j+1,k,0)) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) + ? std::abs(fcy(i,j+1,k,1)) : T(0.0); + syp = (T(1.0)-fracx)*(T(1.0)-fracz)*syp; + } - Real szp = -bZ(i,j,k+1,n); - if (apzp != Real(0.0) && apzp != Real(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(Real(1.0),fcz(i,j,k+1,0))); - int jj = j + static_cast(std::copysign(Real(1.0),fcz(i,j,k+1,1))); - Real fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) - ? std::abs(fcz(i,j,k+1,0)) : Real(0.0); - Real fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) - ? std::abs(fcz(i,j,k+1,1)) : Real(0.0); - szp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szp; - } + T szm = bZ(i,j,k,n); + if (apzm != T(0.0) && apzm != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k,0))); + int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k,1))); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) + ? std::abs(fcz(i,j,k,0)) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) + ? std::abs(fcz(i,j,k,1)) : T(0.0); + szm = (T(1.0)-fracx)*(T(1.0)-fracy)*szm; + } - Real vfrcinv = Real(1.0)/kappa; - Real gamma = alpha*a(i,j,k) + vfrcinv * - (dhx*(apxm*sxm-apxp*sxp) + - dhy*(apym*sym-apyp*syp) + - dhz*(apzm*szm-apzp*szp)); - - if (is_dirichlet) { - Real dapx = apxm-apxp; - Real dapy = apym-apyp; - Real dapz = apzm-apzp; - Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); - Real anorminv = Real(1.0)/anorm; - Real anrmx = dapx * anorminv; - Real anrmy = dapy * anorminv; - Real anrmz = dapz * anorminv; - Real bctx = bc(i,j,k,0); - Real bcty = bc(i,j,k,1); - Real bctz = bc(i,j,k,2); - Real dx_eb = get_dx_eb(vfrc(i,j,k)); - - Real dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), - std::abs(anrmz)); - - Real gx = bctx - dg*anrmx; - Real gy = bcty - dg*anrmy; - Real gz = bctz - dg*anrmz; - Real sx = std::copysign(Real(1.0),anrmx); - Real sy = std::copysign(Real(1.0),anrmy); - Real sz = std::copysign(Real(1.0),anrmz); - - gx *= sx; - gy *= sy; - gz *= sz; - Real gxy = gx*gy; - Real gxz = gx*gz; - Real gyz = gy*gz; - Real gxyz = gx*gy*gz; - Real phig_gamma = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); - Real feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); - gamma += vfrcinv*(-dhx)*feb_gamma; - } + T szp = -bZ(i,j,k+1,n); + if (apzp != T(0.0) && apzp != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k+1,0))); + int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k+1,1))); + T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) + ? std::abs(fcz(i,j,k+1,0)) : T(0.0); + T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) + ? std::abs(fcz(i,j,k+1,1)) : T(0.0); + szp = (T(1.0)-fracx)*(T(1.0)-fracy)*szp; + } - phi(i,j,k,n) /= gamma; + T vfrcinv = T(1.0)/kappa; + T gamma = alpha*a(i,j,k) + vfrcinv * + (dhx*(apxm*sxm-apxp*sxp) + + dhy*(apym*sym-apyp*syp) + + dhz*(apzm*szm-apzp*szp)); + + if (is_dirichlet) { + T dapx = apxm-apxp; + T dapy = apym-apyp; + T dapz = apzm-apzp; + T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + T anrmz = dapz * anorminv; + T bctx = bc(i,j,k,0); + T bcty = bc(i,j,k,1); + T bctz = bc(i,j,k,2); + T dx_eb = get_dx_eb(vfrc(i,j,k)); + + T dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), + std::abs(anrmz)); + + T gx = bctx - dg*anrmx; + T gy = bcty - dg*anrmy; + T gz = bctz - dg*anrmz; + T sx = std::copysign(T(1.0),anrmx); + T sy = std::copysign(T(1.0),anrmy); + T sz = std::copysign(T(1.0),anrmz); + + gx *= sx; + gy *= sy; + gz *= sz; + T gxy = gx*gy; + T gxz = gx*gz; + T gyz = gy*gz; + T gxyz = gx*gy*gz; + T phig_gamma = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); + T feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); + gamma += vfrcinv*(-dhx)*feb_gamma; } + + phi(i,j,k,n) /= gamma; + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebabeclap_normalize (Box const& box, Array4 const& phi, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, + Array4 const& ccm, Array4 const& flag, + Array4 const& vfrc, + Array4 const& apx, Array4 const& apy, + Array4 const& apz, + Array4 const& fcx, Array4 const& fcy, + Array4 const& fcz, + Array4 const& ba, Array4 const& bc, + Array4 const& beb, + bool is_dirichlet, bool beta_on_centroid, int ncomp) noexcept +{ + amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + { + mlebabeclap_normalize(i, j, k, n, phi, + alpha, a, + dhx, dhy, dhz, + bX, bY, bZ, + ccm, flag, + vfrc, + apx, apy, apz, + fcx, fcy, fcz, + ba, bc, + beb, + is_dirichlet, beta_on_centroid); }); } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 940bfa045a2..62ded4543e8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -37,8 +37,6 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const int ncomp = getNComp(); - Array4 foo; - const Real ascalar = m_a_scalar; const Real bscalar = m_b_scalar; @@ -57,97 +55,168 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const bool extdir_y = !(m_geom[amrlev][mglev].isPeriodic(1));, const bool extdir_z = !(m_geom[amrlev][mglev].isPeriodic(2));); - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling().SetDynamic(true); } -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(out, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - Array4 const& xfab = in.const_array(mfi); - Array4 const& yfab = out.array(mfi); - Array4 const& afab = acoef.const_array(mfi); - AMREX_D_TERM(Array4 const& bxfab = bxcoef.const_array(mfi);, - Array4 const& byfab = bycoef.const_array(mfi);, - Array4 const& bzfab = bzcoef.const_array(mfi);); - - auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular; - - if (fabtyp == FabType::covered) { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n, +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && in.isFusingCandidate()) { + MultiArray4 foo; + const auto& xma = in.arrays(); + const auto& yma = out.arrays(); + const auto& ama = acoef.const_arrays(); + AMREX_D_TERM(const auto& bxma = bxcoef.const_arrays();, + const auto& byma = bycoef.const_arrays();, + const auto& bzma = bzcoef.const_arrays();); + auto const& ccmma = ccmask.const_arrays(); + auto const& flagma = flags->const_arrays(); + auto const& vfracma = vfrac->const_arrays(); + AMREX_D_TERM(auto const& apxma = area[0]->const_arrays();, + auto const& apyma = area[1]->const_arrays();, + auto const& apzma = area[2]->const_arrays();); + AMREX_D_TERM(auto const& fcxma = fcent[0]->const_arrays();, + auto const& fcyma = fcent[1]->const_arrays();, + auto const& fczma = fcent[2]->const_arrays();); + auto const& bama = barea->const_arrays(); + auto const& bcma = bcent->const_arrays(); + auto const& ccentma = ccent->const_arrays(); + auto const& bebma = (is_eb_dirichlet) + ? m_eb_b_coeffs[amrlev][mglev]->const_arrays() : foo; + auto const& phiebma = (is_eb_dirichlet && is_eb_inhomog) + ? m_eb_phi[amrlev]->const_arrays() : foo; + + bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); + bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); + + bool treat_phi_as_on_centroid = ( phi_on_centroid && (mglev == 0) ); + + Real dhx = bscalar*dxinvarr[0]*dxinvarr[0]; + Real dhy = bscalar*dxinvarr[1]*dxinvarr[1]; + Real dhz = bscalar*dxinvarr[2]*dxinvarr[2]; + if (treat_phi_as_on_centroid) { + amrex::ParallelFor(out, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept { - yfab(i,j,k,n) = 0.0; + mlebabeclap_adotx_centroid(i,j,k,n, yma[box_no], xma[box_no], ama[box_no], + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + flagma[box_no], vfracma[box_no], + AMREX_D_DECL(apxma[box_no],apyma[box_no],apzma[box_no]), + AMREX_D_DECL(fcxma[box_no],fcyma[box_no],fczma[box_no]), + ccentma[box_no], bama[box_no], bcma[box_no], + bebma[box_no], phiebma[box_no], + AMREX_D_DECL(domlo_x, domlo_y, domlo_z), + AMREX_D_DECL(domhi_x, domhi_y, domhi_z), + AMREX_D_DECL(extdir_x, extdir_y, extdir_z), + is_eb_dirichlet, is_eb_inhomog, + ascalar, dhx, dhy, dhz); }); - } else if (fabtyp == FabType::regular) { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n, + } else { + amrex::ParallelFor(out, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept { - mlabeclap_adotx(i,j,k,n, yfab, xfab, afab, - AMREX_D_DECL(bxfab,byfab,bzfab), - dxinvarr, ascalar, bscalar); + mlebabeclap_adotx(i,j,k,n, yma[box_no], xma[box_no], ama[box_no], + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + ccmma[box_no], flagma[box_no], vfracma[box_no], + AMREX_D_DECL(apxma[box_no],apyma[box_no],apzma[box_no]), + AMREX_D_DECL(fcxma[box_no],fcyma[box_no],fczma[box_no]), + bama[box_no], bcma[box_no], bebma[box_no], + is_eb_dirichlet, phiebma[box_no], + is_eb_inhomog, ascalar, dhx, dhy, dhz, + beta_on_centroid, phi_on_centroid); }); - } else { - Array4 const& ccmfab = ccmask.const_array(mfi); - Array4 const& flagfab = flags->const_array(mfi); - Array4 const& vfracfab = vfrac->const_array(mfi); - AMREX_D_TERM(Array4 const& apxfab = area[0]->const_array(mfi);, - Array4 const& apyfab = area[1]->const_array(mfi);, - Array4 const& apzfab = area[2]->const_array(mfi);); - AMREX_D_TERM(Array4 const& fcxfab = fcent[0]->const_array(mfi);, - Array4 const& fcyfab = fcent[1]->const_array(mfi);, - Array4 const& fczfab = fcent[2]->const_array(mfi);); - Array4 const& bafab = barea->const_array(mfi); - Array4 const& bcfab = bcent->const_array(mfi); - Array4 const& ccfab = ccent->const_array(mfi); - Array4 const& bebfab = (is_eb_dirichlet) - ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo; - Array4 const& phiebfab = (is_eb_dirichlet && is_eb_inhomog) - ? m_eb_phi[amrlev]->const_array(mfi) : foo; - - bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); - bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); - - bool treat_phi_as_on_centroid = ( phi_on_centroid && (mglev == 0) ); - - if (treat_phi_as_on_centroid) { + } + } else +#endif + { + Array4 foo; + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling().SetDynamic(true); } +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(out, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& xfab = in.const_array(mfi); + Array4 const& yfab = out.array(mfi); + Array4 const& afab = acoef.const_array(mfi); + AMREX_D_TERM(Array4 const& bxfab = bxcoef.const_array(mfi);, + Array4 const& byfab = bycoef.const_array(mfi);, + Array4 const& bzfab = bzcoef.const_array(mfi);); + + auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular; + + if (fabtyp == FabType::covered) { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n, + { + yfab(i,j,k,n) = 0.0; + }); + } else if (fabtyp == FabType::regular) { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n, + { + mlabeclap_adotx(i,j,k,n, yfab, xfab, afab, + AMREX_D_DECL(bxfab,byfab,bzfab), + dxinvarr, ascalar, bscalar); + }); + } else { + Array4 const& ccmfab = ccmask.const_array(mfi); + Array4 const& flagfab = flags->const_array(mfi); + Array4 const& vfracfab = vfrac->const_array(mfi); + AMREX_D_TERM(Array4 const& apxfab = area[0]->const_array(mfi);, + Array4 const& apyfab = area[1]->const_array(mfi);, + Array4 const& apzfab = area[2]->const_array(mfi);); + AMREX_D_TERM(Array4 const& fcxfab = fcent[0]->const_array(mfi);, + Array4 const& fcyfab = fcent[1]->const_array(mfi);, + Array4 const& fczfab = fcent[2]->const_array(mfi);); + Array4 const& bafab = barea->const_array(mfi); + Array4 const& bcfab = bcent->const_array(mfi); + Array4 const& ccfab = ccent->const_array(mfi); + Array4 const& bebfab = (is_eb_dirichlet) + ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo; + Array4 const& phiebfab = (is_eb_dirichlet && is_eb_inhomog) + ? m_eb_phi[amrlev]->const_array(mfi) : foo; + + bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); + bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); + + bool treat_phi_as_on_centroid = ( phi_on_centroid && (mglev == 0) ); + + if (treat_phi_as_on_centroid) { #ifdef AMREX_USE_HIP - // This causes an abort in HIP 4.5 but works in earlier versions - // A follow-up release should fix this. - // Error message: - // lld: error: ran out of registers during register allocation - amrex::Abort("MLEBABecLap::Fapply: phi on centroid not supported for HIP"); - amrex::ignore_unused(AMREX_D_DECL(domlo_x, domlo_y, domlo_z), - AMREX_D_DECL(domhi_x, domhi_y, domhi_z), - AMREX_D_DECL(extdir_x, extdir_y, extdir_z)); - amrex::ignore_unused(ccfab); + // This causes an abort in HIP 4.5 but works in earlier versions + // A follow-up release should fix this. + // Error message: + // lld: error: ran out of registers during register allocation + amrex::Abort("MLEBABecLap::Fapply: phi on centroid not supported for HIP"); + amrex::ignore_unused(AMREX_D_DECL(domlo_x, domlo_y, domlo_z), + AMREX_D_DECL(domhi_x, domhi_y, domhi_z), + AMREX_D_DECL(extdir_x, extdir_y, extdir_z)); + amrex::ignore_unused(ccfab); #else - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, - { - mlebabeclap_adotx_centroid(tbx, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), - flagfab, vfracfab, - AMREX_D_DECL(apxfab,apyfab,apzfab), - AMREX_D_DECL(fcxfab,fcyfab,fczfab), - ccfab, bafab, bcfab, bebfab, phiebfab, - AMREX_D_DECL(domlo_x, domlo_y, domlo_z), - AMREX_D_DECL(domhi_x, domhi_y, domhi_z), - AMREX_D_DECL(extdir_x, extdir_y, extdir_z), - is_eb_dirichlet, is_eb_inhomog, dxinvarr, - ascalar, bscalar, ncomp); - }); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + { + mlebabeclap_adotx_centroid(tbx, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), + flagfab, vfracfab, + AMREX_D_DECL(apxfab,apyfab,apzfab), + AMREX_D_DECL(fcxfab,fcyfab,fczfab), + ccfab, bafab, bcfab, bebfab, phiebfab, + AMREX_D_DECL(domlo_x, domlo_y, domlo_z), + AMREX_D_DECL(domhi_x, domhi_y, domhi_z), + AMREX_D_DECL(extdir_x, extdir_y, extdir_z), + is_eb_dirichlet, is_eb_inhomog, dxinvarr, + ascalar, bscalar, ncomp); + }); #endif - } else { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, - { - mlebabeclap_adotx(tbx, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), - ccmfab, flagfab, vfracfab, - AMREX_D_DECL(apxfab,apyfab,apzfab), - AMREX_D_DECL(fcxfab,fcyfab,fczfab), - bafab, bcfab, bebfab, - is_eb_dirichlet, - phiebfab, - is_eb_inhomog, dxinvarr, - ascalar, bscalar, ncomp, beta_on_centroid, phi_on_centroid); - }); + } else { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + { + mlebabeclap_adotx(tbx, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), + ccmfab, flagfab, vfracfab, + AMREX_D_DECL(apxfab,apyfab,apzfab), + AMREX_D_DECL(fcxfab,fcyfab,fczfab), + bafab, bcfab, bebfab, + is_eb_dirichlet, + phiebfab, + is_eb_inhomog, dxinvarr, + ascalar, bscalar, ncomp, beta_on_centroid, phi_on_centroid); + }); + } } } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H index 53d36b58840..63be3b0dcfc 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_K.H @@ -17,14 +17,15 @@ namespace amrex { // note that the mask in these functions is different from masks in bndry registers // 1 means valid data, 0 means invalid data +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_apply_bc_x (int side, Box const& box, int blen, - Array4 const& phi, + Array4 const& phi, Array4 const& mask, - Array4 const& area, - BoundCond bct, Real bcl, - Array4 const& bcval, - int maxorder, Real dxinv, int inhomog, int icomp) noexcept + Array4 const& area, + BoundCond bct, T bcl, + Array4 const& bcval, + int maxorder, T dxinv, int inhomog, int icomp) noexcept { const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); @@ -56,10 +57,10 @@ void mlebabeclap_apply_bc_x (int side, Box const& box, int blen, case AMREX_LO_DIRICHLET: { const int NX = amrex::min(blen+1, maxorder); - GpuArray x{-bcl * dxinv, Real(0.5), Real(1.5), Real(2.5)}; - Array2D coef{}; + GpuArray x{-bcl * dxinv, T(0.5), T(1.5), T(2.5)}; + Array2D coef{}; for (int r = 0; r <= maxorder-2; ++r) { - poly_interp_coeff(-Real(0.5), x.data(), r+2, &(coef(0,r))); + poly_interp_coeff(-T(0.5), x.data(), r+2, &(coef(0,r))); } for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { @@ -67,10 +68,10 @@ void mlebabeclap_apply_bc_x (int side, Box const& box, int blen, int order = 1; bool has_cutfaces = false; for (int r = 0; r <= NX-2; ++r) { - Real a = area(i+(1-side)+s*r,j,k); - if (a > Real(0.0)) { + T a = area(i+(1-side)+s*r,j,k); + if (a > T(0.0)) { ++order; - if (a < Real(1.0)) { + if (a < T(1.0)) { has_cutfaces = true; } } else { @@ -82,10 +83,10 @@ void mlebabeclap_apply_bc_x (int side, Box const& box, int blen, if (inhomog) { phi(i,j,k,icomp) = bcval(i,j,k,icomp); } else { - phi(i,j,k,icomp) = Real(0.0); + phi(i,j,k,icomp) = T(0.0); } } else { - Real tmp = Real(0.0); + T tmp = T(0.0); for (int m = 1; m < order; ++m) { tmp += phi(i+m*s,j,k,icomp) * coef(m,order-2); } @@ -103,14 +104,15 @@ void mlebabeclap_apply_bc_x (int side, Box const& box, int blen, } } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_apply_bc_y (int side, Box const& box, int blen, - Array4 const& phi, + Array4 const& phi, Array4 const& mask, - Array4 const& area, - BoundCond bct, Real bcl, - Array4 const& bcval, - int maxorder, Real dyinv, int inhomog, int icomp) noexcept + Array4 const& area, + BoundCond bct, T bcl, + Array4 const& bcval, + int maxorder, T dyinv, int inhomog, int icomp) noexcept { const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); @@ -142,10 +144,10 @@ void mlebabeclap_apply_bc_y (int side, Box const& box, int blen, case AMREX_LO_DIRICHLET: { const int NX = amrex::min(blen+1, maxorder); - GpuArray x{-bcl * dyinv, Real(0.5), Real(1.5), Real(2.5)}; - Array2D coef{}; + GpuArray x{-bcl * dyinv, T(0.5), T(1.5), T(2.5)}; + Array2D coef{}; for (int r = 0; r <= maxorder-2; ++r) { - poly_interp_coeff(-Real(0.5), x.data(), r+2, &(coef(0,r))); + poly_interp_coeff(-T(0.5), x.data(), r+2, &(coef(0,r))); } for (int k = lo.z; k <= hi.z; ++k) { for (int i = lo.x; i <= hi.x; ++i) { @@ -153,10 +155,10 @@ void mlebabeclap_apply_bc_y (int side, Box const& box, int blen, int order = 1; bool has_cutfaces = false; for (int r = 0; r <= NX-2; ++r) { - Real a = area(i,j+(1-side)+s*r,k); - if (a > Real(0.0)) { + T a = area(i,j+(1-side)+s*r,k); + if (a > T(0.0)) { ++order; - if (a < Real(1.0)) { + if (a < T(1.0)) { has_cutfaces = true; } } else { @@ -168,10 +170,10 @@ void mlebabeclap_apply_bc_y (int side, Box const& box, int blen, if (inhomog) { phi(i,j,k,icomp) = bcval(i,j,k,icomp); } else { - phi(i,j,k,icomp) = Real(0.0); + phi(i,j,k,icomp) = T(0.0); } } else { - Real tmp = Real(0.0); + T tmp = T(0.0); for (int m = 1; m < order; ++m) { tmp += phi(i,j+m*s,k,icomp) * coef(m,order-2); } @@ -189,14 +191,15 @@ void mlebabeclap_apply_bc_y (int side, Box const& box, int blen, } } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_apply_bc_z (int side, Box const& box, int blen, - Array4 const& phi, + Array4 const& phi, Array4 const& mask, - Array4 const& area, - BoundCond bct, Real bcl, - Array4 const& bcval, - int maxorder, Real dzinv, int inhomog, int icomp) noexcept + Array4 const& area, + BoundCond bct, T bcl, + Array4 const& bcval, + int maxorder, T dzinv, int inhomog, int icomp) noexcept { const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); @@ -228,10 +231,10 @@ void mlebabeclap_apply_bc_z (int side, Box const& box, int blen, case AMREX_LO_DIRICHLET: { const int NX = amrex::min(blen+1, maxorder); - GpuArray x{-bcl * dzinv, Real(0.5), Real(1.5), Real(2.5)}; - Array2D coef{}; + GpuArray x{-bcl * dzinv, T(0.5), T(1.5), T(2.5)}; + Array2D coef{}; for (int r = 0; r <= maxorder-2; ++r) { - poly_interp_coeff(-Real(0.5), x.data(), r+2, &(coef(0,r))); + poly_interp_coeff(-T(0.5), x.data(), r+2, &(coef(0,r))); } for (int j = lo.y; j <= hi.y; ++j) { for (int i = lo.x; i <= hi.x; ++i) { @@ -239,10 +242,10 @@ void mlebabeclap_apply_bc_z (int side, Box const& box, int blen, int order = 1; bool has_cutfaces = false; for (int r = 0; r <= NX-2; ++r) { - Real a = area(i,j,k+(1-side)+s*r); - if (a > Real(0.0)) { + T a = area(i,j,k+(1-side)+s*r); + if (a > T(0.0)) { ++order; - if (a < Real(1.0)) { + if (a < T(1.0)) { has_cutfaces = true; } } else { @@ -254,10 +257,10 @@ void mlebabeclap_apply_bc_z (int side, Box const& box, int blen, if (inhomog) { phi(i,j,k,icomp) = bcval(i,j,k,icomp); } else { - phi(i,j,k,icomp) = Real(0.0); + phi(i,j,k,icomp) = T(0.0); } } else { - Real tmp = Real(0.0); + T tmp = T(0.0); for (int m = 1; m < order; ++m) { tmp += phi(i,j,k+m*s,icomp) * coef(m,order-2); } @@ -275,14 +278,15 @@ void mlebabeclap_apply_bc_z (int side, Box const& box, int blen, } } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_apply_bc_x (int side, int i, int j, int k, int blen, - Array4 const& phi, + Array4 const& phi, Array4 const& mask, - Array4 const& area, - BoundCond bct, Real bcl, - Array4 const& bcval, - int maxorder, Real dxinv, int inhomog, int icomp) noexcept + Array4 const& area, + BoundCond bct, T bcl, + Array4 const& bcval, + int maxorder, T dxinv, int inhomog, int icomp) noexcept { const int s = 1-2*side; // +1 for lo and -1 for hi switch (bct) { @@ -303,19 +307,19 @@ void mlebabeclap_apply_bc_x (int side, int i, int j, int k, int blen, case AMREX_LO_DIRICHLET: { const int NX = amrex::min(blen+1, maxorder); - GpuArray x{-bcl * dxinv, Real(0.5), Real(1.5), Real(2.5)}; - Array2D coef{}; + GpuArray x{-bcl * dxinv, T(0.5), T(1.5), T(2.5)}; + Array2D coef{}; for (int r = 0; r <= maxorder-2; ++r) { - poly_interp_coeff(-Real(0.5), x.data(), r+2, &(coef(0,r))); + poly_interp_coeff(-T(0.5), x.data(), r+2, &(coef(0,r))); } if (mask(i,j,k) == 0 && mask(i+s,j,k) == 1) { int order = 1; bool has_cutfaces = false; for (int r = 0; r <= NX-2; ++r) { - Real a = area(i+(1-side)+s*r,j,k); - if (a > Real(0.0)) { + T a = area(i+(1-side)+s*r,j,k); + if (a > T(0.0)) { ++order; - if (a < Real(1.0)) { + if (a < T(1.0)) { has_cutfaces = true; } } else { @@ -327,10 +331,10 @@ void mlebabeclap_apply_bc_x (int side, int i, int j, int k, int blen, if (inhomog) { phi(i,j,k,icomp) = bcval(i,j,k,icomp); } else { - phi(i,j,k,icomp) = Real(0.0); + phi(i,j,k,icomp) = T(0.0); } } else { - Real tmp = Real(0.0); + T tmp = T(0.0); for (int m = 1; m < order; ++m) { tmp += phi(i+m*s,j,k,icomp) * coef(m,order-2); } @@ -346,14 +350,15 @@ void mlebabeclap_apply_bc_x (int side, int i, int j, int k, int blen, } } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_apply_bc_y (int side, int i, int j, int k, int blen, - Array4 const& phi, + Array4 const& phi, Array4 const& mask, - Array4 const& area, - BoundCond bct, Real bcl, - Array4 const& bcval, - int maxorder, Real dyinv, int inhomog, int icomp) noexcept + Array4 const& area, + BoundCond bct, T bcl, + Array4 const& bcval, + int maxorder, T dyinv, int inhomog, int icomp) noexcept { const int s = 1-2*side; // +1 for lo and -1 for hi switch (bct) { @@ -374,19 +379,19 @@ void mlebabeclap_apply_bc_y (int side, int i, int j, int k, int blen, case AMREX_LO_DIRICHLET: { const int NX = amrex::min(blen+1, maxorder); - GpuArray x{-bcl * dyinv, Real(0.5), Real(1.5), Real(2.5)}; - Array2D coef{}; + GpuArray x{-bcl * dyinv, T(0.5), T(1.5), T(2.5)}; + Array2D coef{}; for (int r = 0; r <= maxorder-2; ++r) { - poly_interp_coeff(-Real(0.5), x.data(), r+2, &(coef(0,r))); + poly_interp_coeff(-T(0.5), x.data(), r+2, &(coef(0,r))); } if (mask(i,j,k) == 0 && mask(i,j+s,k) == 1) { int order = 1; bool has_cutfaces = false; for (int r = 0; r <= NX-2; ++r) { - Real a = area(i,j+(1-side)+s*r,k); - if (a > Real(0.0)) { + T a = area(i,j+(1-side)+s*r,k); + if (a > T(0.0)) { ++order; - if (a < Real(1.0)) { + if (a < T(1.0)) { has_cutfaces = true; } } else { @@ -398,10 +403,10 @@ void mlebabeclap_apply_bc_y (int side, int i, int j, int k, int blen, if (inhomog) { phi(i,j,k,icomp) = bcval(i,j,k,icomp); } else { - phi(i,j,k,icomp) = Real(0.0); + phi(i,j,k,icomp) = T(0.0); } } else { - Real tmp = Real(0.0); + T tmp = T(0.0); for (int m = 1; m < order; ++m) { tmp += phi(i,j+m*s,k,icomp) * coef(m,order-2); } @@ -417,14 +422,15 @@ void mlebabeclap_apply_bc_y (int side, int i, int j, int k, int blen, } } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_apply_bc_z (int side, int i, int j, int k, int blen, - Array4 const& phi, + Array4 const& phi, Array4 const& mask, - Array4 const& area, - BoundCond bct, Real bcl, - Array4 const& bcval, - int maxorder, Real dzinv, int inhomog, int icomp) noexcept + Array4 const& area, + BoundCond bct, T bcl, + Array4 const& bcval, + int maxorder, T dzinv, int inhomog, int icomp) noexcept { const int s = 1-2*side; // +1 for lo and -1 for hi switch (bct) { @@ -445,19 +451,19 @@ void mlebabeclap_apply_bc_z (int side, int i, int j, int k, int blen, case AMREX_LO_DIRICHLET: { const int NX = amrex::min(blen+1, maxorder); - GpuArray x{-bcl * dzinv, Real(0.5), Real(1.5), Real(2.5)}; - Array2D coef{}; + GpuArray x{-bcl * dzinv, T(0.5), T(1.5), T(2.5)}; + Array2D coef{}; for (int r = 0; r <= maxorder-2; ++r) { - poly_interp_coeff(-Real(0.5), x.data(), r+2, &(coef(0,r))); + poly_interp_coeff(-T(0.5), x.data(), r+2, &(coef(0,r))); } if (mask(i,j,k) == 0 && mask(i,j,k+s) == 1) { int order = 1; bool has_cutfaces = false; for (int r = 0; r <= NX-2; ++r) { - Real a = area(i,j,k+(1-side)+s*r); - if (a > Real(0.0)) { + T a = area(i,j,k+(1-side)+s*r); + if (a > T(0.0)) { ++order; - if (a < Real(1.0)) { + if (a < T(1.0)) { has_cutfaces = true; } } else { @@ -469,10 +475,10 @@ void mlebabeclap_apply_bc_z (int side, int i, int j, int k, int blen, if (inhomog) { phi(i,j,k,icomp) = bcval(i,j,k,icomp); } else { - phi(i,j,k,icomp) = Real(0.0); + phi(i,j,k,icomp) = T(0.0); } } else { - Real tmp = Real(0.0); + T tmp = T(0.0); for (int m = 1; m < order; ++m) { tmp += phi(i,j,k+m*s,icomp) * coef(m,order-2); } From ac3913128a929d93b4295f44ccd69267a806e8b4 Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Thu, 8 Jan 2026 13:41:35 +1100 Subject: [PATCH 02/12] Added GPU sync statement --- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 62ded4543e8..56022785c0b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -121,6 +121,9 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c beta_on_centroid, phi_on_centroid); }); } + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { From 465a114eac43a73196545165bcfc7d3efe1490b3 Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Thu, 8 Jan 2026 14:25:35 +1100 Subject: [PATCH 03/12] Updated GSRB and Normalize for MLEBABecLap --- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp | 141 ++++--- .../MLMG/AMReX_MLEBABecLap_3D_K.H | 350 +++++++++++++++++- .../MLMG/AMReX_MLEBABecLap_F.cpp | 221 +++++++---- 3 files changed, 591 insertions(+), 121 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index 836cda7c2fe..9ace60c0052 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -883,66 +883,109 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const bool is_eb_dirichlet = isEBDirichlet(); - Array4 foo; - const Real ascalar = m_a_scalar; const Real bscalar = m_b_scalar; const int ncomp = getNComp(); - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling(); } +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && mf.isFusingCandidate()) { + MultiArray4 foo; + const auto& xma = mf.arrays(); + const auto& ama = acoef.const_arrays(); + AMREX_D_TERM(const auto& bxma = bxcoef.const_arrays();, + const auto& byma = bycoef.const_arrays();, + const auto& bzma = bzcoef.const_arrays();); + auto const& ccmma = ccmask.const_arrays(); + auto const& flagma = flags->const_arrays(); + auto const& vfracma = vfrac->const_arrays(); + AMREX_D_TERM(auto const& apxma = area[0]->const_arrays();, + auto const& apyma = area[1]->const_arrays();, + auto const& apzma = area[2]->const_arrays();); + AMREX_D_TERM(auto const& fcxma = fcent[0]->const_arrays();, + auto const& fcyma = fcent[1]->const_arrays();, + auto const& fczma = fcent[2]->const_arrays();); + auto const& bama = barea->const_arrays(); + auto const& bcma = bcent->const_arrays(); + auto const& bebma = (is_eb_dirichlet) + ? m_eb_b_coeffs[amrlev][mglev]->const_arrays() : foo; + + bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); + bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); + amrex::ParallelFor(mf, IntVect(0), ncomp, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + mlebabeclap_normalize(i, j, k, n, + xma[box_no], ascalar, ama[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no], byma[box_no], bzma[box_no]), + ccmma[box_no], flagma[box_no], vfracma[box_no], + AMREX_D_DECL(apxma[box_no], apyma[box_no], apzma[box_no]), + AMREX_D_DECL(fcxma[box_no], fcyma[box_no], fczma[box_no]), + bama[box_no], bcma[box_no], bebma[box_no], + is_eb_dirichlet, + beta_on_centroid); + }); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } + } else +#endif + { + Array4 foo; + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling(); } #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(mf, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - Array4 const& fab = mf.array(mfi); - Array4 const& afab = acoef.const_array(mfi); - AMREX_D_TERM(Array4 const& bxfab = bxcoef.const_array(mfi);, - Array4 const& byfab = bycoef.const_array(mfi);, - Array4 const& bzfab = bzcoef.const_array(mfi);); + for (MFIter mfi(mf, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& fab = mf.array(mfi); + Array4 const& afab = acoef.const_array(mfi); + AMREX_D_TERM(Array4 const& bxfab = bxcoef.const_array(mfi);, + Array4 const& byfab = bycoef.const_array(mfi);, + Array4 const& bzfab = bzcoef.const_array(mfi);); - auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular; + auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular; - if (fabtyp == FabType::regular) - { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, + if (fabtyp == FabType::regular) { - mlabeclap_normalize(i,j,k,n, fab, afab, AMREX_D_DECL(bxfab, byfab, bzfab), - dxinvarray, ascalar, bscalar); - }); - } - else if (fabtyp == FabType::singlevalued) - { - Array4 const& bebfab - = (is_eb_dirichlet) ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo; - Array4 const& ccmfab = ccmask.const_array(mfi); - Array4 const& flagfab = flags->const_array(mfi); - Array4 const& vfracfab = vfrac->const_array(mfi); - AMREX_D_TERM(Array4 const& apxfab = area[0]->const_array(mfi);, - Array4 const& apyfab = area[1]->const_array(mfi);, - Array4 const& apzfab = area[2]->const_array(mfi);); - AMREX_D_TERM(Array4 const& fcxfab = fcent[0]->const_array(mfi);, - Array4 const& fcyfab = fcent[1]->const_array(mfi);, - Array4 const& fczfab = fcent[2]->const_array(mfi);); - Array4 const& bafab = barea->const_array(mfi); - Array4 const& bcfab = bcent->const_array(mfi); - - bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); - - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, + { + mlabeclap_normalize(i,j,k,n, fab, afab, AMREX_D_DECL(bxfab, byfab, bzfab), + dxinvarray, ascalar, bscalar); + }); + } + else if (fabtyp == FabType::singlevalued) { - mlebabeclap_normalize(tbx, fab, ascalar, afab, - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_2D_ONLY_ARGS(dh, dxarray) - AMREX_D_DECL(bxfab, byfab, bzfab), - ccmfab, flagfab, vfracfab, - AMREX_D_DECL(apxfab,apyfab,apzfab), - AMREX_D_DECL(fcxfab,fcyfab,fczfab), - bafab, bcfab, bebfab, is_eb_dirichlet, - beta_on_centroid, ncomp); - }); + Array4 const& bebfab + = (is_eb_dirichlet) ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo; + Array4 const& ccmfab = ccmask.const_array(mfi); + Array4 const& flagfab = flags->const_array(mfi); + Array4 const& vfracfab = vfrac->const_array(mfi); + AMREX_D_TERM(Array4 const& apxfab = area[0]->const_array(mfi);, + Array4 const& apyfab = area[1]->const_array(mfi);, + Array4 const& apzfab = area[2]->const_array(mfi);); + AMREX_D_TERM(Array4 const& fcxfab = fcent[0]->const_array(mfi);, + Array4 const& fcyfab = fcent[1]->const_array(mfi);, + Array4 const& fczfab = fcent[2]->const_array(mfi);); + Array4 const& bafab = barea->const_array(mfi); + Array4 const& bcfab = bcent->const_array(mfi); + + bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + { + mlebabeclap_normalize(tbx, fab, ascalar, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_2D_ONLY_ARGS(dh, dxarray) + AMREX_D_DECL(bxfab, byfab, bzfab), + ccmfab, flagfab, vfracfab, + AMREX_D_DECL(apxfab,apyfab,apzfab), + AMREX_D_DECL(fcxfab,fcyfab,fczfab), + bafab, bcfab, bebfab, is_eb_dirichlet, + beta_on_centroid, ncomp); + }); + } } } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H index 94e371ae408..4b0fa00b9cb 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H @@ -513,9 +513,6 @@ void mlebabeclap_adotx (Box const& box, Array4 const& y, T dhy = beta*dxinv[1]*dxinv[1]; T dhz = beta*dxinv[2]*dxinv[2]; - bool beta_on_center = !(beta_on_centroid); - bool phi_on_center = !( phi_on_centroid); - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept { mlebabeclap_adotx(i, j, k, n, @@ -612,6 +609,353 @@ void mlebabeclap_ebflux (int i, int j, int k, int n, } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebabeclap_gsrb (int i, int j, int k, int n, + Array4 const& phi, Array4 const& rhs, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, + Array4 const& m0, Array4 const& m2, + Array4 const& m4, + Array4 const& m1, Array4 const& m3, + Array4 const& m5, + Array4 const& f0, Array4 const& f2, + Array4 const& f4, + Array4 const& f1, Array4 const& f3, + Array4 const& f5, + Array4 const& ccm, Array4 const& beb, + Array4 const& flag, + Array4 const& vfrc, Array4 const& apx, + Array4 const& apy, Array4 const& apz, + Array4 const& fcx, Array4 const& fcy, + Array4 const& fcz, Array4 const& ba, + Array4 const& bcent, + bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, + Box const& vbox, int redblack) noexcept +{ + constexpr T omega = 1.15; + if ((i+j+k+redblack) % 2 == 0) + { + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + if (flag(i,j,k).isCovered()) + { + phi(i,j,k,n) = T(0.0); + } + else + { + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z,n) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k,n) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k,n) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z,n) : T(0.0); + + if (flag(i,j,k).isRegular()) + { + T gamma = alpha*a(i,j,k) + + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n)) + + dhy*(bY(i,j+1,k,n) + bY(i,j,k,n)) + + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n)); + + T rho = dhx*(bX(i+1,j ,k ,n)*phi(i+1,j ,k ,n) + + bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) + + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + + bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) + + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + + bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); + + T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) + + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += omega*res/(gamma-delta); + } + else + { + T kappa = vfrc(i,j,k); + T apxm = apx(i ,j ,k ); + T apxp = apx(i+1,j ,k ); + T apym = apy(i ,j ,k ); + T apyp = apy(i ,j+1,k ); + T apzm = apz(i ,j ,k ); + T apzp = apz(i ,j ,k+1); + + T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); + T oxm = -bX(i,j,k,n)*cf0; + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0)) { + auto fcx0 = fcx(i,j,k,0); + auto fcx1 = fcx(i,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0), fcx0)); + int kk = k + static_cast(std::copysign(T(1.0), fcx1)); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) + ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) + ? std::abs(fcx1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm + + fracy *(T(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n)) + +(T(1.0)-fracy)* fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n)) + + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*( -phi(i-1, j, k,n)) + + fracy *(T(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n)) + +(T(1.0)-fracy)* fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n)) + + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + fxm *= bX(i,j,k,n); + + } + oxm = T(0.0); + sxm = (T(1.0)-fracy)*(T(1.0)-fracz)*sxm; + } + + T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); + T oxp = bX(i+1,j,k,n)*cf3; + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0)) { + auto fcx0 = fcx(i+1,j,k,0); + auto fcx1 = fcx(i+1,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0),fcx0)); + int kk = k + static_cast(std::copysign(T(1.0),fcx1)); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) + ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) + ? std::abs(fcx1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp + + fracy *(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n)) + +(T(1.0)-fracy)* fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n)) + + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(phi(i+1, j, k,n) ) + + fracy *(T(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) + + fracz *(T(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) + + fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + fxp *= bX(i+1,j,k,n); + + } + + oxp = T(0.0); + sxp = (T(1.0)-fracy)*(T(1.0)-fracz)*sxp; + } + + T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); + T oym = -bY(i,j,k,n)*cf1; + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0)) { + auto fcy0 = fcy(i,j,k,0); + auto fcy1 = fcy(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) + ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) + ? std::abs(fcy1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym + + fracx *(T(1.0)-fracz)*bY(ii,j,k ,n)*(phi(ii,j,k ,n)-phi(ii,j-1,k ,n)) + + (T(1.0)-fracx)* fracz *bY(i ,j,kk,n)*(phi(i ,j,kk,n)-phi(i ,j-1,kk,n)) + + fracx * fracz *bY(ii,j,kk,n)*(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*( -phi( i,j-1, k,n)) + + fracx *(T(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n)) + + (T(1.0)-fracx)* fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n)) + + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); + fym *= bY(i,j,k,n); + + } + oym = T(0.0); + sym = (T(1.0)-fracx)*(T(1.0)-fracz)*sym; + } + + T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); + T oyp = bY(i,j+1,k,n)*cf4; + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0)) { + auto fcy0 = fcy(i,j+1,k,0); + auto fcy1 = fcy(i,j+1,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) + ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) + ? std::abs(fcy1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp + + fracx *(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n)) + + (T(1.0)-fracx)* fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n)) + + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(phi( i,j+1, k,n) ) + + fracx *(T(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n)) + + (T(1.0)-fracx)* fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n)) + + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); + fyp *= bY(i,j+1,k,n); + + } + oyp = T(0.0); + syp = (T(1.0)-fracx)*(T(1.0)-fracz)*syp; + } + + T fzm = -bZ(i,j,k,n)*phi(i,j,k-1,n); + T ozm = -bZ(i,j,k,n)*cf2; + T szm = bZ(i,j,k,n); + if (apzm != T(0.0) && apzm != T(1.0)) { + auto fcz0 = fcz(i,j,k,0); + auto fcz1 = fcz(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) + ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) + ? std::abs(fcz1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm + + fracx *(T(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) + +(T(1.0)-fracx)* fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n)) + + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*( -phi( i, j,k-1,n)) + + fracx *(T(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) + + (T(1.0)-fracx)* fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n)) + + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + fzm *= bZ(i,j,k,n); + + } + ozm = T(0.0); + szm = (T(1.0)-fracx)*(T(1.0)-fracy)*szm; + } + + T fzp = bZ(i,j,k+1,n)*phi(i,j,k+1,n); + T ozp = bZ(i,j,k+1,n)*cf5; + T szp = -bZ(i,j,k+1,n); + if (apzp != T(0.0) && apzp != T(1.0)) { + auto fcz0 = fcz(i,j,k+1,0); + auto fcz1 = fcz(i,j,k+1,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) + ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) + ? std::abs(fcz1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp + + fracx *(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n)) + + (T(1.0)-fracx)* fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n)) + + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(phi( i, j,k+1,n) ) + + fracx *(T(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n)) + + (T(1.0)-fracx)* fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n)) + + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + fzp *= bZ(i,j,k+1,n); + + } + ozp = T(0.0); + szp = (T(1.0)-fracx)*(T(1.0)-fracy)*szp; + } + + T vfrcinv = T(1.0)/kappa; + T gamma = alpha*a(i,j,k) + vfrcinv * + (dhx*(apxm*sxm-apxp*sxp) + + dhy*(apym*sym-apyp*syp) + + dhz*(apzm*szm-apzp*szp)); + + T rho = -vfrcinv * + (dhx*(apxm*fxm-apxp*fxp) + + dhy*(apym*fym-apyp*fyp) + + dhz*(apzm*fzm-apzp*fzp)); + + T delta = -vfrcinv * + (dhx*(apxm*oxm-apxp*oxp) + + dhy*(apym*oym-apyp*oyp) + + dhz*(apzm*ozm-apzp*ozp)); + + if (is_dirichlet) { + T dapx = apxm-apxp; + T dapy = apym-apyp; + T dapz = apzm-apzp; + T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + T anrmz = dapz * anorminv; + T bctx = bcent(i,j,k,0); + T bcty = bcent(i,j,k,1); + T bctz = bcent(i,j,k,2); + T dx_eb = get_dx_eb(kappa); + + T dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), + std::abs(anrmz)); + + T gx = bctx - dg*anrmx; + T gy = bcty - dg*anrmy; + T gz = bctz - dg*anrmz; + T sx = std::copysign(T(1.0),anrmx); + T sy = std::copysign(T(1.0),anrmy); + T sz = std::copysign(T(1.0),anrmz); + int ii = i - static_cast(sx); + int jj = j - static_cast(sy); + int kk = k - static_cast(sz); + + gx *= sx; + gy *= sy; + gz *= sz; + T gxy = gx*gy; + T gxz = gx*gz; + T gyz = gy*gz; + T gxyz = gx*gy*gz; + T phig_gamma = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); + T phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n) + + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n) + + (gyz + gxyz) * phi(i,jj,kk,n) + + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n) + + (gxz + gxyz) * phi(ii,j,kk,n) + + (gxy + gxyz) * phi(ii,jj,k,n) + + (-gxyz) * phi(ii,jj,kk,n); + + T dphidn = ( -phig)/dg; + T feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); + gamma += vfrcinv*(-dhx)*feb_gamma; + T feb = dphidn * ba(i,j,k) * beb(i,j,k,n); + rho += -vfrcinv*(-dhx)*feb; + } + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += omega*res/(gamma-delta); + } + } + } +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_gsrb (int i, int j, int k, int n, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 56022785c0b..60572e1d357 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -275,99 +275,182 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; + const MultiFab* vfrac = (factory) ? &(factory->getVolFrac()) : nullptr; + const auto area = (factory) ? factory->getAreaFrac() + : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; + const auto fcent = (factory) ? factory->getFaceCent() + : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; + const MultiCutFab* barea = (factory) ? &(factory->getBndryArea()) : nullptr; + const MultiCutFab* bcent = (factory) ? &(factory->getBndryCent()) : nullptr; + const auto *const ccent = (factory) ? &(factory->getCentroid()) : nullptr; + + amrex::ignore_unused(vfrac, area, fcent, barea, bcent, ccent); bool is_eb_dirichlet = isEBDirichlet(); - Array4 foo; +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && sol.isFusingCandidate()) { + MultiArray4 foo; + const auto& m0 = mm0.const_arrays(); + const auto& m1 = mm1.const_arrays(); +#if (AMREX_SPACEDIM > 1) + const auto& m2 = mm2.const_arrays(); + const auto& m3 = mm3.const_arrays(); +#if (AMREX_SPACEDIM > 2) + const auto& m4 = mm4.const_arrays(); + const auto& m5 = mm5.const_arrays(); +#endif +#endif + const auto& solma = sol.arrays(); + const auto& rhsma = rhs.const_arrays(); + const auto& ama = acoef.const_arrays(); + + AMREX_D_TERM(const auto& bxma = bxcoef.const_arrays();, + const auto& byma = bycoef.const_arrays();, + const auto& bzma = bzcoef.const_arrays();); - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) { mfi_info.SetDynamic(true); } + const auto& f0ma = f0.const_arrays(); + const auto& f1ma = f1.const_arrays(); +#if (AMREX_SPACEDIM > 1) + const auto& f2ma = f2.const_arrays(); + const auto& f3ma = f3.const_arrays(); +#if (AMREX_SPACEDIM > 2) + const auto& f4ma = f4.const_arrays(); + const auto& f5ma = f5.const_arrays(); +#endif +#endif + // auto const& ebflags_ma = factory->getMultiEBCellFlagFab().const_arrays(); + auto const& flagma = flags->const_arrays(); + auto const& vfracma = vfrac->const_arrays(); + AMREX_D_TERM(auto const& apxma = area[0]->const_arrays();, + auto const& apyma = area[1]->const_arrays();, + auto const& apzma = area[2]->const_arrays();); + AMREX_D_TERM(auto const& fcxma = fcent[0]->const_arrays();, + auto const& fcyma = fcent[1]->const_arrays();, + auto const& fczma = fcent[2]->const_arrays();); + auto const& bama = barea->const_arrays(); + auto const& bcma = bcent->const_arrays(); + + auto const& ccmma = ccmask.const_arrays(); + auto const& bebfab = (is_eb_dirichlet) ? + m_eb_b_coeffs[amrlev][mglev]->const_arrays() : foo; + + bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); + bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); + + amrex::ParallelFor(sol, IntVect(0), nc, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + Box vbx(ama[box_no]); + mlebabeclap_gsrb(i,j,k,n, solma[box_no], rhsma[box_no], alpha, + ama[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + AMREX_D_DECL(m0[box_no],m2[box_no],m4[box_no]), + AMREX_D_DECL(m1[box_no],m3[box_no],m5[box_no]), + AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), + AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), + ccmma[box_no], bebfab[box_no], flagma[box_no], vfracma[box_no], + AMREX_D_DECL(apxma[box_no],apyma[box_no],apzma[box_no]), + AMREX_D_DECL(fcxma[box_no],fcyma[box_no],fczma[box_no]), + bama[box_no], bcma[box_no], + is_eb_dirichlet, beta_on_centroid, phi_on_centroid, + vbx, redblack); + }); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } + } else +#endif + { + Array4 foo; + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) { mfi_info.SetDynamic(true); } #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) - { - const auto& m0 = mm0.array(mfi); - const auto& m1 = mm1.array(mfi); + for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) + { + const auto& m0 = mm0.array(mfi); + const auto& m1 = mm1.array(mfi); #if (AMREX_SPACEDIM > 1) - const auto& m2 = mm2.array(mfi); - const auto& m3 = mm3.array(mfi); + const auto& m2 = mm2.array(mfi); + const auto& m3 = mm3.array(mfi); #if (AMREX_SPACEDIM > 2) - const auto& m4 = mm4.array(mfi); - const auto& m5 = mm5.array(mfi); + const auto& m4 = mm4.array(mfi); + const auto& m5 = mm5.array(mfi); #endif #endif + const Box& vbx = mfi.validbox(); + const auto& solnfab = sol.array(mfi); + const auto& rhsfab = rhs.const_array(mfi); + const auto& afab = acoef.const_array(mfi); - const Box& vbx = mfi.validbox(); - const auto& solnfab = sol.array(mfi); - const auto& rhsfab = rhs.const_array(mfi); - const auto& afab = acoef.const_array(mfi); - - AMREX_D_TERM(const auto& bxfab = bxcoef.const_array(mfi);, - const auto& byfab = bycoef.const_array(mfi);, - const auto& bzfab = bzcoef.const_array(mfi);); + AMREX_D_TERM(const auto& bxfab = bxcoef.const_array(mfi);, + const auto& byfab = bycoef.const_array(mfi);, + const auto& bzfab = bzcoef.const_array(mfi);); - const auto& f0fab = f0.const_array(mfi); - const auto& f1fab = f1.const_array(mfi); + const auto& f0fab = f0.const_array(mfi); + const auto& f1fab = f1.const_array(mfi); #if (AMREX_SPACEDIM > 1) - const auto& f2fab = f2.const_array(mfi); - const auto& f3fab = f3.const_array(mfi); + const auto& f2fab = f2.const_array(mfi); + const auto& f3fab = f3.const_array(mfi); #if (AMREX_SPACEDIM > 2) - const auto& f4fab = f4.const_array(mfi); - const auto& f5fab = f5.const_array(mfi); + const auto& f4fab = f4.const_array(mfi); + const auto& f5fab = f5.const_array(mfi); #endif #endif + auto fabtyp = (flags) ? (*flags)[mfi].getType(vbx) : FabType::regular; - auto fabtyp = (flags) ? (*flags)[mfi].getType(vbx) : FabType::regular; - - if (fabtyp == FabType::covered) - { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D ( vbx, nc, i, j, k, n, + if (fabtyp == FabType::covered) { - solnfab(i,j,k,n) = 0.0; - }); - } - else if (fabtyp == FabType::regular) - { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D(vbx, nc, i, j, k, n, + AMREX_HOST_DEVICE_PARALLEL_FOR_4D ( vbx, nc, i, j, k, n, + { + solnfab(i,j,k,n) = 0.0; + }); + } + else if (fabtyp == FabType::regular) { - abec_gsrb(i,j,k,n, solnfab, rhsfab, alpha, afab, - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_D_DECL(bxfab, byfab, bzfab), - AMREX_D_DECL(m0,m2,m4), - AMREX_D_DECL(m1,m3,m5), - AMREX_D_DECL(f0fab,f2fab,f4fab), - AMREX_D_DECL(f1fab,f3fab,f5fab), - vbx, redblack); - }); - } - else - { - Array4 const& ccmfab = ccmask.const_array(mfi); - Array4 const& bebfab = (is_eb_dirichlet) - ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo; + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(vbx, nc, i, j, k, n, + { + abec_gsrb(i,j,k,n, solnfab, rhsfab, alpha, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxfab, byfab, bzfab), + AMREX_D_DECL(m0,m2,m4), + AMREX_D_DECL(m1,m3,m5), + AMREX_D_DECL(f0fab,f2fab,f4fab), + AMREX_D_DECL(f1fab,f3fab,f5fab), + vbx, redblack); + }); + } + else + { + Array4 const& ccmfab = ccmask.const_array(mfi); + Array4 const& bebfab = (is_eb_dirichlet) + ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo; - auto const& ebdata = factory->getEBData(mfi); + auto const& ebdata = factory->getEBData(mfi); - bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); - bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); + bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); + bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); - if (phi_on_centroid) { amrex::Abort("phi_on_centroid is still a WIP"); } + if (phi_on_centroid) { amrex::Abort("phi_on_centroid is still a WIP"); } - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( vbx, thread_box, - { - mlebabeclap_gsrb(thread_box, solnfab, rhsfab, alpha, afab, - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_2D_ONLY_ARGS(dh,h) - AMREX_D_DECL(bxfab,byfab,bzfab), - AMREX_D_DECL(m0,m2,m4), - AMREX_D_DECL(m1,m3,m5), - AMREX_D_DECL(f0fab,f2fab,f4fab), - AMREX_D_DECL(f1fab,f3fab,f5fab), - ccmfab, bebfab, ebdata, - is_eb_dirichlet, beta_on_centroid, phi_on_centroid, - vbx, redblack, nc); - }); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( vbx, thread_box, + { + mlebabeclap_gsrb(thread_box, solnfab, rhsfab, alpha, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_2D_ONLY_ARGS(dh,h) + AMREX_D_DECL(bxfab,byfab,bzfab), + AMREX_D_DECL(m0,m2,m4), + AMREX_D_DECL(m1,m3,m5), + AMREX_D_DECL(f0fab,f2fab,f4fab), + AMREX_D_DECL(f1fab,f3fab,f5fab), + ccmfab, bebfab, ebdata, + is_eb_dirichlet, beta_on_centroid, phi_on_centroid, + vbx, redblack, nc); + }); + } } } } From c7b1d99a34dfc337de8106c227cce5e3ab318b16 Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Thu, 8 Jan 2026 14:27:02 +1100 Subject: [PATCH 04/12] Remove unused var --- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index 9ace60c0052..ab327325da6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -910,7 +910,7 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const ? m_eb_b_coeffs[amrlev][mglev]->const_arrays() : foo; bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); - bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); + amrex::ParallelFor(mf, IntVect(0), ncomp, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept { mlebabeclap_normalize(i, j, k, n, From ed26761a505b961f2e15c817d030afa63bc2ffc4 Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Thu, 8 Jan 2026 15:14:04 +1100 Subject: [PATCH 05/12] Revert back old GSRB 3D --- .../MLMG/AMReX_MLEBABecLap_3D_K.H | 655 +++++++++--------- 1 file changed, 310 insertions(+), 345 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H index 4b0fa00b9cb..00bbf6d4d8e 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H @@ -958,7 +958,7 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_gsrb (int i, int j, int k, int n, +void mlebabeclap_gsrb (Box const& box, Array4 const& phi, Array4 const& rhs, T alpha, Array4 const& a, T dhx, T dhy, T dhz, @@ -975,376 +975,341 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, Array4 const& ccm, Array4 const& beb, EBData const& ebdata, bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, - Box const& vbox, int redblack) noexcept + Box const& vbox, int redblack, int ncomp) noexcept { constexpr T omega = 1.15; - if ((i+j+k+redblack) % 2 == 0) + + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + +// amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + // amrex::Loop here causes gcc 8 to crash. + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + for (int n = 0; n < ncomp; ++n) { + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { - const auto vlo = amrex::lbound(vbox); - const auto vhi = amrex::ubound(vbox); - auto const flag = ebdata.get(i,j,k); - if (flag.isCovered()) - { - phi(i,j,k,n) = T(0.0); - } - else + if ((i+j+k+redblack) % 2 == 0) { - T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) - ? f0(vlo.x,j,k,n) : T(0.0); - T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) - ? f1(i,vlo.y,k,n) : T(0.0); - T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) - ? f2(i,j,vlo.z,n) : T(0.0); - T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) - ? f3(vhi.x,j,k,n) : T(0.0); - T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) - ? f4(i,vhi.y,k,n) : T(0.0); - T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) - ? f5(i,j,vhi.z,n) : T(0.0); - - if (flag.isRegular()) + auto const flag = ebdata.get(i,j,k); + if (flag.isCovered()) { - T gamma = alpha*a(i,j,k) - + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n)) - + dhy*(bY(i,j+1,k,n) + bY(i,j,k,n)) - + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n)); - - T rho = dhx*(bX(i+1,j ,k ,n)*phi(i+1,j ,k ,n) + - bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) - + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + - bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) - + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + - bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); - - T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) - + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) - + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); - - T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += omega*res/(gamma-delta); + phi(i,j,k,n) = T(0.0); } else { - T kappa = ebdata.get(i,j,k); - T apxm = ebdata.get(i ,j ,k ); - T apxp = ebdata.get(i+1,j ,k ); - T apym = ebdata.get(i ,j ,k ); - T apyp = ebdata.get(i ,j+1,k ); - T apzm = ebdata.get(i ,j ,k ); - T apzp = ebdata.get(i ,j ,k+1); - - T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); - T oxm = -bX(i,j,k,n)*cf0; - T sxm = bX(i,j,k,n); - if (apxm != T(0.0) && apxm != T(1.0)) { - auto fcx0 = ebdata.get(i,j,k,0); - auto fcx1 = ebdata.get(i,j,k,1); - int jj = j + static_cast(std::copysign(T(1.0), fcx0)); - int kk = k + static_cast(std::copysign(T(1.0), fcx1)); - T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) - ? std::abs(fcx0) : T(0.0); - T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) - ? std::abs(fcx1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm - + fracy *(T(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n)) - +(T(1.0)-fracy)* fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n)) - + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*( -phi(i-1, j, k,n)) - + fracy *(T(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n)) - +(T(1.0)-fracy)* fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n)) - + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); - fxm *= bX(i,j,k,n); - - } - oxm = T(0.0); - sxm = (T(1.0)-fracy)*(T(1.0)-fracz)*sxm; - } - - T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); - T oxp = bX(i+1,j,k,n)*cf3; - T sxp = -bX(i+1,j,k,n); - if (apxp != T(0.0) && apxp != T(1.0)) { - auto fcx0 = ebdata.get(i+1,j,k,0); - auto fcx1 = ebdata.get(i+1,j,k,1); - int jj = j + static_cast(std::copysign(T(1.0),fcx0)); - int kk = k + static_cast(std::copysign(T(1.0),fcx1)); - T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) - ? std::abs(fcx0) : T(0.0); - T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) - ? std::abs(fcx1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp - + fracy *(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n)) - +(T(1.0)-fracy)* fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n)) - + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(phi(i+1, j, k,n) ) + - fracy *(T(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) + - fracz *(T(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) + - fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); - fxp *= bX(i+1,j,k,n); - - } - - oxp = T(0.0); - sxp = (T(1.0)-fracy)*(T(1.0)-fracz)*sxp; + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z,n) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k,n) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k,n) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z,n) : T(0.0); + + if (flag.isRegular()) + { + T gamma = alpha*a(i,j,k) + + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n)) + + dhy*(bY(i,j+1,k,n) + bY(i,j,k,n)) + + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n)); + + T rho = dhx*(bX(i+1,j ,k ,n)*phi(i+1,j ,k ,n) + + bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) + + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + + bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) + + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + + bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); + + T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) + + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += omega*res/(gamma-delta); } - - T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); - T oym = -bY(i,j,k,n)*cf1; - T sym = bY(i,j,k,n); - if (apym != T(0.0) && apym != T(1.0)) { - auto fcy0 = ebdata.get(i,j,k,0); - auto fcy1 = ebdata.get(i,j,k,1); - int ii = i + static_cast(std::copysign(T(1.0),fcy0)); - int kk = k + static_cast(std::copysign(T(1.0),fcy1)); - T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) - ? std::abs(fcy0) : T(0.0); - T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) - ? std::abs(fcy1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym - + fracx *(T(1.0)-fracz)*bY(ii,j,k ,n)*(phi(ii,j,k ,n)-phi(ii,j-1,k ,n)) - + (T(1.0)-fracx)* fracz *bY(i ,j,kk,n)*(phi(i ,j,kk,n)-phi(i ,j-1,kk,n)) - + fracx * fracz *bY(ii,j,kk,n)*(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); + else + { + T kappa = ebdata.get(i,j,k); + T apxm = ebdata.get(i ,j ,k ); + T apxp = ebdata.get(i+1,j ,k ); + T apym = ebdata.get(i ,j ,k ); + T apyp = ebdata.get(i ,j+1,k ); + T apzm = ebdata.get(i ,j ,k ); + T apzp = ebdata.get(i ,j ,k+1); + + T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); + T oxm = -bX(i,j,k,n)*cf0; + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0)) { + auto fcx0 = ebdata.get(i,j,k,0); + auto fcx1 = ebdata.get(i,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0), fcx0)); + int kk = k + static_cast(std::copysign(T(1.0), fcx1)); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) + ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) + ? std::abs(fcx1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm + + fracy *(T(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n)) + +(T(1.0)-fracy)* fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n)) + + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*( -phi(i-1, j, k,n)) + + fracy *(T(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n)) + +(T(1.0)-fracy)* fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n)) + + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + fxm *= bX(i,j,k,n); + + } + oxm = T(0.0); + sxm = (T(1.0)-fracy)*(T(1.0)-fracz)*sxm; } - else if (beta_on_centroid && !phi_on_centroid) - { - fym = (T(1.0)-fracx)*(T(1.0)-fracz)*( -phi( i,j-1, k,n)) - + fracx *(T(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n)) - + (T(1.0)-fracx)* fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n)) - + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); - fym *= bY(i,j,k,n); + T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); + T oxp = bX(i+1,j,k,n)*cf3; + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0)) { + auto fcx0 = ebdata.get(i+1,j,k,0); + auto fcx1 = ebdata.get(i+1,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0),fcx0)); + int kk = k + static_cast(std::copysign(T(1.0),fcx1)); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) + ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) + ? std::abs(fcx1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp + + fracy *(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n)) + +(T(1.0)-fracy)* fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n)) + + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(phi(i+1, j, k,n) ) + + fracy *(T(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) + + fracz *(T(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) + + fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + fxp *= bX(i+1,j,k,n); + + } + + oxp = T(0.0); + sxp = (T(1.0)-fracy)*(T(1.0)-fracz)*sxp; } - oym = T(0.0); - sym = (T(1.0)-fracx)*(T(1.0)-fracz)*sym; - } - - T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); - T oyp = bY(i,j+1,k,n)*cf4; - T syp = -bY(i,j+1,k,n); - if (apyp != T(0.0) && apyp != T(1.0)) { - auto fcy0 = ebdata.get(i,j+1,k,0); - auto fcy1 = ebdata.get(i,j+1,k,1); - int ii = i + static_cast(std::copysign(T(1.0),fcy0)); - int kk = k + static_cast(std::copysign(T(1.0),fcy1)); - T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) - ? std::abs(fcy0) : T(0.0); - T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) - ? std::abs(fcy1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp - + fracx *(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n)) - + (T(1.0)-fracx)* fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n)) - + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(phi( i,j+1, k,n) ) - + fracx *(T(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n)) - + (T(1.0)-fracx)* fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n)) - + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); - fyp *= bY(i,j+1,k,n); + T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); + T oym = -bY(i,j,k,n)*cf1; + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0)) { + auto fcy0 = ebdata.get(i,j,k,0); + auto fcy1 = ebdata.get(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) + ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) + ? std::abs(fcy1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym + + fracx *(T(1.0)-fracz)*bY(ii,j,k ,n)*(phi(ii,j,k ,n)-phi(ii,j-1,k ,n)) + + (T(1.0)-fracx)* fracz *bY(i ,j,kk,n)*(phi(i ,j,kk,n)-phi(i ,j-1,kk,n)) + + fracx * fracz *bY(ii,j,kk,n)*(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*(T(1.0)-fracz)*( -phi( i,j-1, k,n)) + + fracx *(T(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n)) + + (T(1.0)-fracx)* fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n)) + + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); + fym *= bY(i,j,k,n); + + } + oym = T(0.0); + sym = (T(1.0)-fracx)*(T(1.0)-fracz)*sym; } - oyp = T(0.0); - syp = (T(1.0)-fracx)*(T(1.0)-fracz)*syp; - } - T fzm = -bZ(i,j,k,n)*phi(i,j,k-1,n); - T ozm = -bZ(i,j,k,n)*cf2; - T szm = bZ(i,j,k,n); - if (apzm != T(0.0) && apzm != T(1.0)) { - auto fcz0 = ebdata.get(i,j,k,0); - auto fcz1 = ebdata.get(i,j,k,1); - int ii = i + static_cast(std::copysign(T(1.0),fcz0)); - int jj = j + static_cast(std::copysign(T(1.0),fcz1)); - T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) - ? std::abs(fcz0) : T(0.0); - T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) - ? std::abs(fcz1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm - + fracx *(T(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) - +(T(1.0)-fracx)* fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n)) - + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); + T oyp = bY(i,j+1,k,n)*cf4; + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0)) { + auto fcy0 = ebdata.get(i,j+1,k,0); + auto fcy1 = ebdata.get(i,j+1,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) + ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) + ? std::abs(fcy1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp + + fracx *(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n)) + + (T(1.0)-fracx)* fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n)) + + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(phi( i,j+1, k,n) ) + + fracx *(T(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n)) + + (T(1.0)-fracx)* fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n)) + + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); + fyp *= bY(i,j+1,k,n); + + } + oyp = T(0.0); + syp = (T(1.0)-fracx)*(T(1.0)-fracz)*syp; } - else if (beta_on_centroid && !phi_on_centroid) - { - fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*( -phi( i, j,k-1,n)) - + fracx *(T(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) - + (T(1.0)-fracx)* fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n)) - + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); - fzm *= bZ(i,j,k,n); + T fzm = -bZ(i,j,k,n)*phi(i,j,k-1,n); + T ozm = -bZ(i,j,k,n)*cf2; + T szm = bZ(i,j,k,n); + if (apzm != T(0.0) && apzm != T(1.0)) { + auto fcz0 = ebdata.get(i,j,k,0); + auto fcz1 = ebdata.get(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) + ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) + ? std::abs(fcz1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm + + fracx *(T(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) + +(T(1.0)-fracx)* fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n)) + + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*( -phi( i, j,k-1,n)) + + fracx *(T(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) + + (T(1.0)-fracx)* fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n)) + + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + fzm *= bZ(i,j,k,n); + + } + ozm = T(0.0); + szm = (T(1.0)-fracx)*(T(1.0)-fracy)*szm; } - ozm = T(0.0); - szm = (T(1.0)-fracx)*(T(1.0)-fracy)*szm; - } - T fzp = bZ(i,j,k+1,n)*phi(i,j,k+1,n); - T ozp = bZ(i,j,k+1,n)*cf5; - T szp = -bZ(i,j,k+1,n); - if (apzp != T(0.0) && apzp != T(1.0)) { - auto fcz0 = ebdata.get(i,j,k+1,0); - auto fcz1 = ebdata.get(i,j,k+1,1); - int ii = i + static_cast(std::copysign(T(1.0),fcz0)); - int jj = j + static_cast(std::copysign(T(1.0),fcz1)); - T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) - ? std::abs(fcz0) : T(0.0); - T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) - ? std::abs(fcz1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp - + fracx *(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n)) - + (T(1.0)-fracx)* fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n)) - + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + T fzp = bZ(i,j,k+1,n)*phi(i,j,k+1,n); + T ozp = bZ(i,j,k+1,n)*cf5; + T szp = -bZ(i,j,k+1,n); + if (apzp != T(0.0) && apzp != T(1.0)) { + auto fcz0 = ebdata.get(i,j,k+1,0); + auto fcz1 = ebdata.get(i,j,k+1,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) + ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) + ? std::abs(fcz1) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp + + fracx *(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n)) + + (T(1.0)-fracx)* fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n)) + + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(phi( i, j,k+1,n) ) + + fracx *(T(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n)) + + (T(1.0)-fracx)* fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n)) + + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + fzp *= bZ(i,j,k+1,n); + + } + ozp = T(0.0); + szp = (T(1.0)-fracx)*(T(1.0)-fracy)*szp; } - else if (beta_on_centroid && !phi_on_centroid) - { - fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(phi( i, j,k+1,n) ) - + fracx *(T(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n)) - + (T(1.0)-fracx)* fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n)) - + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); - fzp *= bZ(i,j,k+1,n); + T vfrcinv = T(1.0)/kappa; + T gamma = alpha*a(i,j,k) + vfrcinv * + (dhx*(apxm*sxm-apxp*sxp) + + dhy*(apym*sym-apyp*syp) + + dhz*(apzm*szm-apzp*szp)); + + T rho = -vfrcinv * + (dhx*(apxm*fxm-apxp*fxp) + + dhy*(apym*fym-apyp*fyp) + + dhz*(apzm*fzm-apzp*fzp)); + + T delta = -vfrcinv * + (dhx*(apxm*oxm-apxp*oxp) + + dhy*(apym*oym-apyp*oyp) + + dhz*(apzm*ozm-apzp*ozp)); + + if (is_dirichlet) { + T dapx = apxm-apxp; + T dapy = apym-apyp; + T dapz = apzm-apzp; + T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + T anrmz = dapz * anorminv; + T bctx = ebdata.get(i,j,k,0); + T bcty = ebdata.get(i,j,k,1); + T bctz = ebdata.get(i,j,k,2); + T dx_eb = get_dx_eb(kappa); + + T dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), + std::abs(anrmz)); + + T gx = bctx - dg*anrmx; + T gy = bcty - dg*anrmy; + T gz = bctz - dg*anrmz; + T sx = std::copysign(T(1.0),anrmx); + T sy = std::copysign(T(1.0),anrmy); + T sz = std::copysign(T(1.0),anrmz); + int ii = i - static_cast(sx); + int jj = j - static_cast(sy); + int kk = k - static_cast(sz); + + gx *= sx; + gy *= sy; + gz *= sz; + T gxy = gx*gy; + T gxz = gx*gz; + T gyz = gy*gz; + T gxyz = gx*gy*gz; + T phig_gamma = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); + T phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n) + + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n) + + (gyz + gxyz) * phi(i,jj,kk,n) + + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n) + + (gxz + gxyz) * phi(ii,j,kk,n) + + (gxy + gxyz) * phi(ii,jj,k,n) + + (-gxyz) * phi(ii,jj,kk,n); + + T ba = ebdata.get(i,j,k); + + T dphidn = ( -phig)/dg; + T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); + gamma += vfrcinv*(-dhx)*feb_gamma; + T feb = dphidn * ba * beb(i,j,k,n); + rho += -vfrcinv*(-dhx)*feb; } - ozp = T(0.0); - szp = (T(1.0)-fracx)*(T(1.0)-fracy)*szp; - } - - T vfrcinv = T(1.0)/kappa; - T gamma = alpha*a(i,j,k) + vfrcinv * - (dhx*(apxm*sxm-apxp*sxp) + - dhy*(apym*sym-apyp*syp) + - dhz*(apzm*szm-apzp*szp)); - - T rho = -vfrcinv * - (dhx*(apxm*fxm-apxp*fxp) + - dhy*(apym*fym-apyp*fyp) + - dhz*(apzm*fzm-apzp*fzp)); - - T delta = -vfrcinv * - (dhx*(apxm*oxm-apxp*oxp) + - dhy*(apym*oym-apyp*oyp) + - dhz*(apzm*ozm-apzp*ozp)); - if (is_dirichlet) { - T dapx = apxm-apxp; - T dapy = apym-apyp; - T dapz = apzm-apzp; - T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); - T anorminv = T(1.0)/anorm; - T anrmx = dapx * anorminv; - T anrmy = dapy * anorminv; - T anrmz = dapz * anorminv; - T bctx = ebdata.get(i,j,k,0); - T bcty = ebdata.get(i,j,k,1); - T bctz = ebdata.get(i,j,k,2); - T dx_eb = get_dx_eb(kappa); - - T dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), - std::abs(anrmz)); - - T gx = bctx - dg*anrmx; - T gy = bcty - dg*anrmy; - T gz = bctz - dg*anrmz; - T sx = std::copysign(T(1.0),anrmx); - T sy = std::copysign(T(1.0),anrmy); - T sz = std::copysign(T(1.0),anrmz); - int ii = i - static_cast(sx); - int jj = j - static_cast(sy); - int kk = k - static_cast(sz); - - gx *= sx; - gy *= sy; - gz *= sz; - T gxy = gx*gy; - T gxz = gx*gz; - T gyz = gy*gz; - T gxyz = gx*gy*gz; - T phig_gamma = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); - T phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n) - + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n) - + (gyz + gxyz) * phi(i,jj,kk,n) - + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n) - + (gxz + gxyz) * phi(ii,j,kk,n) - + (gxy + gxyz) * phi(ii,jj,k,n) - + (-gxyz) * phi(ii,jj,kk,n); - - T ba = ebdata.get(i,j,k); - - T dphidn = ( -phig)/dg; - T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); - gamma += vfrcinv*(-dhx)*feb_gamma; - T feb = dphidn * ba * beb(i,j,k,n); - rho += -vfrcinv*(-dhx)*feb; + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += omega*res/(gamma-delta); } - - T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += omega*res/(gamma-delta); } } - } -} - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_gsrb (Box const& box, - Array4 const& phi, Array4 const& rhs, - T alpha, Array4 const& a, - T dhx, T dhy, T dhz, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, - Array4 const& m0, Array4 const& m2, - Array4 const& m4, - Array4 const& m1, Array4 const& m3, - Array4 const& m5, - Array4 const& f0, Array4 const& f2, - Array4 const& f4, - Array4 const& f1, Array4 const& f3, - Array4 const& f5, - Array4 const& ccm, Array4 const& beb, - EBData const& ebdata, - bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, - Box const& vbox, int redblack, int ncomp) noexcept -{ -// amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept - // amrex::Loop here causes gcc 8 to crash. - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); - for (int n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) - { - mlebabeclap_gsrb(i, j, k, n, - phi, rhs, - alpha, a, - dhx, dhy, dhz, - bX, bY, bZ, - m0, m2, m4, - m1, m3, m5, - f0, f2, f4, - f1, f3, f5, - ccm, beb, - ebdata, - is_dirichlet, beta_on_centroid, phi_on_centroid, - vbox, redblack); - }}}} // }); } From b3ed9a9edd359ce4f687b18ce2ede1e1508fa875 Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Mon, 19 Jan 2026 22:15:52 +1100 Subject: [PATCH 06/12] Removed box 3D version of Fapply functions --- .../MLMG/AMReX_MLEBABecLap_3D_K.H | 81 ------------------- .../MLMG/AMReX_MLEBABecLap_F.cpp | 33 ++++---- 2 files changed, 17 insertions(+), 97 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H index 00bbf6d4d8e..e3bb0b08025 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H @@ -205,50 +205,6 @@ void mlebabeclap_adotx_centroid (int i, int j, int k, int n, Array4 const& y, } } - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, - Array4 const& x, Array4 const& a, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, - Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, - Array4 const& ccent, Array4 const& ba, - Array4 const& bcent, Array4 const& beb, - Array4 const& phieb, - const int& domlo_x, const int& domlo_y, const int& domlo_z, - const int& domhi_x, const int& domhi_y, const int& domhi_z, - const bool& on_x_face, const bool& on_y_face, const bool& on_z_face, - bool is_eb_dirichlet, bool is_eb_inhomog, - GpuArray const& dxinv, - T alpha, T beta, int ncomp) noexcept -{ - T dhx = beta*dxinv[0]*dxinv[0]; - T dhy = beta*dxinv[1]*dxinv[1]; - T dhz = beta*dxinv[2]*dxinv[2]; - - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept - { - mlebabeclap_adotx_centroid(i, j, k, n, - y, x, a, - bX, bY, bZ, - flag, vfrc, - apx, apy, apz, - fcx, fcy, fcz, - ccent, ba, bcent, beb, - phieb, - domlo_x, domlo_y, domlo_z, - domhi_x, domhi_y, domhi_z, - on_x_face, on_y_face, on_z_face, - is_eb_dirichlet, is_eb_inhomog, - alpha, dhx, dhy, dhz); - }); -} - template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, @@ -492,43 +448,6 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, } } -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx (Box const& box, Array4 const& y, - Array4 const& x, Array4 const& a, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, Array4 const& ccm, - Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, Array4 const& ba, - Array4 const& bc, Array4 const& beb, - bool is_dirichlet, Array4 const& phieb, - bool is_inhomog, GpuArray const& dxinv, - T alpha, T beta, int ncomp, - bool beta_on_centroid, bool phi_on_centroid) noexcept -{ - T dhx = beta*dxinv[0]*dxinv[0]; - T dhy = beta*dxinv[1]*dxinv[1]; - T dhz = beta*dxinv[2]*dxinv[2]; - - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept - { - mlebabeclap_adotx(i, j, k, n, - y, x, a, - bX, bY, bZ, - ccm, flag, vfrc, - apx, apy, apz, - fcx, fcy, fcz, - ba, bc, beb, - is_dirichlet, phieb, - is_inhomog, alpha, - dhx, dhy, dhz, - beta_on_centroid, phi_on_centroid); - }); -} - template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_ebflux (int i, int j, int k, int n, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 60572e1d357..9e865098096 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -55,6 +55,10 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const bool extdir_y = !(m_geom[amrlev][mglev].isPeriodic(1));, const bool extdir_z = !(m_geom[amrlev][mglev].isPeriodic(2));); + AMREX_D_TERM(const Real dhx = bscalar*dxinvarr[0]*dxinvarr[0];, + const Real dhy = bscalar*dxinvarr[1]*dxinvarr[1];, + const Real dhz = bscalar*dxinvarr[2]*dxinvarr[2];) + #ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && in.isFusingCandidate()) { MultiArray4 foo; @@ -85,10 +89,6 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c bool phi_on_centroid = (m_phi_loc == Location::CellCentroid); bool treat_phi_as_on_centroid = ( phi_on_centroid && (mglev == 0) ); - - Real dhx = bscalar*dxinvarr[0]*dxinvarr[0]; - Real dhy = bscalar*dxinvarr[1]*dxinvarr[1]; - Real dhz = bscalar*dxinvarr[2]*dxinvarr[2]; if (treat_phi_as_on_centroid) { amrex::ParallelFor(out, IntVect(0), ncomp, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept @@ -192,32 +192,33 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c AMREX_D_DECL(extdir_x, extdir_y, extdir_z)); amrex::ignore_unused(ccfab); #else - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, { - mlebabeclap_adotx_centroid(tbx, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), + mlebabeclap_adotx_centroid(i,j,k,n, yfab, xfab, afab, + AMREX_D_DECL(bxfab,byfab,bzfab), flagfab, vfracfab, AMREX_D_DECL(apxfab,apyfab,apzfab), AMREX_D_DECL(fcxfab,fcyfab,fczfab), - ccfab, bafab, bcfab, bebfab, phiebfab, + ccfab, bafab, bcfab, bebfab,phiebfab, AMREX_D_DECL(domlo_x, domlo_y, domlo_z), AMREX_D_DECL(domhi_x, domhi_y, domhi_z), AMREX_D_DECL(extdir_x, extdir_y, extdir_z), - is_eb_dirichlet, is_eb_inhomog, dxinvarr, - ascalar, bscalar, ncomp); - }); + is_eb_dirichlet, is_eb_inhomog, + ascalar, dhx, dhy, dhz); + }) #endif } else { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n, { - mlebabeclap_adotx(tbx, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), + mlebabeclap_adotx(i,j,k,n, yfab, xfab, afab, + AMREX_D_DECL(bxfab,byfab,bzfab), ccmfab, flagfab, vfracfab, AMREX_D_DECL(apxfab,apyfab,apzfab), AMREX_D_DECL(fcxfab,fcyfab,fczfab), bafab, bcfab, bebfab, - is_eb_dirichlet, - phiebfab, - is_eb_inhomog, dxinvarr, - ascalar, bscalar, ncomp, beta_on_centroid, phi_on_centroid); + is_eb_dirichlet, phiebfab, + is_eb_inhomog, ascalar, dhx, dhy, dhz, + beta_on_centroid, phi_on_centroid); }); } } From 00362d10b0fd357e1ed0ee099bb22814bc215b2f Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Mon, 19 Jan 2026 22:53:16 +1100 Subject: [PATCH 07/12] Mostly completed 3D --- Src/EB/AMReX_EBData.H | 50 +- Src/EB/AMReX_EBFabFactory.H | 2 + Src/EB/AMReX_EBFabFactory.cpp | 12 + Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp | 5 +- .../MLMG/AMReX_MLEBABecLap_3D_K.H | 659 +++++------------- .../MLMG/AMReX_MLEBABecLap_F.cpp | 114 +-- 6 files changed, 269 insertions(+), 573 deletions(-) diff --git a/Src/EB/AMReX_EBData.H b/Src/EB/AMReX_EBData.H index 91172f9829d..75ac6dcd3ed 100644 --- a/Src/EB/AMReX_EBData.H +++ b/Src/EB/AMReX_EBData.H @@ -34,6 +34,17 @@ struct EBData return m_real_data[static_cast(T)](i,j,k); } } + + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto get () const noexcept + { + if constexpr (T == EBData_t::cellflag) { + return *m_cell_flag; + } else { + return m_real_data[static_cast(T)]; + } + } template (EBData_t::cellflag); Array4 const* m_cell_flag = nullptr; - Array4 const* m_real_data = nullptr; + Array4 const* AMREX_RESTRICT m_real_data = nullptr; +}; + +struct EBDataArrays +{ + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto get (int box_no, int i, int j, int k) const noexcept + { + if constexpr (T == EBData_t::cellflag) { + return m_cell_flag[box_no](i,j,k); + } else { + return m_real_data[(box_no*real_data_size) + static_cast(T)](i,j,k); + } + } + + template = 0> + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto get (int box_no, int i, int j, int k, int n) const noexcept + { + return m_real_data[(box_no*real_data_size) + static_cast(T)](i,j,k,n); + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + EBData get (int box_no) const noexcept + { + return EBData{m_cell_flag + box_no, m_real_data + (box_no * real_data_size)}; + } + + static constexpr int real_data_size = static_cast(EBData_t::cellflag); + + Array4 const * AMREX_RESTRICT m_cell_flag = nullptr; + Array4 const * AMREX_RESTRICT m_real_data = nullptr; }; } diff --git a/Src/EB/AMReX_EBFabFactory.H b/Src/EB/AMReX_EBFabFactory.H index 2b9f4e79143..8e269628812 100644 --- a/Src/EB/AMReX_EBFabFactory.H +++ b/Src/EB/AMReX_EBFabFactory.H @@ -94,6 +94,8 @@ public: [[nodiscard]] EBData getEBData (MFIter const& mfi) const noexcept; + [[nodiscard]] EBDataArrays getEBDataArrays () const noexcept; + private: EBSupport m_support; diff --git a/Src/EB/AMReX_EBFabFactory.cpp b/Src/EB/AMReX_EBFabFactory.cpp index 1ea28c96711..f1c0807499b 100644 --- a/Src/EB/AMReX_EBFabFactory.cpp +++ b/Src/EB/AMReX_EBFabFactory.cpp @@ -198,6 +198,18 @@ EBFArrayBoxFactory::getEBData (MFIter const& mfi) const noexcept return EBData{pebflag, m_eb_data.data()+EBData::real_data_size*li}; } +EBDataArrays +EBFArrayBoxFactory::getEBDataArrays () const noexcept +{ + auto const& ebflags_ma = this->getMultiEBCellFlagFab().const_arrays(); +#ifdef AMREX_USE_GPU + auto const* pebflag = ebflags_ma.dp; +#else + auto const* pebflag = ebflags_ma.hp; +#endif + return EBDataArrays{pebflag, m_eb_data.data()}; +} + std::unique_ptr makeEBFabFactory (const Geometry& a_geom, const BoxArray& a_ba, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index ab327325da6..00296c0f7a1 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -887,7 +887,7 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const const Real bscalar = m_b_scalar; const int ncomp = getNComp(); -#ifdef AMREX_USE_GPU +// #ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && mf.isFusingCandidate()) { MultiArray4 foo; const auto& xma = mf.arrays(); @@ -916,6 +916,7 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const mlebabeclap_normalize(i, j, k, n, xma[box_no], ascalar, ama[box_no], AMREX_D_DECL(dhx, dhy, dhz), + AMREX_2D_ONLY_ARGS(dh, dxarray) AMREX_D_DECL(bxma[box_no], byma[box_no], bzma[box_no]), ccmma[box_no], flagma[box_no], vfracma[box_no], AMREX_D_DECL(apxma[box_no], apyma[box_no], apzma[box_no]), @@ -928,7 +929,7 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const Gpu::streamSynchronize(); } } else -#endif +// #endif { Array4 foo; MFItInfo mfi_info; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H index e3bb0b08025..9bc8c879efe 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H @@ -2,6 +2,7 @@ #define AMREX_MLEBABECLAP_3D_K_H_ #include #include +#include #include @@ -13,20 +14,35 @@ void mlebabeclap_adotx_centroid (int i, int j, int k, int n, Array4 const& y, Array4 const& x, Array4 const& a, Array4 const& bX, Array4 const& bY, Array4 const& bZ, - Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, - Array4 const& ccent, Array4 const& ba, - Array4 const& bcent, Array4 const& beb, + EBData const& ebdata, + Array4 const& beb, Array4 const& phieb, const int& domlo_x, const int& domlo_y, const int& domlo_z, const int& domhi_x, const int& domhi_y, const int& domhi_z, const bool& on_x_face, const bool& on_y_face, const bool& on_z_face, bool is_eb_dirichlet, bool is_eb_inhomog, - T alpha, T dhx, T dhy, T dhz) noexcept + GpuArray const& dxinv, + T alpha, T beta) noexcept { + const T dhx = beta*dxinv[0]*dxinv[0]; + const T dhy = beta*dxinv[1]*dxinv[1]; + const T dhz = beta*dxinv[2]*dxinv[2]; + + Array4 const& flag = ebdata.get(); + Array4 const& vfrc = ebdata.get(); + + Array4 const& apx = ebdata.get(); + Array4 const& apy = ebdata.get(); + Array4 const& apz = ebdata.get(); + + Array4 const& fcx = ebdata.get(); + Array4 const& fcy = ebdata.get(); + Array4 const& fcz = ebdata.get(); + + Array4 const& ccent = ebdata.get(); + Array4 const& bcent = ebdata.get(); + Array4 const& ba = ebdata.get(); + if (flag(i,j,k).isCovered()) { y(i,j,k,n) = T(0.0); @@ -211,23 +227,26 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, Array4 const& x, Array4 const& a, Array4 const& bX, Array4 const& bY, Array4 const& bZ, Array4 const& ccm, - Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, Array4 const& ba, - Array4 const& bc, Array4 const& beb, + EBData const& ebdata, + Array4 const& beb, bool is_dirichlet, Array4 const& phieb, - bool is_inhomog, T alpha, T dhx, T dhy, T dhz, + bool is_inhomog, T alpha, T beta, + GpuArray const& dxinv, bool beta_on_centroid, bool phi_on_centroid) noexcept { + const T dhx = beta*dxinv[0]*dxinv[0]; + const T dhy = beta*dxinv[1]*dxinv[1]; + const T dhz = beta*dxinv[2]*dxinv[2]; + bool beta_on_center = !(beta_on_centroid); bool phi_on_center = !( phi_on_centroid); - if (flag(i,j,k).isCovered()) + + auto const flag = ebdata.get(i,j,k); + if (flag.isCovered()) { y(i,j,k,n) = T(0.0); } - else if (flag(i,j,k).isRegular()) + else if (flag.isRegular()) { y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) @@ -239,56 +258,60 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, } else { - T kappa = vfrc(i,j,k); - T apxm = apx(i,j,k); - T apxp = apx(i+1,j,k); - T apym = apy(i,j,k); - T apyp = apy(i,j+1,k); - T apzm = apz(i,j,k); - T apzp = apz(i,j,k+1); + T kappa = ebdata.get(i,j,k); + T apxm = ebdata.get(i ,j ,k ); + T apxp = ebdata.get(i+1,j ,k ); + T apym = ebdata.get(i ,j ,k ); + T apyp = ebdata.get(i ,j+1,k ); + T apzm = ebdata.get(i ,j ,k ); + T apzp = ebdata.get(i ,j ,k+1); T fxm = bX(i,j,k,n)*(x(i,j,k,n) - x(i-1,j,k,n)); if (apxm != T(0.0) && apxm != T(1.0)) { - int jj = j + static_cast(std::copysign(T(1.0), fcx(i,j,k,0))); - int kk = k + static_cast(std::copysign(T(1.0), fcx(i,j,k,1))); - T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : T(0.0); - T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : T(0.0); + auto fcx0 = ebdata.get(i,j,k,0); + auto fcx1 = ebdata.get(i,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0), fcx0)); + int kk = k + static_cast(std::copysign(T(1.0), fcx1)); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx1) : T(0.0); if (beta_on_center && phi_on_center) { fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm + fracy*(T(1.0)-fracz)*bX(i,jj,k ,n)*(x(i,jj,k ,n)-x(i-1,jj,k ,n)) + fracz*(T(1.0)-fracy)*bX(i,j ,kk,n)*(x(i,j ,kk,n)-x(i-1,j ,kk,n)) + - fracy* fracz *bX(i,jj,kk,n)*(x(i,jj,kk,n)-x(i-1,jj,kk,n)); + fracy* fracz *bX(i,jj,kk,n)*(x(i,jj,kk,n)-x(i-1,jj,kk,n)); } else if (beta_on_centroid && phi_on_center) { fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*(x(i, j, k,n)-x(i-1, j, k,n)) + fracy *(T(1.0)-fracz)*(x(i,jj, k,n)-x(i-1,jj, k,n)) + fracz *(T(1.0)-fracy)*(x(i, j,kk,n)-x(i-1, j,kk,n)) + - fracy * fracz *(x(i,jj,kk,n)-x(i-1,jj,kk,n)); + fracy * fracz *(x(i,jj,kk,n)-x(i-1,jj,kk,n)); fxm *= bX(i,j,k,n); } } T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i,j,k,n)); if (apxp != T(0.0) && apxp != T(1.0)) { - int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k,0))); - int kk = k + static_cast(std::copysign(T(1.0),fcx(i+1,j,k,1))); - T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx(i+1,j,k,0)) : T(0.0); - T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) ? std::abs(fcx(i+1,j,k,1)) : T(0.0); + auto fcx0 = ebdata.get(i+1,j,k,0); + auto fcx1 = ebdata.get(i+1,j,k,1); + int jj = j + static_cast(std::copysign(T(1.0),fcx0)); + int kk = k + static_cast(std::copysign(T(1.0),fcx1)); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) ? std::abs(fcx1) : T(0.0); if (beta_on_center && phi_on_center) { fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp + fracy*(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(x(i+1,jj,k ,n)-x(i,jj,k ,n)) + fracz*(T(1.0)-fracy)*bX(i+1,j ,kk,n)*(x(i+1,j ,kk,n)-x(i,j ,kk,n)) + - fracy* fracz *bX(i+1,jj,kk,n)*(x(i+1,jj,kk,n)-x(i,jj,kk,n)); + fracy* fracz *bX(i+1,jj,kk,n)*(x(i+1,jj,kk,n)-x(i,jj,kk,n)); } else if (beta_on_centroid && phi_on_center) { fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(x(i+1, j, k,n)-x(i, j, k,n)) + fracy *(T(1.0)-fracz)*(x(i+1,jj, k,n)-x(i,jj, k,n)) + fracz *(T(1.0)-fracy)*(x(i+1, j,kk,n)-x(i, j,kk,n)) + - fracy * fracz *(x(i+1,jj,kk,n)-x(i,jj,kk,n)); + fracy * fracz *(x(i+1,jj,kk,n)-x(i,jj,kk,n)); fxp *= bX(i+1,j,k,n); } @@ -296,23 +319,25 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, T fym = bY(i,j,k,n)*(x(i,j,k,n) - x(i,j-1,k,n)); if (apym != T(0.0) && apym != T(1.0)) { - int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k,0))); - int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j,k,1))); - T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : T(0.0); - T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : T(0.0); + auto fcy0 = ebdata.get(i,j,k,0); + auto fcy1 = ebdata.get(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy1) : T(0.0); if (beta_on_center && phi_on_center) { fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym + fracx*(T(1.0)-fracz)*bY(ii,j,k ,n)*(x(ii,j,k ,n)-x(ii,j-1,k ,n)) + fracz*(T(1.0)-fracx)*bY(i ,j,kk,n)*(x(i ,j,kk,n)-x(i ,j-1,kk,n)) + - fracx* fracz *bY(ii,j,kk,n)*(x(ii,j,kk,n)-x(ii,j-1,kk,n)); + fracx* fracz *bY(ii,j,kk,n)*(x(ii,j,kk,n)-x(ii,j-1,kk,n)); } else if (beta_on_centroid && phi_on_center) { fym = (T(1.0)-fracx)*(T(1.0)-fracz)*(x( i,j, k,n)-x( i,j-1, k,n)) + fracx *(T(1.0)-fracz)*(x(ii,j, k,n)-x(ii,j-1, k,n)) + fracz *(T(1.0)-fracx)*(x(i ,j,kk,n)-x( i,j-1,kk,n)) + - fracx * fracz *(x(ii,j,kk,n)-x(ii,j-1,kk,n)); + fracx * fracz *(x(ii,j,kk,n)-x(ii,j-1,kk,n)); fym *= bY(i,j,k,n); } @@ -320,23 +345,25 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j,k,n)); if (apyp != T(0.0) && apyp != T(1.0)) { - int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k,0))); - int kk = k + static_cast(std::copysign(T(1.0),fcy(i,j+1,k,1))); - T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy(i,j+1,k,0)) : T(0.0); - T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) ? std::abs(fcy(i,j+1,k,1)) : T(0.0); + auto fcy0 = ebdata.get(i,j+1,k,0); + auto fcy1 = ebdata.get(i,j+1,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcy0)); + int kk = k + static_cast(std::copysign(T(1.0),fcy1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy0) : T(0.0); + T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) ? std::abs(fcy1) : T(0.0); if (beta_on_center && phi_on_center) { fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp + fracx*(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(x(ii,j+1,k ,n)-x(ii,j,k ,n)) + fracz*(T(1.0)-fracx)*bY(i ,j+1,kk,n)*(x(i ,j+1,kk,n)-x(i ,j,kk,n)) + - fracx* fracz *bY(ii,j+1,kk,n)*(x(ii,j+1,kk,n)-x(ii,j,kk,n)); + fracx* fracz *bY(ii,j+1,kk,n)*(x(ii,j+1,kk,n)-x(ii,j,kk,n)); } else if (beta_on_centroid && phi_on_center) { fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(x( i,j+1, k,n)-x( i,j, k,n)) + fracx *(T(1.0)-fracz)*(x(ii,j+1, k,n)-x(ii,j, k,n)) + fracz *(T(1.0)-fracx)*(x( i,j+1,kk,n)-x( i,j,kk,n)) + - fracx * fracz *(x(ii,j+1,kk,n)-x(ii,j,kk,n)); + fracx * fracz *(x(ii,j+1,kk,n)-x(ii,j,kk,n)); fyp *= bY(i,j+1,k,n); } @@ -344,23 +371,25 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, T fzm = bZ(i,j,k,n)*(x(i,j,k,n) - x(i,j,k-1,n)); if (apzm != T(0.0) && apzm != T(1.0)) { - int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k,0))); - int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k,1))); - T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : T(0.0); - T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : T(0.0); + auto fcz0 = ebdata.get(i,j,k,0); + auto fcz1 = ebdata.get(i,j,k,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz1) : T(0.0); if (beta_on_center && phi_on_center) { fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm + fracx*(T(1.0)-fracy)*bZ(ii,j ,k,n)*(x(ii,j ,k,n)-x(ii,j ,k-1,n)) + fracy*(T(1.0)-fracx)*bZ(i ,jj,k,n)*(x(i ,jj,k,n)-x(i ,jj,k-1,n)) + - fracx* fracy *bZ(ii,jj,k,n)*(x(ii,jj,k,n)-x(ii,jj,k-1,n)); + fracx* fracy *bZ(ii,jj,k,n)*(x(ii,jj,k,n)-x(ii,jj,k-1,n)); } else if (beta_on_centroid && phi_on_center) { fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*(x( i, j,k,n)-x( i, j,k-1,n)) + fracx *(T(1.0)-fracy)*(x(ii, j,k,n)-x(ii, j,k-1,n)) + fracy *(T(1.0)-fracx)*(x( i,jj,k,n)-x( i,jj,k-1,n)) + - fracx * fracy *(x(ii,jj,k,n)-x(ii,jj,k-1,n)); + fracx * fracy *(x(ii,jj,k,n)-x(ii,jj,k-1,n)); fzm *= bZ(i,j,k,n); } @@ -368,23 +397,25 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, T fzp = bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k,n)); if (apzp != T(0.0) && apzp != T(1.0)) { - int ii = i + static_cast(std::copysign(T(1.0),fcz(i,j,k+1,0))); - int jj = j + static_cast(std::copysign(T(1.0),fcz(i,j,k+1,1))); - T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) ? std::abs(fcz(i,j,k+1,0)) : T(0.0); - T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) ? std::abs(fcz(i,j,k+1,1)) : T(0.0); + auto fcz0 = ebdata.get(i,j,k+1,0); + auto fcz1 = ebdata.get(i,j,k+1,1); + int ii = i + static_cast(std::copysign(T(1.0),fcz0)); + int jj = j + static_cast(std::copysign(T(1.0),fcz1)); + T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) ? std::abs(fcz0) : T(0.0); + T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) ? std::abs(fcz1) : T(0.0); if (beta_on_center && phi_on_center) { fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp + fracx*(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(x(ii,j ,k+1,n)-x(ii,j ,k,n)) + fracy*(T(1.0)-fracx)*bZ(i ,jj,k+1,n)*(x(i ,jj,k+1,n)-x(i ,jj,k,n)) + - fracx* fracy *bZ(ii,jj,k+1,n)*(x(ii,jj,k+1,n)-x(ii,jj,k,n)); + fracx* fracy *bZ(ii,jj,k+1,n)*(x(ii,jj,k+1,n)-x(ii,jj,k,n)); } else if (beta_on_centroid && phi_on_center) { fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(x( i, j,k+1,n)-x( i, j,k,n)) + fracx *(T(1.0)-fracy)*(x(ii, j,k+1,n)-x(ii, j,k,n)) + fracy *(T(1.0)-fracx)*(x( i,jj,k+1,n)-x( i,jj,k,n)) + - fracx * fracy *(x(ii,jj,k+1,n)-x(ii,jj,k,n)); + fracx * fracy *(x(ii,jj,k+1,n)-x(ii,jj,k,n)); fzp *= bZ(i,j,k+1,n); } @@ -403,9 +434,9 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, T phib = is_inhomog ? phieb(i,j,k,n) : T(0.0); - T bctx = bc(i,j,k,0); - T bcty = bc(i,j,k,1); - T bctz = bc(i,j,k,2); + T bctx = ebdata.get(i,j,k,0); + T bcty = ebdata.get(i,j,k,1); + T bctz = ebdata.get(i,j,k,2); T dx_eb = get_dx_eb(kappa); T dg = dx_eb / amrex::max(std::abs(anrmx), std::abs(anrmy), @@ -438,7 +469,8 @@ void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, T dphidn = (phib-phig)/dg; - feb = dphidn * ba(i,j,k) * beb(i,j,k,n); + T ba = ebdata.get(i,j,k); + feb = dphidn * ba * beb(i,j,k,n); } y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * @@ -533,7 +565,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_gsrb (int i, int j, int k, int n, Array4 const& phi, Array4 const& rhs, T alpha, Array4 const& a, - T dhx, T dhy, T dhz, + T beta, GpuArray const& dxinv, Array4 const& bX, Array4 const& bY, Array4 const& bZ, Array4 const& m0, Array4 const& m2, @@ -545,21 +577,22 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, Array4 const& f1, Array4 const& f3, Array4 const& f5, Array4 const& ccm, Array4 const& beb, - Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, Array4 const& ba, - Array4 const& bcent, + EBData const& ebdata, bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, Box const& vbox, int redblack) noexcept { constexpr T omega = 1.15; + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + const T dhx = beta*dxinv[0]*dxinv[0]; + const T dhy = beta*dxinv[1]*dxinv[1]; + const T dhz = beta*dxinv[2]*dxinv[2]; + if ((i+j+k+redblack) % 2 == 0) { - const auto vlo = amrex::lbound(vbox); - const auto vhi = amrex::ubound(vbox); - if (flag(i,j,k).isCovered()) + auto const flag = ebdata.get(i,j,k); + if (flag.isCovered()) { phi(i,j,k,n) = T(0.0); } @@ -578,7 +611,7 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) ? f5(i,j,vhi.z,n) : T(0.0); - if (flag(i,j,k).isRegular()) + if (flag.isRegular()) { T gamma = alpha*a(i,j,k) + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n)) @@ -586,35 +619,35 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n)); T rho = dhx*(bX(i+1,j ,k ,n)*phi(i+1,j ,k ,n) + - bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) - + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + - bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) - + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + - bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); + bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) + + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + + bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) + + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + + bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) - + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) - + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) + + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); phi(i,j,k,n) += omega*res/(gamma-delta); } else { - T kappa = vfrc(i,j,k); - T apxm = apx(i ,j ,k ); - T apxp = apx(i+1,j ,k ); - T apym = apy(i ,j ,k ); - T apyp = apy(i ,j+1,k ); - T apzm = apz(i ,j ,k ); - T apzp = apz(i ,j ,k+1); + T kappa = ebdata.get(i,j,k); + T apxm = ebdata.get(i ,j ,k ); + T apxp = ebdata.get(i+1,j ,k ); + T apym = ebdata.get(i ,j ,k ); + T apyp = ebdata.get(i ,j+1,k ); + T apzm = ebdata.get(i ,j ,k ); + T apzp = ebdata.get(i ,j ,k+1); T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); T oxm = -bX(i,j,k,n)*cf0; T sxm = bX(i,j,k,n); if (apxm != T(0.0) && apxm != T(1.0)) { - auto fcx0 = fcx(i,j,k,0); - auto fcx1 = fcx(i,j,k,1); + auto fcx0 = ebdata.get(i,j,k,0); + auto fcx1 = ebdata.get(i,j,k,1); int jj = j + static_cast(std::copysign(T(1.0), fcx0)); int kk = k + static_cast(std::copysign(T(1.0), fcx1)); T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) @@ -626,14 +659,14 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm + fracy *(T(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n)) +(T(1.0)-fracy)* fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n)) - + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); } else if (beta_on_centroid && !phi_on_centroid) { fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*( -phi(i-1, j, k,n)) + fracy *(T(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n)) +(T(1.0)-fracy)* fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n)) - + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); + + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); fxm *= bX(i,j,k,n); } @@ -645,8 +678,8 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T oxp = bX(i+1,j,k,n)*cf3; T sxp = -bX(i+1,j,k,n); if (apxp != T(0.0) && apxp != T(1.0)) { - auto fcx0 = fcx(i+1,j,k,0); - auto fcx1 = fcx(i+1,j,k,1); + auto fcx0 = ebdata.get(i+1,j,k,0); + auto fcx1 = ebdata.get(i+1,j,k,1); int jj = j + static_cast(std::copysign(T(1.0),fcx0)); int kk = k + static_cast(std::copysign(T(1.0),fcx1)); T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) @@ -658,18 +691,16 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp + fracy *(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n)) +(T(1.0)-fracy)* fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n)) - + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); } else if (beta_on_centroid && !phi_on_centroid) { fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(phi(i+1, j, k,n) ) + fracy *(T(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) + fracz *(T(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) + - fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); + fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); fxp *= bX(i+1,j,k,n); - } - oxp = T(0.0); sxp = (T(1.0)-fracy)*(T(1.0)-fracz)*sxp; } @@ -678,8 +709,8 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T oym = -bY(i,j,k,n)*cf1; T sym = bY(i,j,k,n); if (apym != T(0.0) && apym != T(1.0)) { - auto fcy0 = fcy(i,j,k,0); - auto fcy1 = fcy(i,j,k,1); + auto fcy0 = ebdata.get(i,j,k,0); + auto fcy1 = ebdata.get(i,j,k,1); int ii = i + static_cast(std::copysign(T(1.0),fcy0)); int kk = k + static_cast(std::copysign(T(1.0),fcy1)); T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) @@ -698,9 +729,8 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, fym = (T(1.0)-fracx)*(T(1.0)-fracz)*( -phi( i,j-1, k,n)) + fracx *(T(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n)) + (T(1.0)-fracx)* fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n)) - + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); + + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); fym *= bY(i,j,k,n); - } oym = T(0.0); sym = (T(1.0)-fracx)*(T(1.0)-fracz)*sym; @@ -710,8 +740,8 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T oyp = bY(i,j+1,k,n)*cf4; T syp = -bY(i,j+1,k,n); if (apyp != T(0.0) && apyp != T(1.0)) { - auto fcy0 = fcy(i,j+1,k,0); - auto fcy1 = fcy(i,j+1,k,1); + auto fcy0 = ebdata.get(i,j+1,k,0); + auto fcy1 = ebdata.get(i,j+1,k,1); int ii = i + static_cast(std::copysign(T(1.0),fcy0)); int kk = k + static_cast(std::copysign(T(1.0),fcy1)); T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) @@ -723,16 +753,15 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp + fracx *(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n)) + (T(1.0)-fracx)* fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n)) - + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); + + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); } else if (beta_on_centroid && !phi_on_centroid) { fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(phi( i,j+1, k,n) ) + fracx *(T(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n)) + (T(1.0)-fracx)* fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n)) - + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); + + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); fyp *= bY(i,j+1,k,n); - } oyp = T(0.0); syp = (T(1.0)-fracx)*(T(1.0)-fracz)*syp; @@ -742,8 +771,8 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T ozm = -bZ(i,j,k,n)*cf2; T szm = bZ(i,j,k,n); if (apzm != T(0.0) && apzm != T(1.0)) { - auto fcz0 = fcz(i,j,k,0); - auto fcz1 = fcz(i,j,k,1); + auto fcz0 = ebdata.get(i,j,k,0); + auto fcz1 = ebdata.get(i,j,k,1); int ii = i + static_cast(std::copysign(T(1.0),fcz0)); int jj = j + static_cast(std::copysign(T(1.0),fcz1)); T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) @@ -755,14 +784,14 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm + fracx *(T(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) +(T(1.0)-fracx)* fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n)) - + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); } else if (beta_on_centroid && !phi_on_centroid) { fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*( -phi( i, j,k-1,n)) + fracx *(T(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) + (T(1.0)-fracx)* fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n)) - + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); + + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); fzm *= bZ(i,j,k,n); } @@ -774,8 +803,8 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T ozp = bZ(i,j,k+1,n)*cf5; T szp = -bZ(i,j,k+1,n); if (apzp != T(0.0) && apzp != T(1.0)) { - auto fcz0 = fcz(i,j,k+1,0); - auto fcz1 = fcz(i,j,k+1,1); + auto fcz0 = ebdata.get(i,j,k+1,0); + auto fcz1 = ebdata.get(i,j,k+1,1); int ii = i + static_cast(std::copysign(T(1.0),fcz0)); int jj = j + static_cast(std::copysign(T(1.0),fcz1)); T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) @@ -787,14 +816,14 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp + fracx *(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n)) + (T(1.0)-fracx)* fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n)) - + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); } else if (beta_on_centroid && !phi_on_centroid) { fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(phi( i, j,k+1,n) ) + fracx *(T(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n)) + (T(1.0)-fracx)* fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n)) - + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); + + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); fzp *= bZ(i,j,k+1,n); } @@ -805,18 +834,18 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T vfrcinv = T(1.0)/kappa; T gamma = alpha*a(i,j,k) + vfrcinv * (dhx*(apxm*sxm-apxp*sxp) + - dhy*(apym*sym-apyp*syp) + - dhz*(apzm*szm-apzp*szp)); + dhy*(apym*sym-apyp*syp) + + dhz*(apzm*szm-apzp*szp)); T rho = -vfrcinv * (dhx*(apxm*fxm-apxp*fxp) + - dhy*(apym*fym-apyp*fyp) + - dhz*(apzm*fzm-apzp*fzp)); + dhy*(apym*fym-apyp*fyp) + + dhz*(apzm*fzm-apzp*fzp)); T delta = -vfrcinv * (dhx*(apxm*oxm-apxp*oxp) + - dhy*(apym*oym-apyp*oyp) + - dhz*(apzm*ozm-apzp*ozp)); + dhy*(apym*oym-apyp*oyp) + + dhz*(apzm*ozm-apzp*ozp)); if (is_dirichlet) { T dapx = apxm-apxp; @@ -827,13 +856,13 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T anrmx = dapx * anorminv; T anrmy = dapy * anorminv; T anrmz = dapz * anorminv; - T bctx = bcent(i,j,k,0); - T bcty = bcent(i,j,k,1); - T bctz = bcent(i,j,k,2); + T bctx = ebdata.get(i,j,k,0); + T bcty = ebdata.get(i,j,k,1); + T bctz = ebdata.get(i,j,k,2); T dx_eb = get_dx_eb(kappa); T dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), - std::abs(anrmz)); + std::abs(anrmz)); T gx = bctx - dg*anrmx; T gy = bcty - dg*anrmy; @@ -853,18 +882,20 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, T gyz = gy*gz; T gxyz = gx*gy*gz; T phig_gamma = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); - T phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n) - + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n) - + (gyz + gxyz) * phi(i,jj,kk,n) - + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n) - + (gxz + gxyz) * phi(ii,j,kk,n) - + (gxy + gxyz) * phi(ii,jj,k,n) - + (-gxyz) * phi(ii,jj,kk,n); + T phig = (-gz - gxz - gyz - gxyz) * phi(i ,j ,kk,n) + + (-gy - gxy - gyz - gxyz) * phi(i ,jj,k ,n) + + (gyz + gxyz) * phi(i ,jj,kk,n) + + (-gx - gxy - gxz - gxyz) * phi(ii,j ,k ,n) + + (gxz + gxyz) * phi(ii,j ,kk,n) + + (gxy + gxyz) * phi(ii,jj,k ,n) + + (-gxyz) * phi(ii,jj,kk,n); + + T ba = ebdata.get(i,j,k); T dphidn = ( -phig)/dg; - T feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); + T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); gamma += vfrcinv*(-dhx)*feb_gamma; - T feb = dphidn * ba(i,j,k) * beb(i,j,k,n); + T feb = dphidn * ba * beb(i,j,k,n); rho += -vfrcinv*(-dhx)*feb; } @@ -875,364 +906,6 @@ void mlebabeclap_gsrb (int i, int j, int k, int n, } } -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_gsrb (Box const& box, - Array4 const& phi, Array4 const& rhs, - T alpha, Array4 const& a, - T dhx, T dhy, T dhz, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, - Array4 const& m0, Array4 const& m2, - Array4 const& m4, - Array4 const& m1, Array4 const& m3, - Array4 const& m5, - Array4 const& f0, Array4 const& f2, - Array4 const& f4, - Array4 const& f1, Array4 const& f3, - Array4 const& f5, - Array4 const& ccm, Array4 const& beb, - EBData const& ebdata, - bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, - Box const& vbox, int redblack, int ncomp) noexcept -{ - constexpr T omega = 1.15; - - const auto vlo = amrex::lbound(vbox); - const auto vhi = amrex::ubound(vbox); - -// amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept - // amrex::Loop here causes gcc 8 to crash. - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); - for (int n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) - { - if ((i+j+k+redblack) % 2 == 0) - { - auto const flag = ebdata.get(i,j,k); - if (flag.isCovered()) - { - phi(i,j,k,n) = T(0.0); - } - else - { - T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) - ? f0(vlo.x,j,k,n) : T(0.0); - T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) - ? f1(i,vlo.y,k,n) : T(0.0); - T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) - ? f2(i,j,vlo.z,n) : T(0.0); - T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) - ? f3(vhi.x,j,k,n) : T(0.0); - T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) - ? f4(i,vhi.y,k,n) : T(0.0); - T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) - ? f5(i,j,vhi.z,n) : T(0.0); - - if (flag.isRegular()) - { - T gamma = alpha*a(i,j,k) - + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n)) - + dhy*(bY(i,j+1,k,n) + bY(i,j,k,n)) - + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n)); - - T rho = dhx*(bX(i+1,j ,k ,n)*phi(i+1,j ,k ,n) + - bX(i ,j ,k ,n)*phi(i-1,j ,k ,n)) - + dhy*(bY(i ,j+1,k ,n)*phi(i ,j+1,k ,n) + - bY(i ,j ,k ,n)*phi(i ,j-1,k ,n)) - + dhz*(bZ(i ,j ,k+1,n)*phi(i ,j ,k+1,n) + - bZ(i ,j ,k ,n)*phi(i ,j ,k-1,n)); - - T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) - + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) - + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5); - - T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += omega*res/(gamma-delta); - } - else - { - T kappa = ebdata.get(i,j,k); - T apxm = ebdata.get(i ,j ,k ); - T apxp = ebdata.get(i+1,j ,k ); - T apym = ebdata.get(i ,j ,k ); - T apyp = ebdata.get(i ,j+1,k ); - T apzm = ebdata.get(i ,j ,k ); - T apzp = ebdata.get(i ,j ,k+1); - - T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); - T oxm = -bX(i,j,k,n)*cf0; - T sxm = bX(i,j,k,n); - if (apxm != T(0.0) && apxm != T(1.0)) { - auto fcx0 = ebdata.get(i,j,k,0); - auto fcx1 = ebdata.get(i,j,k,1); - int jj = j + static_cast(std::copysign(T(1.0), fcx0)); - int kk = k + static_cast(std::copysign(T(1.0), fcx1)); - T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) - ? std::abs(fcx0) : T(0.0); - T fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) - ? std::abs(fcx1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*fxm - + fracy *(T(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n)) - +(T(1.0)-fracy)* fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n)) - + fracy * fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxm = (T(1.0)-fracy)*(T(1.0)-fracz)*( -phi(i-1, j, k,n)) - + fracy *(T(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n)) - +(T(1.0)-fracy)* fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n)) - + fracy * fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n)); - fxm *= bX(i,j,k,n); - - } - oxm = T(0.0); - sxm = (T(1.0)-fracy)*(T(1.0)-fracz)*sxm; - } - - T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); - T oxp = bX(i+1,j,k,n)*cf3; - T sxp = -bX(i+1,j,k,n); - if (apxp != T(0.0) && apxp != T(1.0)) { - auto fcx0 = ebdata.get(i+1,j,k,0); - auto fcx1 = ebdata.get(i+1,j,k,1); - int jj = j + static_cast(std::copysign(T(1.0),fcx0)); - int kk = k + static_cast(std::copysign(T(1.0),fcx1)); - T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) - ? std::abs(fcx0) : T(0.0); - T fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) - ? std::abs(fcx1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*fxp - + fracy *(T(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n)) - +(T(1.0)-fracy)* fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n)) - + fracy * fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxp = (T(1.0)-fracy)*(T(1.0)-fracz)*(phi(i+1, j, k,n) ) + - fracy *(T(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) + - fracz *(T(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) + - fracy * fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n)); - fxp *= bX(i+1,j,k,n); - - } - - oxp = T(0.0); - sxp = (T(1.0)-fracy)*(T(1.0)-fracz)*sxp; - } - - T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); - T oym = -bY(i,j,k,n)*cf1; - T sym = bY(i,j,k,n); - if (apym != T(0.0) && apym != T(1.0)) { - auto fcy0 = ebdata.get(i,j,k,0); - auto fcy1 = ebdata.get(i,j,k,1); - int ii = i + static_cast(std::copysign(T(1.0),fcy0)); - int kk = k + static_cast(std::copysign(T(1.0),fcy1)); - T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) - ? std::abs(fcy0) : T(0.0); - T fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) - ? std::abs(fcy1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fym = (T(1.0)-fracx)*(T(1.0)-fracz)*fym - + fracx *(T(1.0)-fracz)*bY(ii,j,k ,n)*(phi(ii,j,k ,n)-phi(ii,j-1,k ,n)) - + (T(1.0)-fracx)* fracz *bY(i ,j,kk,n)*(phi(i ,j,kk,n)-phi(i ,j-1,kk,n)) - + fracx * fracz *bY(ii,j,kk,n)*(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fym = (T(1.0)-fracx)*(T(1.0)-fracz)*( -phi( i,j-1, k,n)) - + fracx *(T(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n)) - + (T(1.0)-fracx)* fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n)) - + fracx * fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n)); - fym *= bY(i,j,k,n); - - } - oym = T(0.0); - sym = (T(1.0)-fracx)*(T(1.0)-fracz)*sym; - } - - T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); - T oyp = bY(i,j+1,k,n)*cf4; - T syp = -bY(i,j+1,k,n); - if (apyp != T(0.0) && apyp != T(1.0)) { - auto fcy0 = ebdata.get(i,j+1,k,0); - auto fcy1 = ebdata.get(i,j+1,k,1); - int ii = i + static_cast(std::copysign(T(1.0),fcy0)); - int kk = k + static_cast(std::copysign(T(1.0),fcy1)); - T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) - ? std::abs(fcy0) : T(0.0); - T fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) - ? std::abs(fcy1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*fyp - + fracx *(T(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n)) - + (T(1.0)-fracx)* fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n)) - + fracx * fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fyp = (T(1.0)-fracx)*(T(1.0)-fracz)*(phi( i,j+1, k,n) ) - + fracx *(T(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n)) - + (T(1.0)-fracx)* fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n)) - + fracx * fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n)); - fyp *= bY(i,j+1,k,n); - - } - oyp = T(0.0); - syp = (T(1.0)-fracx)*(T(1.0)-fracz)*syp; - } - - T fzm = -bZ(i,j,k,n)*phi(i,j,k-1,n); - T ozm = -bZ(i,j,k,n)*cf2; - T szm = bZ(i,j,k,n); - if (apzm != T(0.0) && apzm != T(1.0)) { - auto fcz0 = ebdata.get(i,j,k,0); - auto fcz1 = ebdata.get(i,j,k,1); - int ii = i + static_cast(std::copysign(T(1.0),fcz0)); - int jj = j + static_cast(std::copysign(T(1.0),fcz1)); - T fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) - ? std::abs(fcz0) : T(0.0); - T fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) - ? std::abs(fcz1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*fzm - + fracx *(T(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) - +(T(1.0)-fracx)* fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n)) - + fracx * fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fzm = (T(1.0)-fracx)*(T(1.0)-fracy)*( -phi( i, j,k-1,n)) - + fracx *(T(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n)) - + (T(1.0)-fracx)* fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n)) - + fracx * fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n)); - fzm *= bZ(i,j,k,n); - - } - ozm = T(0.0); - szm = (T(1.0)-fracx)*(T(1.0)-fracy)*szm; - } - - T fzp = bZ(i,j,k+1,n)*phi(i,j,k+1,n); - T ozp = bZ(i,j,k+1,n)*cf5; - T szp = -bZ(i,j,k+1,n); - if (apzp != T(0.0) && apzp != T(1.0)) { - auto fcz0 = ebdata.get(i,j,k+1,0); - auto fcz1 = ebdata.get(i,j,k+1,1); - int ii = i + static_cast(std::copysign(T(1.0),fcz0)); - int jj = j + static_cast(std::copysign(T(1.0),fcz1)); - T fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) - ? std::abs(fcz0) : T(0.0); - T fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) - ? std::abs(fcz1) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*fzp - + fracx *(T(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n)) - + (T(1.0)-fracx)* fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n)) - + fracx * fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fzp = (T(1.0)-fracx)*(T(1.0)-fracy)*(phi( i, j,k+1,n) ) - + fracx *(T(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n)) - + (T(1.0)-fracx)* fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n)) - + fracx * fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n)); - fzp *= bZ(i,j,k+1,n); - - } - ozp = T(0.0); - szp = (T(1.0)-fracx)*(T(1.0)-fracy)*szp; - } - - T vfrcinv = T(1.0)/kappa; - T gamma = alpha*a(i,j,k) + vfrcinv * - (dhx*(apxm*sxm-apxp*sxp) + - dhy*(apym*sym-apyp*syp) + - dhz*(apzm*szm-apzp*szp)); - - T rho = -vfrcinv * - (dhx*(apxm*fxm-apxp*fxp) + - dhy*(apym*fym-apyp*fyp) + - dhz*(apzm*fzm-apzp*fzp)); - - T delta = -vfrcinv * - (dhx*(apxm*oxm-apxp*oxp) + - dhy*(apym*oym-apyp*oyp) + - dhz*(apzm*ozm-apzp*ozp)); - - if (is_dirichlet) { - T dapx = apxm-apxp; - T dapy = apym-apyp; - T dapz = apzm-apzp; - T anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz); - T anorminv = T(1.0)/anorm; - T anrmx = dapx * anorminv; - T anrmy = dapy * anorminv; - T anrmz = dapz * anorminv; - T bctx = ebdata.get(i,j,k,0); - T bcty = ebdata.get(i,j,k,1); - T bctz = ebdata.get(i,j,k,2); - T dx_eb = get_dx_eb(kappa); - - T dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy), - std::abs(anrmz)); - - T gx = bctx - dg*anrmx; - T gy = bcty - dg*anrmy; - T gz = bctz - dg*anrmz; - T sx = std::copysign(T(1.0),anrmx); - T sy = std::copysign(T(1.0),anrmy); - T sz = std::copysign(T(1.0),anrmz); - int ii = i - static_cast(sx); - int jj = j - static_cast(sy); - int kk = k - static_cast(sz); - - gx *= sx; - gy *= sy; - gz *= sz; - T gxy = gx*gy; - T gxz = gx*gz; - T gyz = gy*gz; - T gxyz = gx*gy*gz; - T phig_gamma = (T(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz); - T phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n) - + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n) - + (gyz + gxyz) * phi(i,jj,kk,n) - + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n) - + (gxz + gxyz) * phi(ii,j,kk,n) - + (gxy + gxyz) * phi(ii,jj,k,n) - + (-gxyz) * phi(ii,jj,kk,n); - - T ba = ebdata.get(i,j,k); - - T dphidn = ( -phig)/dg; - T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); - gamma += vfrcinv*(-dhx)*feb_gamma; - T feb = dphidn * ba * beb(i,j,k,n); - rho += -vfrcinv*(-dhx)*feb; - } - - T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += omega*res/(gamma-delta); - } - } - } - }}}} -// }); -} - template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebabeclap_flux_x (Box const& box, Array4 const& fx, Array4 const& apx, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 9e865098096..521ca64cf17 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -59,7 +59,7 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const Real dhy = bscalar*dxinvarr[1]*dxinvarr[1];, const Real dhz = bscalar*dxinvarr[2]*dxinvarr[2];) -#ifdef AMREX_USE_GPU +// #ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && in.isFusingCandidate()) { MultiArray4 foo; const auto& xma = in.arrays(); @@ -69,17 +69,7 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const auto& byma = bycoef.const_arrays();, const auto& bzma = bzcoef.const_arrays();); auto const& ccmma = ccmask.const_arrays(); - auto const& flagma = flags->const_arrays(); - auto const& vfracma = vfrac->const_arrays(); - AMREX_D_TERM(auto const& apxma = area[0]->const_arrays();, - auto const& apyma = area[1]->const_arrays();, - auto const& apzma = area[2]->const_arrays();); - AMREX_D_TERM(auto const& fcxma = fcent[0]->const_arrays();, - auto const& fcyma = fcent[1]->const_arrays();, - auto const& fczma = fcent[2]->const_arrays();); - auto const& bama = barea->const_arrays(); - auto const& bcma = bcent->const_arrays(); - auto const& ccentma = ccent->const_arrays(); + auto const& ebdata_ma = factory->getEBDataArrays(); auto const& bebma = (is_eb_dirichlet) ? m_eb_b_coeffs[amrlev][mglev]->const_arrays() : foo; auto const& phiebma = (is_eb_dirichlet && is_eb_inhomog) @@ -95,16 +85,13 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c { mlebabeclap_adotx_centroid(i,j,k,n, yma[box_no], xma[box_no], ama[box_no], AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), - flagma[box_no], vfracma[box_no], - AMREX_D_DECL(apxma[box_no],apyma[box_no],apzma[box_no]), - AMREX_D_DECL(fcxma[box_no],fcyma[box_no],fczma[box_no]), - ccentma[box_no], bama[box_no], bcma[box_no], + ebdata_ma.get(box_no), bebma[box_no], phiebma[box_no], AMREX_D_DECL(domlo_x, domlo_y, domlo_z), AMREX_D_DECL(domhi_x, domhi_y, domhi_z), AMREX_D_DECL(extdir_x, extdir_y, extdir_z), - is_eb_dirichlet, is_eb_inhomog, - ascalar, dhx, dhy, dhz); + is_eb_dirichlet, is_eb_inhomog, dxinvarr, + ascalar, bscalar); }); } else { amrex::ParallelFor(out, IntVect(0), ncomp, @@ -112,20 +99,18 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c { mlebabeclap_adotx(i,j,k,n, yma[box_no], xma[box_no], ama[box_no], AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), - ccmma[box_no], flagma[box_no], vfracma[box_no], - AMREX_D_DECL(apxma[box_no],apyma[box_no],apzma[box_no]), - AMREX_D_DECL(fcxma[box_no],fcyma[box_no],fczma[box_no]), - bama[box_no], bcma[box_no], bebma[box_no], + ccmma[box_no], ebdata_ma.get(box_no), + bebma[box_no], is_eb_dirichlet, phiebma[box_no], - is_eb_inhomog, ascalar, dhx, dhy, dhz, - beta_on_centroid, phi_on_centroid); + is_eb_inhomog, ascalar, bscalar, + dxinvarr, beta_on_centroid, phi_on_centroid); }); } if (!Gpu::inNoSyncRegion()) { Gpu::streamSynchronize(); } } else -#endif +// #endif { Array4 foo; MFItInfo mfi_info; @@ -159,17 +144,7 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c }); } else { Array4 const& ccmfab = ccmask.const_array(mfi); - Array4 const& flagfab = flags->const_array(mfi); - Array4 const& vfracfab = vfrac->const_array(mfi); - AMREX_D_TERM(Array4 const& apxfab = area[0]->const_array(mfi);, - Array4 const& apyfab = area[1]->const_array(mfi);, - Array4 const& apzfab = area[2]->const_array(mfi);); - AMREX_D_TERM(Array4 const& fcxfab = fcent[0]->const_array(mfi);, - Array4 const& fcyfab = fcent[1]->const_array(mfi);, - Array4 const& fczfab = fcent[2]->const_array(mfi);); - Array4 const& bafab = barea->const_array(mfi); - Array4 const& bcfab = bcent->const_array(mfi); - Array4 const& ccfab = ccent->const_array(mfi); + const auto& ebdata = factory->getEBData(mfi); Array4 const& bebfab = (is_eb_dirichlet) ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo; Array4 const& phiebfab = (is_eb_dirichlet && is_eb_inhomog) @@ -196,15 +171,13 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c { mlebabeclap_adotx_centroid(i,j,k,n, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), - flagfab, vfracfab, - AMREX_D_DECL(apxfab,apyfab,apzfab), - AMREX_D_DECL(fcxfab,fcyfab,fczfab), - ccfab, bafab, bcfab, bebfab,phiebfab, + ebdata, + bebfab,phiebfab, AMREX_D_DECL(domlo_x, domlo_y, domlo_z), AMREX_D_DECL(domhi_x, domhi_y, domhi_z), AMREX_D_DECL(extdir_x, extdir_y, extdir_z), is_eb_dirichlet, is_eb_inhomog, - ascalar, dhx, dhy, dhz); + dxinvarr, ascalar, bscalar); }) #endif } else { @@ -212,12 +185,10 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c { mlebabeclap_adotx(i,j,k,n, yfab, xfab, afab, AMREX_D_DECL(bxfab,byfab,bzfab), - ccmfab, flagfab, vfracfab, - AMREX_D_DECL(apxfab,apyfab,apzfab), - AMREX_D_DECL(fcxfab,fcyfab,fczfab), - bafab, bcfab, bebfab, + ccmfab, ebdata, bebfab, is_eb_dirichlet, phiebfab, - is_eb_inhomog, ascalar, dhx, dhy, dhz, + is_eb_inhomog, ascalar, bscalar, + dxinvarr, beta_on_centroid, phi_on_centroid); }); } @@ -264,15 +235,17 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, #endif const int nc = getNComp(); - const auto h = m_geom[amrlev][mglev].CellSizeArray(); - AMREX_D_TERM(const Real dhx = m_b_scalar/(h[0]*h[0]);, - const Real dhy = m_b_scalar/(h[1]*h[1]);, - const Real dhz = m_b_scalar/(h[2]*h[2])); + const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); + AMREX_D_TERM(const Real dhx = m_b_scalar * dxinv[0]*dxinv[0];, + const Real dhy = m_b_scalar * dxinv[1]*dxinv[1];, + const Real dhz = m_b_scalar * dxinv[2]*dxinv[2]); #if (AMREX_SPACEDIM == 2) + const auto h = m_geom[amrlev][mglev].CellSizeArray(); const Real dh = m_b_scalar/(AMREX_D_TERM(h[0],*h[1],*h[2])); #endif - const Real alpha = m_a_scalar; + const Real ascalar = m_a_scalar; + const Real bscalar = m_b_scalar; const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; @@ -289,7 +262,7 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, bool is_eb_dirichlet = isEBDirichlet(); -#ifdef AMREX_USE_GPU +// #ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && sol.isFusingCandidate()) { MultiArray4 foo; const auto& m0 = mm0.const_arrays(); @@ -320,17 +293,7 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, const auto& f5ma = f5.const_arrays(); #endif #endif - // auto const& ebflags_ma = factory->getMultiEBCellFlagFab().const_arrays(); - auto const& flagma = flags->const_arrays(); - auto const& vfracma = vfrac->const_arrays(); - AMREX_D_TERM(auto const& apxma = area[0]->const_arrays();, - auto const& apyma = area[1]->const_arrays();, - auto const& apzma = area[2]->const_arrays();); - AMREX_D_TERM(auto const& fcxma = fcent[0]->const_arrays();, - auto const& fcyma = fcent[1]->const_arrays();, - auto const& fczma = fcent[2]->const_arrays();); - auto const& bama = barea->const_arrays(); - auto const& bcma = bcent->const_arrays(); + const auto& ebdata_ma = factory->getEBDataArrays(); auto const& ccmma = ccmask.const_arrays(); auto const& bebfab = (is_eb_dirichlet) ? @@ -343,18 +306,15 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept { Box vbx(ama[box_no]); - mlebabeclap_gsrb(i,j,k,n, solma[box_no], rhsma[box_no], alpha, + mlebabeclap_gsrb(i,j,k,n, solma[box_no], rhsma[box_no], ascalar, ama[box_no], - AMREX_D_DECL(dhx, dhy, dhz), + bscalar, dxinv, AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), AMREX_D_DECL(m0[box_no],m2[box_no],m4[box_no]), AMREX_D_DECL(m1[box_no],m3[box_no],m5[box_no]), AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), - ccmma[box_no], bebfab[box_no], flagma[box_no], vfracma[box_no], - AMREX_D_DECL(apxma[box_no],apyma[box_no],apzma[box_no]), - AMREX_D_DECL(fcxma[box_no],fcyma[box_no],fczma[box_no]), - bama[box_no], bcma[box_no], + ccmma[box_no], bebfab[box_no], ebdata_ma.get(box_no), is_eb_dirichlet, beta_on_centroid, phi_on_centroid, vbx, redblack); }); @@ -362,7 +322,7 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, Gpu::streamSynchronize(); } } else -#endif +// #endif { Array4 foo; MFItInfo mfi_info; @@ -414,7 +374,7 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, { AMREX_HOST_DEVICE_PARALLEL_FOR_4D(vbx, nc, i, j, k, n, { - abec_gsrb(i,j,k,n, solnfab, rhsfab, alpha, afab, + abec_gsrb(i,j,k,n, solnfab, rhsfab, ascalar, afab, AMREX_D_DECL(dhx, dhy, dhz), AMREX_D_DECL(bxfab, byfab, bzfab), AMREX_D_DECL(m0,m2,m4), @@ -437,19 +397,19 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, if (phi_on_centroid) { amrex::Abort("phi_on_centroid is still a WIP"); } - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( vbx, thread_box, + AMREX_HOST_DEVICE_PARALLEL_FOR_4D( vbx, nc, i, j, k, n, { - mlebabeclap_gsrb(thread_box, solnfab, rhsfab, alpha, afab, - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_2D_ONLY_ARGS(dh,h) - AMREX_D_DECL(bxfab,byfab,bzfab), + mlebabeclap_gsrb(i,j,k,n, solnfab, rhsfab, ascalar, + afab, + bscalar, dxinv, + AMREX_D_DECL(bxfab, byfab, bzfab), AMREX_D_DECL(m0,m2,m4), AMREX_D_DECL(m1,m3,m5), AMREX_D_DECL(f0fab,f2fab,f4fab), AMREX_D_DECL(f1fab,f3fab,f5fab), ccmfab, bebfab, ebdata, is_eb_dirichlet, beta_on_centroid, phi_on_centroid, - vbx, redblack, nc); + vbx, redblack); }); } } From fd70e3b5ab563f7cc0b1fadbbebbb1798b071f95 Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Mon, 19 Jan 2026 23:48:36 +1100 Subject: [PATCH 08/12] Fixes for 2D --- Src/EB/AMReX_EBData.H | 10 +- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp | 18 +- .../MLMG/AMReX_MLEBABecLap_2D_K.H | 971 +++++++++--------- .../MLMG/AMReX_MLEBABecLap_3D_K.H | 33 - .../MLMG/AMReX_MLEBABecLap_F.cpp | 26 +- 5 files changed, 505 insertions(+), 553 deletions(-) diff --git a/Src/EB/AMReX_EBData.H b/Src/EB/AMReX_EBData.H index 75ac6dcd3ed..81b9c8694c7 100644 --- a/Src/EB/AMReX_EBData.H +++ b/Src/EB/AMReX_EBData.H @@ -34,10 +34,10 @@ struct EBData return m_real_data[static_cast(T)](i,j,k); } } - + template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - auto get () const noexcept + const auto& get () const noexcept { if constexpr (T == EBData_t::cellflag) { return *m_cell_flag; @@ -210,15 +210,15 @@ struct EBDataArrays { return m_real_data[(box_no*real_data_size) + static_cast(T)](i,j,k,n); } - + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - EBData get (int box_no) const noexcept + EBData get (int box_no) const noexcept { return EBData{m_cell_flag + box_no, m_real_data + (box_no * real_data_size)}; } static constexpr int real_data_size = static_cast(EBData_t::cellflag); - + Array4 const * AMREX_RESTRICT m_cell_flag = nullptr; Array4 const * AMREX_RESTRICT m_real_data = nullptr; }; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index 00296c0f7a1..fb96ff21547 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -887,7 +887,7 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const const Real bscalar = m_b_scalar; const int ncomp = getNComp(); -// #ifdef AMREX_USE_GPU +#ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && mf.isFusingCandidate()) { MultiArray4 foo; const auto& xma = mf.arrays(); @@ -929,7 +929,7 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const Gpu::streamSynchronize(); } } else -// #endif +#endif { Array4 foo; MFItInfo mfi_info; @@ -974,17 +974,19 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, { - mlebabeclap_normalize(tbx, fab, ascalar, afab, + mlebabeclap_normalize(i, j, k, n, + fab, ascalar, afab, AMREX_D_DECL(dhx, dhy, dhz), AMREX_2D_ONLY_ARGS(dh, dxarray) AMREX_D_DECL(bxfab, byfab, bzfab), ccmfab, flagfab, vfracfab, - AMREX_D_DECL(apxfab,apyfab,apzfab), - AMREX_D_DECL(fcxfab,fcyfab,fczfab), - bafab, bcfab, bebfab, is_eb_dirichlet, - beta_on_centroid, ncomp); + AMREX_D_DECL(apxfab, apyfab, apzfab), + AMREX_D_DECL(fcxfab, fcyfab, fczfab), + bafab, bcfab, bebfab, + is_eb_dirichlet, + beta_on_centroid); }); } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H index aae4844fb84..f68dd4f9b06 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H @@ -2,304 +2,309 @@ #define AMREX_MLEBABECLAP_2D_K_H_ #include #include +#include #include namespace amrex { -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx_centroid (Box const& box, Array4 const& y, +void mlebabeclap_adotx_centroid (int i, int j, int k, int n, Array4 const& y, Array4 const& x, Array4 const& a, Array4 const& bX, Array4 const& bY, - Array4 const& flag, - Array4 const& vfrc, - Array4 const& apx, Array4 const& apy, - Array4 const& fcx, Array4 const& fcy, - Array4 const& ccent, Array4 const& ba, - Array4 const& bcent, Array4 const& beb, + EBData const& ebdata, + Array4 const& beb, Array4 const& phieb, const int& domlo_x, const int& domlo_y, const int& domhi_x, const int& domhi_y, const bool& on_x_face, const bool& on_y_face, bool is_eb_dirichlet, bool is_eb_inhomog, - GpuArray const& dxinv, - T alpha, T beta, int ncomp) noexcept + GpuArray const& dxinv, + T alpha, T beta) noexcept { T dhx = beta*dxinv[0]*dxinv[0]; T dhy = beta*dxinv[1]*dxinv[1]; - T dh = beta*dxinv[0]*dxinv[1]; + T dh = beta*dxinv[0]*dxinv[0]; - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept - { - if (flag(i,j,k).isCovered()) - { - y(i,j,k,n) = T(0.0); - } - else if (flag(i,j,k).isRegular() && - ((flag(i-1,j ,k).isRegular() && flag(i+1,j ,k).isRegular() && - flag(i ,j-1,k).isRegular() && flag(i ,j+1,k).isRegular()) )) - { - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) - - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) - - bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) - - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) - - bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))); + Array4 const& flag = ebdata.get(); + Array4 const& vfrc = ebdata.get(); + Array4 const& apx = ebdata.get(); + Array4 const& apy = ebdata.get(); - } - else - { - T kappa = vfrc(i,j,k); - T apxm = apx(i,j,k); - T apxp = apx(i+1,j,k); - T apym = apy(i,j,k); - T apyp = apy(i,j+1,k); - - // First get EB-aware slope that doesn't know about extdir - bool needs_bdry_stencil = (i <= domlo_x) || (i >= domhi_x) || - (j <= domlo_y) || (j >= domhi_y); - - // if phi_on_centroid -- A second order least squares fit is used - // to approximate the slope on the high and low faces. Note that if - // any of the three cells --e.g., (i-1,j), (i,j), or (i-1,j)-- are - // cut, then the least squares fit is needed. This is a bit more than - // is actually needed for most cases but it will return the correct - // value in all cases. - - T fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); - if ( (apxm != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0)) ) - { - T yloc_on_xface = fcx(i,j,k); + Array4 const& fcx = ebdata.get(); + Array4 const& fcy = ebdata.get(); - if(needs_bdry_stencil) { + Array4 const& ccent = ebdata.get(); + Array4 const& bcent = ebdata.get(); + Array4 const& ba = ebdata.get(); - fxm = grad_x_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - yloc_on_xface,is_eb_dirichlet,is_eb_inhomog, - on_x_face, domlo_x, domhi_x, - on_y_face, domlo_y, domhi_y); + if (flag(i,j,k).isCovered()) + { + y(i,j,k,n) = T(0.0); + } + else if (flag(i,j,k).isRegular() && + ((flag(i-1,j ,k).isRegular() && flag(i+1,j ,k).isRegular() && + flag(i ,j-1,k).isRegular() && flag(i ,j+1,k).isRegular()) )) + { + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) + - bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) + - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) + - bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))); - } else { - fxm = grad_x_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, - yloc_on_xface,is_eb_dirichlet,is_eb_inhomog); - } - fxm *= bX(i,j,k,n); - } + } + else + { + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); - T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); - if ( (apxp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0)) ) { - T yloc_on_xface = fcx(i+1,j,k,0); - if(needs_bdry_stencil) { - fxp = grad_x_of_phi_on_centroids_extdir(i+1,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - yloc_on_xface,is_eb_dirichlet,is_eb_inhomog, - on_x_face, domlo_x, domhi_x, - on_y_face, domlo_y, domhi_y); - - } else { - fxp = grad_x_of_phi_on_centroids(i+1,j,k,n,x,phieb,flag,ccent,bcent, - yloc_on_xface,is_eb_dirichlet,is_eb_inhomog); - } - fxp *= bX(i+1,j,k,n); + // First get EB-aware slope that doesn't know about extdir + bool needs_bdry_stencil = (i <= domlo_x) || (i >= domhi_x) || + (j <= domlo_y) || (j >= domhi_y); - } + // if phi_on_centroid -- A second order least squares fit is used + // to approximate the slope on the high and low faces. Note that if + // any of the three cells --e.g., (i-1,j), (i,j), or (i-1,j)-- are + // cut, then the least squares fit is needed. This is a bit more than + // is actually needed for most cases but it will return the correct + // value in all cases. - T fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); - if ( (apym != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0)) ) { - T xloc_on_yface = fcy(i,j,k,0); + T fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); + if ( (apxm != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0)) ) + { + T yloc_on_xface = fcx(i,j,k); - if(needs_bdry_stencil) { + if(needs_bdry_stencil) { - fym = grad_y_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - xloc_on_yface,is_eb_dirichlet,is_eb_inhomog, - on_x_face, domlo_x, domhi_x, - on_y_face, domlo_y, domhi_y); + fxm = grad_x_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + yloc_on_xface,is_eb_dirichlet,is_eb_inhomog, + on_x_face, domlo_x, domhi_x, + on_y_face, domlo_y, domhi_y); - } else { - fym = grad_y_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, - xloc_on_yface,is_eb_dirichlet,is_eb_inhomog); - } - fym *= bY(i,j,k,n); + } else { + fxm = grad_x_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, + yloc_on_xface,is_eb_dirichlet,is_eb_inhomog); } + fxm *= bX(i,j,k,n); + } - T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); - if ( (apyp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0)) ) { - T xloc_on_yface = fcy(i,j+1,k,0); - if(needs_bdry_stencil) { - fyp = grad_y_of_phi_on_centroids_extdir(i,j+1,k,n,x,phieb,flag,ccent,bcent,vfrc, - xloc_on_yface,is_eb_dirichlet,is_eb_inhomog, - on_x_face, domlo_x, domhi_x, - on_y_face, domlo_y, domhi_y); - - } else { - fyp = grad_y_of_phi_on_centroids(i,j+1,k,n,x,phieb,flag,ccent,bcent, - xloc_on_yface,is_eb_dirichlet,is_eb_inhomog); - } - fyp *= bY(i,j+1,k,n); + T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); + if ( (apxp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i+1,j,k) != T(1.0) || vfrc(i-1,j,k) != T(1.0)) ) { + T yloc_on_xface = fcx(i+1,j,k,0); + if(needs_bdry_stencil) { + fxp = grad_x_of_phi_on_centroids_extdir(i+1,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + yloc_on_xface,is_eb_dirichlet,is_eb_inhomog, + on_x_face, domlo_x, domhi_x, + on_y_face, domlo_y, domhi_y); + + } else { + fxp = grad_x_of_phi_on_centroids(i+1,j,k,n,x,phieb,flag,ccent,bcent, + yloc_on_xface,is_eb_dirichlet,is_eb_inhomog); } + fxp *= bX(i+1,j,k,n); - T feb = T(0.0); - if (is_eb_dirichlet && flag(i,j,k).isSingleValued()) - { - T dapx = (apxm-apxp)/dxinv[1]; - T dapy = (apym-apyp)/dxinv[0]; - T anorm = std::hypot(dapx,dapy); - T anorminv = T(1.0)/anorm; - T anrmx = dapx * anorminv; - T anrmy = dapy * anorminv; + } + T fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); + if ( (apym != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0)) ) { + T xloc_on_yface = fcy(i,j,k,0); - feb = grad_eb_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, - anrmx,anrmy,is_eb_inhomog, - on_x_face, domlo_x, domhi_x, - on_y_face, domlo_y, domhi_y); + if(needs_bdry_stencil) { - feb *= ba(i,j,k) * beb(i,j,k,n); + fym = grad_y_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + xloc_on_yface,is_eb_dirichlet,is_eb_inhomog, + on_x_face, domlo_x, domhi_x, + on_y_face, domlo_y, domhi_y); + + } else { + fym = grad_y_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent, + xloc_on_yface,is_eb_dirichlet,is_eb_inhomog); } + fym *= bY(i,j,k,n); + } + T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); + if ( (apyp != T(0.0)) && (vfrc(i,j,k) != T(1.0) || vfrc(i,j+1,k) != T(1.0) || vfrc(i,j-1,k) != T(1.0)) ) { + T xloc_on_yface = fcy(i,j+1,k,0); + if(needs_bdry_stencil) { + fyp = grad_y_of_phi_on_centroids_extdir(i,j+1,k,n,x,phieb,flag,ccent,bcent,vfrc, + xloc_on_yface,is_eb_dirichlet,is_eb_inhomog, + on_x_face, domlo_x, domhi_x, + on_y_face, domlo_y, domhi_y); - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * - (dhx*(apxm*fxm-apxp*fxp) + dhy*(apym*fym-apyp*fyp) - dh*feb); + } else { + fyp = grad_y_of_phi_on_centroids(i,j+1,k,n,x,phieb,flag,ccent,bcent, + xloc_on_yface,is_eb_dirichlet,is_eb_inhomog); + } + fyp *= bY(i,j+1,k,n); } - }); + + T feb = T(0.0); + if (is_eb_dirichlet && flag(i,j,k).isSingleValued()) + { + T dapx = (apxm-apxp) / dxinv[0]; + T dapy = (apym-apyp) / dxinv[1]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + + feb = grad_eb_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc, + anrmx,anrmy,is_eb_inhomog, + on_x_face, domlo_x, domhi_x, + on_y_face, domlo_y, domhi_y); + + feb *= ba(i,j,k) * beb(i,j,k,n); + } + + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * + (dhx*(apxm*fxm-apxp*fxp) + dhy*(apym*fym-apyp*fyp) - dh*feb); + } } template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_adotx (Box const& box, Array4 const& y, +void mlebabeclap_adotx (int i, int j, int k, int n, Array4 const& y, Array4 const& x, Array4 const& a, Array4 const& bX, Array4 const& bY, - Array4 const& ccm, Array4 const& flag, - Array4 const& vfrc, Array4 const& apx, - Array4 const& apy, Array4 const& fcx, - Array4 const& fcy, Array4 const& ba, - Array4 const& bc, Array4 const& beb, + Array4 const& ccm, + EBData const& ebdata, + Array4 const& beb, bool is_dirichlet, Array4 const& phieb, - bool is_inhomog, GpuArray const& dxinv, - T alpha, T beta, int ncomp, + bool is_inhomog, T alpha,T beta, + GpuArray const& dxinv, bool beta_on_centroid, bool phi_on_centroid) noexcept { T dhx = beta*dxinv[0]*dxinv[0]; T dhy = beta*dxinv[1]*dxinv[1]; T dh = beta*dxinv[0]*dxinv[1]; - bool beta_on_center = !(beta_on_centroid); bool phi_on_center = !( phi_on_centroid); + auto const flag = ebdata.get(i,j,k); - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + if (flag.isCovered()) { - if (flag(i,j,k).isCovered()) - { - y(i,j,k,n) = T(0.0); - } - else if (flag(i,j,k).isRegular()) - { - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) - - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) - - bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) - - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) - - bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))); - } - else - { - T kappa = vfrc(i,j,k); - T apxm = apx(i,j,k); - T apxp = apx(i+1,j,k); - T apym = apy(i,j,k); - T apyp = apy(i,j+1,k); - - T fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); - if (apxm != T(0.0) && apxm != T(1.0)) { - int jj = j + static_cast(std::copysign(T(1.0),fcx(i,j,k))); - T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k)) : T(0.0); - if (beta_on_center && phi_on_center) { - fxm = (T(1.0)-fracy)*fxm + fracy*bX(i,jj,k,n)*(x(i,jj,k,n)-x(i-1,jj,k,n)); - } else if (beta_on_centroid && phi_on_center) { - fxm = bX(i,j,k,n) * ( (T(1.0)-fracy)*(x(i, j,k,n)-x(i-1, j,k,n)) - + fracy *(x(i,jj,k,n)-x(i-1,jj,k,n)) ); - } + y(i,j,k,n) = T(0.0); + } + else if (flag.isRegular()) + { + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n)) + - bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n))) + - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n)) + - bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n))); + } + else + { + T kappa = ebdata.get(i,j,k); + T apxm = ebdata.get(i ,j ,k ); + T apxp = ebdata.get(i+1,j ,k ); + T apym = ebdata.get(i ,j ,k ); + T apyp = ebdata.get(i ,j+1,k ); + + T fxm = bX(i,j,k,n) * (x(i,j,k,n)-x(i-1,j,k,n)); + if (apxm != T(0.0) && apxm != T(1.0)) { + T fcx = ebdata.get(i,j,k); + int jj = j + static_cast(std::copysign(T(1.0),fcx)); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx) : T(0.0); + if (beta_on_center && phi_on_center) { + fxm = (T(1.0)-fracy)*fxm + fracy*bX(i,jj,k,n)*(x(i,jj,k,n)-x(i-1,jj,k,n)); + } else if (beta_on_centroid && phi_on_center) { + fxm = bX(i,j,k,n) * ( (T(1.0)-fracy)*(x(i, j,k,n)-x(i-1, j,k,n)) + + fracy *(x(i,jj,k,n)-x(i-1,jj,k,n)) ); } + } - T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); - if (apxp != T(0.0) && apxp != T(1.0)) { - int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k))); - T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx(i+1,j,k)) : T(0.0); - if (beta_on_center && phi_on_center) { - fxp = (T(1.0)-fracy)*fxp + fracy*bX(i+1,jj,k,n)*(x(i+1,jj,k,n)-x(i,jj,k,n)); - } else if (beta_on_centroid && phi_on_center) { - fxp = bX(i+1,j,k,n) * ( (T(1.0)-fracy)*(x(i+1, j,k,n)-x(i, j,k,n)) - + fracy *(x(i+1,jj,k,n)-x(i,jj,k,n)) ); - } + T fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n)-x(i,j,k,n)); + if (apxp != T(0.0) && apxp != T(1.0)) { + T fcx = ebdata.get(i+1,j,k); + int jj = j + static_cast(std::copysign(T(1.0),fcx)); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx) : T(0.0); + if (beta_on_center && phi_on_center) { + fxp = (T(1.0)-fracy)*fxp + fracy*bX(i+1,jj,k,n)*(x(i+1,jj,k,n)-x(i,jj,k,n)); + } else if (beta_on_centroid && phi_on_center) { + fxp = bX(i+1,j,k,n) * ( (T(1.0)-fracy)*(x(i+1, j,k,n)-x(i, j,k,n)) + + fracy *(x(i+1,jj,k,n)-x(i,jj,k,n)) ); } + } - T fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); - if (apym != T(0.0) && apym != T(1.0)) { - int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k))); - T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k)) : T(0.0); - if (beta_on_center && phi_on_center) { - fym = (T(1.0)-fracx)*fym + fracx*bY(ii,j,k,n)*(x(ii,j,k,n)-x(ii,j-1,k,n)); - } else if (beta_on_centroid && phi_on_center) { - fym = bY(i,j,k,n) * ( (T(1.0)-fracx)*(x( i,j,k,n)-x( i,j-1,k,n)) - + fracx *(x(ii,j,k,n)-x(ii,j-1,k,n)) ); - } + T fym = bY(i,j,k,n)*(x(i,j,k,n)-x(i,j-1,k,n)); + if (apym != T(0.0) && apym != T(1.0)) { + T fcy = ebdata.get(i,j,k); + int ii = i + static_cast(std::copysign(T(1.0),fcy)); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy) : T(0.0); + if (beta_on_center && phi_on_center) { + fym = (T(1.0)-fracx)*fym + fracx*bY(ii,j,k,n)*(x(ii,j,k,n)-x(ii,j-1,k,n)); + } else if (beta_on_centroid && phi_on_center) { + fym = bY(i,j,k,n) * ( (T(1.0)-fracx)*(x( i,j,k,n)-x( i,j-1,k,n)) + + fracx *(x(ii,j,k,n)-x(ii,j-1,k,n)) ); } + } - T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); - if (apyp != T(0.0) && apyp != T(1.0)) { - int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k))); - T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy(i,j+1,k)) : T(0.0); - if (beta_on_center && phi_on_center) { - fyp = (T(1.0)-fracx)*fyp + fracx*bY(ii,j+1,k,n)*(x(ii,j+1,k,n)-x(ii,j,k,n)); - } else if (beta_on_centroid && phi_on_center) { - fyp = bY(i,j+1,k,n) * ( (T(1.0)-fracx)*(x( i,j+1,k,n)-x( i,j,k,n)) - + fracx *(x(ii,j+1,k,n)-x(ii,j,k,n)) ); - } + T fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n)-x(i,j,k,n)); + if (apyp != T(0.0) && apyp != T(1.0)) { + T fcy = ebdata.get(i,j+1,k); + int ii = i + static_cast(std::copysign(T(1.0),fcy)); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy) : T(0.0); + if (beta_on_center && phi_on_center) { + fyp = (T(1.0)-fracx)*fyp + fracx*bY(ii,j+1,k,n)*(x(ii,j+1,k,n)-x(ii,j,k,n)); + } else if (beta_on_centroid && phi_on_center) { + fyp = bY(i,j+1,k,n) * ( (T(1.0)-fracx)*(x( i,j+1,k,n)-x( i,j,k,n)) + + fracx *(x(ii,j+1,k,n)-x(ii,j,k,n)) ); } + } - T feb = T(0.0); - if (is_dirichlet) { - T dapx = (apxm-apxp)/dxinv[1]; - T dapy = (apym-apyp)/dxinv[0]; - T anorm = std::hypot(dapx,dapy); - T anorminv = T(1.0)/anorm; - T anrmx = dapx * anorminv; - T anrmy = dapy * anorminv; - - T phib = is_inhomog ? phieb(i,j,k,n) : T(0.0); - - T bctx = bc(i,j,k,0); - T bcty = bc(i,j,k,1); - T dx_eb = get_dx_eb(kappa); - - T dg, gx, gy, sx, sy; - if (std::abs(anrmx) > std::abs(anrmy)) { - dg = dx_eb / std::abs(anrmx); - } else { - dg = dx_eb / std::abs(anrmy); - } - gx = (bctx - dg*anrmx); - gy = (bcty - dg*anrmy); - sx = std::copysign(T(1.0),anrmx); - sy = std::copysign(T(1.0),anrmy); - - int ii = i - static_cast(sx); - int jj = j - static_cast(sy); + T feb = T(0.0); + if (is_dirichlet) { + T dapx = (apxm-apxp) / dxinv[0]; + T dapy = (apym-apyp) / dxinv[1]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; - T phig = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy) * x(i ,j ,k,n) - + ( - gx*sx - gx*gy*sx*sy) * x(ii,j ,k,n) - + ( - gy*sy - gx*gy*sx*sy) * x(i ,jj,k,n) - + ( + gx*gy*sx*sy) * x(ii,jj,k,n) ; + T phib = is_inhomog ? phieb(i,j,k,n) : T(0.0); - T dphidn = (phib-phig) / dg; + T bctx = ebdata.get(i,j,k,0); + T bcty = ebdata.get(i,j,k,1); + T dx_eb = get_dx_eb(kappa); - feb = dphidn * ba(i,j,k) * beb(i,j,k,n); + T dg, gx, gy, sx, sy; + if (std::abs(anrmx) > std::abs(anrmy)) { + dg = dx_eb / std::abs(anrmx); + } else { + dg = dx_eb / std::abs(anrmy); } + gx = (bctx - dg*anrmx); + gy = (bcty - dg*anrmy); + sx = std::copysign(T(1.0),anrmx); + sy = std::copysign(T(1.0),anrmy); + + int ii = i - static_cast(sx); + int jj = j - static_cast(sy); + + T phig = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy) * x(i ,j ,k,n) + + ( - gx*sx - gx*gy*sx*sy) * x(ii,j ,k,n) + + ( - gy*sy - gx*gy*sx*sy) * x(i ,jj,k,n) + + ( + gx*gy*sx*sy) * x(ii,jj,k,n) ; + + T dphidn = (phib-phig) / dg; + + T ba = ebdata.get(i,j,k); - y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * - (dhx*(apxm*fxm-apxp*fxp) + - dhy*(apym*fym-apyp*fyp) - dh*feb); + feb = dphidn * ba * beb(i,j,k,n); } - }); + + y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (T(1.0)/kappa) * + (dhx*(apxm*fxm-apxp*fxp) + + dhy*(apym*fym-apyp*fyp) - dh*feb); + } } template @@ -370,13 +375,13 @@ void mlebabeclap_ebflux (int i, int j, int k, int n, } } + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_gsrb (Box const& box, +void mlebabeclap_gsrb (int i, int j, int k, int n, Array4 const& phi, Array4 const& rhs, - T alpha, Array4 const& a, - T dhx, T dhy, T dh, - GpuArray const& dx, + T alpha, Array4 const& a, T beta, + GpuArray const& dxinv, Array4 const& bX, Array4 const& bY, Array4 const& m0, Array4 const& m2, Array4 const& m1, Array4 const& m3, @@ -385,206 +390,206 @@ void mlebabeclap_gsrb (Box const& box, Array4 const& ccm, Array4 const& beb, EBData const& ebdata, bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid, - Box const& vbox, int redblack, int ncomp) noexcept + Box const& vbox, int redblack) noexcept { const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); + T dhx = beta*dxinv[0]*dxinv[0]; + T dhy = beta*dxinv[1]*dxinv[1]; + T dh = beta*dxinv[0]*dxinv[1]; - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + if ((i+j+k+redblack) % 2 == 0) { - if ((i+j+k+redblack) % 2 == 0) + auto const flag = ebdata.get(i,j,k); + if (flag.isCovered()) { - auto const flag = ebdata.get(i,j,k); - if (flag.isCovered()) + phi(i,j,k,n) = T(0.0); + } + else + { + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (i == vhi.x && m2(vhi.x+1,j,k) > 0) + ? f2(vhi.x,j,k,n) : T(0.0); + T cf3 = (j == vhi.y && m3(i,vhi.y+1,k) > 0) + ? f3(i,vhi.y,k,n) : T(0.0); + + if (flag.isRegular()) { - phi(i,j,k,n) = T(0.0); + T gamma = alpha*a(i,j,k) + + dhx * (bX(i+1,j,k,n) + bX(i,j,k,n)) + + dhy * (bY(i,j+1,k,n) + bY(i,j,k,n)); + + T rho = dhx * (bX(i+1,j,k,n)*phi(i+1,j,k,n) + + bX(i ,j,k,n)*phi(i-1,j,k,n)) + + dhy * (bY(i,j+1,k,n)*phi(i,j+1,k,n) + + bY(i,j ,k,n)*phi(i,j-1,k,n)); + + T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf2) + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf3); + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += res/(gamma-delta); } else { - T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) - ? f0(vlo.x,j,k,n) : T(0.0); - T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) - ? f1(i,vlo.y,k,n) : T(0.0); - T cf2 = (i == vhi.x && m2(vhi.x+1,j,k) > 0) - ? f2(vhi.x,j,k,n) : T(0.0); - T cf3 = (j == vhi.y && m3(i,vhi.y+1,k) > 0) - ? f3(i,vhi.y,k,n) : T(0.0); - - if (flag.isRegular()) - { - T gamma = alpha*a(i,j,k) - + dhx * (bX(i+1,j,k,n) + bX(i,j,k,n)) - + dhy * (bY(i,j+1,k,n) + bY(i,j,k,n)); + T kappa = ebdata.get(i,j,k); + T apxm = ebdata.get(i ,j ,k); + T apxp = ebdata.get(i+1,j ,k); + T apym = ebdata.get(i ,j ,k); + T apyp = ebdata.get(i ,j+1,k); + + T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); + T oxm = -bX(i,j,k,n)*cf0; + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0)) { + T fcx = ebdata.get(i,j,k); + int jj = j + static_cast(std::copysign(T(1.0),fcx)); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*fxm + + fracy *bX(i,jj,k,n)*(phi(i,jj,k,n)-phi(i-1,jj,k,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxm = (T(1.0)-fracy)*( -phi(i-1,j,k,n)) + + fracy *(phi(i,jj,k,n)-phi(i-1,jj,k,n)); + fxm *= bX(i,j,k,n); + } + oxm = T(0.0); + sxm = (T(1.0)-fracy)*sxm; + } - T rho = dhx * (bX(i+1,j,k,n)*phi(i+1,j,k,n) - + bX(i ,j,k,n)*phi(i-1,j,k,n)) - + dhy * (bY(i,j+1,k,n)*phi(i,j+1,k,n) - + bY(i,j ,k,n)*phi(i,j-1,k,n)); + T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); + T oxp = bX(i+1,j,k,n)*cf2; + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0)) { + T fcx = ebdata.get(i+1,j,k); + int jj = j + static_cast(std::copysign(T(1.0),fcx)); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*fxp + + fracy *bX(i+1,jj,k,n)*(phi(i+1,jj,k,n)-phi(i,jj,k,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fxp = (T(1.0)-fracy)*(phi(i+1,j,k,n) ) + + fracy *(phi(i+1,jj,k,n)-phi(i,jj,k,n)); + fxp *= bX(i+1,j,k,n); + } + oxp = T(0.0); + sxp = (T(1.0)-fracy)*sxp; + } - T delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf2) - + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf3); + T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); + T oym = -bY(i,j,k,n)*cf1; + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0)) { + T fcy = ebdata.get(i,j,k); + int ii = i + static_cast(std::copysign(T(1.0),fcy)); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*fym + + fracx *bY(ii,j,k,n)*(phi(ii,j,k,n)-phi(ii,j-1,k,n)); + } + else if (beta_on_centroid && !phi_on_centroid) + { + fym = (T(1.0)-fracx)*( -phi( i,j-1,k,n)) + + fracx *(phi(ii,j,k,n)-phi(ii,j-1,k,n)); + fym *= bY(i,j,k,n); + } - T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += res/(gamma-delta); + oym = T(0.0); + sym = (T(1.0)-fracx)*sym; } - else - { - T kappa = ebdata.get(i,j,k); - T apxm = ebdata.get(i ,j ,k); - T apxp = ebdata.get(i+1,j ,k); - T apym = ebdata.get(i ,j ,k); - T apyp = ebdata.get(i ,j+1,k); - - T fxm = -bX(i,j,k,n)*phi(i-1,j,k,n); - T oxm = -bX(i,j,k,n)*cf0; - T sxm = bX(i,j,k,n); - if (apxm != T(0.0) && apxm != T(1.0)) { - T fcx = ebdata.get(i,j,k); - int jj = j + static_cast(std::copysign(T(1.0),fcx)); - T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxm = (T(1.0)-fracy)*fxm + - fracy *bX(i,jj,k,n)*(phi(i,jj,k,n)-phi(i-1,jj,k,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxm = (T(1.0)-fracy)*( -phi(i-1,j,k,n)) + - fracy *(phi(i,jj,k,n)-phi(i-1,jj,k,n)); - fxm *= bX(i,j,k,n); - } - oxm = T(0.0); - sxm = (T(1.0)-fracy)*sxm; - } - T fxp = bX(i+1,j,k,n)*phi(i+1,j,k,n); - T oxp = bX(i+1,j,k,n)*cf2; - T sxp = -bX(i+1,j,k,n); - if (apxp != T(0.0) && apxp != T(1.0)) { - T fcx = ebdata.get(i+1,j,k); - int jj = j + static_cast(std::copysign(T(1.0),fcx)); - T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fxp = (T(1.0)-fracy)*fxp + - fracy *bX(i+1,jj,k,n)*(phi(i+1,jj,k,n)-phi(i,jj,k,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fxp = (T(1.0)-fracy)*(phi(i+1,j,k,n) ) + - fracy *(phi(i+1,jj,k,n)-phi(i,jj,k,n)); - fxp *= bX(i+1,j,k,n); - } - oxp = T(0.0); - sxp = (T(1.0)-fracy)*sxp; + T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); + T oyp = bY(i,j+1,k,n)*cf3; + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0)) { + T fcy = ebdata.get(i,j+1,k); + int ii = i + static_cast(std::copysign(T(1.0),fcy)); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy) : T(0.0); + if (!beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*fyp + + fracx*bY(ii,j+1,k,n)*(phi(ii,j+1,k,n)-phi(ii,j,k,n)); } - - T fym = -bY(i,j,k,n)*phi(i,j-1,k,n); - T oym = -bY(i,j,k,n)*cf1; - T sym = bY(i,j,k,n); - if (apym != T(0.0) && apym != T(1.0)) { - T fcy = ebdata.get(i,j,k); - int ii = i + static_cast(std::copysign(T(1.0),fcy)); - T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fym = (T(1.0)-fracx)*fym + - fracx *bY(ii,j,k,n)*(phi(ii,j,k,n)-phi(ii,j-1,k,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fym = (T(1.0)-fracx)*( -phi( i,j-1,k,n)) + - fracx *(phi(ii,j,k,n)-phi(ii,j-1,k,n)); - fym *= bY(i,j,k,n); - } - - oym = T(0.0); - sym = (T(1.0)-fracx)*sym; + else if (beta_on_centroid && !phi_on_centroid) + { + fyp = (T(1.0)-fracx)*(phi(i,j+1,k,n) )+ + fracx *(phi(ii,j+1,k,n)-phi(ii,j,k,n)); + fyp *= bY(i,j+1,k,n); } + oyp = T(0.0); + syp = (T(1.0)-fracx)*syp; + } - T fyp = bY(i,j+1,k,n)*phi(i,j+1,k,n); - T oyp = bY(i,j+1,k,n)*cf3; - T syp = -bY(i,j+1,k,n); - if (apyp != T(0.0) && apyp != T(1.0)) { - T fcy = ebdata.get(i,j+1,k); - int ii = i + static_cast(std::copysign(T(1.0),fcy)); - T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy) : T(0.0); - if (!beta_on_centroid && !phi_on_centroid) - { - fyp = (T(1.0)-fracx)*fyp + - fracx*bY(ii,j+1,k,n)*(phi(ii,j+1,k,n)-phi(ii,j,k,n)); - } - else if (beta_on_centroid && !phi_on_centroid) - { - fyp = (T(1.0)-fracx)*(phi(i,j+1,k,n) )+ - fracx *(phi(ii,j+1,k,n)-phi(ii,j,k,n)); - fyp *= bY(i,j+1,k,n); - } - oyp = T(0.0); - syp = (T(1.0)-fracx)*syp; + T vfrcinv = (T(1.0)/kappa); + T gamma = alpha*a(i,j,k) + vfrcinv * + (dhx*(apxm*sxm-apxp*sxp) + + dhy*(apym*sym-apyp*syp)); + T rho = -vfrcinv * + (dhx*(apxm*fxm-apxp*fxp) + + dhy*(apym*fym-apyp*fyp)); + + T delta = -vfrcinv * + (dhx*(apxm*oxm-apxp*oxp) + + dhy*(apym*oym-apyp*oyp)); + + if (is_dirichlet) { + T dapx = (apxm-apxp) / dxinv[0]; + T dapy = (apym-apyp) / dxinv[1]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + + T bctx = ebdata.get(i,j,k,0); + T bcty = ebdata.get(i,j,k,1); + T dx_eb = get_dx_eb(kappa); + + T dg, gx, gy, sx, sy; + if (std::abs(anrmx) > std::abs(anrmy)) { + dg = dx_eb / std::abs(anrmx); + } else { + dg = dx_eb / std::abs(anrmy); } + gx = bctx - dg*anrmx; + gy = bcty - dg*anrmy; + sx = std::copysign(T(1.0),anrmx); + sy = std::copysign(T(1.0),anrmy); - T vfrcinv = (T(1.0)/kappa); - T gamma = alpha*a(i,j,k) + vfrcinv * - (dhx*(apxm*sxm-apxp*sxp) + - dhy*(apym*sym-apyp*syp)); - T rho = -vfrcinv * - (dhx*(apxm*fxm-apxp*fxp) + - dhy*(apym*fym-apyp*fyp)); - - T delta = -vfrcinv * - (dhx*(apxm*oxm-apxp*oxp) + - dhy*(apym*oym-apyp*oyp)); - - if (is_dirichlet) { - T dapx = (apxm-apxp)*dx[1]; - T dapy = (apym-apyp)*dx[0]; - T anorm = std::hypot(dapx,dapy); - T anorminv = T(1.0)/anorm; - T anrmx = dapx * anorminv; - T anrmy = dapy * anorminv; - - T bctx = ebdata.get(i,j,k,0); - T bcty = ebdata.get(i,j,k,1); - T dx_eb = get_dx_eb(kappa); - - T dg, gx, gy, sx, sy; - if (std::abs(anrmx) > std::abs(anrmy)) { - dg = dx_eb / std::abs(anrmx); - } else { - dg = dx_eb / std::abs(anrmy); - } - gx = bctx - dg*anrmx; - gy = bcty - dg*anrmy; - sx = std::copysign(T(1.0),anrmx); - sy = std::copysign(T(1.0),anrmy); - - int ii = i - static_cast(sx); - int jj = j - static_cast(sy); - - T phig_gamma = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); - T phig = ( - gx*sx - gx*gy*sx*sy) * phi(ii,j ,k,n) - + ( - gy*sy - gx*gy*sx*sy) * phi(i ,jj,k,n) - + ( + gx*gy*sx*sy) * phi(ii,jj,k,n); - - // In gsrb we are always in residual-correction form so phib = 0 - T dphidn = ( -phig)/dg; - - T ba = ebdata.get(i,j,k); - - T feb = dphidn * ba * beb(i,j,k,n); - rho += -vfrcinv*(-dh)*feb; - - T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); - gamma += vfrcinv*(-dh)*feb_gamma; - } + int ii = i - static_cast(sx); + int jj = j - static_cast(sy); + + T phig_gamma = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); + T phig = ( - gx*sx - gx*gy*sx*sy) * phi(ii,j ,k,n) + + ( - gy*sy - gx*gy*sx*sy) * phi(i ,jj,k,n) + + ( + gx*gy*sx*sy) * phi(ii,jj,k,n); + + // In gsrb we are always in residual-correction form so phib = 0 + T dphidn = ( -phig)/dg; + + T ba = ebdata.get(i,j,k); + + T feb = dphidn * ba * beb(i,j,k,n); + rho += -vfrcinv*(-dh)*feb; - T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); - phi(i,j,k,n) += res/(gamma-delta); + T feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n); + gamma += vfrcinv*(-dh)*feb_gamma; } + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) += res/(gamma-delta); } } - }); + } } template @@ -777,7 +782,7 @@ void mlebabeclap_grad_y_0 (Box const& box, Array4 const& gy, Array4 template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_normalize (Box const& box, Array4 const& phi, +void mlebabeclap_normalize (int i, int j, int k, int n, Array4 const& phi, T alpha, Array4 const& a, T dhx, T dhy, T dh, const amrex::GpuArray& dx, @@ -788,91 +793,87 @@ void mlebabeclap_normalize (Box const& box, Array4 const& phi, Array4 const& fcx, Array4 const& fcy, Array4 const& ba, Array4 const& bc, Array4 const& beb, - bool is_dirichlet, bool beta_on_centroid, int ncomp) noexcept + bool is_dirichlet, bool beta_on_centroid) noexcept { - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept + if (flag(i,j,k).isRegular()) { - if (flag(i,j,k).isRegular()) - { - phi(i,j,k,n) /= alpha*a(i,j,k) + dhx*(bX(i,j,k,n) + bX(i+1,j,k,n)) - + dhy*(bY(i,j,k,n) + bY(i,j+1,k,n)); + phi(i,j,k,n) /= alpha*a(i,j,k) + dhx*(bX(i,j,k,n) + bX(i+1,j,k,n)) + + dhy*(bY(i,j,k,n) + bY(i,j+1,k,n)); + } + else if (flag(i,j,k).isSingleValued()) + { + T kappa = vfrc(i,j,k); + T apxm = apx(i,j,k); + T apxp = apx(i+1,j,k); + T apym = apy(i,j,k); + T apyp = apy(i,j+1,k); + + T sxm = bX(i,j,k,n); + if (apxm != T(0.0) && apxm != T(1.0) && !beta_on_centroid) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i,j,k))); + T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) + ? std::abs(fcx(i,j,k)) : T(0.0); + sxm = (T(1.0)-fracy)*sxm; } - else if (flag(i,j,k).isSingleValued()) - { - T kappa = vfrc(i,j,k); - T apxm = apx(i,j,k); - T apxp = apx(i+1,j,k); - T apym = apy(i,j,k); - T apyp = apy(i,j+1,k); - - T sxm = bX(i,j,k,n); - if (apxm != T(0.0) && apxm != T(1.0) && !beta_on_centroid) { - int jj = j + static_cast(std::copysign(T(1.0),fcx(i,j,k))); - T fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) - ? std::abs(fcx(i,j,k)) : T(0.0); - sxm = (T(1.0)-fracy)*sxm; - } - T sxp = -bX(i+1,j,k,n); - if (apxp != T(0.0) && apxp != T(1.0) && !beta_on_centroid) { - int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k))); - T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) - ? std::abs(fcx(i+1,j,k)) : T(0.0); - sxp = (T(1.0)-fracy)*sxp; - } + T sxp = -bX(i+1,j,k,n); + if (apxp != T(0.0) && apxp != T(1.0) && !beta_on_centroid) { + int jj = j + static_cast(std::copysign(T(1.0),fcx(i+1,j,k))); + T fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) + ? std::abs(fcx(i+1,j,k)) : T(0.0); + sxp = (T(1.0)-fracy)*sxp; + } - T sym = bY(i,j,k,n); - if (apym != T(0.0) && apym != T(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k))); - T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) - ? std::abs(fcy(i,j,k)) : T(0.0); - sym = (T(1.0)-fracx)*sym; - } + T sym = bY(i,j,k,n); + if (apym != T(0.0) && apym != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j,k))); + T fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) + ? std::abs(fcy(i,j,k)) : T(0.0); + sym = (T(1.0)-fracx)*sym; + } - T syp = -bY(i,j+1,k,n); - if (apyp != T(0.0) && apyp != T(1.0) && !beta_on_centroid) { - int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k))); - T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) - ? std::abs(fcy(i,j+1,k)) : T(0.0); - syp = (T(1.0)-fracx)*syp; - } + T syp = -bY(i,j+1,k,n); + if (apyp != T(0.0) && apyp != T(1.0) && !beta_on_centroid) { + int ii = i + static_cast(std::copysign(T(1.0),fcy(i,j+1,k))); + T fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) + ? std::abs(fcy(i,j+1,k)) : T(0.0); + syp = (T(1.0)-fracx)*syp; + } - T vfrcinv = (T(1.0)/kappa); - T gamma = alpha*a(i,j,k) + vfrcinv * - (dhx*(apxm*sxm-apxp*sxp) + - dhy*(apym*sym-apyp*syp)); - - if (is_dirichlet) { - T dapx = (apxm-apxp)*dx[1]; - T dapy = (apym-apyp)*dx[0]; - T anorm = std::hypot(dapx,dapy); - T anorminv = T(1.0)/anorm; - T anrmx = dapx * anorminv; - T anrmy = dapy * anorminv; - - T bctx = bc(i,j,k,0); - T bcty = bc(i,j,k,1); - T dx_eb = get_dx_eb(vfrc(i,j,k)); - - T dg, gx, gy, sx, sy; - if (std::abs(anrmx) > std::abs(anrmy)) { - dg = dx_eb / std::abs(anrmx); - } else { - dg = dx_eb / std::abs(anrmy); - } - gx = bctx - dg*anrmx; - gy = bcty - dg*anrmy; - sx = std::copysign(T(1.0),anrmx); - sy = std::copysign(T(1.0),anrmy); - - T phig_gamma = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); - T feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); - gamma += vfrcinv*(-dh)*feb_gamma; + T vfrcinv = (T(1.0)/kappa); + T gamma = alpha*a(i,j,k) + vfrcinv * + (dhx*(apxm*sxm-apxp*sxp) + + dhy*(apym*sym-apyp*syp)); + + if (is_dirichlet) { + T dapx = (apxm-apxp)*dx[1]; + T dapy = (apym-apyp)*dx[0]; + T anorm = std::hypot(dapx,dapy); + T anorminv = T(1.0)/anorm; + T anrmx = dapx * anorminv; + T anrmy = dapy * anorminv; + + T bctx = bc(i,j,k,0); + T bcty = bc(i,j,k,1); + T dx_eb = get_dx_eb(vfrc(i,j,k)); + + T dg, gx, gy, sx, sy; + if (std::abs(anrmx) > std::abs(anrmy)) { + dg = dx_eb / std::abs(anrmx); + } else { + dg = dx_eb / std::abs(anrmy); } - - phi(i,j,k,n) /= gamma; + gx = bctx - dg*anrmx; + gy = bcty - dg*anrmy; + sx = std::copysign(T(1.0),anrmx); + sy = std::copysign(T(1.0),anrmy); + + T phig_gamma = (T(1.0) + gx*sx + gy*sy + gx*gy*sx*sy); + T feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n); + gamma += vfrcinv*(-dh)*feb_gamma; } - }); + phi(i,j,k,n) /= gamma; + } } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H index 9bc8c879efe..e17309b7cc7 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_3D_K.H @@ -1384,39 +1384,6 @@ void mlebabeclap_normalize (int i, int j, int k, int n, Array4 const& phi, } } -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlebabeclap_normalize (Box const& box, Array4 const& phi, - T alpha, Array4 const& a, - T dhx, T dhy, T dhz, - Array4 const& bX, Array4 const& bY, - Array4 const& bZ, - Array4 const& ccm, Array4 const& flag, - Array4 const& vfrc, - Array4 const& apx, Array4 const& apy, - Array4 const& apz, - Array4 const& fcx, Array4 const& fcy, - Array4 const& fcz, - Array4 const& ba, Array4 const& bc, - Array4 const& beb, - bool is_dirichlet, bool beta_on_centroid, int ncomp) noexcept -{ - amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept - { - mlebabeclap_normalize(i, j, k, n, phi, - alpha, a, - dhx, dhy, dhz, - bX, bY, bZ, - ccm, flag, - vfrc, - apx, apy, apz, - fcx, fcy, fcz, - ba, bc, - beb, - is_dirichlet, beta_on_centroid); - }); -} - } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 521ca64cf17..55cd8ff015f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -23,14 +23,6 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; - const MultiFab* vfrac = (factory) ? &(factory->getVolFrac()) : nullptr; - auto area = (factory) ? factory->getAreaFrac() - : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; - auto fcent = (factory) ? factory->getFaceCent() - : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; - const MultiCutFab* barea = (factory) ? &(factory->getBndryArea()) : nullptr; - const MultiCutFab* bcent = (factory) ? &(factory->getBndryCent()) : nullptr; - const auto *const ccent = (factory) ? &(factory->getCentroid()) : nullptr; const bool is_eb_dirichlet = isEBDirichlet(); const bool is_eb_inhomog = m_is_eb_inhomog && (!this->m_precond_mode); @@ -59,7 +51,7 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const Real dhy = bscalar*dxinvarr[1]*dxinvarr[1];, const Real dhz = bscalar*dxinvarr[2]*dxinvarr[2];) -// #ifdef AMREX_USE_GPU +#ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && in.isFusingCandidate()) { MultiArray4 foo; const auto& xma = in.arrays(); @@ -110,7 +102,7 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c Gpu::streamSynchronize(); } } else -// #endif +#endif { Array4 foo; MFItInfo mfi_info; @@ -249,20 +241,10 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; - const MultiFab* vfrac = (factory) ? &(factory->getVolFrac()) : nullptr; - const auto area = (factory) ? factory->getAreaFrac() - : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; - const auto fcent = (factory) ? factory->getFaceCent() - : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; - const MultiCutFab* barea = (factory) ? &(factory->getBndryArea()) : nullptr; - const MultiCutFab* bcent = (factory) ? &(factory->getBndryCent()) : nullptr; - const auto *const ccent = (factory) ? &(factory->getCentroid()) : nullptr; - - amrex::ignore_unused(vfrac, area, fcent, barea, bcent, ccent); bool is_eb_dirichlet = isEBDirichlet(); -// #ifdef AMREX_USE_GPU +#ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && sol.isFusingCandidate()) { MultiArray4 foo; const auto& m0 = mm0.const_arrays(); @@ -322,7 +304,7 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, Gpu::streamSynchronize(); } } else -// #endif +#endif { Array4 foo; MFItInfo mfi_info; From b0ef68947005a0dde95471ea32356ae6e157f78f Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Mon, 19 Jan 2026 23:50:24 +1100 Subject: [PATCH 09/12] Remove whitespace --- Src/EB/AMReX_EBFabFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/EB/AMReX_EBFabFactory.cpp b/Src/EB/AMReX_EBFabFactory.cpp index f1c0807499b..2b47a115792 100644 --- a/Src/EB/AMReX_EBFabFactory.cpp +++ b/Src/EB/AMReX_EBFabFactory.cpp @@ -206,7 +206,7 @@ EBFArrayBoxFactory::getEBDataArrays () const noexcept auto const* pebflag = ebflags_ma.dp; #else auto const* pebflag = ebflags_ma.hp; -#endif +#endif return EBDataArrays{pebflag, m_eb_data.data()}; } From 58c702b509a70ef52071f791990cce73d5b4564e Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Tue, 20 Jan 2026 00:16:05 +1100 Subject: [PATCH 10/12] Remove unused vars --- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 55cd8ff015f..69796aeb0be 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -47,10 +47,6 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const bool extdir_y = !(m_geom[amrlev][mglev].isPeriodic(1));, const bool extdir_z = !(m_geom[amrlev][mglev].isPeriodic(2));); - AMREX_D_TERM(const Real dhx = bscalar*dxinvarr[0]*dxinvarr[0];, - const Real dhy = bscalar*dxinvarr[1]*dxinvarr[1];, - const Real dhz = bscalar*dxinvarr[2]*dxinvarr[2];) - #ifdef AMREX_USE_GPU if (Gpu::inLaunchRegion() && in.isFusingCandidate()) { MultiArray4 foo; @@ -231,11 +227,6 @@ MLEBABecLap::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, AMREX_D_TERM(const Real dhx = m_b_scalar * dxinv[0]*dxinv[0];, const Real dhy = m_b_scalar * dxinv[1]*dxinv[1];, const Real dhz = m_b_scalar * dxinv[2]*dxinv[2]); - -#if (AMREX_SPACEDIM == 2) - const auto h = m_geom[amrlev][mglev].CellSizeArray(); - const Real dh = m_b_scalar/(AMREX_D_TERM(h[0],*h[1],*h[2])); -#endif const Real ascalar = m_a_scalar; const Real bscalar = m_b_scalar; From c9a84a0f2a1716e71ca3a2b1460dafa8da31b8eb Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Tue, 20 Jan 2026 00:41:19 +1100 Subject: [PATCH 11/12] HIP fix --- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index 69796aeb0be..692c1618757 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -153,7 +153,6 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c amrex::ignore_unused(AMREX_D_DECL(domlo_x, domlo_y, domlo_z), AMREX_D_DECL(domhi_x, domhi_y, domhi_z), AMREX_D_DECL(extdir_x, extdir_y, extdir_z)); - amrex::ignore_unused(ccfab); #else AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, { From 037d97439496e661c0575e5292fe2a7857bf3953 Mon Sep 17 00:00:00 2001 From: Ankith A Das Date: Sat, 24 Jan 2026 16:25:51 +1100 Subject: [PATCH 12/12] Removed restrict as it is not required --- Src/EB/AMReX_EBData.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/EB/AMReX_EBData.H b/Src/EB/AMReX_EBData.H index 81b9c8694c7..8935e941cd3 100644 --- a/Src/EB/AMReX_EBData.H +++ b/Src/EB/AMReX_EBData.H @@ -183,7 +183,7 @@ struct EBData static constexpr int real_data_size = static_cast(EBData_t::cellflag); Array4 const* m_cell_flag = nullptr; - Array4 const* AMREX_RESTRICT m_real_data = nullptr; + Array4 const* m_real_data = nullptr; }; struct EBDataArrays