Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 95 additions & 49 deletions Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -883,66 +883,112 @@ MLEBABecLap::normalize (int amrlev, int mglev, MultiFab& mf) const

bool is_eb_dirichlet = isEBDirichlet();

Array4<Real const> foo;

const Real ascalar = m_a_scalar;
const Real bscalar = m_b_scalar;
const int ncomp = getNComp();

MFItInfo mfi_info;
if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling(); }
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion() && mf.isFusingCandidate()) {
MultiArray4<Real const> foo;
const auto& xma = mf.arrays();
const auto& ama = acoef.const_arrays();
AMREX_D_TERM(const auto& bxma = bxcoef.const_arrays();,
const auto& byma = bycoef.const_arrays();,
const auto& bzma = bzcoef.const_arrays(););
auto const& ccmma = ccmask.const_arrays();
auto const& flagma = flags->const_arrays();
auto const& vfracma = vfrac->const_arrays();
AMREX_D_TERM(auto const& apxma = area[0]->const_arrays();,
auto const& apyma = area[1]->const_arrays();,
auto const& apzma = area[2]->const_arrays(););
AMREX_D_TERM(auto const& fcxma = fcent[0]->const_arrays();,
auto const& fcyma = fcent[1]->const_arrays();,
auto const& fczma = fcent[2]->const_arrays(););
auto const& bama = barea->const_arrays();
auto const& bcma = bcent->const_arrays();
auto const& bebma = (is_eb_dirichlet)
? m_eb_b_coeffs[amrlev][mglev]->const_arrays() : foo;

bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid);

amrex::ParallelFor(mf, IntVect(0), ncomp, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
mlebabeclap_normalize(i, j, k, n,
xma[box_no], ascalar, ama[box_no],
AMREX_D_DECL(dhx, dhy, dhz),
AMREX_2D_ONLY_ARGS(dh, dxarray)
AMREX_D_DECL(bxma[box_no], byma[box_no], bzma[box_no]),
ccmma[box_no], flagma[box_no], vfracma[box_no],
AMREX_D_DECL(apxma[box_no], apyma[box_no], apzma[box_no]),
AMREX_D_DECL(fcxma[box_no], fcyma[box_no], fczma[box_no]),
bama[box_no], bcma[box_no], bebma[box_no],
is_eb_dirichlet,
beta_on_centroid);
});
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Array4<Real const> foo;
MFItInfo mfi_info;
if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling(); }
#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(mf, mfi_info); mfi.isValid(); ++mfi)
{
const Box& bx = mfi.tilebox();
Array4<Real> const& fab = mf.array(mfi);
Array4<Real const> const& afab = acoef.const_array(mfi);
AMREX_D_TERM(Array4<Real const> const& bxfab = bxcoef.const_array(mfi);,
Array4<Real const> const& byfab = bycoef.const_array(mfi);,
Array4<Real const> const& bzfab = bzcoef.const_array(mfi););
for (MFIter mfi(mf, mfi_info); mfi.isValid(); ++mfi)
{
const Box& bx = mfi.tilebox();
Array4<Real> const& fab = mf.array(mfi);
Array4<Real const> const& afab = acoef.const_array(mfi);
AMREX_D_TERM(Array4<Real const> const& bxfab = bxcoef.const_array(mfi);,
Array4<Real const> const& byfab = bycoef.const_array(mfi);,
Array4<Real const> const& bzfab = bzcoef.const_array(mfi););

auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular;
auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular;

if (fabtyp == FabType::regular)
{
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
if (fabtyp == FabType::regular)
{
mlabeclap_normalize(i,j,k,n, fab, afab, AMREX_D_DECL(bxfab, byfab, bzfab),
dxinvarray, ascalar, bscalar);
});
}
else if (fabtyp == FabType::singlevalued)
{
Array4<Real const> const& bebfab
= (is_eb_dirichlet) ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo;
Array4<int const> const& ccmfab = ccmask.const_array(mfi);
Array4<EBCellFlag const> const& flagfab = flags->const_array(mfi);
Array4<Real const> const& vfracfab = vfrac->const_array(mfi);
AMREX_D_TERM(Array4<Real const> const& apxfab = area[0]->const_array(mfi);,
Array4<Real const> const& apyfab = area[1]->const_array(mfi);,
Array4<Real const> const& apzfab = area[2]->const_array(mfi););
AMREX_D_TERM(Array4<Real const> const& fcxfab = fcent[0]->const_array(mfi);,
Array4<Real const> const& fcyfab = fcent[1]->const_array(mfi);,
Array4<Real const> const& fczfab = fcent[2]->const_array(mfi););
Array4<Real const> const& bafab = barea->const_array(mfi);
Array4<Real const> const& bcfab = bcent->const_array(mfi);

bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid);

AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
mlabeclap_normalize(i,j,k,n, fab, afab, AMREX_D_DECL(bxfab, byfab, bzfab),
dxinvarray, ascalar, bscalar);
});
}
else if (fabtyp == FabType::singlevalued)
{
mlebabeclap_normalize(tbx, fab, ascalar, afab,
AMREX_D_DECL(dhx, dhy, dhz),
AMREX_2D_ONLY_ARGS(dh, dxarray)
AMREX_D_DECL(bxfab, byfab, bzfab),
ccmfab, flagfab, vfracfab,
AMREX_D_DECL(apxfab,apyfab,apzfab),
AMREX_D_DECL(fcxfab,fcyfab,fczfab),
bafab, bcfab, bebfab, is_eb_dirichlet,
beta_on_centroid, ncomp);
});
Array4<Real const> const& bebfab
= (is_eb_dirichlet) ? m_eb_b_coeffs[amrlev][mglev]->const_array(mfi) : foo;
Array4<int const> const& ccmfab = ccmask.const_array(mfi);
Array4<EBCellFlag const> const& flagfab = flags->const_array(mfi);
Array4<Real const> const& vfracfab = vfrac->const_array(mfi);
AMREX_D_TERM(Array4<Real const> const& apxfab = area[0]->const_array(mfi);,
Array4<Real const> const& apyfab = area[1]->const_array(mfi);,
Array4<Real const> const& apzfab = area[2]->const_array(mfi););
AMREX_D_TERM(Array4<Real const> const& fcxfab = fcent[0]->const_array(mfi);,
Array4<Real const> const& fcyfab = fcent[1]->const_array(mfi);,
Array4<Real const> const& fczfab = fcent[2]->const_array(mfi););
Array4<Real const> const& bafab = barea->const_array(mfi);
Array4<Real const> const& bcfab = bcent->const_array(mfi);

bool beta_on_centroid = (m_beta_loc == Location::FaceCentroid);

AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
mlebabeclap_normalize(i, j, k, n,
fab, ascalar, afab,
AMREX_D_DECL(dhx, dhy, dhz),
AMREX_2D_ONLY_ARGS(dh, dxarray)
AMREX_D_DECL(bxfab, byfab, bzfab),
ccmfab, flagfab, vfracfab,
AMREX_D_DECL(apxfab, apyfab, apzfab),
AMREX_D_DECL(fcxfab, fcyfab, fczfab),
bafab, bcfab, bebfab,
is_eb_dirichlet,
beta_on_centroid);
});
}
}
}
}
Expand Down
151 changes: 74 additions & 77 deletions Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_2D_K.H
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ void mlebabeclap_grad_y_0 (Box const& box, Array4<Real> const& gy, Array4<Real c
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_normalize (Box const& box, Array4<Real> const& phi,
void mlebabeclap_normalize (int i, int j, int k, int n, Array4<Real> const& phi,
Real alpha, Array4<Real const> const& a,
Real dhx, Real dhy, Real dh,
const amrex::GpuArray<Real, AMREX_SPACEDIM>& dx,
Expand All @@ -775,91 +775,88 @@ void mlebabeclap_normalize (Box const& box, Array4<Real> const& phi,
Array4<Real const> const& fcx, Array4<Real const> const& fcy,
Array4<Real const> const& ba, Array4<Real const> const& bc,
Array4<Real const> const& beb,
bool is_dirichlet, bool beta_on_centroid, int ncomp) noexcept
bool is_dirichlet, bool beta_on_centroid) noexcept
{
amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept
if (flag(i,j,k).isRegular())
{
if (flag(i,j,k).isRegular())
{
phi(i,j,k,n) /= alpha*a(i,j,k) + dhx*(bX(i,j,k,n) + bX(i+1,j,k,n))
+ dhy*(bY(i,j,k,n) + bY(i,j+1,k,n));
}
else if (flag(i,j,k).isSingleValued())
{
Real kappa = vfrc(i,j,k);
Real apxm = apx(i,j,k);
Real apxp = apx(i+1,j,k);
Real apym = apy(i,j,k);
Real apyp = apy(i,j+1,k);

Real sxm = bX(i,j,k,n);
if (apxm != Real(0.0) && apxm != Real(1.0) && !beta_on_centroid) {
int jj = j + static_cast<int>(std::copysign(Real(1.0),fcx(i,j,k)));
Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k))
? std::abs(fcx(i,j,k)) : Real(0.0);
sxm = (Real(1.0)-fracy)*sxm;
}

Real sxp = -bX(i+1,j,k,n);
if (apxp != Real(0.0) && apxp != Real(1.0) && !beta_on_centroid) {
int jj = j + static_cast<int>(std::copysign(Real(1.0),fcx(i+1,j,k)));
Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k))
? std::abs(fcx(i+1,j,k)) : Real(0.0);
sxp = (Real(1.0)-fracy)*sxp;
}

Real sym = bY(i,j,k,n);
if (apym != Real(0.0) && apym != Real(1.0) && !beta_on_centroid) {
int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k)));
Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k))
? std::abs(fcy(i,j,k)) : Real(0.0);
sym = (Real(1.0)-fracx)*sym;
}

Real syp = -bY(i,j+1,k,n);
if (apyp != Real(0.0) && apyp != Real(1.0) && !beta_on_centroid) {
int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j+1,k)));
Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k))
? std::abs(fcy(i,j+1,k)) : Real(0.0);
syp = (Real(1.0)-fracx)*syp;
}
phi(i,j,k,n) /= alpha*a(i,j,k) + dhx*(bX(i,j,k,n) + bX(i+1,j,k,n))
+ dhy*(bY(i,j,k,n) + bY(i,j+1,k,n));
}
else if (flag(i,j,k).isSingleValued())
{
Real kappa = vfrc(i,j,k);
Real apxm = apx(i,j,k);
Real apxp = apx(i+1,j,k);
Real apym = apy(i,j,k);
Real apyp = apy(i,j+1,k);

Real vfrcinv = (Real(1.0)/kappa);
Real gamma = alpha*a(i,j,k) + vfrcinv *
(dhx*(apxm*sxm-apxp*sxp) +
dhy*(apym*sym-apyp*syp));
Real sxm = bX(i,j,k,n);
if (apxm != Real(0.0) && apxm != Real(1.0) && !beta_on_centroid) {
int jj = j + static_cast<int>(std::copysign(Real(1.0),fcx(i,j,k)));
Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k))
? std::abs(fcx(i,j,k)) : Real(0.0);
sxm = (Real(1.0)-fracy)*sxm;
}

if (is_dirichlet) {
Real dapx = (apxm-apxp)*dx[1];
Real dapy = (apym-apyp)*dx[0];
Real anorm = std::hypot(dapx,dapy);
Real anorminv = Real(1.0)/anorm;
Real anrmx = dapx * anorminv;
Real anrmy = dapy * anorminv;
Real sxp = -bX(i+1,j,k,n);
if (apxp != Real(0.0) && apxp != Real(1.0) && !beta_on_centroid) {
int jj = j + static_cast<int>(std::copysign(Real(1.0),fcx(i+1,j,k)));
Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k))
? std::abs(fcx(i+1,j,k)) : Real(0.0);
sxp = (Real(1.0)-fracy)*sxp;
}

Real bctx = bc(i,j,k,0);
Real bcty = bc(i,j,k,1);
Real dx_eb = get_dx_eb(vfrc(i,j,k));
Real sym = bY(i,j,k,n);
if (apym != Real(0.0) && apym != Real(1.0) && !beta_on_centroid) {
int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k)));
Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k))
? std::abs(fcy(i,j,k)) : Real(0.0);
sym = (Real(1.0)-fracx)*sym;
}

Real dg, gx, gy, sx, sy;
if (std::abs(anrmx) > std::abs(anrmy)) {
dg = dx_eb / std::abs(anrmx);
} else {
dg = dx_eb / std::abs(anrmy);
}
gx = bctx - dg*anrmx;
gy = bcty - dg*anrmy;
sx = std::copysign(Real(1.0),anrmx);
sy = std::copysign(Real(1.0),anrmy);
Real syp = -bY(i,j+1,k,n);
if (apyp != Real(0.0) && apyp != Real(1.0) && !beta_on_centroid) {
int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j+1,k)));
Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k))
? std::abs(fcy(i,j+1,k)) : Real(0.0);
syp = (Real(1.0)-fracx)*syp;
}

Real phig_gamma = (Real(1.0) + gx*sx + gy*sy + gx*gy*sx*sy);
Real feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n);
gamma += vfrcinv*(-dh)*feb_gamma;
Real vfrcinv = (Real(1.0)/kappa);
Real gamma = alpha*a(i,j,k) + vfrcinv *
(dhx*(apxm*sxm-apxp*sxp) +
dhy*(apym*sym-apyp*syp));

if (is_dirichlet) {
Real dapx = (apxm-apxp)*dx[1];
Real dapy = (apym-apyp)*dx[0];
Real anorm = std::hypot(dapx,dapy);
Real anorminv = Real(1.0)/anorm;
Real anrmx = dapx * anorminv;
Real anrmy = dapy * anorminv;

Real bctx = bc(i,j,k,0);
Real bcty = bc(i,j,k,1);
Real dx_eb = get_dx_eb(vfrc(i,j,k));

Real dg, gx, gy, sx, sy;
if (std::abs(anrmx) > std::abs(anrmy)) {
dg = dx_eb / std::abs(anrmx);
} else {
dg = dx_eb / std::abs(anrmy);
}

phi(i,j,k,n) /= gamma;
gx = bctx - dg*anrmx;
gy = bcty - dg*anrmy;
sx = std::copysign(Real(1.0),anrmx);
sy = std::copysign(Real(1.0),anrmy);

Real phig_gamma = (Real(1.0) + gx*sx + gy*sy + gx*gy*sx*sy);
Real feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n);
gamma += vfrcinv*(-dh)*feb_gamma;
}
});

phi(i,j,k,n) /= gamma;
}
}

}
Expand Down
Loading
Loading