Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions Src/Base/AMReX_Arena.H
Original file line number Diff line number Diff line change
Expand Up @@ -306,19 +306,15 @@ public:

static void Finalize (); ///< Used internally by amrex

#if 0
union Word
{
void* p;
long long ll;
long double ld;
void (*f) ();
};
static const std::size_t align_size = sizeof(Word);
#endif

/// The alignment of allocated memory
static const std::size_t align_size = 16;
/// 256 bytes is the default alignment that cudaMalloc uses.
/// If the native allocation function were to give memory that
/// has a smaller alignment than align_size, memory from
/// the Arena will also only have this smaller alignment.
/// In practice, native allocation functions have >= 256 bytes
/// of alignment. In all cases, memory should be at least
/// aligned to std::max_align_t, usually 16.
static const std::size_t align_size = 256;

/**
* \brief Return the ArenaInfo object for querying
Expand Down
49 changes: 32 additions & 17 deletions Src/Base/AMReX_Arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,27 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (arena_info.use_cpu_memory)
{
#endif
p = std::malloc(nbytes);

#ifndef _WIN32
p = std::aligned_alloc(align_size, nbytes);
#else
p = _aligned_malloc(nbytes, align_size);
#endif

if (!p) {
freeUnused_protected();
p = std::malloc(nbytes);
#ifndef _WIN32
p = std::aligned_alloc(align_size, nbytes);
#else
p = _aligned_malloc(nbytes, align_size);
#endif
}
if (!p) {
// out_of_memory_abort uses heap allocations,
// so we print an error before in case it doesn't work.
amrex::ErrorStream() <<
"Out of CPU memory: got nullptr from std::malloc, aborting...\n";
out_of_memory_abort("CPU memory", nbytes, "std::malloc returned nullptr");
"Out of CPU memory: got nullptr from std::aligned_alloc, aborting...\n";
out_of_memory_abort("CPU memory", nbytes, "std::aligned_alloc returned nullptr");
}

#ifndef _WIN32
Expand All @@ -210,7 +220,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
auto ret = cudaHostAlloc(&p, nbytes, cudaHostAllocMapped);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_host(nbytes, Gpu::Device::syclContext())
p = sycl::aligned_alloc_host(align_size, nbytes, Gpu::Device::syclContext())
);

if (!p) {
Expand All @@ -220,7 +230,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
ret = cudaHostAlloc(&p, nbytes, cudaHostAllocMapped);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_host(nbytes, Gpu::Device::syclContext())
p = sycl::aligned_alloc_host(align_size, nbytes, Gpu::Device::syclContext())
);
}

Expand All @@ -235,7 +245,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
": " + hipGetErrorString(ret),
msg = "cudaHostAlloc returned " + std::to_string(ret) +
": " + cudaGetErrorString(ret),
msg = "sycl::malloc_host returned nullptr"
msg = "sycl::aligned_alloc_host returned nullptr"
);
out_of_memory_abort("CPU pinned memory", nbytes, msg);
}
Expand All @@ -262,8 +272,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
auto ret = cudaMallocManaged(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_shared(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);

if (!p) {
Expand All @@ -273,8 +283,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
ret = cudaMallocManaged(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_shared(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);
}

Expand All @@ -285,7 +295,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
": " + hipGetErrorString(ret),
msg = "cudaMallocManaged returned " + std::to_string(ret) +
": " + cudaGetErrorString(ret),
msg = "sycl::malloc_shared returned nullptr"
msg = "sycl::aligned_alloc_shared returned nullptr"
);
out_of_memory_abort("GPU managed memory", nbytes, msg);
}
Expand All @@ -312,8 +322,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
auto ret = cudaMalloc(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_device(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_device(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);

if (!p) {
Expand All @@ -323,8 +333,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
ret = cudaMalloc(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_device(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_device(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);
}

Expand All @@ -335,7 +345,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
": " + hipGetErrorString(ret),
msg = "cudaMalloc returned " + std::to_string(ret) +
": " + cudaGetErrorString(ret),
msg = "sycl::malloc_device returned nullptr"
msg = "sycl::aligned_alloc_device returned nullptr"
);
out_of_memory_abort("GPU device memory", nbytes, msg);
}
Expand All @@ -354,7 +364,12 @@ Arena::deallocate_system (void* p, std::size_t nbytes) // NOLINT(readability-mak
{
#endif
if (p && arena_info.device_use_hostalloc) { AMREX_MUNLOCK(p, nbytes); }
#ifndef _WIN32
std::free(p);
#else
_aligned_free(p);
#endif

#ifdef AMREX_USE_GPU
}
else if (arena_info.device_use_hostalloc)
Expand Down
11 changes: 10 additions & 1 deletion Src/Base/AMReX_BArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
void*
amrex::BArena::alloc (std::size_t sz_)
{
void* pt = std::malloc(sz_);
sz_ = Arena::align(sz_);
#ifndef _WIN32
void* pt = std::aligned_alloc(align_size, sz_);
#else
void* pt = _aligned_malloc(sz_, align_size);
#endif
m_profiler.profile_alloc(pt, sz_);
return pt;
}
Expand All @@ -12,7 +17,11 @@ void
amrex::BArena::free (void* pt)
{
m_profiler.profile_free(pt);
#ifndef _WIN32
std::free(pt);
#else
_aligned_free(pt);
#endif
}

bool
Expand Down
6 changes: 4 additions & 2 deletions Src/Base/AMReX_CArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ CArena::CArena (std::size_t hunk_size, ArenaInfo info)
: m_hunk(align(hunk_size == 0 ? DefaultHunkSize : hunk_size))
{
arena_info = info;
BL_ASSERT(m_hunk >= hunk_size);
BL_ASSERT(m_hunk%Arena::align_size == 0);
AMREX_ALWAYS_ASSERT(m_hunk >= hunk_size);
AMREX_ALWAYS_ASSERT(m_hunk%Arena::align_size == 0);
}

CArena::~CArena ()
Expand Down Expand Up @@ -88,6 +88,8 @@ CArena::alloc_protected (std::size_t nbytes)
}
}

N = Arena::align(N);

vp = allocate_system(N);

m_used += N;
Expand Down
Loading