Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions Src/Base/AMReX_Arena.H
Original file line number Diff line number Diff line change
Expand Up @@ -306,19 +306,9 @@ public:

static void Finalize (); ///< Used internally by amrex

#if 0
union Word
{
void* p;
long long ll;
long double ld;
void (*f) ();
};
static const std::size_t align_size = sizeof(Word);
#endif

/// The alignment of allocated memory
static const std::size_t align_size = 16;
/// 256 bytes is the default alignment that cudaMalloc uses
static const std::size_t align_size = 256;
Comment thread
AlexanderSinn marked this conversation as resolved.
Outdated

/**
* \brief Return the ArenaInfo object for querying
Expand Down
50 changes: 33 additions & 17 deletions Src/Base/AMReX_Arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,27 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (arena_info.use_cpu_memory)
{
#endif
p = std::malloc(nbytes);

#ifndef _WIN32
p = std::aligned_alloc(align_size, nbytes);
#else
p = _aligned_malloc(nbytes, align_size);
#endif

if (!p) {
freeUnused_protected();
p = std::malloc(nbytes);
#ifndef _WIN32
p = std::aligned_alloc(align_size, nbytes);
#else
p = _aligned_malloc(nbytes, align_size);
#endif
}
if (!p) {
// out_of_memory_abort uses heap allocations,
// so we print an error before in case it doesn't work.
amrex::ErrorStream() <<
"Out of CPU memory: got nullptr from std::malloc, aborting...\n";
out_of_memory_abort("CPU memory", nbytes, "std::malloc returned nullptr");
"Out of CPU memory: got nullptr from std::aligned_alloc, aborting...\n";
out_of_memory_abort("CPU memory", nbytes, "std::aligned_alloc returned nullptr");
}

#ifndef _WIN32
Expand All @@ -210,7 +220,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
auto ret = cudaHostAlloc(&p, nbytes, cudaHostAllocMapped);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_host(nbytes, Gpu::Device::syclContext())
p = sycl::aligned_alloc_host(align_size, nbytes, Gpu::Device::syclContext())
);

if (!p) {
Expand All @@ -220,7 +230,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
ret = cudaHostAlloc(&p, nbytes, cudaHostAllocMapped);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_host(nbytes, Gpu::Device::syclContext())
p = sycl::aligned_alloc_host(align_size, nbytes, Gpu::Device::syclContext())
);
}

Expand All @@ -235,7 +245,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
": " + hipGetErrorString(ret),
msg = "cudaHostAlloc returned " + std::to_string(ret) +
": " + cudaGetErrorString(ret),
msg = "sycl::malloc_host returned nullptr"
msg = "sycl::aligned_alloc_host returned nullptr"
);
out_of_memory_abort("CPU pinned memory", nbytes, msg);
}
Expand All @@ -262,8 +272,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
auto ret = cudaMallocManaged(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_shared(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);

if (!p) {
Expand All @@ -273,8 +283,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
ret = cudaMallocManaged(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_shared(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);
}

Expand All @@ -285,7 +295,7 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
": " + hipGetErrorString(ret),
msg = "cudaMallocManaged returned " + std::to_string(ret) +
": " + cudaGetErrorString(ret),
msg = "sycl::malloc_shared returned nullptr"
msg = "sycl::aligned_alloc_shared returned nullptr"
);
out_of_memory_abort("GPU managed memory", nbytes, msg);
}
Expand All @@ -312,8 +322,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
auto ret = cudaMalloc(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_device(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_device(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);

if (!p) {
Expand All @@ -323,8 +333,8 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
if (ret != hipSuccess) { p = nullptr; },
ret = cudaMalloc(&p, nbytes);
if (ret != cudaSuccess) { p = nullptr; },
p = sycl::malloc_device(nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
p = sycl::aligned_alloc_device(align_size, nbytes, Gpu::Device::syclDevice(),
Gpu::Device::syclContext())
);
}

Expand All @@ -335,14 +345,15 @@ Arena::allocate_system (std::size_t nbytes) // NOLINT(readability-make-member-fu
": " + hipGetErrorString(ret),
msg = "cudaMalloc returned " + std::to_string(ret) +
": " + cudaGetErrorString(ret),
msg = "sycl::malloc_device returned nullptr"
msg = "sycl::aligned_alloc_device returned nullptr"
);
out_of_memory_abort("GPU device memory", nbytes, msg);
}
}
}
#endif
AMREX_ALWAYS_ASSERT(p != nullptr);
AMREX_ALWAYS_ASSERT(is_aligned(p, align_size)); // for testing
Comment thread
AlexanderSinn marked this conversation as resolved.
Outdated
return p;
}

Expand All @@ -354,7 +365,12 @@ Arena::deallocate_system (void* p, std::size_t nbytes) // NOLINT(readability-mak
{
#endif
if (p && arena_info.device_use_hostalloc) { AMREX_MUNLOCK(p, nbytes); }
#ifndef _WIN32
std::free(p);
#else
_aligned_free(p);
#endif

#ifdef AMREX_USE_GPU
}
else if (arena_info.device_use_hostalloc)
Expand Down
11 changes: 10 additions & 1 deletion Src/Base/AMReX_BArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
void*
amrex::BArena::alloc (std::size_t sz_)
{
void* pt = std::malloc(sz_);
sz_ = Arena::align(sz_);
#ifndef _WIN32
void* pt = std::aligned_alloc(align_size, sz_);
#else
void* pt = _aligned_malloc(sz_, align_size);
#endif
m_profiler.profile_alloc(pt, sz_);
return pt;
}
Expand All @@ -12,7 +17,11 @@ void
amrex::BArena::free (void* pt)
{
m_profiler.profile_free(pt);
#ifndef _WIN32
std::free(pt);
#else
_aligned_free(pt);
#endif
}

bool
Expand Down
6 changes: 4 additions & 2 deletions Src/Base/AMReX_CArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ CArena::CArena (std::size_t hunk_size, ArenaInfo info)
: m_hunk(align(hunk_size == 0 ? DefaultHunkSize : hunk_size))
{
arena_info = info;
BL_ASSERT(m_hunk >= hunk_size);
BL_ASSERT(m_hunk%Arena::align_size == 0);
AMREX_ALWAYS_ASSERT(m_hunk >= hunk_size);
AMREX_ALWAYS_ASSERT(m_hunk%Arena::align_size == 0);
}

CArena::~CArena ()
Expand Down Expand Up @@ -88,6 +88,8 @@ CArena::alloc_protected (std::size_t nbytes)
}
}

N = Arena::align(N);

vp = allocate_system(N);

m_used += N;
Expand Down
Loading