Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions lib/hsa/mcwamp_hsa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@
// kernel dispatch speed optimization flags
/////////////////////////////////////////////////

// size of default kernarg buffer in the kernarg pool in HSAContext
#define KERNARG_BUFFER_SIZE (512)
// Size of default kernarg buffer in the kernarg pool in HSAContext, in bytes.
// Increased from 512 to 4k to match CUDA default. See
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#function-parameters
// When this size is exceeded, on-demand allocation of the kernarg buffer is slow.
#define KERNARG_BUFFER_SIZE (4096)

// number of pre-allocated kernarg buffers in HSAContext
// (some kernels don't allocate signals but nearly all need kernargs)
Expand All @@ -75,8 +78,10 @@
// MUST be a power of 2.
#define MAX_INFLIGHT_COMMANDS_PER_QUEUE (2*8192)

// threshold to clean up finished kernel in HSAQueue.asyncOps
int HCC_ASYNCOPS_SIZE = (2*8192);
// Threshold to clean up finished kernel in HSAQueue.asyncOps.
// Reduced from 16k to 1k at the same time when the HCC_KERNARG_BUFFER_SIZE
// was increased, in order to offset the increase in memory pressure.
int HCC_ASYNCOPS_SIZE = (1024);


//---
Expand Down