diff --git a/include/kalmar_runtime.h b/include/kalmar_runtime.h index 46766050e5e..9efb7f75995 100644 --- a/include/kalmar_runtime.h +++ b/include/kalmar_runtime.h @@ -421,13 +421,18 @@ class KalmarDevice }); return def; #else + std::shared_ptr result; std::thread::id tid = std::this_thread::get_id(); - tlsDefaultQueueMap_mutex.lock(); - if (tlsDefaultQueueMap.find(tid) == tlsDefaultQueueMap.end()) { - tlsDefaultQueueMap[tid] = createQueue(); + if (tlsDefaultQueueMap.find(tid) != tlsDefaultQueueMap.end()) { + result = tlsDefaultQueueMap[tid]; + } else { + tlsDefaultQueueMap_mutex.lock(); + if (tlsDefaultQueueMap.find(tid) == tlsDefaultQueueMap.end()) { + tlsDefaultQueueMap[tid] = createQueue(); + } + result = tlsDefaultQueueMap[tid]; + tlsDefaultQueueMap_mutex.unlock(); } - std::shared_ptr result = tlsDefaultQueueMap[tid]; - tlsDefaultQueueMap_mutex.unlock(); return result; #endif } diff --git a/lib/hsa/unpinned_copy_engine.cpp b/lib/hsa/unpinned_copy_engine.cpp index fdcd2168d82..a29b7ee14b7 100644 --- a/lib/hsa/unpinned_copy_engine.cpp +++ b/lib/hsa/unpinned_copy_engine.cpp @@ -52,6 +52,7 @@ static hsa_status_t findGlobalPool(hsa_amd_memory_pool_t pool, void* data) if ((HSA_AMD_SEGMENT_GLOBAL == segment) && (flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED)) { *((hsa_amd_memory_pool_t*)data) = pool; + return HSA_STATUS_INFO_BREAK; } return HSA_STATUS_SUCCESS; } @@ -96,6 +97,7 @@ UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgen { hsa_amd_memory_pool_t sys_pool; hsa_status_t err = hsa_amd_agent_iterate_memory_pools(_cpuAgent, findGlobalPool, &sys_pool); + ErrorCheck(err); // Generate a packed C-style array of agents, for use below with hsa_amd_agents_allow_access // TODO - should this include the CPU agents as well?