diff --git a/libkineto/CMakeLists.txt b/libkineto/CMakeLists.txt index 8e13feea3..4dea56f26 100644 --- a/libkineto/CMakeLists.txt +++ b/libkineto/CMakeLists.txt @@ -206,6 +206,12 @@ elseif(KINETO_BACKEND STREQUAL "rocm") target_compile_definitions(kineto_base PRIVATE "__HIP_PLATFORM_HCC__") target_compile_definitions(kineto_base PRIVATE "__HIP_PLATFORM_AMD__") elseif(KINETO_BACKEND STREQUAL "xpu") + if(MSVC) + # MSVC reports __cplusplus as 199711L unless /Zc:__cplusplus is set, which + # breaks SYCL headers that static_assert __cplusplus >= 201703L. + list(APPEND KINETO_COMPILE_OPTIONS "/Zc:__cplusplus") + endif() + list(APPEND KINETO_COMPILE_OPTIONS ${XPUPTI_BUILD_FLAG}) if(KINETO_BUILD_TESTS) set_target_properties(kineto_base PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/libkineto/src/plugin/xpupti/CMakeLists.txt b/libkineto/src/plugin/xpupti/CMakeLists.txt index c75748948..b0011e4ec 100644 --- a/libkineto/src/plugin/xpupti/CMakeLists.txt +++ b/libkineto/src/plugin/xpupti/CMakeLists.txt @@ -49,7 +49,8 @@ if(TARGET Pti::pti_view) list(APPEND XPUPTI_INCLUDE_DIR ${PTI_INCLUDE_DIR}) set(XPUPTI_INCLUDE_DIR ${XPUPTI_INCLUDE_DIR} PARENT_SCOPE) - set(XPUPTI_BUILD_FLAG "-DHAS_XPUPTI" PARENT_SCOPE) + set(XPUPTI_BUILD_FLAG "-DHAS_XPUPTI") + set(XPUPTI_BUILD_FLAG ${XPUPTI_BUILD_FLAG} PARENT_SCOPE) message(STATUS " XPU_xpupti_LIBRARY = ${XPU_xpupti_LIBRARY}") message(STATUS " XPUPTI_INCLUDE_DIR = ${XPUPTI_INCLUDE_DIR}") diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp index c934645b5..8f8801057 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp @@ -21,7 +21,6 @@ XpuptiActivityApi& XpuptiActivityApi::singleton() { } void XpuptiActivityApi::pushCorrelationID(int id, CorrelationFlowType type) { -#ifdef HAS_XPUPTI if (!singleton().externalCorrelationEnabled_) { return; } @@ -34,11 +33,9 @@ void XpuptiActivityApi::pushCorrelationID(int id, CorrelationFlowType type) { XPUPTI_CALL(ptiViewPushExternalCorrelationId( pti_view_external_kind::PTI_VIEW_EXTERNAL_KIND_CUSTOM_1, id)); } -#endif } void XpuptiActivityApi::popCorrelationID(CorrelationFlowType type) { -#ifdef HAS_XPUPTI if (!singleton().externalCorrelationEnabled_) { return; } @@ -51,19 +48,16 @@ void XpuptiActivityApi::popCorrelationID(CorrelationFlowType type) { XPUPTI_CALL(ptiViewPopExternalCorrelationId( pti_view_external_kind::PTI_VIEW_EXTERNAL_KIND_CUSTOM_1, nullptr)); } -#endif } static bool nextActivityRecord( uint8_t* buffer, size_t valid_size, pti_view_record_base*& record) { -#ifdef HAS_XPUPTI pti_result status = ptiViewGetNextRecord(buffer, valid_size, &record); if (status != pti_result::PTI_SUCCESS) { record = nullptr; } -#endif return record != nullptr; } @@ -91,16 +85,13 @@ std::unique_ptr XpuptiActivityApi::activityBuffers() { } } -#ifdef HAS_XPUPTI std::chrono::time_point t1; XPUPTI_CALL(ptiFlushAllViews()); -#endif std::lock_guard guard(mutex_); return std::move(readyGpuTraceBuffers_); } -#ifdef HAS_XPUPTI int XpuptiActivityApi::processActivitiesForBuffer( uint8_t* buf, size_t validSize, @@ -115,26 +106,21 @@ int XpuptiActivityApi::processActivitiesForBuffer( } return count; } -#endif const std::pair XpuptiActivityApi::processActivities( XpuptiActivityBufferMap& buffers, std::function handler) { std::pair res{0, 0}; -#ifdef HAS_XPUPTI for (auto& pair : buffers) { auto& buf = pair.second; res.first += processActivitiesForBuffer(buf->data(), buf->size(), handler); res.second += buf->size(); } -#endif return res; } void XpuptiActivityApi::flushActivities() { -#ifdef HAS_XPUPTI XPUPTI_CALL(ptiFlushAllViews()); -#endif } void XpuptiActivityApi::clearActivities() { @@ -144,14 +130,11 @@ void XpuptiActivityApi::clearActivities() { return; } } -#ifdef HAS_XPUPTI XPUPTI_CALL(ptiFlushAllViews()); -#endif std::lock_guard guard(mutex_); readyGpuTraceBuffers_ = nullptr; } -#ifdef HAS_XPUPTI void XpuptiActivityApi::bufferCompletedTrampoline( uint8_t* buffer, size_t size, @@ -173,7 +156,6 @@ void XpuptiActivityApi::bufferCompleted( (*readyGpuTraceBuffers_)[it->first] = std::move(it->second); allocatedGpuTraceBuffers_.erase(it); } -#endif #if PTI_VERSION_AT_LEAST(0, 12) #elif PTI_VERSION_AT_LEAST(0, 11) @@ -204,7 +186,6 @@ static void enableSpecifcRuntimeAPIsTracing() { void XpuptiActivityApi::enableXpuptiActivities( const std::set& selected_activities) { -#ifdef HAS_XPUPTI XPUPTI_CALL(ptiViewSetCallbacks( bufferRequestedTrampoline, bufferCompletedTrampoline)); @@ -264,12 +245,10 @@ void XpuptiActivityApi::enableXpuptiActivities( break; } } -#endif } void XpuptiActivityApi::disablePtiActivities( const std::set& selected_activities) { -#ifdef HAS_XPUPTI for (const auto& activity : selected_activities) { switch (activity) { case ActivityType::GPU_MEMCPY: @@ -309,7 +288,6 @@ void XpuptiActivityApi::disablePtiActivities( } } externalCorrelationEnabled_ = false; -#endif } } // namespace KINETO_NAMESPACE diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp index 8c2121f11..484399b52 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp @@ -183,8 +183,6 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( traceBuffer_.span.opCount += 1; traceBuffer_.gpuOpCount += 1; - const ITraceActivity* linked = - linkedActivity(activity->_correlation_id, cpuCorrelationMap_); if constexpr (handleRuntimeActivities) { traceBuffer_.emplace_activity( @@ -209,11 +207,14 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( trace_activity->startTime = activity->_start_timestamp; trace_activity->endTime = activity->_end_timestamp; - trace_activity->id = activity->_correlation_id; trace_activity->threadId = activity->_thread_id; trace_activity->flow.id = activity->_correlation_id; trace_activity->flow.type = libkineto::kLinkAsyncCpuGpu; - trace_activity->linked = linked; + + trace_activity->id = activity->_correlation_id; + trace_activity->linked = + linkedActivity(activity->_correlation_id, cpuCorrelationMap_); + trace_activity->addMetadata("correlation", activity->_correlation_id); if constexpr (handleRuntimeActivities) { trace_activity->device = activity->_process_id; @@ -271,8 +272,6 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( "l0 queue", handleToHexString(activity->_queue_handle)); } - trace_activity->addMetadata("correlation", activity->_correlation_id); - if constexpr (handleKernelActivities) { if (activity->_source_file_name) { trace_activity->addMetadataQuoted( diff --git a/libkineto/test/xpupti/CMakeLists.txt b/libkineto/test/xpupti/CMakeLists.txt index faa5751b8..38e073e59 100644 --- a/libkineto/test/xpupti/CMakeLists.txt +++ b/libkineto/test/xpupti/CMakeLists.txt @@ -6,8 +6,10 @@ set(CMAKE_CXX_STANDARD 20) -set(LINK_LIBRARIES - gtest_main +# Do not link gtest into the shared library to avoid duplicate gtest globals +# (gtest is statically linked into the executable; the shared library resolves +# gtest symbols from the executable at runtime via --export-dynamic). +set(COMMON_LINK_LIBRARIES kineto_base kineto_api $ @@ -15,31 +17,35 @@ set(LINK_LIBRARIES ${PTI_LIBRARY} ) +set(LINK_LIBRARIES gtest_main ${COMMON_LINK_LIBRARIES}) + add_executable(XpuptiScopeProfilerConfigTest XpuptiScopeProfilerConfigTest.cpp) target_link_libraries(XpuptiScopeProfilerConfigTest PRIVATE ${LINK_LIBRARIES}) -gtest_discover_tests(XpuptiScopeProfilerConfigTest) +gtest_add_tests(TARGET XpuptiScopeProfilerConfigTest) include(ExternalProject) -function(make_test test_file) +function(make_sycl_test test_file) get_filename_component(test_name "${test_file}" NAME_WE) set(lib_name "${test_name}Lib") add_library(${lib_name} SHARED XpuptiTestUtilities.cpp ${test_file}) - # Do not link gtest into the shared library to avoid duplicate gtest globals - # (gtest is statically linked into the executable; the shared library resolves - # gtest symbols from the executable at runtime via --export-dynamic). - set(LIB_LINK_LIBRARIES - kineto_base - kineto_api - $ - ${SYCL_LIBRARY} - ${PTI_LIBRARY} - ) - target_link_libraries(${lib_name} PRIVATE ${LIB_LINK_LIBRARIES}) + + target_link_libraries(${lib_name} PRIVATE ${COMMON_LINK_LIBRARIES}) target_include_directories(${lib_name} PRIVATE $) - set_target_properties(${lib_name} PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties(${lib_name} PROPERTIES + POSITION_INDEPENDENT_CODE ON + WINDOWS_EXPORT_ALL_SYMBOLS ON + ) + # On Windows, DLLs must resolve all symbols at link time (unlike Linux .so). + # ComputeOnXpu is defined in the compute executable (ExternalProject) which + # links back to this DLL. /FORCE:UNRESOLVED allows the DLL to build with + # this symbol unresolved. The linker will still report LNK2019 as an error + # but /FORCE overrides it (followed by LNK4088 warning) — this is expected. + if(WIN32) + target_link_options(${lib_name} PRIVATE "LINKER:/FORCE:UNRESOLVED") + endif() set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}) install(TARGETS ${lib_name} @@ -47,20 +53,63 @@ function(make_test test_file) RUNTIME DESTINATION bin ) - ExternalProject_Add(${test_name} - SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/compute - CMAKE_ARGS -DPROJECT_NAME=${test_name} - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${SYCL_COMPILER} - CMAKE_ARGS -DCMAKE_PARENT_BINARY_DIR=${CMAKE_BINARY_DIR} - CMAKE_ARGS -DLINK_LIBRARY=${CMAKE_CURRENT_BINARY_DIR}/lib${lib_name}.so - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR} - BUILD_ALWAYS TRUE + set(_link_lib_search_dir ${CMAKE_CURRENT_BINARY_DIR}) + + # Build the common cmake args for the inner ExternalProject (single-config Ninja). + set(_ep_cmake_args + -DPROJECT_NAME=${test_name} + -DCMAKE_CXX_COMPILER=${SYCL_COMPILER} + -DCMAKE_PARENT_BINARY_DIR=${CMAKE_BINARY_DIR} + -DLINK_LIBRARY_NAME=${lib_name} + -DLINK_LIBRARY_DIR=${_link_lib_search_dir} + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR} ) + if(WIN32) + get_filename_component(_sycl_compiler_dir "${SYCL_COMPILER}" DIRECTORY) + get_filename_component(_sycl_root "${_sycl_compiler_dir}" DIRECTORY) + list(APPEND _ep_cmake_args + "-DCMAKE_EXE_LINKER_FLAGS=/Qoption,link,/LIBPATH:\"${_sycl_root}/lib\"" + ) + endif() + + # - Multi-config (Visual Studio, Ninja Multi-Config): uses CONFIGURE_COMMAND + # with $ since CMAKE_ARGS doesn't support generator expressions. + # Ninja is forced as a single-config inner generator. + # - Single-config on Windows: forces Ninja for SYCL/ICX compatibility. + # - Single-config on Linux: forwards the outer generator (Ninja, Makefiles, etc.) + # via CMAKE_GENERATOR. + get_property(_is_multi_config GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) + if(_is_multi_config) + ExternalProject_Add(${test_name} + SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/compute + CONFIGURE_COMMAND ${CMAKE_COMMAND} + -G Ninja + -S + -B + -DCMAKE_BUILD_TYPE=$ + ${_ep_cmake_args} + BUILD_ALWAYS TRUE + ) + else() + set(_selected_generator ${CMAKE_GENERATOR}) + if(WIN32) + set(_selected_generator Ninja) + endif() + ExternalProject_Add(${test_name} + SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/compute + CMAKE_GENERATOR ${_selected_generator} + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + ${_ep_cmake_args} + BUILD_ALWAYS TRUE + ) + endif() add_dependencies(${test_name} ${lib_name}) add_dependencies(${test_name} gtest) add_dependencies(${test_name} gtest_main) endfunction() -make_test(XpuptiProfilerTest.cpp) -make_test(XpuptiScopeProfilerTest.cpp) +make_sycl_test(XpuptiProfilerTest.cpp) +make_sycl_test(XpuptiScopeProfilerTest.cpp) + diff --git a/libkineto/test/xpupti/XpuptiTestUtilities.cpp b/libkineto/test/xpupti/XpuptiTestUtilities.cpp index a72d0a997..f490ce305 100644 --- a/libkineto/test/xpupti/XpuptiTestUtilities.cpp +++ b/libkineto/test/xpupti/XpuptiTestUtilities.cpp @@ -8,6 +8,7 @@ #include "XpuptiTestUtilities.h" +#include "src/ActivityBuffers.h" #include "src/plugin/xpupti/XpuptiActivityProfiler.h" #include @@ -88,9 +89,9 @@ void CheckCountsInMap( } EXPECT_EQ(countsMap.size(), expMap.size()); - - for (auto itCountsMap = countsMap.begin(), itExpArray = expMap.begin(); - (itCountsMap != countsMap.end()) && (itExpArray != expMap.end()); + auto itCountsMap = countsMap.begin(); + auto itExpArray = expMap.begin(); + for (; (itCountsMap != countsMap.end()) && (itExpArray != expMap.end()); ++itCountsMap, ++itExpArray) { EXPECT_EQ(itCountsMap->first, itExpArray->first * repeatCount); EXPECT_EQ(itCountsMap->second, itExpArray->second); diff --git a/libkineto/test/xpupti/compute/CMakeLists.txt b/libkineto/test/xpupti/compute/CMakeLists.txt index 5c2ee2d72..7cc9aa83b 100644 --- a/libkineto/test/xpupti/compute/CMakeLists.txt +++ b/libkineto/test/xpupti/compute/CMakeLists.txt @@ -23,7 +23,7 @@ function(find_libraries) find_library(_FIND_LIB NAMES ${lib} PATHS ${CMAKE_PARENT_BINARY_DIR} - PATH_SUFFIXES lib + PATH_SUFFIXES lib/${CMAKE_BUILD_TYPE} lib NO_DEFAULT_PATH ) @@ -40,16 +40,28 @@ find_libraries(gtest gtest_main) find_package(Threads REQUIRED) +find_library(_LINK_LIB + NAMES ${LINK_LIBRARY_NAME} + PATHS ${LINK_LIBRARY_DIR} + PATH_SUFFIXES ${CMAKE_BUILD_TYPE} "" + NO_DEFAULT_PATH +) +if(NOT _LINK_LIB) + message(FATAL_ERROR "Library ${LINK_LIBRARY_NAME} not found in ${LINK_LIBRARY_DIR}") +endif() + add_executable(${PROJECT_NAME} XpuptiScopeProfilerCompute.cpp) target_compile_options(${PROJECT_NAME} PRIVATE -fsycl) target_link_options(${PROJECT_NAME} PRIVATE -fsycl) # Export gtest symbols so the shared library can resolve them at runtime -target_link_options(${PROJECT_NAME} PRIVATE -Wl,--export-dynamic) +if(NOT WIN32) + target_link_options(${PROJECT_NAME} PRIVATE -Wl,--export-dynamic) +endif() target_link_libraries(${PROJECT_NAME} PRIVATE ${gtest_main_FOR_XPU_PATH} ${gtest_FOR_XPU_PATH} - ${LINK_LIBRARY} + ${_LINK_LIB} Threads::Threads )