Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions libkineto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ elseif(KINETO_BACKEND STREQUAL "rocm")
target_compile_definitions(kineto_base PRIVATE "__HIP_PLATFORM_HCC__")
target_compile_definitions(kineto_base PRIVATE "__HIP_PLATFORM_AMD__")
elseif(KINETO_BACKEND STREQUAL "xpu")
if(MSVC)
# MSVC reports __cplusplus as 199711L unless /Zc:__cplusplus is set, which
# breaks SYCL headers that static_assert __cplusplus >= 201703L.
list(APPEND KINETO_COMPILE_OPTIONS "/Zc:__cplusplus")
endif()

list(APPEND KINETO_COMPILE_OPTIONS ${XPUPTI_BUILD_FLAG})
if(KINETO_BUILD_TESTS)
set_target_properties(kineto_base PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
3 changes: 2 additions & 1 deletion libkineto/src/plugin/xpupti/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ if(TARGET Pti::pti_view)
list(APPEND XPUPTI_INCLUDE_DIR ${PTI_INCLUDE_DIR})
set(XPUPTI_INCLUDE_DIR ${XPUPTI_INCLUDE_DIR} PARENT_SCOPE)

set(XPUPTI_BUILD_FLAG "-DHAS_XPUPTI" PARENT_SCOPE)
set(XPUPTI_BUILD_FLAG "-DHAS_XPUPTI")
set(XPUPTI_BUILD_FLAG ${XPUPTI_BUILD_FLAG} PARENT_SCOPE)

message(STATUS " XPU_xpupti_LIBRARY = ${XPU_xpupti_LIBRARY}")
message(STATUS " XPUPTI_INCLUDE_DIR = ${XPUPTI_INCLUDE_DIR}")
Expand Down
22 changes: 0 additions & 22 deletions libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ XpuptiActivityApi& XpuptiActivityApi::singleton() {
}

void XpuptiActivityApi::pushCorrelationID(int id, CorrelationFlowType type) {
#ifdef HAS_XPUPTI
if (!singleton().externalCorrelationEnabled_) {
return;
}
Expand All @@ -34,11 +33,9 @@ void XpuptiActivityApi::pushCorrelationID(int id, CorrelationFlowType type) {
XPUPTI_CALL(ptiViewPushExternalCorrelationId(
pti_view_external_kind::PTI_VIEW_EXTERNAL_KIND_CUSTOM_1, id));
}
#endif
}

void XpuptiActivityApi::popCorrelationID(CorrelationFlowType type) {
#ifdef HAS_XPUPTI
if (!singleton().externalCorrelationEnabled_) {
return;
}
Expand All @@ -51,19 +48,16 @@ void XpuptiActivityApi::popCorrelationID(CorrelationFlowType type) {
XPUPTI_CALL(ptiViewPopExternalCorrelationId(
pti_view_external_kind::PTI_VIEW_EXTERNAL_KIND_CUSTOM_1, nullptr));
}
#endif
}

static bool nextActivityRecord(
uint8_t* buffer,
size_t valid_size,
pti_view_record_base*& record) {
#ifdef HAS_XPUPTI
pti_result status = ptiViewGetNextRecord(buffer, valid_size, &record);
if (status != pti_result::PTI_SUCCESS) {
record = nullptr;
}
#endif
return record != nullptr;
}

Expand Down Expand Up @@ -91,16 +85,13 @@ std::unique_ptr<XpuptiActivityBufferMap> XpuptiActivityApi::activityBuffers() {
}
}

#ifdef HAS_XPUPTI
std::chrono::time_point<std::chrono::system_clock> t1;
XPUPTI_CALL(ptiFlushAllViews());
#endif

std::lock_guard<std::mutex> guard(mutex_);
return std::move(readyGpuTraceBuffers_);
}

#ifdef HAS_XPUPTI
int XpuptiActivityApi::processActivitiesForBuffer(
uint8_t* buf,
size_t validSize,
Expand All @@ -115,26 +106,21 @@ int XpuptiActivityApi::processActivitiesForBuffer(
}
return count;
}
#endif

const std::pair<int, int> XpuptiActivityApi::processActivities(
XpuptiActivityBufferMap& buffers,
std::function<void(const pti_view_record_base*)> handler) {
std::pair<int, int> res{0, 0};
#ifdef HAS_XPUPTI
for (auto& pair : buffers) {
auto& buf = pair.second;
res.first += processActivitiesForBuffer(buf->data(), buf->size(), handler);
res.second += buf->size();
}
#endif
return res;
}

void XpuptiActivityApi::flushActivities() {
#ifdef HAS_XPUPTI
XPUPTI_CALL(ptiFlushAllViews());
#endif
}

void XpuptiActivityApi::clearActivities() {
Expand All @@ -144,14 +130,11 @@ void XpuptiActivityApi::clearActivities() {
return;
}
}
#ifdef HAS_XPUPTI
XPUPTI_CALL(ptiFlushAllViews());
#endif
std::lock_guard<std::mutex> guard(mutex_);
readyGpuTraceBuffers_ = nullptr;
}

#ifdef HAS_XPUPTI
void XpuptiActivityApi::bufferCompletedTrampoline(
uint8_t* buffer,
size_t size,
Expand All @@ -173,7 +156,6 @@ void XpuptiActivityApi::bufferCompleted(
(*readyGpuTraceBuffers_)[it->first] = std::move(it->second);
allocatedGpuTraceBuffers_.erase(it);
}
#endif

#if PTI_VERSION_AT_LEAST(0, 12)
#elif PTI_VERSION_AT_LEAST(0, 11)
Expand Down Expand Up @@ -204,7 +186,6 @@ static void enableSpecifcRuntimeAPIsTracing() {

void XpuptiActivityApi::enableXpuptiActivities(
const std::set<ActivityType>& selected_activities) {
#ifdef HAS_XPUPTI
XPUPTI_CALL(ptiViewSetCallbacks(
bufferRequestedTrampoline, bufferCompletedTrampoline));

Expand Down Expand Up @@ -264,12 +245,10 @@ void XpuptiActivityApi::enableXpuptiActivities(
break;
}
}
#endif
}

void XpuptiActivityApi::disablePtiActivities(
const std::set<ActivityType>& selected_activities) {
#ifdef HAS_XPUPTI
for (const auto& activity : selected_activities) {
switch (activity) {
case ActivityType::GPU_MEMCPY:
Expand Down Expand Up @@ -309,7 +288,6 @@ void XpuptiActivityApi::disablePtiActivities(
}
}
externalCorrelationEnabled_ = false;
#endif
}

} // namespace KINETO_NAMESPACE
11 changes: 5 additions & 6 deletions libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,6 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities(

traceBuffer_.span.opCount += 1;
traceBuffer_.gpuOpCount += 1;
const ITraceActivity* linked =
linkedActivity(activity->_correlation_id, cpuCorrelationMap_);

if constexpr (handleRuntimeActivities) {
traceBuffer_.emplace_activity(
Expand All @@ -209,11 +207,14 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities(

trace_activity->startTime = activity->_start_timestamp;
trace_activity->endTime = activity->_end_timestamp;
trace_activity->id = activity->_correlation_id;
trace_activity->threadId = activity->_thread_id;
trace_activity->flow.id = activity->_correlation_id;
trace_activity->flow.type = libkineto::kLinkAsyncCpuGpu;
trace_activity->linked = linked;

trace_activity->id = activity->_correlation_id;
trace_activity->linked =
linkedActivity(activity->_correlation_id, cpuCorrelationMap_);
trace_activity->addMetadata("correlation", activity->_correlation_id);

if constexpr (handleRuntimeActivities) {
trace_activity->device = activity->_process_id;
Expand Down Expand Up @@ -271,8 +272,6 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities(
"l0 queue", handleToHexString(activity->_queue_handle));
}

trace_activity->addMetadata("correlation", activity->_correlation_id);

if constexpr (handleKernelActivities) {
if (activity->_source_file_name) {
trace_activity->addMetadataQuoted(
Expand Down
101 changes: 75 additions & 26 deletions libkineto/test/xpupti/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,61 +6,110 @@

set(CMAKE_CXX_STANDARD 20)

set(LINK_LIBRARIES
gtest_main
# Do not link gtest into the shared library to avoid duplicate gtest globals
# (gtest is statically linked into the executable; the shared library resolves
# gtest symbols from the executable at runtime via --export-dynamic).
set(COMMON_LINK_LIBRARIES
kineto_base
kineto_api
$<BUILD_INTERFACE:fmt::fmt-header-only>
${SYCL_LIBRARY}
${PTI_LIBRARY}
)

set(LINK_LIBRARIES gtest_main ${COMMON_LINK_LIBRARIES})

add_executable(XpuptiScopeProfilerConfigTest XpuptiScopeProfilerConfigTest.cpp)
target_link_libraries(XpuptiScopeProfilerConfigTest PRIVATE ${LINK_LIBRARIES})
gtest_discover_tests(XpuptiScopeProfilerConfigTest)
gtest_add_tests(TARGET XpuptiScopeProfilerConfigTest)

include(ExternalProject)

function(make_test test_file)
function(make_sycl_test test_file)
get_filename_component(test_name "${test_file}" NAME_WE)
set(lib_name "${test_name}Lib")
add_library(${lib_name} SHARED XpuptiTestUtilities.cpp ${test_file})
# Do not link gtest into the shared library to avoid duplicate gtest globals
# (gtest is statically linked into the executable; the shared library resolves
# gtest symbols from the executable at runtime via --export-dynamic).
set(LIB_LINK_LIBRARIES
kineto_base
kineto_api
$<BUILD_INTERFACE:fmt::fmt-header-only>
${SYCL_LIBRARY}
${PTI_LIBRARY}
)
target_link_libraries(${lib_name} PRIVATE ${LIB_LINK_LIBRARIES})

target_link_libraries(${lib_name} PRIVATE ${COMMON_LINK_LIBRARIES})
target_include_directories(${lib_name} PRIVATE
$<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES>)

set_target_properties(${lib_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${lib_name} PROPERTIES
POSITION_INDEPENDENT_CODE ON
WINDOWS_EXPORT_ALL_SYMBOLS ON
)
# On Windows, DLLs must resolve all symbols at link time (unlike Linux .so).
# ComputeOnXpu is defined in the compute executable (ExternalProject) which
# links back to this DLL. /FORCE:UNRESOLVED allows the DLL to build with
# this symbol unresolved. The linker will still report LNK2019 as an error
# but /FORCE overrides it (followed by LNK4088 warning) — this is expected.
if(WIN32)
target_link_options(${lib_name} PRIVATE "LINKER:/FORCE:UNRESOLVED")
endif()

set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR})
install(TARGETS ${lib_name}
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)

ExternalProject_Add(${test_name}
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/compute
CMAKE_ARGS -DPROJECT_NAME=${test_name}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${SYCL_COMPILER}
CMAKE_ARGS -DCMAKE_PARENT_BINARY_DIR=${CMAKE_BINARY_DIR}
CMAKE_ARGS -DLINK_LIBRARY=${CMAKE_CURRENT_BINARY_DIR}/lib${lib_name}.so
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}
BUILD_ALWAYS TRUE
set(_link_lib_search_dir ${CMAKE_CURRENT_BINARY_DIR})

# Build the common cmake args for the inner ExternalProject (single-config Ninja).
set(_ep_cmake_args
-DPROJECT_NAME=${test_name}
-DCMAKE_CXX_COMPILER=${SYCL_COMPILER}
-DCMAKE_PARENT_BINARY_DIR=${CMAKE_BINARY_DIR}
-DLINK_LIBRARY_NAME=${lib_name}
-DLINK_LIBRARY_DIR=${_link_lib_search_dir}
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}
)
if(WIN32)
get_filename_component(_sycl_compiler_dir "${SYCL_COMPILER}" DIRECTORY)
get_filename_component(_sycl_root "${_sycl_compiler_dir}" DIRECTORY)
list(APPEND _ep_cmake_args
"-DCMAKE_EXE_LINKER_FLAGS=/Qoption,link,/LIBPATH:\"${_sycl_root}/lib\""
)
endif()

# - Multi-config (Visual Studio, Ninja Multi-Config): uses CONFIGURE_COMMAND
# with $<CONFIG> since CMAKE_ARGS doesn't support generator expressions.
# Ninja is forced as a single-config inner generator.
# - Single-config on Windows: forces Ninja for SYCL/ICX compatibility.
# - Single-config on Linux: forwards the outer generator (Ninja, Makefiles, etc.)
# via CMAKE_GENERATOR.
get_property(_is_multi_config GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(_is_multi_config)
ExternalProject_Add(${test_name}
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/compute
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-G Ninja
-S <SOURCE_DIR>
-B <BINARY_DIR>
-DCMAKE_BUILD_TYPE=$<CONFIG>
${_ep_cmake_args}
BUILD_ALWAYS TRUE
)
else()
set(_selected_generator ${CMAKE_GENERATOR})
if(WIN32)
set(_selected_generator Ninja)
endif()
ExternalProject_Add(${test_name}
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/compute
CMAKE_GENERATOR ${_selected_generator}
CMAKE_ARGS
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
${_ep_cmake_args}
BUILD_ALWAYS TRUE
)
endif()

add_dependencies(${test_name} ${lib_name})
add_dependencies(${test_name} gtest)
add_dependencies(${test_name} gtest_main)
endfunction()

make_test(XpuptiProfilerTest.cpp)
make_test(XpuptiScopeProfilerTest.cpp)
make_sycl_test(XpuptiProfilerTest.cpp)
make_sycl_test(XpuptiScopeProfilerTest.cpp)

7 changes: 4 additions & 3 deletions libkineto/test/xpupti/XpuptiTestUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "XpuptiTestUtilities.h"

#include "src/ActivityBuffers.h"
#include "src/plugin/xpupti/XpuptiActivityProfiler.h"

#include <libkineto.h>
Expand Down Expand Up @@ -88,9 +89,9 @@ void CheckCountsInMap(
}

EXPECT_EQ(countsMap.size(), expMap.size());

for (auto itCountsMap = countsMap.begin(), itExpArray = expMap.begin();
(itCountsMap != countsMap.end()) && (itExpArray != expMap.end());
auto itCountsMap = countsMap.begin();
auto itExpArray = expMap.begin();
for (; (itCountsMap != countsMap.end()) && (itExpArray != expMap.end());
++itCountsMap, ++itExpArray) {
EXPECT_EQ(itCountsMap->first, itExpArray->first * repeatCount);
EXPECT_EQ(itCountsMap->second, itExpArray->second);
Expand Down
18 changes: 15 additions & 3 deletions libkineto/test/xpupti/compute/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function(find_libraries)
find_library(_FIND_LIB
NAMES ${lib}
PATHS ${CMAKE_PARENT_BINARY_DIR}
PATH_SUFFIXES lib
PATH_SUFFIXES lib/${CMAKE_BUILD_TYPE} lib
NO_DEFAULT_PATH
)

Expand All @@ -40,16 +40,28 @@ find_libraries(gtest gtest_main)

find_package(Threads REQUIRED)

find_library(_LINK_LIB
NAMES ${LINK_LIBRARY_NAME}
PATHS ${LINK_LIBRARY_DIR}
PATH_SUFFIXES ${CMAKE_BUILD_TYPE} ""
NO_DEFAULT_PATH
)
if(NOT _LINK_LIB)
message(FATAL_ERROR "Library ${LINK_LIBRARY_NAME} not found in ${LINK_LIBRARY_DIR}")
endif()

add_executable(${PROJECT_NAME} XpuptiScopeProfilerCompute.cpp)
target_compile_options(${PROJECT_NAME} PRIVATE -fsycl)
target_link_options(${PROJECT_NAME} PRIVATE -fsycl)
# Export gtest symbols so the shared library can resolve them at runtime
target_link_options(${PROJECT_NAME} PRIVATE -Wl,--export-dynamic)
if(NOT WIN32)
target_link_options(${PROJECT_NAME} PRIVATE -Wl,--export-dynamic)
endif()

target_link_libraries(${PROJECT_NAME} PRIVATE
${gtest_main_FOR_XPU_PATH}
${gtest_FOR_XPU_PATH}
${LINK_LIBRARY}
${_LINK_LIB}
Threads::Threads
)

Expand Down