Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion libkineto/include/ActivityType.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ enum class ActivityType {
PRIVATEUSE1_DRIVER = 25, // host side privateUse1 driver events

XPU_SCOPE_PROFILER = 26, // XPUPTI Profiler scope for performance metrics
XPU_SYNC = 27, // XPU synchronization events

ENUM_COUNT = 27, // This is to add buffer and not used for any profiling logic. Add
ENUM_COUNT = 28, // This is to add buffer and not used for any profiling logic. Add
// your new type before it.
OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME,
};
Expand Down Expand Up @@ -97,6 +98,7 @@ inline constexpr std::array<_ActivityTypeName, activityTypeCount + 1> _activityT
{"privateuse1_runtime", ActivityType::PRIVATEUSE1_RUNTIME},
{"privateuse1_driver", ActivityType::PRIVATEUSE1_DRIVER},
{"xpu_scope_profiler", ActivityType::XPU_SCOPE_PROFILER},
{"xpu_sync", ActivityType::XPU_SYNC},
{"ENUM_COUNT", ActivityType::ENUM_COUNT},
}};

Expand Down
8 changes: 8 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ void XpuptiActivityApi::enableXpuptiActivities(
XPUPTI_CALL(ptiViewEnable(PTI_VIEW_COLLECTION_OVERHEAD));
break;

case ActivityType::XPU_SYNC:
XPUPTI_CALL(ptiViewEnable(PTI_VIEW_DEVICE_SYNCHRONIZATION));
break;

default:
break;
}
Expand Down Expand Up @@ -304,6 +308,10 @@ void XpuptiActivityApi::disablePtiActivities(
XPUPTI_CALL(ptiViewDisable(PTI_VIEW_COLLECTION_OVERHEAD));
break;

case ActivityType::XPU_SYNC:
XPUPTI_CALL(ptiViewDisable(PTI_VIEW_DEVICE_SYNCHRONIZATION));
break;

default:
break;
}
Expand Down
96 changes: 84 additions & 12 deletions libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,6 @@ inline void XpuptiActivityProfilerSession::handleCorrelationActivity(
}
}

std::string XpuptiActivityProfilerSession::getApiName(
const pti_view_record_api_t* activity) {
#if PTI_VERSION_AT_LEAST(0, 11)
const char* api_name = nullptr;
XPUPTI_CALL(
ptiViewGetApiIdName(activity->_api_group, activity->_api_id, &api_name));
return std::string(api_name);
#else
return std::string(activity->_name);
#endif
}

inline std::string memcpyName(
pti_view_memcpy_type kind,
pti_view_memory_type src,
Expand Down Expand Up @@ -301,6 +289,85 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities(
trace_activity->log(logger);
}

namespace {
std::string getStringFromSynchronizationType(
const pti_view_synchronization_type& synchronization_type) {
using pv_st = pti_view_synchronization_type;
static const std::unordered_map<pv_st, std::string> name_map{
{pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_UNKNOWN, "UNKNOWN"},
{pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_EXECUTION,
"GPU_BARRIER_EXECUTION"},
{pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_MEMORY,
"GPU_BARRIER_MEMORY"},
{pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_FENCE, "HOST_FENCE"},
{pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_EVENT, "HOST_EVENT"},
{pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_LIST,
"HOST_COMMAND_LIST"},
{pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_QUEUE,
"HOST_COMMAND_QUEUE"},
};

const auto& name_string = name_map.find(synchronization_type);
if (name_string == name_map.end()) {
const std::string error_message =
"404: Not found string literal for this synchronization type: " +
std::to_string(synchronization_type);
return error_message;
}
return name_string->second;
}
} // namespace

void XpuptiActivityProfilerSession::handleSynchronizationActivity(
const pti_view_record_synchronization* activity,
ActivityLogger& logger) {
const auto& activity_record = *activity;
const auto record_name = getApiName(activity);

const bool isGpuSync =
activity_record._synch_type == PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_EXECUTION ||
activity_record._synch_type == PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_MEMORY;

traceBuffer_.span.opCount += 1;
if (isGpuSync) {
traceBuffer_.gpuOpCount += 1;
}
traceBuffer_.emplace_activity(traceBuffer_.span, ActivityType::XPU_SYNC, record_name);
auto& synchronization_activity = *(traceBuffer_.activities.back());

synchronization_activity.startTime = activity_record._start_timestamp;
synchronization_activity.endTime = activity_record._end_timestamp;
synchronization_activity.device = -1;
synchronization_activity.resource = activity_record._thread_id;
synchronization_activity.threadId = activity_record._thread_id;

synchronization_activity.id = activity->_correlation_id;
synchronization_activity.linked =
linkedActivity(activity->_correlation_id, cpuCorrelationMap_);
synchronization_activity.addMetadata(
"correlation", activity_record._correlation_id);

synchronization_activity.addMetadataQuoted(
"Type", getStringFromSynchronizationType(activity_record._synch_type));
synchronization_activity.addMetadataQuoted("Context_handle", handleToHexString(activity_record._context_handle));
synchronization_activity.addMetadataQuoted("Queue_handle", handleToHexString(activity_record._queue_handle));
synchronization_activity.addMetadataQuoted("Event_handle", handleToHexString(activity_record._event_handle));
synchronization_activity.addMetadata("Number_wait_events", activity_record._number_wait_events);
synchronization_activity.addMetadata("Return_code", activity_record._return_code);

if (outOfRange(&synchronization_activity)) {
traceBuffer_.span.opCount -= 1;
if (isGpuSync) {
traceBuffer_.gpuOpCount -= 1;
}
removeCorrelatedPtiActivities(&synchronization_activity);
traceBuffer_.activities.pop_back();
return;
}

synchronization_activity.log(logger);
}

void XpuptiActivityProfilerSession::handleOverheadActivity(
const pti_view_record_overhead* activity,
ActivityLogger& logger) {
Expand Down Expand Up @@ -375,6 +442,11 @@ void XpuptiActivityProfilerSession::handlePtiActivity(
handleOverheadActivity(
reinterpret_cast<const pti_view_record_overhead*>(record), logger);
break;
case PTI_VIEW_DEVICE_SYNCHRONIZATION:
handleSynchronizationActivity(
reinterpret_cast<const pti_view_record_synchronization*>(record),
logger);
break;
default:
errors_.push_back(
"Unexpected activity type: " + std::to_string(record->_view_kind));
Expand Down
13 changes: 12 additions & 1 deletion libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,24 @@ class XpuptiActivityProfilerSession : public libkineto::IActivityProfilerSession
using pti_view_record_api_t = pti_view_record_sycl_runtime;
#endif

std::string getApiName(const pti_view_record_api_t* activity);
template <typename PTI_VIEW>
std::string getApiName(const PTI_VIEW* activity) {
#if PTI_VERSION_AT_LEAST(0, 11)
const char* api_name = nullptr;
XPUPTI_CALL(ptiViewGetApiIdName(
activity->_api_group, activity->_api_id, &api_name));
return std::string(api_name);
#else
return std::string(activity->_name);
#endif
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change is a MUST?

Copy link
Copy Markdown
Author

@tsocha tsocha May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is now used with:

  • pti_view_record_api_t aka pti_view_record_api
  • pti_view_record_synchronization

Unfortunately these structs have no common base class in PTI.
I wanted to use the same logic with no code duplication.


template <class pti_view_memory_record_type>
void handleRuntimeKernelMemcpyMemsetActivities(ActivityType activityType,
const pti_view_memory_record_type* activity,
ActivityLogger& logger);

void handleSynchronizationActivity(const pti_view_record_synchronization* activity, ActivityLogger& logger);
void handleOverheadActivity(const pti_view_record_overhead* activity, ActivityLogger& logger);
void handlePtiActivity(const pti_view_record_base* record, ActivityLogger& logger);

Expand Down
5 changes: 4 additions & 1 deletion libkineto/test/xpupti/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ add_executable(XpuptiScopeProfilerConfigTest XpuptiScopeProfilerConfigTest.cpp)
target_link_libraries(XpuptiScopeProfilerConfigTest PRIVATE ${LINK_LIBRARIES})
gtest_discover_tests(XpuptiScopeProfilerConfigTest)

include(ExternalProject)
add_executable(XpuptiActivityHandlersTest XpuptiActivityHandlersTest.cpp)
target_link_libraries(XpuptiActivityHandlersTest PRIVATE ${LINK_LIBRARIES})
gtest_add_tests(TARGET XpuptiActivityHandlersTest)

include(ExternalProject)
function(make_test test_file)
get_filename_component(test_name "${test_file}" NAME_WE)
set(lib_name "${test_name}Lib")
Expand Down
Loading
Loading