diff --git a/libkineto/include/ActivityType.h b/libkineto/include/ActivityType.h index 3008425a3..bdd902f13 100644 --- a/libkineto/include/ActivityType.h +++ b/libkineto/include/ActivityType.h @@ -51,8 +51,9 @@ enum class ActivityType { PRIVATEUSE1_DRIVER = 25, // host side privateUse1 driver events XPU_SCOPE_PROFILER = 26, // XPUPTI Profiler scope for performance metrics + XPU_SYNC = 27, // XPU synchronization events - ENUM_COUNT = 27, // This is to add buffer and not used for any profiling logic. Add + ENUM_COUNT = 28, // This is to add buffer and not used for any profiling logic. Add // your new type before it. OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME, }; @@ -97,6 +98,7 @@ inline constexpr std::array<_ActivityTypeName, activityTypeCount + 1> _activityT {"privateuse1_runtime", ActivityType::PRIVATEUSE1_RUNTIME}, {"privateuse1_driver", ActivityType::PRIVATEUSE1_DRIVER}, {"xpu_scope_profiler", ActivityType::XPU_SCOPE_PROFILER}, + {"xpu_sync", ActivityType::XPU_SYNC}, {"ENUM_COUNT", ActivityType::ENUM_COUNT}, }}; diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp index c934645b5..ba16be814 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp @@ -260,6 +260,10 @@ void XpuptiActivityApi::enableXpuptiActivities( XPUPTI_CALL(ptiViewEnable(PTI_VIEW_COLLECTION_OVERHEAD)); break; + case ActivityType::XPU_SYNC: + XPUPTI_CALL(ptiViewEnable(PTI_VIEW_DEVICE_SYNCHRONIZATION)); + break; + default: break; } @@ -304,6 +308,10 @@ void XpuptiActivityApi::disablePtiActivities( XPUPTI_CALL(ptiViewDisable(PTI_VIEW_COLLECTION_OVERHEAD)); break; + case ActivityType::XPU_SYNC: + XPUPTI_CALL(ptiViewDisable(PTI_VIEW_DEVICE_SYNCHRONIZATION)); + break; + default: break; } diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp index 8c2121f11..cbd294d16 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp @@ -95,18 +95,6 @@ inline void XpuptiActivityProfilerSession::handleCorrelationActivity( } } -std::string XpuptiActivityProfilerSession::getApiName( - const pti_view_record_api_t* activity) { -#if PTI_VERSION_AT_LEAST(0, 11) - const char* api_name = nullptr; - XPUPTI_CALL( - ptiViewGetApiIdName(activity->_api_group, activity->_api_id, &api_name)); - return std::string(api_name); -#else - return std::string(activity->_name); -#endif -} - inline std::string memcpyName( pti_view_memcpy_type kind, pti_view_memory_type src, @@ -301,6 +289,85 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( trace_activity->log(logger); } +namespace { +std::string getStringFromSynchronizationType( + const pti_view_synchronization_type& synchronization_type) { + using pv_st = pti_view_synchronization_type; + static const std::unordered_map name_map{ + {pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_UNKNOWN, "UNKNOWN"}, + {pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_EXECUTION, + "GPU_BARRIER_EXECUTION"}, + {pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_MEMORY, + "GPU_BARRIER_MEMORY"}, + {pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_FENCE, "HOST_FENCE"}, + {pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_EVENT, "HOST_EVENT"}, + {pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_LIST, + "HOST_COMMAND_LIST"}, + {pv_st::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_QUEUE, + "HOST_COMMAND_QUEUE"}, + }; + + const auto& name_string = name_map.find(synchronization_type); + if (name_string == name_map.end()) { + const std::string error_message = + "404: Not found string literal for this synchronization type: " + + std::to_string(synchronization_type); + return error_message; + } + return name_string->second; +} +} // namespace + +void XpuptiActivityProfilerSession::handleSynchronizationActivity( + const pti_view_record_synchronization* activity, + ActivityLogger& logger) { + const auto& activity_record = *activity; + const auto record_name = getApiName(activity); + + const bool isGpuSync = + activity_record._synch_type == PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_EXECUTION || + activity_record._synch_type == PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_MEMORY; + + traceBuffer_.span.opCount += 1; + if (isGpuSync) { + traceBuffer_.gpuOpCount += 1; + } + traceBuffer_.emplace_activity(traceBuffer_.span, ActivityType::XPU_SYNC, record_name); + auto& synchronization_activity = *(traceBuffer_.activities.back()); + + synchronization_activity.startTime = activity_record._start_timestamp; + synchronization_activity.endTime = activity_record._end_timestamp; + synchronization_activity.device = -1; + synchronization_activity.resource = activity_record._thread_id; + synchronization_activity.threadId = activity_record._thread_id; + + synchronization_activity.id = activity->_correlation_id; + synchronization_activity.linked = + linkedActivity(activity->_correlation_id, cpuCorrelationMap_); + synchronization_activity.addMetadata( + "correlation", activity_record._correlation_id); + + synchronization_activity.addMetadataQuoted( + "Type", getStringFromSynchronizationType(activity_record._synch_type)); + synchronization_activity.addMetadataQuoted("Context_handle", handleToHexString(activity_record._context_handle)); + synchronization_activity.addMetadataQuoted("Queue_handle", handleToHexString(activity_record._queue_handle)); + synchronization_activity.addMetadataQuoted("Event_handle", handleToHexString(activity_record._event_handle)); + synchronization_activity.addMetadata("Number_wait_events", activity_record._number_wait_events); + synchronization_activity.addMetadata("Return_code", activity_record._return_code); + + if (outOfRange(&synchronization_activity)) { + traceBuffer_.span.opCount -= 1; + if (isGpuSync) { + traceBuffer_.gpuOpCount -= 1; + } + removeCorrelatedPtiActivities(&synchronization_activity); + traceBuffer_.activities.pop_back(); + return; + } + + synchronization_activity.log(logger); +} + void XpuptiActivityProfilerSession::handleOverheadActivity( const pti_view_record_overhead* activity, ActivityLogger& logger) { @@ -375,6 +442,11 @@ void XpuptiActivityProfilerSession::handlePtiActivity( handleOverheadActivity( reinterpret_cast(record), logger); break; + case PTI_VIEW_DEVICE_SYNCHRONIZATION: + handleSynchronizationActivity( + reinterpret_cast(record), + logger); + break; default: errors_.push_back( "Unexpected activity type: " + std::to_string(record->_view_kind)); diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h index 0cca9df87..fbe4f17dd 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h @@ -76,13 +76,24 @@ class XpuptiActivityProfilerSession : public libkineto::IActivityProfilerSession using pti_view_record_api_t = pti_view_record_sycl_runtime; #endif - std::string getApiName(const pti_view_record_api_t* activity); + template + std::string getApiName(const PTI_VIEW* activity) { +#if PTI_VERSION_AT_LEAST(0, 11) + const char* api_name = nullptr; + XPUPTI_CALL(ptiViewGetApiIdName( + activity->_api_group, activity->_api_id, &api_name)); + return std::string(api_name); +#else + return std::string(activity->_name); +#endif + } template void handleRuntimeKernelMemcpyMemsetActivities(ActivityType activityType, const pti_view_memory_record_type* activity, ActivityLogger& logger); + void handleSynchronizationActivity(const pti_view_record_synchronization* activity, ActivityLogger& logger); void handleOverheadActivity(const pti_view_record_overhead* activity, ActivityLogger& logger); void handlePtiActivity(const pti_view_record_base* record, ActivityLogger& logger); diff --git a/libkineto/test/xpupti/CMakeLists.txt b/libkineto/test/xpupti/CMakeLists.txt index faa5751b8..a0d7ff5a5 100644 --- a/libkineto/test/xpupti/CMakeLists.txt +++ b/libkineto/test/xpupti/CMakeLists.txt @@ -19,8 +19,11 @@ add_executable(XpuptiScopeProfilerConfigTest XpuptiScopeProfilerConfigTest.cpp) target_link_libraries(XpuptiScopeProfilerConfigTest PRIVATE ${LINK_LIBRARIES}) gtest_discover_tests(XpuptiScopeProfilerConfigTest) -include(ExternalProject) +add_executable(XpuptiActivityHandlersTest XpuptiActivityHandlersTest.cpp) +target_link_libraries(XpuptiActivityHandlersTest PRIVATE ${LINK_LIBRARIES}) +gtest_add_tests(TARGET XpuptiActivityHandlersTest) +include(ExternalProject) function(make_test test_file) get_filename_component(test_name "${test_file}" NAME_WE) set(lib_name "${test_name}Lib") diff --git a/libkineto/test/xpupti/XpuptiActivityHandlersTest.cpp b/libkineto/test/xpupti/XpuptiActivityHandlersTest.cpp new file mode 100644 index 000000000..1b64cefdc --- /dev/null +++ b/libkineto/test/xpupti/XpuptiActivityHandlersTest.cpp @@ -0,0 +1,210 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "src/plugin/xpupti/XpuptiActivityApi.h" +#include "src/plugin/xpupti/XpuptiActivityProfilerSession.h" +#include "src/ActivityBuffers.h" +#include "include/output_base.h" + +#include "src/plugin/xpupti/XpuptiProfilerMacros.h" + +#include + +namespace KN = KINETO_NAMESPACE; +using namespace libkineto; + +// Mock XpuptiActivityApi that delivers hand-crafted PTI records +// through the virtual processActivities without needing PTI runtime. +class MockXpuptiActivityApi : public KN::XpuptiActivityApi { + public: + std::vector records; + + std::unique_ptr activityBuffers() override { + // Return a non-null map so processTrace enters the processing path. + return std::make_unique(); + } + + const std::pair processActivities( + KN::XpuptiActivityBufferMap&, + std::function handler) override { + for (auto* record : records) { + handler(record); + } + return {static_cast(records.size()), 0}; + } +}; + +// Minimal ActivityLogger that captures logged GenericTraceActivity objects. +class MockActivityLogger : public ActivityLogger { + public: + std::vector logged_activities; + + void handleDeviceInfo(const DeviceInfo&, uint64_t) override {} + void handleResourceInfo(const ResourceInfo&, int64_t) override {} + void handleOverheadInfo(const OverheadInfo&, int64_t) override {} + void handleTraceSpan(const TraceSpan&) override {} + + void handleActivity(const ITraceActivity&) override {} + + void handleGenericActivity(const GenericTraceActivity& activity) override { + logged_activities.push_back(&activity); + } + + void handleTraceStart( + const std::unordered_map&, + const std::string&) override {} + + void finalizeMemoryTrace(const std::string&, const Config&) override {} + + void finalizeTrace( + const Config&, + std::unique_ptr, + int64_t, + std::unordered_map>&) override {} +}; + +class XpuptiActivityHandlersTest : public ::testing::Test { + protected: + MockXpuptiActivityApi mockApi_; + MockActivityLogger logger_; + + // Processes all records in mockApi_ through the handler pipeline + // and returns the resulting trace buffer. + std::unique_ptr processAndGetTrace( + int64_t windowStart = 0, + int64_t windowEnd = 1000) { + Config config; + std::set activity_types = {ActivityType::COLLECTIVE_COMM, ActivityType::XPU_SYNC}; + auto session = std::make_unique( + mockApi_, "__test_profiler__", config, activity_types); + session->processTrace( + logger_, + [](int64_t) -> const ITraceActivity* { return nullptr; }, + windowStart, + windowEnd); + return session->getTraceBuffer(); + } +}; + +// --- Synchronization Activity Tests --- + +TEST_F(XpuptiActivityHandlersTest, SynchronizationActivityDeviceIsNegativeOne) { + pti_view_record_synchronization sync_record{}; + sync_record._view_kind._view_kind = PTI_VIEW_DEVICE_SYNCHRONIZATION; + sync_record._synch_type = PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_EVENT; + sync_record._start_timestamp = 100; + sync_record._end_timestamp = 200; + sync_record._thread_id = 55; + sync_record._correlation_id = 1; + sync_record._api_id = 84; // zeEventHostSynchronize_id + sync_record._api_group = static_cast(1); // PTI_API_GROUP_LEVELZERO + + mockApi_.records.push_back( + reinterpret_cast(&sync_record)); + + auto traceBuffer = processAndGetTrace(); + ASSERT_EQ(traceBuffer->activities.size(), 1); + + auto& activity = *traceBuffer->activities[0]; + EXPECT_EQ(activity.deviceId(), -1); + EXPECT_EQ(activity.type(), ActivityType::XPU_SYNC); +} + +TEST_F(XpuptiActivityHandlersTest, SynchronizationActivityMetadata) { + pti_view_record_synchronization sync_record{}; + sync_record._view_kind._view_kind = PTI_VIEW_DEVICE_SYNCHRONIZATION; + sync_record._synch_type = PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_FENCE; + sync_record._context_handle = nullptr; + sync_record._queue_handle = nullptr; + sync_record._event_handle = nullptr; + sync_record._start_timestamp = 400; + sync_record._end_timestamp = 600; + sync_record._thread_id = 88; + sync_record._correlation_id = 5; + sync_record._number_wait_events = 3; + sync_record._return_code = 0; + sync_record._api_id = 84; // zeEventHostSynchronize_id + sync_record._api_group = static_cast(1); // PTI_API_GROUP_LEVELZERO + + mockApi_.records.push_back( + reinterpret_cast(&sync_record)); + + auto traceBuffer = processAndGetTrace(); + ASSERT_EQ(traceBuffer->activities.size(), 1); + + auto& activity = *traceBuffer->activities[0]; + EXPECT_EQ(activity.timestamp(), 400); + EXPECT_EQ(activity.duration(), 200); + EXPECT_EQ(activity.resourceId(), 88); + EXPECT_EQ(activity.getMetadataValue("Type"), "HOST_FENCE"); + EXPECT_EQ(activity.getMetadataValue("Number_wait_events"), "3"); + EXPECT_EQ(activity.getMetadataValue("Return_code"), "0"); + EXPECT_EQ(activity.getMetadataValue("correlation"), "5"); +} + +TEST_F(XpuptiActivityHandlersTest, SynchronizationAllTypes) { + struct SyncTypeTestCase { + pti_view_synchronization_type type; + std::string expected_name; + }; + std::vector cases = { + {PTI_VIEW_SYNCHRONIZATION_TYPE_UNKNOWN, "UNKNOWN"}, + {PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_EXECUTION, + "GPU_BARRIER_EXECUTION"}, + {PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_MEMORY, + "GPU_BARRIER_MEMORY"}, + {PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_FENCE, "HOST_FENCE"}, + {PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_EVENT, "HOST_EVENT"}, + {PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_LIST, "HOST_COMMAND_LIST"}, + {PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_QUEUE, + "HOST_COMMAND_QUEUE"}, + }; + + for (const auto& tc : cases) { + mockApi_.records.clear(); + + pti_view_record_synchronization sync_record{}; + sync_record._view_kind._view_kind = PTI_VIEW_DEVICE_SYNCHRONIZATION; + sync_record._synch_type = tc.type; + sync_record._start_timestamp = 100; + sync_record._end_timestamp = 200; + sync_record._thread_id = 1; + sync_record._correlation_id = 1; + sync_record._api_id = 84; // zeEventHostSynchronize_id + sync_record._api_group = static_cast(1); // PTI_API_GROUP_LEVELZERO + + mockApi_.records.push_back( + reinterpret_cast(&sync_record)); + + auto traceBuffer = processAndGetTrace(); + ASSERT_EQ(traceBuffer->activities.size(), 1) + << "Failed for type: " << tc.expected_name; + + auto& activity = *traceBuffer->activities[0]; + EXPECT_EQ(activity.getMetadataValue("Type"), tc.expected_name) + << "Wrong string for synchronization type " << tc.type; + } +} + +TEST_F(XpuptiActivityHandlersTest, SynchronizationActivityOutOfRange) { + pti_view_record_synchronization sync_record{}; + sync_record._view_kind._view_kind = PTI_VIEW_DEVICE_SYNCHRONIZATION; + sync_record._synch_type = PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_FENCE; + sync_record._start_timestamp = 50; + sync_record._end_timestamp = 80; + sync_record._thread_id = 1; + sync_record._correlation_id = 1; + sync_record._api_id = 84; // zeEventHostSynchronize_id + sync_record._api_group = static_cast(1); // PTI_API_GROUP_LEVELZERO + + mockApi_.records.push_back( + reinterpret_cast(&sync_record)); + + auto traceBuffer = processAndGetTrace(100, 500); + EXPECT_EQ(traceBuffer->activities.size(), 0); +}