diff --git a/libkineto/include/ActivityType.h b/libkineto/include/ActivityType.h index 3bf551645..3008425a3 100644 --- a/libkineto/include/ActivityType.h +++ b/libkineto/include/ActivityType.h @@ -50,7 +50,9 @@ enum class ActivityType { PRIVATEUSE1_RUNTIME = 24, // host side privateUse1 runtime events PRIVATEUSE1_DRIVER = 25, // host side privateUse1 driver events - ENUM_COUNT = 26, // This is to add buffer and not used for any profiling logic. Add + XPU_SCOPE_PROFILER = 26, // XPUPTI Profiler scope for performance metrics + + ENUM_COUNT = 27, // This is to add buffer and not used for any profiling logic. Add // your new type before it. OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME, }; @@ -94,6 +96,7 @@ inline constexpr std::array<_ActivityTypeName, activityTypeCount + 1> _activityT {"collective_comm", ActivityType::COLLECTIVE_COMM}, {"privateuse1_runtime", ActivityType::PRIVATEUSE1_RUNTIME}, {"privateuse1_driver", ActivityType::PRIVATEUSE1_DRIVER}, + {"xpu_scope_profiler", ActivityType::XPU_SCOPE_PROFILER}, {"ENUM_COUNT", ActivityType::ENUM_COUNT}, }}; diff --git a/libkineto/libkineto_defs.bzl b/libkineto/libkineto_defs.bzl index c96240af2..ce35f1c7a 100644 --- a/libkineto/libkineto_defs.bzl +++ b/libkineto/libkineto_defs.bzl @@ -52,7 +52,10 @@ def get_libkineto_xpupti_srcs(with_api = True): "src/plugin/xpupti/XpuptiActivityProfiler.cpp", "src/plugin/xpupti/XpuptiActivityProfilerSession.cpp", "src/plugin/xpupti/XpuptiProfilerMacros.cpp", + "src/plugin/xpupti/XpuptiScopeProfilerApi.cpp", "src/plugin/xpupti/XpuptiScopeProfilerConfig.cpp", + "src/plugin/xpupti/XpuptiScopeProfilerHandlers.cpp", + "src/plugin/xpupti/XpuptiScopeProfilerSession.cpp", ] + (get_libkineto_cpu_only_srcs(with_api)) def get_libkineto_aiupti_srcs(with_api = True): diff --git a/libkineto/src/output_json.cpp b/libkineto/src/output_json.cpp index fe0115550..e70e14826 100644 --- a/libkineto/src/output_json.cpp +++ b/libkineto/src/output_json.cpp @@ -760,9 +760,13 @@ void ChromeTraceLogger::handleActivity(const libkineto::ITraceActivity& op) { return; } - if (op.type() == ActivityType::MTIA_COUNTERS) { - handleCounterEvent(op); - return; + switch (op.type()) { + case ActivityType::MTIA_COUNTERS: + case ActivityType::XPU_SCOPE_PROFILER: + handleCounterEvent(op); + return; + default: + break; } int64_t ts = op.timestamp(); diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp index 04d949afe..c934645b5 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp @@ -245,6 +245,17 @@ void XpuptiActivityApi::enableXpuptiActivities( XPUPTI_CALL(ptiViewEnable(PTI_VIEW_DRIVER_API)); break; + case ActivityType::XPU_SCOPE_PROFILER: +#if PTI_VERSION_AT_LEAST(0, 15) + // This case is handled in constructor of + // XpuptiScopeProfilerSession +#else + throw std::runtime_error( + "IntelĀ® PTI version required to use scope profiler is at least 0.15 " + "(available with IntelĀ® oneAPI in version at least 2025.3.1)."); +#endif + break; + case ActivityType::OVERHEAD: XPUPTI_CALL(ptiViewEnable(PTI_VIEW_COLLECTION_OVERHEAD)); break; diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp index 0dee2d68f..8c2121f11 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp @@ -225,6 +225,14 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( trace_activity->resource = activity->_sycl_queue_id; trace_activity->flow.start = 0; + if constexpr (handleKernelActivities) { + kernelActivities_[activity->_kernel_id].emplace( + trace_activity->startTime, + trace_activity->endTime, + trace_activity->device, + trace_activity->resource); + } + addResouceInfo(trace_activity->device, trace_activity->resource); } diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp index 6d8b8f9b7..bd140dc8e 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp @@ -7,8 +7,8 @@ */ #include "XpuptiActivityProfiler.h" -#include "XpuptiActivityApi.h" -#include "XpuptiActivityProfilerSession.h" +#include "XpuptiScopeProfilerApi.h" +#include "XpuptiScopeProfilerSession.h" #include #include @@ -66,8 +66,13 @@ std::unique_ptr XPUActivityProfiler:: configure( const std::set& activity_types, const libkineto::Config& config) { +#if PTI_VERSION_AT_LEAST(0, 15) + return std::make_unique( + XpuptiActivityApi::singleton(), name(), config, activity_types); +#else return std::make_unique( XpuptiActivityApi::singleton(), name(), config, activity_types); +#endif } std::unique_ptr XPUActivityProfiler:: diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h index f7237ce84..2b3edeb8e 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h @@ -95,7 +95,7 @@ class XpuptiActivityProfilerSession : public libkineto::IActivityProfilerSession void addResouceInfo(int32_t device_id, int32_t sycl_queue_id); - private: + protected: static uint32_t iterationCount_; static std::vector deviceUUIDs_; static std::unordered_set correlateRuntimeOps_; @@ -117,6 +117,27 @@ class XpuptiActivityProfilerSession : public libkineto::IActivityProfilerSession std::unique_ptr config_{nullptr}; const std::set& activity_types_; std::string name_; + + struct KernelActivity { + void emplace( + int64_t startTime, + int64_t endTime, + int32_t device, + int32_t resource) { + startTime_ = startTime; + endTime_ = endTime; + device_ = device; + resource_ = resource; + } + + int64_t startTime_{0}; + int64_t endTime_{0}; + int32_t device_{0}; + int32_t resource_{0}; + }; + + std::unordered_map kernelActivities_; + uint64_t lastKernelActivityEndTime_{0}; }; } // namespace KINETO_NAMESPACE diff --git a/libkineto/src/plugin/xpupti/XpuptiScopeProfilerApi.cpp b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerApi.cpp new file mode 100644 index 000000000..d942172a2 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerApi.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiScopeProfilerApi.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include "XpuptiScopeProfilerConfig.h" + +#include +#include +#include + +#endif + +namespace KINETO_NAMESPACE { + +#if PTI_VERSION_AT_LEAST(0, 15) + +XpuptiScopeProfilerApi::safe_pti_scope_collection_handle_t:: + safe_pti_scope_collection_handle_t(std::exception_ptr& exceptFromDestructor) + : exceptFromDestructor_(exceptFromDestructor) { + XPUPTI_CALL(ptiMetricsScopeEnable(&handle_)); +} + +XpuptiScopeProfilerApi::safe_pti_scope_collection_handle_t:: + ~safe_pti_scope_collection_handle_t() noexcept { + try { + XPUPTI_CALL(ptiMetricsScopeDisable(handle_)); + } catch (...) { + exceptFromDestructor_ = std::current_exception(); + } +} + +void XpuptiScopeProfilerApi::enableScopeProfiler(const Config& cfg) { + uint32_t deviceCount = 0; + XPUPTI_CALL(ptiMetricsGetDevices(nullptr, &deviceCount)); + + if (deviceCount == 0) { + throw std::runtime_error("No XPU devices available"); + } + + auto devices = std::make_unique(deviceCount); + XPUPTI_CALL(ptiMetricsGetDevices(devices.get(), &deviceCount)); + + auto devicesHandles = std::make_unique(deviceCount); + for (uint32_t i = 0; i < deviceCount; ++i) { + devicesHandles[i] = devices[i]._handle; + } + + const auto& spcfg = XpuptiScopeProfilerConfig::get(cfg); + const auto& activitiesXpuptiMetrics = spcfg.activitiesXpuptiMetrics(); + + std::vector metricNames; + metricNames.reserve(activitiesXpuptiMetrics.size()); + std::transform( + activitiesXpuptiMetrics.begin(), + activitiesXpuptiMetrics.end(), + std::back_inserter(metricNames), + [](const std::string& s) { return s.c_str(); }); + + pti_metrics_scope_mode_t collectionMode = spcfg.xpuptiProfilerPerKernel() + ? PTI_METRICS_SCOPE_AUTO_KERNEL + : PTI_METRICS_SCOPE_USER; + + if (collectionMode == PTI_METRICS_SCOPE_USER) { + throw std::runtime_error( + "XPUPTI_PROFILER_ENABLE_PER_KERNEL has to be set to 1. Other variants are currently not supported."); + } + + scopeHandleOpt_.emplace(exceptFromScopeHandleDestructor_); + XPUPTI_CALL(ptiMetricsScopeConfigure( + *scopeHandleOpt_, + collectionMode, + devicesHandles.get(), + ((void)deviceCount, 1), // Only 1 device is currently supported + metricNames.data(), + metricNames.size())); + + uint64_t expectedKernels = spcfg.xpuptiProfilerMaxScopes(); + size_t estimatedCollectionBufferSize = 0; + XPUPTI_CALL(ptiMetricsScopeQueryCollectionBufferSize( + *scopeHandleOpt_, expectedKernels, &estimatedCollectionBufferSize)); + + XPUPTI_CALL(ptiMetricsScopeSetCollectionBufferSize( + *scopeHandleOpt_, estimatedCollectionBufferSize)); +} + +void XpuptiScopeProfilerApi::disableScopeProfiler() { + scopeHandleOpt_.reset(); + if (exceptFromScopeHandleDestructor_) { + std::rethrow_exception(exceptFromScopeHandleDestructor_); + } +} + +void XpuptiScopeProfilerApi::startScopeActivity() { + if (scopeHandleOpt_) { + XPUPTI_CALL(ptiMetricsScopeStartCollection(*scopeHandleOpt_)); + } +} + +void XpuptiScopeProfilerApi::stopScopeActivity() { + if (scopeHandleOpt_) { + XPUPTI_CALL(ptiMetricsScopeStopCollection(*scopeHandleOpt_)); + } +} + +static size_t IntDivRoundUp(size_t a, size_t b) { + return (a + b - 1) / b; +} + +void XpuptiScopeProfilerApi::processScopeTrace( + std::function handler) { + if (scopeHandleOpt_) { + pti_metrics_scope_record_metadata_t metadata; + metadata._struct_size = sizeof(pti_metrics_scope_record_metadata_t); + + XPUPTI_CALL(ptiMetricsScopeGetMetricsMetadata(*scopeHandleOpt_, &metadata)); + + uint64_t collectionBuffersCount = 0; + XPUPTI_CALL(ptiMetricsScopeGetCollectionBuffersCount( + *scopeHandleOpt_, &collectionBuffersCount)); + + for (uint64_t bufferId = 0; bufferId < collectionBuffersCount; ++bufferId) { + void* collectionBuffer = nullptr; + size_t actualCollectionBufferSize = 0; + XPUPTI_CALL(ptiMetricsScopeGetCollectionBuffer( + *scopeHandleOpt_, + bufferId, + &collectionBuffer, + &actualCollectionBufferSize)); + + pti_metrics_scope_collection_buffer_properties_t metricsBufferProps; + metricsBufferProps._struct_size = + sizeof(pti_metrics_scope_collection_buffer_properties_t); + XPUPTI_CALL(ptiMetricsScopeGetCollectionBufferProperties( + *scopeHandleOpt_, collectionBuffer, &metricsBufferProps)); + + size_t requiredMetricsBufferSize = 0; + size_t recordsCount = 0; + XPUPTI_CALL(ptiMetricsScopeQueryMetricsBufferSize( + *scopeHandleOpt_, + collectionBuffer, + &requiredMetricsBufferSize, + &recordsCount)); + + if (recordsCount > 0) { + auto metricsBuffer = + std::make_unique(IntDivRoundUp( + requiredMetricsBufferSize, sizeof(pti_metrics_scope_record_t))); + + size_t actualRecordsCount = 0; + XPUPTI_CALL(ptiMetricsScopeCalculateMetrics( + *scopeHandleOpt_, + collectionBuffer, + metricsBuffer.get(), + requiredMetricsBufferSize, + &actualRecordsCount)); + + for (size_t recordId = 0; recordId < actualRecordsCount; ++recordId) { + auto record = metricsBuffer.get() + recordId; + handler(record, metadata); + } + } + } + } +} + +#endif + +} // namespace KINETO_NAMESPACE diff --git a/libkineto/src/plugin/xpupti/XpuptiScopeProfilerApi.h b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerApi.h new file mode 100644 index 000000000..ed7e97932 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerApi.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "XpuptiActivityApi.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include + +#include + +namespace KINETO_NAMESPACE { + +class Config; + +class XpuptiScopeProfilerApi { + public: + XpuptiScopeProfilerApi() = default; + XpuptiScopeProfilerApi(const XpuptiScopeProfilerApi&) = delete; + XpuptiScopeProfilerApi& operator=(const XpuptiScopeProfilerApi&) = delete; + + ~XpuptiScopeProfilerApi() = default; + + void enableScopeProfiler(const Config&); + void disableScopeProfiler(); + void startScopeActivity(); + void stopScopeActivity(); + + void processScopeTrace( + std::function handler); + + private: + struct safe_pti_scope_collection_handle_t { + safe_pti_scope_collection_handle_t( + std::exception_ptr& exceptFromDestructor); + ~safe_pti_scope_collection_handle_t() noexcept; + + operator pti_scope_collection_handle_t() { + return handle_; + } + + pti_scope_collection_handle_t handle_{}; + std::exception_ptr& exceptFromDestructor_; + }; + + std::optional scopeHandleOpt_; + std::exception_ptr exceptFromScopeHandleDestructor_; +}; + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiScopeProfilerHandlers.cpp b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerHandlers.cpp new file mode 100644 index 000000000..dc27fd5d4 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerHandlers.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiProfilerMacros.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include "XpuptiScopeProfilerSession.h" + +namespace KINETO_NAMESPACE { + +enum class MetadataOrCounterValue { + Metadata = 0, + CounterValue = 1, +}; + +static void AddPtiValueToMetadataOrCounterValue( + GenericTraceActivity* scopeActivity, + MetadataOrCounterValue metadataOrCounterValue, + const std::string& metricName, + pti_metric_value_type valueType, + const pti_value_t& value) { + switch (valueType) { +#define CASE(T, FIELD) \ + case PTI_METRIC_VALUE_TYPE_##T: \ + if (metadataOrCounterValue == MetadataOrCounterValue::Metadata) { \ + scopeActivity->addMetadata(metricName, value.FIELD); \ + } else { \ + scopeActivity->addCounterValue(metricName, value.FIELD); \ + } \ + return; + + CASE(UINT32, ui32); + CASE(UINT64, ui64); + CASE(FLOAT32, fp32); + CASE(FLOAT64, fp64); + +#undef CASE + + case PTI_METRIC_VALUE_TYPE_BOOL8: + if (metadataOrCounterValue == MetadataOrCounterValue::Metadata) { + scopeActivity->addMetadata(metricName, value.b8 ? "true" : "false "); + } else { + scopeActivity->addCounterValue(metricName, value.b8); + } + return; + + default: + break; + } +} + +void XpuptiScopeProfilerSession::handleScopeRecord( + const pti_metrics_scope_record_t* record, + const pti_metrics_scope_record_metadata_t& metadata, + ActivityLogger& logger) { + std::array scopeActivities{}; + + traceBuffer_.emplace_activity( + traceBuffer_.span, + ActivityType::CONCURRENT_KERNEL, + record->_kernel_name + ? fmt::format("metrics: {}", record->_kernel_name) + : fmt::format("metrics: kernel_{}", record->_kernel_id)); + + scopeActivities[0] = traceBuffer_.activities.back().get(); + + for (auto itSa = scopeActivities.begin() + 1; itSa != scopeActivities.end(); + ++itSa) { + traceBuffer_.emplace_activity( + traceBuffer_.span, ActivityType::XPU_SCOPE_PROFILER, "xpu"); + + *itSa = traceBuffer_.activities.back().get(); + } + + std::function FillActivityRecord{}; + auto it = kernelActivities_.find(record->_kernel_id); + if (it != kernelActivities_.end()) { + FillActivityRecord = [it](GenericTraceActivity* act) { + act->startTime = it->second.startTime_ - 1; + act->endTime = it->second.endTime_ + 1; + act->device = it->second.device_; + act->resource = it->second.resource_; + }; + } else { + FillActivityRecord = [this](GenericTraceActivity* act) { + act->startTime = lastKernelActivityEndTime_ + 1; + act->endTime = act->startTime + 1; + act->device = 0; + act->resource = 0; + }; + } + for (auto sa : scopeActivities) { + FillActivityRecord(sa); + } + scopeActivities[2]->startTime = scopeActivities[2]->endTime; + + if (it != kernelActivities_.end()) { + kernelActivities_.erase(it); + } + lastKernelActivityEndTime_ = scopeActivities[0]->endTime; + + scopeActivities[0]->addMetadata("kernel_id", record->_kernel_id); + scopeActivities[0]->addMetadataQuoted( + "queue", fmt::format("{}", record->_queue)); + + for (uint32_t m = 0; m < metadata._metrics_count; ++m) { + const auto& unit = metadata._metric_units[m]; + std::string unitSuffix = unit ? fmt::format("::{}", unit) : ""; + std::string metricNameWithUnit = + fmt::format("{}{}", metadata._metric_names[m], unitSuffix); + + AddPtiValueToMetadataOrCounterValue( + scopeActivities[0], + MetadataOrCounterValue::Metadata, + metricNameWithUnit, + metadata._value_types[m], + record->_metrics_values[m]); + + AddPtiValueToMetadataOrCounterValue( + scopeActivities[1], + MetadataOrCounterValue::CounterValue, + metadata._metric_names[m], + metadata._value_types[m], + record->_metrics_values[m]); + + scopeActivities[2]->addCounterValue(metadata._metric_names[m], 0); + } + + for (auto sa : scopeActivities) { + sa->log(logger); + } +} + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiScopeProfilerSession.cpp b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerSession.cpp new file mode 100644 index 000000000..cf491f317 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerSession.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiScopeProfilerSession.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +namespace KINETO_NAMESPACE { + +XpuptiScopeProfilerSession::XpuptiScopeProfilerSession( + XpuptiActivityApi& xpti, + const std::string& name, + const libkineto::Config& config, + const std::set& activity_types) + : XpuptiActivityProfilerSession(xpti, name, config, activity_types) { + scopeProfilerEnabled_ = + activity_types.count(ActivityType::XPU_SCOPE_PROFILER) > 0; + if (scopeProfilerEnabled_) { + xptiScopeProf_.enableScopeProfiler(*config_); + } +} + +XpuptiScopeProfilerSession::~XpuptiScopeProfilerSession() { + if (scopeProfilerEnabled_) { + xptiScopeProf_.disableScopeProfiler(); + } +} + +void XpuptiScopeProfilerSession::start() { + XpuptiActivityProfilerSession::start(); + if (scopeProfilerEnabled_) { + xptiScopeProf_.startScopeActivity(); + } +} + +void XpuptiScopeProfilerSession::stop() { + if (scopeProfilerEnabled_) { + xptiScopeProf_.stopScopeActivity(); + } + XpuptiActivityProfilerSession::stop(); +} + +void XpuptiScopeProfilerSession::toggleCollectionDynamic(const bool enable) { + XpuptiActivityProfilerSession::toggleCollectionDynamic(enable); + if (scopeProfilerEnabled_) { + if (enable) { + xptiScopeProf_.startScopeActivity(); + } else { + xptiScopeProf_.stopScopeActivity(); + } + } +} + +void XpuptiScopeProfilerSession::processTrace(ActivityLogger& logger) { + XpuptiActivityProfilerSession::processTrace(logger); + if (scopeProfilerEnabled_) { + xptiScopeProf_.processScopeTrace( + [this, &logger]( + const pti_metrics_scope_record_t* record, + const pti_metrics_scope_record_metadata_t& metadata) -> void { + handleScopeRecord(record, metadata, logger); + }); + } +} + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiScopeProfilerSession.h b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerSession.h new file mode 100644 index 000000000..0e19376d3 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiScopeProfilerSession.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "XpuptiActivityProfilerSession.h" +#include "XpuptiScopeProfilerApi.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include + +namespace KINETO_NAMESPACE { + +class XpuptiScopeProfilerApi; + +class XpuptiScopeProfilerSession : public XpuptiActivityProfilerSession { + public: + XpuptiScopeProfilerSession( + XpuptiActivityApi& xpti, + const std::string& name, + const libkineto::Config& config, + const std::set& activity_types); + + XpuptiScopeProfilerSession(const XpuptiScopeProfilerSession&) = delete; + XpuptiScopeProfilerSession& operator=(const XpuptiScopeProfilerSession&) = + delete; + + ~XpuptiScopeProfilerSession(); + void start(); + void stop(); + void toggleCollectionDynamic(const bool enable); + + void processTrace(ActivityLogger& logger) override; + + void handleScopeRecord( + const pti_metrics_scope_record_t* record, + const pti_metrics_scope_record_metadata_t& metadata, + ActivityLogger& logger); + + private: + XpuptiScopeProfilerApi xptiScopeProf_; + bool scopeProfilerEnabled_{false}; +}; + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/test/ConfigTest.cpp b/libkineto/test/ConfigTest.cpp index 6cdfece8b..2c4934aad 100644 --- a/libkineto/test/ConfigTest.cpp +++ b/libkineto/test/ConfigTest.cpp @@ -135,6 +135,11 @@ TEST(ParseTest, ActivityTypes) { cfg2.selectedActivityTypes(), std::set({ActivityType::XPU_RUNTIME})); + EXPECT_TRUE(cfg2.parse("ACTIVITY_TYPES = xpu_scope_profiler")); + EXPECT_EQ( + cfg2.selectedActivityTypes(), + std::set({ActivityType::XPU_SCOPE_PROFILER})); + EXPECT_TRUE( cfg2.parse("ACTIVITY_TYPES=privateuse1_Runtime,privateuse1_driver")); EXPECT_EQ( diff --git a/libkineto/test/xpupti/CMakeLists.txt b/libkineto/test/xpupti/CMakeLists.txt index 0ac807332..1bc5595e7 100644 --- a/libkineto/test/xpupti/CMakeLists.txt +++ b/libkineto/test/xpupti/CMakeLists.txt @@ -63,3 +63,4 @@ function(make_test test_file) endfunction() make_test(XpuptiProfilerTest.cpp) +make_test(XpuptiScopeProfilerTest.cpp) diff --git a/libkineto/test/xpupti/XpuptiScopeProfilerTest.cpp b/libkineto/test/xpupti/XpuptiScopeProfilerTest.cpp new file mode 100644 index 000000000..8311792bb --- /dev/null +++ b/libkineto/test/xpupti/XpuptiScopeProfilerTest.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiTestUtilities.h" + +#include "src/plugin/xpupti/XpuptiActivityProfiler.h" +#include "src/plugin/xpupti/XpuptiProfilerMacros.h" +#include "src/plugin/xpupti/XpuptiScopeProfilerConfig.h" + +#include + +#include +#include + +#include + +namespace KN = KINETO_NAMESPACE; + +class XpuptiScopeProfilerTest : public ::testing::Test { + protected: + void SetUp() override { + KN::XpuptiScopeProfilerConfig::registerFactory(); + } +}; + +void RunTest(std::string_view perKernel, unsigned maxScopes) { + KN::Config cfg; + + std::vector metrics = { + "GpuTime", + "GpuCoreClocks", + "AvgGpuCoreFrequencyMHz", + "XVE_INST_EXECUTED_ALU0_ALL_UTILIZATION", + "XVE_ACTIVE", + "XVE_STALL"}; + + EXPECT_TRUE(cfg.parse( + fmt::format("XPUPTI_PROFILER_METRICS = {}", fmt::join(metrics, ",")))); + EXPECT_TRUE(cfg.parse( + fmt::format("XPUPTI_PROFILER_ENABLE_PER_KERNEL = {}", perKernel))); + EXPECT_TRUE( + cfg.parse(fmt::format("XPUPTI_PROFILER_MAX_SCOPES = {}", maxScopes))); + + std::set activities{ + KN::ActivityType::GPU_MEMCPY, + KN::ActivityType::GPU_MEMSET, + KN::ActivityType::CONCURRENT_KERNEL, + KN::ActivityType::EXTERNAL_CORRELATION, + KN::ActivityType::XPU_RUNTIME, + KN::ActivityType::XPU_SCOPE_PROFILER}; + + std::vector expectedActivities = { + "urEnqueueMemBufferWrite", + "urEnqueueMemBufferWrite", + "urEnqueueMemBufferWrite", + "Memcpy M2D", + "Memcpy M2D", + "Memcpy M2D", + "urEnqueueKernelLaunch", + "Run(sycl::_V1::queue, ...)", + "urEnqueueMemBufferRead", + "Memcpy D2M", + "metrics: Run(sycl::_V1::queue, ...)", + "metrics", + "metrics"}; + + std::vector expectedTypes = { + "xpu_runtime", + "xpu_runtime", + "xpu_runtime", + "gpu_memcpy", + "gpu_memcpy", + "gpu_memcpy", + "xpu_runtime", + "kernel", + "xpu_runtime", + "gpu_memcpy", + "kernel", + "xpu_scope_profiler", + "xpu_scope_profiler"}; + + std::exception_ptr eptr; + + try { + constexpr unsigned repeatCount = 5; + RunProfilerTest( + metrics, + activities, + cfg, + repeatCount, + std::move(expectedActivities), + std::move(expectedTypes)); + } catch (...) { + eptr = std::current_exception(); + } + +#if PTI_VERSION_AT_LEAST(0, 15) + bool expectThrow = (perKernel == "false"); +#else + constexpr bool expectThrow = true; +#endif + + if (expectThrow) { + EXPECT_THROW( + try { + if (eptr) { + std::rethrow_exception(eptr); + } + } catch (const std::runtime_error& e) { + static bool isVerbose = IsEnvVerbose(); + if (isVerbose) { + std::cout << "std::runtime_error = " << e.what() << std::endl; + } + throw; + }, + std::runtime_error); + } else { + if (eptr) { + std::rethrow_exception(eptr); + } + } +} + +///////////////////////////////////////////////////////////////////// + +TEST_F(XpuptiScopeProfilerTest, PerKernelScope) { + RunTest("true", 314); +} + +TEST_F(XpuptiScopeProfilerTest, UserScope) { + RunTest("false", 159); +} diff --git a/libkineto/test/xpupti/XpuptiTestUtilities.cpp b/libkineto/test/xpupti/XpuptiTestUtilities.cpp index 739553528..a72d0a997 100644 --- a/libkineto/test/xpupti/XpuptiTestUtilities.cpp +++ b/libkineto/test/xpupti/XpuptiTestUtilities.cpp @@ -41,6 +41,24 @@ std::ostream& operator<<(std::ostream& os, KN::ActivityType actType) { return os; } +static std::pair CountMetricsInVector( + const std::vector& metrics, + const decltype(std::declval() + .counterValues())& vec) { + unsigned metricsCount = 0; + unsigned metricsMask = 0; + for (unsigned i = 0; i < metrics.size(); ++i) { + const auto metricSv = metrics[i]; + if (std::find_if(vec.begin(), vec.end(), [metricSv](const auto& pair) { + return pair.first == metricSv; + }) != vec.end()) { + ++metricsCount; + metricsMask |= (1 << i); + } + } + return std::pair{metricsCount, metricsMask}; +} + static std::pair CountMetricsInString( const std::vector& metrics, const std::string_view sv) { @@ -226,8 +244,58 @@ RunProfilerTest( activitiesCount[*insertResult.first]++; typesCount[pActivity->type()]++; - [[maybe_unused]] auto [metricsCount, metricsMask] = - CountMetricsInString(metrics, pActivity->metadataJson()); + bool isNameXpu = pActivity->name() == "xpu"; + bool nameStartsWithMetrics = pActivity->name().find("metrics:") == 0; + + unsigned metricsCount = 0; + unsigned metricsMask = 0; + if (isNameXpu) { + std::tie(metricsCount, metricsMask) = + CountMetricsInVector(metrics, pActivity->counterValues()); + } else if (nameStartsWithMetrics) { + std::tie(metricsCount, metricsMask) = + CountMetricsInString(metrics, pActivity->metadataJson()); + } + + enum class TestScenario { + defaultScenario, + scopeProfiler, + nameStartsWithMetrics + }; + TestScenario testScenario = TestScenario::defaultScenario; + + switch (pActivity->type()) { + case KN::ActivityType::CONCURRENT_KERNEL: + if (nameStartsWithMetrics) { + testScenario = TestScenario::nameStartsWithMetrics; + } else { + testScenario = TestScenario::defaultScenario; + } + break; + + case KN::ActivityType::XPU_SCOPE_PROFILER: + testScenario = TestScenario::scopeProfiler; + break; + + default: + testScenario = TestScenario::defaultScenario; + } + + switch (testScenario) { + case TestScenario::scopeProfiler: + EXPECT_TRUE(isNameXpu); + [[fallthrough]]; + case TestScenario::nameStartsWithMetrics: + EXPECT_EQ(metricsCount, metrics.size()); + EXPECT_EQ(metricsMask, (1u << metrics.size()) - 1); + ++scopeProfilerActCount; + break; + + case TestScenario::defaultScenario: + EXPECT_FALSE(isNameXpu); + EXPECT_EQ(metricsCount, 0); + EXPECT_EQ(metricsMask, 0); + } if (isVerbose) { #define PRINT(A) std::cout << #A " = " << pActivity->A() << std::endl;