Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "XpuptiActivityProfilerSession.h"
#include "output_json.h"

#include <algorithm>
#include <iterator>
#include <type_traits>

Expand Down Expand Up @@ -299,6 +300,39 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities(
return;
}
trace_activity->log(logger);

if constexpr (!handleRuntimeActivities) {
if (activity_types_.count(ActivityType::GPU_USER_ANNOTATION)) {
auto userIt = userCorrelationMap_.find(activity->_correlation_id);
if (userIt != userCorrelationMap_.end() && cpuActivity_) {
const int64_t user_external_id = userIt->second;
const int32_t dev = trace_activity->device;
const int32_t res = trace_activity->resource;
auto key = std::make_tuple(dev, res, user_external_id);
auto annIt = userAnnotationsByStream_.find(key);
if (annIt != userAnnotationsByStream_.end()) {
GenericTraceActivity* ua = annIt->second;
ua->startTime = std::min(ua->startTime, trace_activity->startTime);
ua->endTime = std::max(ua->endTime, trace_activity->endTime);
} else if (const ITraceActivity* cpu_act =
cpuActivity_(user_external_id)) {
traceBuffer_.emplace_activity(
traceBuffer_.span,
ActivityType::GPU_USER_ANNOTATION,
cpu_act->name());
auto& ua = traceBuffer_.activities.back();
ua->startTime = trace_activity->startTime;
ua->endTime = trace_activity->endTime;
ua->device = dev;
ua->resource = res;
ua->id = user_external_id;
ua->threadId = trace_activity->threadId;
ua->linked = cpu_act;
userAnnotationsByStream_.emplace(key, ua.get());
}
}
}
}
}

void XpuptiActivityProfilerSession::handleOverheadActivity(
Expand Down
4 changes: 4 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ void XpuptiActivityProfilerSession::processTrace(ActivityLogger& logger) {
handlePtiActivity(record, logger);
});
}
for (auto& kv : userAnnotationsByStream_) {
kv.second->log(logger);
}
userAnnotationsByStream_.clear();
}

void XpuptiActivityProfilerSession::processTrace(
Expand Down
4 changes: 4 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@

#include <pti/pti_view.h>

#include <map>
#include <memory>
#include <set>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
Expand Down Expand Up @@ -107,6 +109,8 @@ class XpuptiActivityProfilerSession : public libkineto::IActivityProfilerSession
std::unordered_map<int64_t, int64_t> cpuCorrelationMap_;
std::unordered_map<int64_t, int64_t> userCorrelationMap_;
std::unordered_map<int64_t, const ITraceActivity*> correlatedPtiActivities_;
std::map<std::tuple<int32_t, int32_t, int64_t>,
libkineto::GenericTraceActivity*> userAnnotationsByStream_;
std::vector<std::string> errors_;

libkineto::getLinkedActivityCallback cpuActivity_;
Expand Down
125 changes: 125 additions & 0 deletions libkineto/test/xpupti/XpuptiProfilerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,125 @@
#include "XpuptiTestUtilities.h"

#include "include/libkineto.h"
#include "include/GenericTraceActivity.h"

#include <gtest/gtest.h>

#include <set>

namespace KN = KINETO_NAMESPACE;

namespace {

constexpr int64_t kUserCorrId = 0xC0FFEE;

enum class LinkedActivityMode {
AlwaysLinked,
MissFirstThenLinked,
};

std::vector<std::string_view> expectedGpuUserAnnotationActivities() {
return {
"urEnqueueMemBufferWrite",
"urEnqueueMemBufferWrite",
"urEnqueueMemBufferWrite",
"urEnqueueKernelLaunch",
"urEnqueueMemBufferRead",
"Memcpy M2D",
"Memcpy M2D",
"Memcpy M2D",
"Run(sycl::_V1::queue, ...)",
"Memcpy D2M",
"user_function",
"user_function"};
}

std::vector<std::string_view> expectedGpuUserAnnotationTypes() {
return {
"xpu_runtime",
"xpu_runtime",
"xpu_runtime",
"xpu_runtime",
"xpu_runtime",
"gpu_memcpy",
"gpu_memcpy",
"gpu_memcpy",
"kernel",
"gpu_memcpy",
"gpu_user_annotation",
"gpu_user_annotation"};
}

std::unique_ptr<KN::CpuTraceBuffer> runGpuUserAnnotationCase(
LinkedActivityMode mode) {
KN::Config cfg;
std::vector<std::string_view> metrics;

std::set<KN::ActivityType> activities{
KN::ActivityType::GPU_MEMCPY,
KN::ActivityType::CONCURRENT_KERNEL,
KN::ActivityType::XPU_RUNTIME,
KN::ActivityType::EXTERNAL_CORRELATION,
KN::ActivityType::GPU_USER_ANNOTATION,
};

auto expectedActivities = expectedGpuUserAnnotationActivities();
auto expectedTypes = expectedGpuUserAnnotationTypes();

static const KN::TraceSpan kCpuSpan(0, 0, "cpu_span", "");
KN::GenericTraceActivity cpuAct(
kCpuSpan, KN::ActivityType::CPU_OP, "user_function");
cpuAct.id = kUserCorrId;

bool firstLookup = true;
auto linkedActivityCallback =
[&cpuAct, &firstLookup, mode](int32_t corr)
-> const KN::ITraceActivity* {
if (corr != kUserCorrId) {
return nullptr;
}
if (mode == LinkedActivityMode::MissFirstThenLinked && firstLookup) {
firstLookup = false;
return nullptr;
}
return &cpuAct;
};

constexpr unsigned repeatCount = 1;
[[maybe_unused]] auto [pSession, pBuffer] = RunProfilerTest(
metrics,
activities,
cfg,
repeatCount,
std::move(expectedActivities),
std::move(expectedTypes),
kUserCorrId,
&cpuAct,
linkedActivityCallback);

return std::move(pBuffer);
}

void expectTwoUserAnnotations(const KN::CpuTraceBuffer& buffer) {
std::set<std::pair<int64_t, int64_t>> streamKeys;
unsigned annotationCount = 0;
for (const auto& activity : buffer.activities) {
if (activity->type() != KN::ActivityType::GPU_USER_ANNOTATION) {
continue;
}
++annotationCount;
EXPECT_EQ(activity->correlationId(), kUserCorrId);
ASSERT_NE(activity->linkedActivity(), nullptr);
EXPECT_EQ(activity->linkedActivity()->name(), "user_function");
streamKeys.insert({activity->deviceId(), activity->resourceId()});
}

EXPECT_EQ(annotationCount, 2);
EXPECT_EQ(streamKeys.size(), 2);
}

} // namespace

TEST(XpuptiProfilerTest, XpuDriverEvents) {
KN::Config cfg;

Expand Down Expand Up @@ -117,3 +231,14 @@ TEST(XpuptiProfilerTest, TestEvents) {
<< ", resourceId=" << resourceInfos[i].id << " never used.";
}
}

TEST(XpuptiProfilerTest, GpuUserAnnotation) {
auto pBuffer = runGpuUserAnnotationCase(LinkedActivityMode::AlwaysLinked);
expectTwoUserAnnotations(*pBuffer);
}

TEST(XpuptiProfilerTest, GpuUserAnnotationLinkedActivityRetry) {
auto pBuffer =
runGpuUserAnnotationCase(LinkedActivityMode::MissFirstThenLinked);
expectTwoUserAnnotations(*pBuffer);
}
28 changes: 24 additions & 4 deletions libkineto/test/xpupti/XpuptiTestUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ RunProfilerTest(
const KN::Config& cfg,
unsigned repeatCount,
std::vector<std::string_view>&& expectedActivities,
std::vector<std::string_view>&& expectedTypes) {
std::vector<std::string_view>&& expectedTypes,
int64_t userCorrelationId,
const KN::ITraceActivity* linkedCpuActivity,
std::function<const KN::ITraceActivity*(int32_t)> linkedActivityCallback) {
KN::XPUActivityProfiler profiler;
EXPECT_TRUE(profiler.name() == "__xpu_profiler__");

Expand All @@ -183,17 +186,34 @@ RunProfilerTest(
.count();
pSession->start();

if (userCorrelationId != 0) {
pSession->pushUserCorrelationId(
static_cast<uint64_t>(userCorrelationId));
}

void ComputeOnXpu(unsigned size, unsigned repeatCount);
ComputeOnXpu(1024, repeatCount);

if (userCorrelationId != 0) {
pSession->popUserCorrelationId();
}

pSession->stop();
int64_t endTime = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();

auto getLinkedActivity = [](int32_t) -> const KN::ITraceActivity* {
return nullptr;
};
auto getLinkedActivity = std::move(linkedActivityCallback);
if (!getLinkedActivity) {
getLinkedActivity =
[userCorrelationId, linkedCpuActivity](
int32_t corr) -> const KN::ITraceActivity* {
if (userCorrelationId != 0 && corr == userCorrelationId) {
return linkedCpuActivity;
}
return nullptr;
};
}

TestActivityLogger logger;
pSession->processTrace(logger, getLinkedActivity, startTime, endTime);
Expand Down
7 changes: 6 additions & 1 deletion libkineto/test/xpupti/XpuptiTestUtilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include "include/output_base.h"

#include <functional>

namespace KN = KINETO_NAMESPACE;

bool IsEnvVerbose();
Expand All @@ -18,4 +20,7 @@ std::pair<std::unique_ptr<KN::IActivityProfilerSession>, std::unique_ptr<KN::Cpu
const KN::Config& cfg,
unsigned repeatCount,
std::vector<std::string_view>&& expectedActivities,
std::vector<std::string_view>&& expectedTypes);
std::vector<std::string_view>&& expectedTypes,
int64_t userCorrelationId = 0,
const KN::ITraceActivity* linkedCpuActivity = nullptr,
std::function<const KN::ITraceActivity*(int32_t)> linkedActivityCallback = nullptr);