Skip to content
Draft
8 changes: 8 additions & 0 deletions libkineto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,14 @@ elseif(KINETO_BACKEND STREQUAL "rocm")
endif()
target_compile_definitions(kineto_base PRIVATE "__HIP_PLATFORM_HCC__")
target_compile_definitions(kineto_base PRIVATE "__HIP_PLATFORM_AMD__")
# The rocprofiler-sdk and HSA system headers (anonymous structs, flexible
# array members, etc.) trip -Werror=pedantic when callers (e.g. PyTorch's
# cmake/Dependencies.cmake) propagate -Wpedantic to us. Silence pedantic for
# the kineto sources only; the headers themselves come from /opt/rocm and
# we can't change them.
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
list(APPEND KINETO_COMPILE_OPTIONS "-Wno-pedantic")
endif()
elseif(KINETO_BACKEND STREQUAL "xpu")
list(APPEND KINETO_COMPILE_OPTIONS ${XPUPTI_BUILD_FLAG})
if(KINETO_BUILD_TESTS)
Expand Down
54 changes: 54 additions & 0 deletions libkineto/src/RocLogger.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ typedef enum {
ROCTRACER_ACTIVITY_COPY,
ROCTRACER_ACTIVITY_MALLOC,
ROCTRACER_ACTIVITY_ASYNC,
ROCTRACER_ACTIVITY_EVENT_RECORD,
ROCTRACER_ACTIVITY_SYNC,
ROCTRACER_ACTIVITY_NONE
} rocprof_activity_types;

Expand Down Expand Up @@ -213,3 +215,55 @@ struct rocprofAsyncRow : public rocprofBase {
uint64_t queue;
std::string kernelName;
};

enum rocprofSyncType {
ROCPROF_SYNC_STREAM_WAIT_EVENT = 0,
ROCPROF_SYNC_EVENT_SYNCHRONIZE,
ROCPROF_SYNC_STREAM_SYNCHRONIZE,
ROCPROF_SYNC_DEVICE_SYNCHRONIZE,
};

struct rocprofEventRecordRow : public rocprofRow {
rocprofEventRecordRow(uint64_t id,
uint32_t domain,
uint32_t cid,
uint32_t pid,
uint32_t tid,
uint64_t begin,
uint64_t end,
hipEvent_t event,
hipStream_t stream)
: rocprofRow(id, domain, cid, pid, tid, begin, end,
ROCTRACER_ACTIVITY_EVENT_RECORD),
event(event),
stream(stream) {}
hipEvent_t event;
hipStream_t stream;
};

struct rocprofSyncRow : public rocprofRow {
rocprofSyncRow(uint64_t id,
uint32_t domain,
uint32_t cid,
uint32_t pid,
uint32_t tid,
uint64_t begin,
uint64_t end,
rocprofSyncType syncType,
hipStream_t stream,
hipEvent_t event,
hipStream_t srcStream,
uint64_t srcCorrId)
: rocprofRow(id, domain, cid, pid, tid, begin, end,
ROCTRACER_ACTIVITY_SYNC),
syncType(syncType),
stream(stream),
event(event),
srcStream(srcStream),
srcCorrId(srcCorrId) {}
rocprofSyncType syncType;
hipStream_t stream;
hipEvent_t event;
hipStream_t srcStream;
uint64_t srcCorrId;
};
14 changes: 14 additions & 0 deletions libkineto/src/RocmActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ void RocmActivityProfiler::popCorrelationIdImpl(CorrelationFlowType type) {

void RocmActivityProfiler::onResetTraceData() {
roc_.teardownContext();
#ifndef ROCTRACER_FALLBACK
// Drop any hipEvent_t -> {stream, corrId} entries left over from the prior
// profiling session so they cannot be returned as the producer of a wait
// recorded in the next session. Mirrors CuptiActivityProfiler::onResetTraceData.
RocprofLogger::clearEventMap();
#endif
}

void RocmActivityProfiler::onFinalizeTrace(
Expand Down Expand Up @@ -273,6 +279,14 @@ void RocmActivityProfiler::handleRocprofActivity(
handleGpuActivity(
reinterpret_cast<const rocprofAsyncRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_EVENT_RECORD:
handleRuntimeActivity(
reinterpret_cast<const rocprofEventRecordRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_SYNC:
handleRuntimeActivity(
reinterpret_cast<const rocprofSyncRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_NONE:
default:
LOG(WARNING) << "Unexpected activity type: " << record->type;
Expand Down
73 changes: 73 additions & 0 deletions libkineto/src/RocprofActivity_inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,79 @@ inline const std::string RuntimeActivity<rocprofMallocRow>::metadataJson() const
raw().ptr);
}

template <>
inline const std::string RuntimeActivity<rocprofEventRecordRow>::metadataJson()
const {
return fmt::format(
R"JSON(
"cid": {}, "correlation": {},
"hip_event": "{}", "hip_stream": "{}")JSON",
raw().cid,
raw().id,
fmt::ptr(raw().event),
fmt::ptr(raw().stream));
}

template <>
inline const std::string RuntimeActivity<rocprofSyncRow>::metadataJson() const {
static const char* syncTypeNames[] = {
"stream_wait_event",
"event_synchronize",
"stream_synchronize",
"device_synchronize",
};
const char* syncName = (raw().syncType >= 0 && raw().syncType <= 3)
? syncTypeNames[raw().syncType]
: "unknown";

std::string meta = fmt::format(
R"JSON(
"cid": {}, "correlation": {},
"sync_type": "{}")JSON",
raw().cid,
raw().id,
syncName);

meta += fmt::format(
R"JSON(,
"hip_stream": "{}")JSON",
fmt::ptr(raw().stream));
if (raw().event) {
meta += fmt::format(
R"JSON(,
"hip_event": "{}")JSON",
fmt::ptr(raw().event));
}
// Inter-stream dependency metadata: emitted for sync types that wait on a
// specific hipEvent_t (stream wait event, event synchronize) whenever the
// event was resolved against a prior hipEventRecord in g_eventMap. Field
// names mirror CUPTI's `wait_on_*` keys for CuptiActivityProfiler parity:
//
// wait_on_stream <=> CUPTI wait_on_stream
// wait_on_hip_event_record_corr_id <=> CUPTI wait_on_cuda_event_record_corr_id
// wait_on_hip_event_id <=> CUPTI wait_on_cuda_event_id
//
// The last field reports the hipEvent_t handle the wait was issued against,
// independent of whether a producer record was found.
if ((raw().syncType == ROCPROF_SYNC_STREAM_WAIT_EVENT ||
raw().syncType == ROCPROF_SYNC_EVENT_SYNCHRONIZE) &&
raw().event) {
meta += fmt::format(
R"JSON(,
"wait_on_hip_event_id": "{}")JSON",
fmt::ptr(raw().event));
if (raw().srcCorrId) {
meta += fmt::format(
R"JSON(,
"wait_on_stream": "{}",
"wait_on_hip_event_record_corr_id": {})JSON",
fmt::ptr(raw().srcStream),
raw().srcCorrId);
}
}
return meta;
}

template <class T>
inline const std::string RuntimeActivity<T>::metadataJson() const {
return fmt::format(
Expand Down
Loading
Loading