diff --git a/examples/seq/Makefile b/examples/seq/Makefile new file mode 100644 index 0000000000..f99cacbb41 --- /dev/null +++ b/examples/seq/Makefile @@ -0,0 +1,21 @@ +TRACY_PUBLIC := ../../public +CXX := g++ + +TRACY_SRCS := $(TRACY_PUBLIC)/TracyClient.cpp +INCLUDES := -I$(TRACY_PUBLIC) -I$(TRACY_PUBLIC)/tracy +LIBS := -lpthread -ldl + +CXXFLAGS := -std=c++17 -O2 -DTRACY_ENABLE + +.PHONY: all clean + +all: seq + +seq: seq.cpp tracy_client.o + $(CXX) $(CXXFLAGS) $(INCLUDES) -o $@ $< tracy_client.o $(LIBS) + +tracy_client.o: $(TRACY_SRCS) + $(CXX) $(CXXFLAGS) $(INCLUDES) -c -o $@ $< + +clean: + rm -f seq tracy_client.o diff --git a/examples/seq/seq.cpp b/examples/seq/seq.cpp new file mode 100644 index 0000000000..b26a1b4dde --- /dev/null +++ b/examples/seq/seq.cpp @@ -0,0 +1,349 @@ +// Demonstrates Tracy's sequence (async-continuation) feature. +// +// Submits a mix of four "recipes" — different async pipelines with varying +// chain lengths and per-stage work — onto a thread pool. Each pipeline kicks +// off on the main thread (via TracySeqCreate + a tiny setup stage) and then +// migrates through 2-7 continuations on worker threads. The profiler renders +// arrows between suspend/resume points so the cross-thread chain of any one +// pipeline is visible by hovering its zones. +// +// Build: +// make +// +// Run: +// ./seq # then connect tracy-profiler + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Tracy.hpp" + +using namespace std::chrono_literals; + +class ThreadPool +{ +public: + explicit ThreadPool( int n ) + { + for( int i = 0; i < n; ++i ) + { + m_workers.emplace_back( [this, i] + { + char name[16]; + std::snprintf( name, sizeof( name ), "worker-%d", i ); + tracy::SetThreadName( name ); + Run(); + } ); + } + } + + ~ThreadPool() + { + { + std::lock_guard lk( m_mu ); + m_stop = true; + } + m_cv.notify_all(); + for( auto& w : m_workers ) w.join(); + } + + void Submit( std::function task ) + { + { + std::lock_guard lk( m_mu ); + m_queue.push( std::move( task ) ); + } + m_cv.notify_one(); + } + +private: + void Run() + { + for(;;) + { + std::function task; + { + std::unique_lock lk( m_mu ); + m_cv.wait( lk, [this]{ return m_stop || !m_queue.empty(); } ); + if( m_stop && m_queue.empty() ) return; + task = std::move( m_queue.front() ); + m_queue.pop(); + } + task(); + } + } + + std::mutex m_mu; + std::condition_variable m_cv; + std::queue> m_queue; + std::vector m_workers; + bool m_stop = false; +}; + +struct Chain +{ + uint64_t seq; + int value; + ThreadPool* pool; + std::atomic* remaining; +}; + +// Each stage opens its own ZoneScoped, brackets simulated work with +// TracySeqResume/Suspend, then either submits the next stage to the pool or +// (for terminal stages) calls TracySeqRetire and bumps the completion counter. + +// ============================================================================ +// query: 2 stages — short pipeline, tight chain +// ============================================================================ + +static void query_exec( Chain c ) +{ + ZoneScopedN( "query/exec" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 35ms ); + c.value *= 3; + TracySeqSuspend( c.seq ); + TracySeqRetire( c.seq ); + c.remaining->fetch_sub( 1, std::memory_order_release ); +} + +static void query_parse( Chain c ) +{ + ZoneScopedN( "query/parse" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 8ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ query_exec( c ); } ); +} + +// ============================================================================ +// ingest: 4 stages — IO-heavy +// ============================================================================ + +static void ingest_store( Chain c ) +{ + ZoneScopedN( "ingest/store" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 18ms ); + TracySeqSuspend( c.seq ); + TracySeqRetire( c.seq ); + c.remaining->fetch_sub( 1, std::memory_order_release ); +} + +static void ingest_validate( Chain c ) +{ + ZoneScopedN( "ingest/validate" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 22ms ); + c.value += 1; + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ ingest_store( c ); } ); +} + +static void ingest_parse( Chain c ) +{ + ZoneScopedN( "ingest/parse" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 28ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ ingest_validate( c ); } ); +} + +static void ingest_fetch( Chain c ) +{ + ZoneScopedN( "ingest/fetch" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 60ms ); // slow IO + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ ingest_parse( c ); } ); +} + +// ============================================================================ +// render: 5 stages — frame-like workload +// ============================================================================ + +static void render_present( Chain c ) +{ + ZoneScopedN( "render/present" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 10ms ); + TracySeqSuspend( c.seq ); + TracySeqRetire( c.seq ); + c.remaining->fetch_sub( 1, std::memory_order_release ); +} + +static void render_compose( Chain c ) +{ + ZoneScopedN( "render/compose" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 35ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ render_present( c ); } ); +} + +static void render_shadows( Chain c ) +{ + ZoneScopedN( "render/shadows" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 45ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ render_compose( c ); } ); +} + +static void render_geometry( Chain c ) +{ + ZoneScopedN( "render/geometry" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 55ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ render_shadows( c ); } ); +} + +static void render_setup( Chain c ) +{ + ZoneScopedN( "render/setup" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 12ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ render_geometry( c ); } ); +} + +// ============================================================================ +// compile: 7 stages — long pipeline +// ============================================================================ + +static void compile_emit( Chain c ) +{ + ZoneScopedN( "compile/emit" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 12ms ); + TracySeqSuspend( c.seq ); + TracySeqRetire( c.seq ); + c.remaining->fetch_sub( 1, std::memory_order_release ); +} + +static void compile_codegen( Chain c ) +{ + ZoneScopedN( "compile/codegen" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 40ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ compile_emit( c ); } ); +} + +static void compile_optimize( Chain c ) +{ + ZoneScopedN( "compile/optimize" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 70ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ compile_codegen( c ); } ); +} + +static void compile_ir( Chain c ) +{ + ZoneScopedN( "compile/ir" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 25ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ compile_optimize( c ); } ); +} + +static void compile_typecheck( Chain c ) +{ + ZoneScopedN( "compile/typecheck" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 30ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ compile_ir( c ); } ); +} + +static void compile_parse( Chain c ) +{ + ZoneScopedN( "compile/parse" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 18ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ compile_typecheck( c ); } ); +} + +static void compile_lex( Chain c ) +{ + ZoneScopedN( "compile/lex" ); + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 8ms ); + TracySeqSuspend( c.seq ); + c.pool->Submit( [c]{ compile_parse( c ); } ); +} + +// ============================================================================ +// Kick off a chain from the main thread. The "kickoff" zone holds the +// TracySeqCreate plus a small setup window on main; the first worker stage +// fires when its Submit lands on a worker. +// ============================================================================ + +enum class Recipe { Query, Ingest, Render, Compile }; + +static void Kickoff( Recipe r, int value, ThreadPool& pool, std::atomic& remaining ) +{ + ZoneScopedN( "kickoff" ); + Chain c { + .seq = TracySeqCreate(), + .value = value, + .pool = &pool, + .remaining = &remaining, + }; + TracySeqResume( c.seq ); + std::this_thread::sleep_for( 4ms ); + TracySeqSuspend( c.seq ); + + switch( r ) + { + case Recipe::Query: pool.Submit( [c]{ query_parse( c ); } ); break; + case Recipe::Ingest: pool.Submit( [c]{ ingest_fetch( c ); } ); break; + case Recipe::Render: pool.Submit( [c]{ render_setup( c ); } ); break; + case Recipe::Compile: pool.Submit( [c]{ compile_lex( c ); } ); break; + } +} + +int main() +{ + tracy::SetThreadName( "main" ); + + ThreadPool pool( 6 ); + + // Mixed schedule: short and long pipelines interleaved so workers stay + // saturated and continuations naturally bounce across threads. + static constexpr Recipe schedule[] = { + Recipe::Compile, Recipe::Query, Recipe::Ingest, Recipe::Render, + Recipe::Query, Recipe::Compile, Recipe::Render, Recipe::Ingest, + Recipe::Render, Recipe::Query, Recipe::Compile, Recipe::Ingest, + Recipe::Query, Recipe::Render, Recipe::Ingest, Recipe::Compile, + Recipe::Query, Recipe::Compile, Recipe::Render, Recipe::Query, + }; + constexpr int kChains = sizeof( schedule ) / sizeof( schedule[0] ); + std::atomic remaining{ kChains }; + + for( int i = 0; i < kChains; ++i ) + { + FrameMarkNamed( "submit" ); + Kickoff( schedule[i], i * 100, pool, remaining ); + std::this_thread::sleep_for( 6ms ); + } + + while( remaining.load( std::memory_order_acquire ) > 0 ) + { + std::this_thread::sleep_for( 10ms ); + } + + std::printf( "%d chains done\n", kChains ); + return 0; +} diff --git a/profiler/src/profiler/TracyTimelineItemThread.cpp b/profiler/src/profiler/TracyTimelineItemThread.cpp index 72c1412aa8..dfd2c3bd66 100644 --- a/profiler/src/profiler/TracyTimelineItemThread.cpp +++ b/profiler/src/profiler/TracyTimelineItemThread.cpp @@ -44,6 +44,7 @@ uint32_t TimelineItemThread::HeaderColor() const auto& crash = m_worker.GetCrashEvent(); if( crash.thread == m_thread->id ) return 0xFF2222FF; if( m_thread->isFiber ) return 0xFF88FF88; + if( m_thread->isFlatView ) return 0xFFFFCC88; // light blue (ABGR) return 0xFFFFFFFF; } @@ -52,6 +53,7 @@ uint32_t TimelineItemThread::HeaderColorInactive() const auto& crash = m_worker.GetCrashEvent(); if( crash.thread == m_thread->id ) return 0xFF111188; if( m_thread->isFiber ) return 0xFF448844; + if( m_thread->isFlatView ) return 0xFF886644; // darker light blue return 0xFF888888; } @@ -235,6 +237,8 @@ void TimelineItemThread::HeaderTooltip( const char* label ) const void TimelineItemThread::HeaderExtraContents( const TimelineContext& ctx, int offset, float labelWidth ) { + if( m_thread->isFlatView ) return; + m_view.DrawThreadMessagesList( ctx, m_msgDraw, offset, m_thread->id ); const bool hasGhostZones = m_worker.AreGhostZonesReady() && !m_thread->ghostZones.empty(); @@ -275,6 +279,16 @@ void TimelineItemThread::DrawOverlay( const ImVec2& ul, const ImVec2& dr ) void TimelineItemThread::DrawExtraPopupItems() { + if( m_thread->isFlatView ) + { + if( ImGui::MenuItem( ICON_FA_TRASH_CAN " Destroy flatten view" ) ) + { + m_view.QueueDestroyFlattenViewByTid( m_thread->id ); + ImGui::CloseCurrentPopup(); + } + return; + } + if( m_view.GetSelectThread() == m_thread->id ) { if( ImGui::MenuItem( ICON_FA_TIMELINE " Unselect in CPU timeline" ) ) @@ -493,7 +507,7 @@ int TimelineItemThread::PreprocessZoneLevel( const TimelineContext& ctx, const V if( hasChildren ) childrenInherited = DarkenColorSlightly( color ); } } - if( hasChildren ) + if( hasChildren && !m_thread->isFlatView ) { const auto d = PreprocessZoneLevel( ctx, m_worker.GetZoneChildren( ev.Child() ), depth + 1, visible, childrenInherited ); if( d > maxdepth ) maxdepth = d; diff --git a/profiler/src/profiler/TracyView.cpp b/profiler/src/profiler/TracyView.cpp index da2ceafb46..e7b4215137 100644 --- a/profiler/src/profiler/TracyView.cpp +++ b/profiler/src/profiler/TracyView.cpp @@ -1230,9 +1230,17 @@ bool View::DrawImpl() { AddAnnotation( s, e ); } + if( m_seqFlattenPopupSeqId != 0 ) + { + if( ImGui::Selectable( ICON_FA_DIAGRAM_PROJECT " Flatten sequence timeline" ) ) + { + MakeFlattenView( m_seqFlattenPopupSeqId ); + } + } ImGui::EndPopup(); } m_setRangePopupOpen = ImGui::IsPopupOpen( "SetZoneRange" ); + if( !m_setRangePopupOpen ) m_seqFlattenPopupSeqId = 0; if( m_zoomAnim.active ) { diff --git a/profiler/src/profiler/TracyView.hpp b/profiler/src/profiler/TracyView.hpp index cb33beac16..f66e319a89 100644 --- a/profiler/src/profiler/TracyView.hpp +++ b/profiler/src/profiler/TracyView.hpp @@ -171,6 +171,7 @@ class View } void HighlightThread( uint64_t thread ); + void QueueDestroyFlattenViewByTid( uint64_t tid ); void SelectThread( uint64_t thread ); uint64_t GetSelectThread() const { return m_selectedThread; } void ZoomToRange( int64_t start, int64_t end, bool pause = true ); @@ -279,7 +280,10 @@ class View void DrawTimelineFrames( const FrameData& frames ); void DrawTimeline(); void DrawSampleList( const TimelineContext& ctx, const std::vector& drawList, const Vector& vec, int offset, uint64_t tid ); - void DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int offset, uint64_t tid, int maxDepth, double margin ); + void DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int offset, uint64_t tid, int maxDepth, double margin, bool isFlatView ); + void DrawSeqArrows( double pxns, const ImVec2& wpos ); + void MakeFlattenView( uint64_t seqId ); + void DestroyFlattenView( uint64_t seqId ); void DrawThreadCropper( const int depth, const uint64_t tid, const float xPos, const float yPos, const float ostep, const float cropperWidth, const bool hasCtxSwitches ); void DrawContextSwitchList( const TimelineContext& ctx, const std::vector& drawList, const Vector& ctxSwitch, int offset, int endOffset, bool isFiber ); int DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); @@ -526,6 +530,26 @@ class View uint32_t m_lockInfoWindow = InvalidId; const ZoneEvent* m_zoneHover = nullptr; DecayValue m_zoneHover2 = nullptr; + + struct SeqArrowDraw + { + int64_t fromTime; + const ZoneEvent* fromZone; + int64_t toTime; + const ZoneEvent* toZone; + }; + std::vector m_seqArrowDraw; + unordered_flat_map m_seqZoneYPos; + + struct FlattenView + { + std::unique_ptr td; + uint64_t seqId; + }; + std::vector m_flattenViews; + std::vector m_flattenViewDestroyQueue; + uint32_t m_nextFlattenTid = 1; + uint64_t m_seqFlattenPopupSeqId = 0; int m_frameHover = -1; bool m_messagesScrollBottom; diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index 105a41c690..dcce7464b0 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -259,6 +259,14 @@ void View::DrawTimeline() m_cpuDataThread.Decay( 0 ); m_zoneHover = nullptr; m_zoneHover2.Decay( nullptr ); + m_seqArrowDraw.clear(); + m_seqZoneYPos.clear(); + + for( auto seqId : m_flattenViewDestroyQueue ) + { + DestroyFlattenView( seqId ); + } + m_flattenViewDestroyQueue.clear(); m_findZone.range.StartFrame(); m_statRange.StartFrame(); m_flameRange.StartFrame(); @@ -396,6 +404,10 @@ void View::DrawTimeline() } m_threadReinsert.clear(); } + for( const auto& fv : m_flattenViews ) + { + m_tc.AddItem( fv.td.get() ); + } for( const auto& v : m_threadOrder ) { m_tc.AddItem( v ); @@ -411,6 +423,7 @@ void View::DrawTimeline() const auto vcenter = verticallyCenterTimeline && drawMouseLine && m_viewMode == ViewMode::Paused; m_tc.End( pxns, wpos, hover, vcenter, yMin, yMax ); + DrawSeqArrows( pxns, wpos ); ImGui::EndChild(); m_lockHighlight = m_nextLockHighlight; diff --git a/profiler/src/profiler/TracyView_ZoneTimeline.cpp b/profiler/src/profiler/TracyView_ZoneTimeline.cpp index 9ed11b7130..b4138990fd 100644 --- a/profiler/src/profiler/TracyView_ZoneTimeline.cpp +++ b/profiler/src/profiler/TracyView_ZoneTimeline.cpp @@ -76,7 +76,7 @@ void View::DrawThread( const TimelineContext& ctx, const ThreadData& thread, con if( !draw.empty() && yPos <= yMax && yPos + ostep * croppedDepth >= yMin ) { // Only apply margin when croppingActive to avoid text moving around when mouse is getting close to the cropper widget - DrawZoneList( ctx, draw, offset, thread.id, croppedDepth, croppingActive ? cropperAdditionalMargin + GetScale() /* Ensure text has a bit of space for text */ : 0.f ); + DrawZoneList( ctx, draw, offset, thread.id, croppedDepth, croppingActive ? cropperAdditionalMargin + GetScale() /* Ensure text has a bit of space for text */ : 0.f, thread.isFlatView != 0 ); } offset += ostep * croppedDepth; @@ -230,7 +230,7 @@ void View::DrawThreadOverlays( const ThreadData& thread, const ImVec2& ul, const } -void View::DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int _offset, uint64_t tid, int maxDepth, double margin ) +void View::DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int _offset, uint64_t tid, int maxDepth, double margin, bool isFlatView ) { auto draw = ImGui::GetWindowDrawList(); const auto w = ctx.w; @@ -354,7 +354,25 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vectorsecond.seqId ); + if( sit != seqMap.end() && seqIt->second.continuationIdx < sit->second->continuations.size() ) + { + colorTid = sit->second->continuations[seqIt->second.continuationIdx].tid; + } + } + const auto zoneColor = GetZoneColorData( ev, colorTid, v.depth, v.inheritedColor ); const char* zoneName = m_worker.GetZoneName( ev ); auto tsz = ImGui::CalcTextSize( zoneName ); @@ -388,7 +406,11 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vectorsecond.seqId : 0; + } if( !m_zoomAnim.active && IsMouseClicked( 2 ) ) { @@ -409,6 +431,30 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vectorsecond.seqId ); + if( sit != seqMap.end() ) + { + const auto& conts = sit->second->continuations; + const auto idx = seqIt->second.continuationIdx; + if( idx > 0 ) + { + m_seqArrowDraw.push_back( { + conts[idx-1].suspendTime, (const ZoneEvent*)conts[idx-1].zone, + conts[idx].resumeTime, (const ZoneEvent*)conts[idx].zone } ); + } + if( idx + 1 < conts.size() ) + { + m_seqArrowDraw.push_back( { + conts[idx].suspendTime, (const ZoneEvent*)conts[idx].zone, + conts[idx+1].resumeTime, (const ZoneEvent*)conts[idx+1].zone } ); + } + } + + } } break; } @@ -671,4 +717,112 @@ void View::DrawThreadCropper( const int depth, const uint64_t tid, const float x } } +void View::MakeFlattenView( uint64_t seqId ) +{ + for( const auto& fv : m_flattenViews ) + { + if( fv.seqId == seqId ) return; // idempotent + } + const auto& seqMap = m_worker.GetSequences(); + auto sit = seqMap.find( seqId ); + if( sit == seqMap.end() || sit->second->continuations.empty() ) return; + const auto& conts = sit->second->continuations; + + const uint64_t synthTid = ( uint64_t(2) << 32 ) | m_nextFlattenTid++; + + auto td = std::make_unique(); + td->id = synthTid; + td->count = 0; + td->nextZoneId = 0; + td->kernelSampleCnt = 0; + td->isFiber = 0; + td->isFlatView = 1; + td->fiber = nullptr; + td->stackCount = nullptr; + td->groupHint = 0; +#ifndef TRACY_NO_STATISTICS + td->ghostIdx = 0; +#endif + + for( const auto& c : conts ) + { + td->timeline.push_back( c.zone ); + } + + char name[128]; + const auto* firstZone = (const ZoneEvent*)conts[0].zone; + const char* zName = m_worker.GetZoneName( *firstZone ); + std::snprintf( name, sizeof( name ), "flat: %s", zName ? zName : "?" ); + m_worker.RegisterFlattenThreadName( synthTid, name, strlen( name ) ); + + m_flattenViews.push_back( FlattenView{ std::move( td ), seqId } ); +} + +void View::DestroyFlattenView( uint64_t seqId ) +{ + for( auto it = m_flattenViews.begin(); it != m_flattenViews.end(); ++it ) + { + if( it->seqId == seqId ) + { + m_worker.UnregisterFlattenThreadName( it->td->id ); + m_flattenViews.erase( it ); + return; + } + } +} + +void View::QueueDestroyFlattenViewByTid( uint64_t tid ) +{ + for( const auto& fv : m_flattenViews ) + { + if( fv.td->id == tid ) + { + m_flattenViewDestroyQueue.push_back( fv.seqId ); + return; + } + } +} + +void View::DrawSeqArrows( double pxns, const ImVec2& wpos ) +{ + if( m_seqArrowDraw.empty() ) return; + + auto draw = ImGui::GetWindowDrawList(); + const auto vStart = m_vd.zvStart; + const auto scale = GetScale(); + const auto lineThickness = std::max( 1.5f * scale, 1.0f ); + const auto dotRadius = 2.5f * scale; + const auto arrowSize = 5.0f * scale; + constexpr ImU32 color = 0xFFFFFFFF; + constexpr ImU32 shadow = 0xFF000000; + + for( const auto& a : m_seqArrowDraw ) + { + const auto fromIt = m_seqZoneYPos.find( a.fromZone ); + const auto toIt = m_seqZoneYPos.find( a.toZone ); + if( fromIt == m_seqZoneYPos.end() || toIt == m_seqZoneYPos.end() ) continue; + + const ImVec2 p0( wpos.x + ( a.fromTime - vStart ) * pxns, fromIt->second ); + const ImVec2 p1( wpos.x + ( a.toTime - vStart ) * pxns, toIt->second ); + + draw->AddLine( p0 + ImVec2( 1, 1 ), p1 + ImVec2( 1, 1 ), shadow, lineThickness ); + draw->AddLine( p0, p1, color, lineThickness ); + draw->AddCircleFilled( p0, dotRadius, color ); + + // Arrowhead at p1 + const auto dx = p1.x - p0.x; + const auto dy = p1.y - p0.y; + const auto len = std::sqrt( dx * dx + dy * dy ); + if( len > 1.0f ) + { + const auto nx = dx / len; + const auto ny = dy / len; + const ImVec2 base( p1.x - nx * arrowSize, p1.y - ny * arrowSize ); + const ImVec2 left ( base.x - ny * arrowSize * 0.5f, base.y + nx * arrowSize * 0.5f ); + const ImVec2 right( base.x + ny * arrowSize * 0.5f, base.y - nx * arrowSize * 0.5f ); + draw->AddTriangleFilled( p1, left, right, color ); + } + } +} + } diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 0e19d6e77e..c7a7f2579c 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -2834,6 +2834,17 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) ++item; continue; } + case QueueType::SeqCreate: + case QueueType::SeqResume: + case QueueType::SeqSuspend: + case QueueType::SeqRetire: + { + int64_t t = MemRead( &item->seqEvent.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->seqEvent.time, dt ); + break; + } default: assert( false ); break; @@ -3264,6 +3275,18 @@ Profiler::DequeueStatus Profiler::DequeueSerial() MemWrite( &item->fiberLeave.time, dt ); break; } + case QueueType::SeqCreate: + case QueueType::SeqResume: + case QueueType::SeqSuspend: + case QueueType::SeqRetire: + { + ThreadCtxCheckSerial( seqEvent ); + int64_t t = MemRead( &item->seqEvent.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->seqEvent.time, dt ); + break; + } #endif default: assert( false ); @@ -5209,6 +5232,11 @@ TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::Enter TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } #endif +TRACY_API uint64_t ___tracy_seq_create( void ){ return tracy::Profiler::CreateSequence(); } +TRACY_API void ___tracy_seq_resume( uint64_t id ){ tracy::Profiler::ResumeSequence( id ); } +TRACY_API void ___tracy_seq_suspend( uint64_t id ){ tracy::Profiler::SuspendSequence( id ); } +TRACY_API void ___tracy_seq_retire( uint64_t id ){ tracy::Profiler::RetireSequence( id ); } + # if defined TRACY_MANUAL_LIFETIME && defined TRACY_DELAYED_INIT TRACY_API void ___tracy_startup_profiler( void ) { diff --git a/public/client/TracyProfiler.hpp b/public/client/TracyProfiler.hpp index f1a221a8d5..4cb8c05c11 100644 --- a/public/client/TracyProfiler.hpp +++ b/public/client/TracyProfiler.hpp @@ -792,6 +792,56 @@ class Profiler } #endif + static tracy_force_inline uint64_t CreateSequence() + { + const auto id = GetProfiler().m_seqCounter.fetch_add( 1, std::memory_order_relaxed ); +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return id; +#endif + TracyQueuePrepare( QueueType::SeqCreate ); + MemWrite( &item->seqEvent.time, GetTime() ); + MemWrite( &item->seqEvent.id, id ); + MemWrite( &item->seqEvent.thread, GetThreadHandle() ); + TracyQueueCommit( seqEvent ); + return id; + } + + static tracy_force_inline void ResumeSequence( uint64_t id ) + { +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +#endif + TracyQueuePrepare( QueueType::SeqResume ); + MemWrite( &item->seqEvent.time, GetTime() ); + MemWrite( &item->seqEvent.id, id ); + MemWrite( &item->seqEvent.thread, GetThreadHandle() ); + TracyQueueCommit( seqEvent ); + } + + static tracy_force_inline void SuspendSequence( uint64_t id ) + { +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +#endif + TracyQueuePrepare( QueueType::SeqSuspend ); + MemWrite( &item->seqEvent.time, GetTime() ); + MemWrite( &item->seqEvent.id, id ); + MemWrite( &item->seqEvent.thread, GetThreadHandle() ); + TracyQueueCommit( seqEvent ); + } + + static tracy_force_inline void RetireSequence( uint64_t id ) + { +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +#endif + TracyQueuePrepare( QueueType::SeqRetire ); + MemWrite( &item->seqEvent.time, GetTime() ); + MemWrite( &item->seqEvent.id, id ); + MemWrite( &item->seqEvent.thread, GetThreadHandle() ); + TracyQueueCommit( seqEvent ); + } + void SendCallstack( int32_t depth, const char** skipBefore ); static void CutCallstack( void* callstack, const char** skipBefore ); @@ -1087,6 +1137,7 @@ class Profiler std::mutex m_symbolQueueMutex; std::atomic m_frameCount; + std::atomic m_seqCounter{ 1 }; std::atomic m_isConnected; #ifdef TRACY_ON_DEMAND std::atomic m_connectionId; diff --git a/public/common/TracyProtocol.hpp b/public/common/TracyProtocol.hpp index 5c799138c5..495af500a0 100644 --- a/public/common/TracyProtocol.hpp +++ b/public/common/TracyProtocol.hpp @@ -10,7 +10,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -constexpr uint32_t ProtocolVersion = 78; +constexpr uint32_t ProtocolVersion = 79; constexpr uint16_t BroadcastVersion = 3; using lz4sz_t = uint32_t; diff --git a/public/common/TracyQueue.hpp b/public/common/TracyQueue.hpp index 995769878b..622d860508 100644 --- a/public/common/TracyQueue.hpp +++ b/public/common/TracyQueue.hpp @@ -81,6 +81,10 @@ enum class QueueType : uint8_t SourceCodeMetadata, FiberEnter, FiberLeave, + SeqCreate, + SeqResume, + SeqSuspend, + SeqRetire, Terminate, KeepAlive, ThreadContext, @@ -307,6 +311,13 @@ struct QueueFiberLeave uint32_t thread; }; +struct QueueSeqEvent +{ + int64_t time; + uint64_t id; + uint32_t thread; +}; + struct QueueLockTerminate { uint32_t id; @@ -918,6 +929,7 @@ struct QueueItem QueueSourceCodeNotAvailable sourceCodeNotAvailable; QueueFiberEnter fiberEnter; QueueFiberLeave fiberLeave; + QueueSeqEvent seqEvent; QueueGpuZoneAnnotation zoneAnnotation; }; }; @@ -997,6 +1009,10 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // SourceCodeMetadata - not for wire transfer sizeof( QueueHeader ) + sizeof( QueueFiberEnter ), sizeof( QueueHeader ) + sizeof( QueueFiberLeave ), + sizeof( QueueHeader ) + sizeof( QueueSeqEvent ), // SeqCreate + sizeof( QueueHeader ) + sizeof( QueueSeqEvent ), // SeqResume + sizeof( QueueHeader ) + sizeof( QueueSeqEvent ), // SeqSuspend + sizeof( QueueHeader ) + sizeof( QueueSeqEvent ), // SeqRetire // above items must be first sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive diff --git a/public/tracy/Tracy.hpp b/public/tracy/Tracy.hpp index 4dcd3a55b7..bb2eb7ff1f 100644 --- a/public/tracy/Tracy.hpp +++ b/public/tracy/Tracy.hpp @@ -126,6 +126,11 @@ #define TracyFiberEnterHint(x,y) #define TracyFiberLeave +#define TracySeqCreate() ((uint64_t)0) +#define TracySeqResume(id) ((void)(id)) +#define TracySeqSuspend(id) ((void)(id)) +#define TracySeqRetire(id) ((void)(id)) + #else #include @@ -275,6 +280,11 @@ # define TracyFiberLeave tracy::Profiler::LeaveFiber() #endif +#define TracySeqCreate() tracy::Profiler::CreateSequence() +#define TracySeqResume( id ) tracy::Profiler::ResumeSequence( id ) +#define TracySeqSuspend( id ) tracy::Profiler::SuspendSequence( id ) +#define TracySeqRetire( id ) tracy::Profiler::RetireSequence( id ) + #endif #endif diff --git a/public/tracy/TracyC.h b/public/tracy/TracyC.h index decbbab915..961afb9b17 100644 --- a/public/tracy/TracyC.h +++ b/public/tracy/TracyC.h @@ -132,6 +132,11 @@ typedef const void* TracyCLockCtx; # define TracyCFiberLeave #endif +#define TracyCSeqCreate() ((uint64_t)0) +#define TracyCSeqResume(id) ((void)(id)) +#define TracyCSeqSuspend(id) ((void)(id)) +#define TracyCSeqRetire(id) ((void)(id)) + #else #ifndef TracyConcat @@ -392,6 +397,16 @@ TRACY_API void ___tracy_fiber_leave( void ); # define TracyCFiberLeave ___tracy_fiber_leave(); #endif +TRACY_API uint64_t ___tracy_seq_create( void ); +TRACY_API void ___tracy_seq_resume( uint64_t id ); +TRACY_API void ___tracy_seq_suspend( uint64_t id ); +TRACY_API void ___tracy_seq_retire( uint64_t id ); + +#define TracyCSeqCreate() ___tracy_seq_create() +#define TracyCSeqResume( id ) ___tracy_seq_resume( id ) +#define TracyCSeqSuspend( id ) ___tracy_seq_suspend( id ) +#define TracyCSeqRetire( id ) ___tracy_seq_retire( id ) + TRACY_API int64_t ___tracy_get_time(); #endif diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 485d52749d..69879bd3dc 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -706,6 +706,7 @@ struct ThreadData Vector ctxSwitchSamples; uint64_t kernelSampleCnt; uint8_t isFiber; + uint8_t isFlatView = 0; ThreadData* fiber; uint8_t* stackCount; int32_t groupHint; @@ -875,6 +876,28 @@ struct FlameGraphItem std::vector children; }; + +struct SeqRef +{ + uint64_t seqId; + uint32_t continuationIdx; +}; + +struct SeqContinuation +{ + short_ptr zone; + uint64_t tid; + int64_t resumeTime; + int64_t suspendTime; // 0 ⇒ still open +}; + +struct SequenceData +{ + int64_t createTime; + int64_t retireTime; // 0 ⇒ not retired + Vector continuations; +}; + } #endif diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index a2400fffb9..cf30aa08ca 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -3695,6 +3695,18 @@ void Worker::CheckFiberName( uint64_t id, uint64_t tid ) if( m_sock.IsValid() ) Query( ServerQueryFiberName, id ); } +void Worker::RegisterFlattenThreadName( uint64_t tid, const char* name, size_t sz ) +{ + if( m_data.threadNames.find( tid ) != m_data.threadNames.end() ) return; + const auto sl = StoreString( name, sz ); + m_data.threadNames.emplace( tid, sl.ptr ); +} + +void Worker::UnregisterFlattenThreadName( uint64_t tid ) +{ + m_data.threadNames.erase( tid ); +} + void Worker::CheckExternalName( uint64_t id ) { if( m_data.externalNames.find( id ) != m_data.externalNames.end() ) return; @@ -4823,6 +4835,18 @@ bool Worker::Process( const QueueItem& ev ) case QueueType::FiberLeave: ProcessFiberLeave( ev.fiberLeave ); break; + case QueueType::SeqCreate: + ProcessSeqCreate( ev.seqEvent ); + break; + case QueueType::SeqResume: + ProcessSeqResume( ev.seqEvent ); + break; + case QueueType::SeqSuspend: + ProcessSeqSuspend( ev.seqEvent ); + break; + case QueueType::SeqRetire: + ProcessSeqRetire( ev.seqEvent ); + break; default: assert( false ); break; @@ -7336,6 +7360,95 @@ void Worker::ProcessFiberLeave( const QueueFiberLeave& ev ) td->fiber = nullptr; } +SequenceData* Worker::GetOrCreateSequence( uint64_t id, int64_t t ) +{ + auto sit = m_data.sequences.find( id ); + if( sit != m_data.sequences.end() ) return sit->second; + auto seq = m_slab.AllocInit(); + seq->createTime = t; + seq->retireTime = 0; + return m_data.sequences.emplace( id, seq ).first->second; +} + +void Worker::ProcessSeqCreate( const QueueSeqEvent& ev ) +{ + const auto t = TscTime( RefTime( m_refTimeThread, ev.time ) ); + if( m_data.lastTime < t ) m_data.lastTime = t; + (void)GetOrCreateSequence( ev.id, t ); +} + +void Worker::ProcessSeqResume( const QueueSeqEvent& ev ) +{ + const auto t = TscTime( RefTime( m_refTimeThread, ev.time ) ); + if( m_data.lastTime < t ) m_data.lastTime = t; + + auto seq = GetOrCreateSequence( ev.id, t ); + + auto td = NoticeThread( ev.thread ); + if( !td || td->stack.empty() ) return; + + auto zone = td->stack.back(); + SeqContinuation cont; + cont.zone = zone; + cont.tid = ev.thread; + cont.resumeTime = t; + cont.suspendTime = 0; + + // Insert in resumeTime-sorted order. The server processes events from multiple + // per-thread queues in interleaved batches, so a later thread's Resume can be + // processed before an earlier thread's Resume for the same sequence. Without + // sorting, conts[idx] would not reflect the logical chain order and neighbor + // lookups (idx-1, idx+1) would jump to wrong continuations. + auto& conts = seq->continuations; + size_t insertAt = conts.size(); + while( insertAt > 0 && conts[insertAt - 1].resumeTime > t ) --insertAt; + if( insertAt == conts.size() ) + { + conts.push_back( cont ); + } + else + { + conts.insert( conts.data() + insertAt, cont ); + } + + // Rewrite zoneSeqRef for every entry from insertAt onwards — their indices shifted. + for( size_t i = insertAt; i < conts.size(); ++i ) + { + const auto* z = (const ZoneEvent*)conts[i].zone; + m_data.zoneSeqRef[z] = SeqRef{ ev.id, uint32_t( i ) }; + } +} + +void Worker::ProcessSeqSuspend( const QueueSeqEvent& ev ) +{ + const auto t = TscTime( RefTime( m_refTimeThread, ev.time ) ); + if( m_data.lastTime < t ) m_data.lastTime = t; + + auto seq = GetOrCreateSequence( ev.id, t ); + auto& conts = seq->continuations; + + // Match Suspend to the most recent unsuspended continuation on the same originating thread. + // Use ev.thread directly because the server's m_threadCtx may have drifted across the + // interleaved processing of multiple per-thread queues by the time this event runs. + for( size_t i = conts.size(); i-- > 0; ) + { + if( conts[i].tid == ev.thread && conts[i].suspendTime == 0 ) + { + conts[i].suspendTime = t; + return; + } + } +} + +void Worker::ProcessSeqRetire( const QueueSeqEvent& ev ) +{ + const auto t = TscTime( RefTime( m_refTimeThread, ev.time ) ); + if( m_data.lastTime < t ) m_data.lastTime = t; + + auto seq = GetOrCreateSequence( ev.id, t ); + seq->retireTime = t; +} + void Worker::MemAllocChanged( MemData& memdata, int64_t time ) { const auto val = (double)memdata.usage; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 043709e4b0..bfe2ddd886 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -373,6 +373,9 @@ class Worker unordered_flat_map ctxSwitch; + unordered_flat_map sequences; + unordered_flat_map zoneSeqRef; + CpuData cpuData[256]; int cpuDataCount = 0; unordered_flat_map tidToPid; @@ -565,6 +568,8 @@ class Worker const Vector& GetPlots() const { return m_data.plots.Data(); } const Vector& GetThreadData() const { return m_data.threads; } const ThreadData* GetThreadData( uint64_t tid ) const; + const unordered_flat_map& GetSequences() const { return m_data.sequences; } + const unordered_flat_map& GetZoneSeqRef() const { return m_data.zoneSeqRef; } const MemData& GetMemoryNamed( uint64_t name ) const; const unordered_flat_map& GetMemNameMap() const { return m_data.memNameMap; } const Vector>& GetFrameImages() const { return m_data.frameImage; } @@ -609,6 +614,8 @@ class Worker const char* GetString( const StringRef& ref ) const; const char* GetString( const StringIdx& idx ) const; const char* GetThreadName( uint64_t id ) const; + void RegisterFlattenThreadName( uint64_t tid, const char* name, size_t sz ); + void UnregisterFlattenThreadName( uint64_t tid ); bool IsThreadLocal( uint64_t id ) { return IsThreadLocal( id, m_data.threadDataLast ); } bool IsThreadLocal( uint64_t id, ThreadCache& cache ); bool IsThreadFiber( uint64_t id ); @@ -846,6 +853,11 @@ class Worker tracy_force_inline void ProcessThreadGroupHint( const QueueThreadGroupHint& ev ); tracy_force_inline void ProcessFiberEnter( const QueueFiberEnter& ev ); tracy_force_inline void ProcessFiberLeave( const QueueFiberLeave& ev ); + tracy_force_inline void ProcessSeqCreate( const QueueSeqEvent& ev ); + tracy_force_inline void ProcessSeqResume( const QueueSeqEvent& ev ); + tracy_force_inline void ProcessSeqSuspend( const QueueSeqEvent& ev ); + tracy_force_inline void ProcessSeqRetire( const QueueSeqEvent& ev ); + SequenceData* GetOrCreateSequence( uint64_t id, int64_t t ); tracy_force_inline ZoneEvent* AllocZoneEvent(); tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev );