diff --git a/llvm/test/tools/llvm-profgen/Inputs/buildid-cs-noprobe.aggperfscript b/llvm/test/tools/llvm-profgen/Inputs/buildid-cs-noprobe.aggperfscript new file mode 100644 index 0000000000000..9dbd2725c8e4d --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/buildid-cs-noprobe.aggperfscript @@ -0,0 +1,11 @@ +2 + aabb1122:4005dc + aabb1122:400634 + aabb1122:400684 + 7f68c5788793 + aabb1122:0x4005c8/aabb1122:0x4005dc aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005c8/aabb1122:0x4005dc +2 + aabb1122:4005b0 + aabb1122:400684 + 7f68c5788793 + aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005c8/aabb1122:0x4005dc aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 diff --git a/llvm/test/tools/llvm-profgen/filter-build-id.test b/llvm/test/tools/llvm-profgen/filter-build-id.test new file mode 100644 index 0000000000000..7f03db99e68de --- /dev/null +++ b/llvm/test/tools/llvm-profgen/filter-build-id.test @@ -0,0 +1,33 @@ +; REQUIRES: x86_64-linux +; Test that [buildid:]0xaddr format is correctly parsed in hybrid perfscript +; input. Both callstack frames and LBR entries may carry buildid prefixes. + +;; Test 1: Hybrid perfscript with buildid prefix on callstack frames and +;; LBR entries, using --filter-build-id to match "aabb1122". +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/buildid-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 --filter-build-id=aabb1122 +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-HYBRID + +; CHECK-HYBRID: [foo] +; CHECK-HYBRID-NEXT: 3 +; CHECK-HYBRID-NEXT: 5ff-62f:6 +; CHECK-HYBRID-NEXT: 634-637:6 +; CHECK-HYBRID-NEXT: 645-645:6 +; CHECK-HYBRID-NEXT: 3 +; CHECK-HYBRID-NEXT: 62f->5b0:6 +; CHECK-HYBRID-NEXT: 637->645:6 +; CHECK-HYBRID-NEXT: 645->5ff:6 +; CHECK-HYBRID-NEXT: [foo:3 @ bar] +; CHECK-HYBRID-NEXT: 4 +; CHECK-HYBRID-NEXT: 5b0-5c8:2 +; CHECK-HYBRID-NEXT: 5b0-5d7:4 +; CHECK-HYBRID-NEXT: 5dc-5e9:2 +; CHECK-HYBRID-NEXT: 5e5-5e9:4 +; CHECK-HYBRID-NEXT: 3 +; CHECK-HYBRID-NEXT: 5c8->5dc:4 +; CHECK-HYBRID-NEXT: 5d7->5e5:4 +; CHECK-HYBRID-NEXT: 5e9->634:6 + +;; Test 2: With non-matching filter, callstack frames are filtered out, +;; resulting in no samples. +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/buildid-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t2 --skip-symbolization --filter-build-id=ccdd3344 2>&1 | FileCheck %s --check-prefix=CHECK-NOMATCH +; CHECK-NOMATCH: warning: No samples in perf script! diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 5180867211127..1a8b15f23ca9c 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -60,6 +60,13 @@ static cl::opt CSProfMaxUnsymbolizedCtxDepth( "means no depth limit."), cl::cat(ProfGenCategory)); +static cl::opt FilterBuildID( + "filter-build-id", + cl::desc("Override auto-detected build ID for filtering perfscript " + "addresses in [buildid:]addr format. When set, only addresses " + "with a matching build ID prefix are kept."), + cl::cat(ProfGenCategory)); + namespace sampleprof { void VirtualUnwinder::unwindCall(UnwindState &State) { @@ -656,13 +663,32 @@ void HybridPerfReader::unwindSamples() { "frame to match."); } -/// Parse a hex address from \p Str. -static bool parseAddress(StringRef Str, uint64_t &Addr, bool HasPrefix) { +/// Parse a hex address from \p Str. If \p BuildID is non-null, also parse +/// an optional [buildid:] prefix. +static bool parseAddress(StringRef Str, uint64_t &Addr, bool HasPrefix, + StringRef *BuildID = nullptr) { + if (BuildID) { + *BuildID = StringRef(); + size_t ColonPos = Str.find(':'); + if (ColonPos != StringRef::npos) { + *BuildID = Str.substr(0, ColonPos); + Str = Str.substr(ColonPos + 1); + } + } if (Str.consume_front("0x") != HasPrefix) return false; return Str.getAsInteger(16, Addr); } +/// Return the build ID to use for filtering perfscript addresses. +/// If --filter-build-id is specified, use it as an override. +/// Otherwise, use the auto-detected value from the binary. +static StringRef getFilterBuildID(const ProfiledBinary *Binary) { + if (FilterBuildID.getNumOccurrences() > 0) + return FilterBuildID; + return Binary->getFilterBuildID(); +} + bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, SmallVectorImpl &LBRStack) { // The raw format of LBR stack is like: @@ -701,10 +727,14 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, Token.split(Addresses, "/"); uint64_t Src; uint64_t Dst; + StringRef SrcBuildID, DstBuildID; // Stop at broken LBR records. - if (Addresses.size() < 2 || parseAddress(Addresses[0], Src, true) || - parseAddress(Addresses[1], Dst, true)) { + if (Addresses.size() < 2 || + parseAddress(Addresses[0], Src, true, + IsPreAggregated ? &SrcBuildID : nullptr) || + parseAddress(Addresses[1], Dst, true, + IsPreAggregated ? &DstBuildID : nullptr)) { WarnInvalidLBR(TraceIt); break; } @@ -714,6 +744,14 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, Dst = Binary->canonicalizeVirtualAddress(Dst); bool SrcIsInternal = Binary->addressIsCode(Src); bool DstIsInternal = Binary->addressIsCode(Dst); + // For pre-aggregated input, filter by build ID. + if (IsPreAggregated) { + StringRef BinaryBuildID = getFilterBuildID(Binary); + if (!SrcBuildID.empty() && SrcBuildID != BinaryBuildID) + SrcIsInternal = false; + if (!DstBuildID.empty() && DstBuildID != BinaryBuildID) + DstIsInternal = false; + } if (!SrcIsInternal) Src = ExternalAddr; if (!DstIsInternal) @@ -731,16 +769,19 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, SmallVectorImpl &CallStack) { // The raw format of call stack is like: - // 4005dc # leaf frame + // 4005dc # leaf frame (no buildid) // 400634 - // 400684 # root frame + // deadbeef:400684 # root frame (with buildid prefix) // It's in bottom-up order with each frame in one line. // Extract stack frames from sample - while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true)) { + while (!TraceIt.isAtEoF() && + !isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) { StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); uint64_t FrameAddr = 0; - if (parseAddress(FrameStr, FrameAddr, false)) { + StringRef FrameBuildID; + if (parseAddress(FrameStr, FrameAddr, false, + IsPreAggregated ? &FrameBuildID : nullptr)) { // We might parse a non-perf sample line like empty line and comments, // skip it TraceIt.advance(); @@ -750,7 +791,11 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, FrameAddr = Binary->canonicalizeVirtualAddress(FrameAddr); // Currently intermixed frame from different binaries is not supported. - if (!Binary->addressIsCode(FrameAddr)) { + bool IsExternal = + !Binary->addressIsCode(FrameAddr) || + // For pre-aggregated input, filter by build ID. + (IsPreAggregated && FrameBuildID != getFilterBuildID(Binary)); + if (IsExternal) { if (CallStack.empty()) NumLeafExternalFrame++; // Push a special value(ExternalAddr) for the external frames so that @@ -785,7 +830,8 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, // Skip other unrelated line, find the next valid LBR line // Note that even for empty call stack, we should skip the address at the // bottom, otherwise the following pass may generate a truncated callstack - while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true)) { + while (!TraceIt.isAtEoF() && + !isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) { TraceIt.advance(); } // Filter out broken stack sample. We may not have complete frame info @@ -830,14 +876,16 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { // Parsing call stack and populate into PerfSample.CallStack if (!extractCallstack(TraceIt, Sample->CallStack)) { // Skip the next LBR line matched current call stack - if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true)) + if (!TraceIt.isAtEoF() && + isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) TraceIt.advance(); return; } warnIfMissingMMap(); - if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true)) { + if (!TraceIt.isAtEoF() && + isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) { // Parsing LBR stack and populate into PerfSample.LBRStack if (extractLBRStack(TraceIt, Sample->LBRStack)) { if (IgnoreStackSamples) { @@ -1161,8 +1209,9 @@ void PerfScriptReader::parseAndAggregateTrace() { // A LBR sample is like: // 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... // A heuristic for fast detection by checking whether a -// leading " 0x" and the '/' exist. -bool PerfScriptReader::isLBRSample(StringRef Line, bool CheckLineStart) { +// leading " 0x" or " buildid:0x" and the '/' exist. +bool PerfScriptReader::isLBRSample(StringRef Line, bool CheckLineStart, + bool IsPreAggregated) { // Skip the leading instruction pointer SmallVector Records; if (!CheckLineStart) @@ -1175,7 +1224,7 @@ bool PerfScriptReader::isLBRSample(StringRef Line, bool CheckLineStart) { return false; if (Token.starts_with("0x")) return true; - return false; + return IsPreAggregated && Token.contains(":0x"); } bool PerfScriptReader::isMMapEvent(StringRef Line) { @@ -1212,13 +1261,15 @@ PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) { // Detect sample with call stack int32_t Count = 0; + StringRef FrameBuildId; while (!TraceIt.isAtEoF() && - !parseAddress(TraceIt.getCurrentLine().ltrim(), FrameAddr, false)) { + !parseAddress(TraceIt.getCurrentLine().ltrim(), FrameAddr, false, + HasAggCount ? &FrameBuildId : nullptr)) { Count++; TraceIt.advance(); } if (!TraceIt.isAtEoF()) { - if (isLBRSample(TraceIt.getCurrentLine(), false)) { + if (isLBRSample(TraceIt.getCurrentLine(), false, HasAggCount)) { if (Count > 0) return HasAggCount ? PerfContent::AggLBRStack : PerfContent::LBRStack; else diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h index 358d61067a4ef..f06c06d038a7b 100644 --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -636,7 +636,8 @@ class PerfScriptReader : public PerfReaderBase { protected: // Check whether a given line is LBR sample - static bool isLBRSample(StringRef Line, bool CheckLineStart); + static bool isLBRSample(StringRef Line, bool CheckLineStart, + bool IsPreAggregated); // Check whether a given line is MMAP event static bool isMMapEvent(StringRef Line); // Update base address based on mmap events @@ -679,7 +680,7 @@ class PerfScriptReader : public PerfReaderBase { std::set InvalidReturnAddresses; // PID for the process of interest std::optional PIDFilter; - // Whether the input is pre-aggregated + // Whether the input is pre-aggregated with [buildid:]addr format. bool IsPreAggregated = false; };