1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "ProfiledBinary.h" 11 #include "llvm/ProfileData/ProfileCommon.h" 12 #include <unordered_set> 13 14 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 15 cl::Required, 16 cl::desc("Output profile file")); 17 static cl::alias OutputA("o", cl::desc("Alias for --output"), 18 cl::aliasopt(OutputFilename)); 19 20 static cl::opt<SampleProfileFormat> OutputFormat( 21 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 22 cl::values( 23 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 24 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 25 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 26 clEnumValN(SPF_Text, "text", "Text encoding"), 27 clEnumValN(SPF_GCC, "gcc", 28 "GCC encoding (only meaningful for -sample)"))); 29 30 static cl::opt<int32_t, true> RecursionCompression( 31 "compress-recursion", 32 cl::desc("Compressing recursion by deduplicating adjacent frame " 33 "sequences up to the specified size. -1 means no size limit."), 34 cl::Hidden, 35 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 36 37 static cl::opt<bool> CSProfMergeColdContext( 38 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 39 cl::desc("If the total count of context profile is smaller than " 40 "the threshold, it will be merged into context-less base " 41 "profile.")); 42 43 static cl::opt<bool> CSProfTrimColdContext( 44 "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore, 45 cl::desc("If the total count of the profile after all merge is done " 46 "is still smaller than threshold, it will be trimmed.")); 47 48 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 49 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, 50 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 51 "context-less base profile")); 52 53 static cl::opt<int, true> CSProfMaxContextDepth( 54 "csprof-max-context-depth", cl::ZeroOrMore, 55 cl::desc("Keep the last K contexts while merging profile. -1 means no " 56 "depth limit."), 57 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 58 59 extern cl::opt<int> ProfileSummaryCutoffCold; 60 61 using namespace llvm; 62 using namespace sampleprof; 63 64 namespace llvm { 65 namespace sampleprof { 66 67 // Initialize the MaxCompressionSize to -1 which means no size limit 68 int32_t CSProfileGenerator::MaxCompressionSize = -1; 69 70 int CSProfileGenerator::MaxContextDepth = -1; 71 72 std::unique_ptr<ProfileGenerator> 73 ProfileGenerator::create(ProfiledBinary *Binary, 74 const ContextSampleCounterMap &SampleCounters, 75 enum PerfScriptType SampleType) { 76 std::unique_ptr<ProfileGenerator> ProfileGenerator; 77 if (SampleType == PERF_LBR_STACK) { 78 if (Binary->usePseudoProbes()) { 79 ProfileGenerator.reset( 80 new PseudoProbeCSProfileGenerator(Binary, SampleCounters)); 81 } else { 82 ProfileGenerator.reset(new CSProfileGenerator(Binary, SampleCounters)); 83 } 84 } else { 85 // TODO: 86 llvm_unreachable("Unsupported perfscript!"); 87 } 88 89 return ProfileGenerator; 90 } 91 92 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 93 StringMap<FunctionSamples> &ProfileMap) { 94 if (std::error_code EC = Writer->write(ProfileMap)) 95 exitWithError(std::move(EC)); 96 } 97 98 void ProfileGenerator::write() { 99 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 100 if (std::error_code EC = WriterOrErr.getError()) 101 exitWithError(EC, OutputFilename); 102 write(std::move(WriterOrErr.get()), ProfileMap); 103 } 104 105 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges, 106 const RangeSample &Ranges) { 107 108 /* 109 Regions may overlap with each other. Using the boundary info, find all 110 disjoint ranges and their sample count. BoundaryPoint contains the count 111 multiple samples begin/end at this points. 112 113 |<--100-->| Sample1 114 |<------200------>| Sample2 115 A B C 116 117 In the example above, 118 Sample1 begins at A, ends at B, its value is 100. 119 Sample2 beings at A, ends at C, its value is 200. 120 For A, BeginCount is the sum of sample begins at A, which is 300 and no 121 samples ends at A, so EndCount is 0. 122 Then boundary points A, B, and C with begin/end counts are: 123 A: (300, 0) 124 B: (0, 100) 125 C: (0, 200) 126 */ 127 struct BoundaryPoint { 128 // Sum of sample counts beginning at this point 129 uint64_t BeginCount; 130 // Sum of sample counts ending at this point 131 uint64_t EndCount; 132 133 BoundaryPoint() : BeginCount(0), EndCount(0){}; 134 135 void addBeginCount(uint64_t Count) { BeginCount += Count; } 136 137 void addEndCount(uint64_t Count) { EndCount += Count; } 138 }; 139 140 /* 141 For the above example. With boundary points, follwing logic finds two 142 disjoint region of 143 144 [A,B]: 300 145 [B+1,C]: 200 146 147 If there is a boundary point that both begin and end, the point itself 148 becomes a separate disjoint region. For example, if we have original 149 ranges of 150 151 |<--- 100 --->| 152 |<--- 200 --->| 153 A B C 154 155 there are three boundary points with their begin/end counts of 156 157 A: (100, 0) 158 B: (200, 100) 159 C: (0, 200) 160 161 the disjoint ranges would be 162 163 [A, B-1]: 100 164 [B, B]: 300 165 [B+1, C]: 200. 166 */ 167 std::map<uint64_t, BoundaryPoint> Boundaries; 168 169 for (auto Item : Ranges) { 170 uint64_t Begin = Item.first.first; 171 uint64_t End = Item.first.second; 172 uint64_t Count = Item.second; 173 if (Boundaries.find(Begin) == Boundaries.end()) 174 Boundaries[Begin] = BoundaryPoint(); 175 Boundaries[Begin].addBeginCount(Count); 176 177 if (Boundaries.find(End) == Boundaries.end()) 178 Boundaries[End] = BoundaryPoint(); 179 Boundaries[End].addEndCount(Count); 180 } 181 182 uint64_t BeginAddress = UINT64_MAX; 183 int Count = 0; 184 for (auto Item : Boundaries) { 185 uint64_t Address = Item.first; 186 BoundaryPoint &Point = Item.second; 187 if (Point.BeginCount) { 188 if (BeginAddress != UINT64_MAX) 189 DisjointRanges[{BeginAddress, Address - 1}] = Count; 190 Count += Point.BeginCount; 191 BeginAddress = Address; 192 } 193 if (Point.EndCount) { 194 assert((BeginAddress != UINT64_MAX) && 195 "First boundary point cannot be 'end' point"); 196 DisjointRanges[{BeginAddress, Address}] = Count; 197 Count -= Point.EndCount; 198 BeginAddress = Address + 1; 199 } 200 } 201 } 202 203 FunctionSamples & 204 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr, 205 bool WasLeafInlined) { 206 auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples()); 207 if (Ret.second) { 208 // Make a copy of the underlying context string in string table 209 // before StringRef wrapper is used for context. 210 auto It = ContextStrings.insert(ContextStr.str()); 211 SampleContext FContext(*It.first, RawContext); 212 if (WasLeafInlined) 213 FContext.setAttribute(ContextWasInlined); 214 FunctionSamples &FProfile = Ret.first->second; 215 FProfile.setContext(FContext); 216 FProfile.setName(FContext.getNameWithoutContext()); 217 } 218 return Ret.first->second; 219 } 220 221 void CSProfileGenerator::generateProfile() { 222 FunctionSamples::ProfileIsCS = true; 223 for (const auto &CI : SampleCounters) { 224 const StringBasedCtxKey *CtxKey = 225 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 226 StringRef ContextId(CtxKey->Context); 227 // Get or create function profile for the range 228 FunctionSamples &FunctionProfile = 229 getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined); 230 231 // Fill in function body samples 232 populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter); 233 // Fill in boundary sample counts as well as call site samples for calls 234 populateFunctionBoundarySamples(ContextId, FunctionProfile, 235 CI.second.BranchCounter); 236 } 237 // Fill in call site value sample for inlined calls and also use context to 238 // infer missing samples. Since we don't have call count for inlined 239 // functions, we estimate it from inlinee's profile using the entry of the 240 // body sample. 241 populateInferredFunctionSamples(); 242 243 postProcessProfiles(); 244 } 245 246 void CSProfileGenerator::updateBodySamplesforFunctionProfile( 247 FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, 248 uint64_t Count) { 249 // Filter out invalid negative(int type) lineOffset 250 if (LeafLoc.second.LineOffset & 0x80000000) 251 return; 252 // Use the maximum count of samples with same line location 253 ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt( 254 LeafLoc.second.LineOffset, LeafLoc.second.Discriminator); 255 uint64_t PreviousCount = R ? R.get() : 0; 256 if (PreviousCount < Count) { 257 FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, 258 LeafLoc.second.Discriminator, 259 Count - PreviousCount); 260 } 261 } 262 263 void CSProfileGenerator::populateFunctionBodySamples( 264 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 265 // Compute disjoint ranges first, so we can use MAX 266 // for calculating count for each location. 267 RangeSample Ranges; 268 findDisjointRanges(Ranges, RangeCounter); 269 for (auto Range : Ranges) { 270 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 271 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 272 uint64_t Count = Range.second; 273 // Disjoint ranges have introduce zero-filled gap that 274 // doesn't belong to current context, filter them out. 275 if (Count == 0) 276 continue; 277 278 InstructionPointer IP(Binary, RangeBegin, true); 279 280 // Disjoint ranges may have range in the middle of two instr, 281 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 282 // can be Addr1+1 to Addr2-1. We should ignore such range. 283 if (IP.Address > RangeEnd) 284 continue; 285 286 while (IP.Address <= RangeEnd) { 287 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 288 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 289 if (LeafLoc.hasValue()) { 290 // Recording body sample for this specific context 291 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 292 } 293 // Accumulate total sample count even it's a line with invalid debug info 294 FunctionProfile.addTotalSamples(Count); 295 // Move to next IP within the range 296 IP.advance(); 297 } 298 } 299 } 300 301 void CSProfileGenerator::populateFunctionBoundarySamples( 302 StringRef ContextId, FunctionSamples &FunctionProfile, 303 const BranchSample &BranchCounters) { 304 305 for (auto Entry : BranchCounters) { 306 uint64_t SourceOffset = Entry.first.first; 307 uint64_t TargetOffset = Entry.first.second; 308 uint64_t Count = Entry.second; 309 // Get the callee name by branch target if it's a call branch 310 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 311 Binary->getFuncFromStartOffset(TargetOffset)); 312 if (CalleeName.size() == 0) 313 continue; 314 315 // Record called target sample and its count 316 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 317 if (!LeafLoc.hasValue()) 318 continue; 319 FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset, 320 LeafLoc->second.Discriminator, 321 CalleeName, Count); 322 323 // Record head sample for called target(callee) 324 std::ostringstream OCalleeCtxStr; 325 if (ContextId.find(" @ ") != StringRef::npos) { 326 OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); 327 OCalleeCtxStr << " @ "; 328 } 329 OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str(); 330 331 FunctionSamples &CalleeProfile = 332 getFunctionProfileForContext(OCalleeCtxStr.str()); 333 assert(Count != 0 && "Unexpected zero weight branch"); 334 CalleeProfile.addHeadSamples(Count); 335 } 336 } 337 338 static FrameLocation getCallerContext(StringRef CalleeContext, 339 StringRef &CallerNameWithContext) { 340 StringRef CallerContext = CalleeContext.rsplit(" @ ").first; 341 CallerNameWithContext = CallerContext.rsplit(':').first; 342 auto ContextSplit = CallerContext.rsplit(" @ "); 343 StringRef CallerFrameStr = ContextSplit.second.size() == 0 344 ? ContextSplit.first 345 : ContextSplit.second; 346 FrameLocation LeafFrameLoc = {"", {0, 0}}; 347 StringRef Funcname; 348 SampleContext::decodeContextString(CallerFrameStr, Funcname, 349 LeafFrameLoc.second); 350 LeafFrameLoc.first = Funcname.str(); 351 return LeafFrameLoc; 352 } 353 354 void CSProfileGenerator::populateInferredFunctionSamples() { 355 for (const auto &Item : ProfileMap) { 356 const StringRef CalleeContext = Item.first(); 357 const FunctionSamples &CalleeProfile = Item.second; 358 359 // If we already have head sample counts, we must have value profile 360 // for call sites added already. Skip to avoid double counting. 361 if (CalleeProfile.getHeadSamples()) 362 continue; 363 // If we don't have context, nothing to do for caller's call site. 364 // This could happen for entry point function. 365 if (CalleeContext.find(" @ ") == StringRef::npos) 366 continue; 367 368 // Infer Caller's frame loc and context ID through string splitting 369 StringRef CallerContextId; 370 FrameLocation &&CallerLeafFrameLoc = 371 getCallerContext(CalleeContext, CallerContextId); 372 373 // It's possible that we haven't seen any sample directly in the caller, 374 // in which case CallerProfile will not exist. But we can't modify 375 // ProfileMap while iterating it. 376 // TODO: created function profile for those callers too 377 if (ProfileMap.find(CallerContextId) == ProfileMap.end()) 378 continue; 379 FunctionSamples &CallerProfile = ProfileMap[CallerContextId]; 380 381 // Since we don't have call count for inlined functions, we 382 // estimate it from inlinee's profile using entry body sample. 383 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 384 // If we don't have samples with location, use 1 to indicate live. 385 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 386 EstimatedCallCount = 1; 387 CallerProfile.addCalledTargetSamples( 388 CallerLeafFrameLoc.second.LineOffset, 389 CallerLeafFrameLoc.second.Discriminator, 390 CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount); 391 CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset, 392 CallerLeafFrameLoc.second.Discriminator, 393 EstimatedCallCount); 394 CallerProfile.addTotalSamples(EstimatedCallCount); 395 } 396 } 397 398 void CSProfileGenerator::postProcessProfiles() { 399 // Compute hot/cold threshold based on profile. This will be used for cold 400 // context profile merging/trimming. 401 computeSummaryAndThreshold(); 402 403 // Run global pre-inliner to adjust/merge context profile based on estimated 404 // inline decisions. 405 if (EnableCSPreInliner) { 406 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) 407 .run(); 408 } 409 410 // Trim and merge cold context profile using cold threshold above; 411 SampleContextTrimmer(ProfileMap) 412 .trimAndMergeColdContextProfiles( 413 ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, 414 CSProfMaxColdContextDepth); 415 } 416 417 void CSProfileGenerator::computeSummaryAndThreshold() { 418 // Update the default value of cold cutoff for llvm-profgen. 419 // Do it here because we don't want to change the global default, 420 // which would lead CS profile size too large. 421 if (!ProfileSummaryCutoffCold.getNumOccurrences()) 422 ProfileSummaryCutoffCold = 999000; 423 424 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 425 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 426 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 427 (Summary->getDetailedSummary())); 428 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 429 (Summary->getDetailedSummary())); 430 } 431 432 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 433 StringMap<FunctionSamples> &ProfileMap) { 434 if (std::error_code EC = Writer->write(ProfileMap)) 435 exitWithError(std::move(EC)); 436 } 437 438 // Helper function to extract context prefix string stack 439 // Extract context stack for reusing, leaf context stack will 440 // be added compressed while looking up function profile 441 static void extractPrefixContextStack( 442 SmallVectorImpl<std::string> &ContextStrStack, 443 const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes, 444 ProfiledBinary *Binary) { 445 for (const auto *P : Probes) { 446 Binary->getInlineContextForProbe(P, ContextStrStack, true); 447 } 448 } 449 450 void PseudoProbeCSProfileGenerator::generateProfile() { 451 // Enable pseudo probe functionalities in SampleProf 452 FunctionSamples::ProfileIsProbeBased = true; 453 FunctionSamples::ProfileIsCS = true; 454 for (const auto &CI : SampleCounters) { 455 const ProbeBasedCtxKey *CtxKey = 456 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 457 SmallVector<std::string, 16> ContextStrStack; 458 extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); 459 // Fill in function body samples from probes, also infer caller's samples 460 // from callee's probe 461 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack); 462 // Fill in boundary samples for a call probe 463 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStrStack); 464 } 465 466 postProcessProfiles(); 467 } 468 469 void PseudoProbeCSProfileGenerator::extractProbesFromRange( 470 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter) { 471 RangeSample Ranges; 472 findDisjointRanges(Ranges, RangeCounter); 473 for (const auto &Range : Ranges) { 474 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 475 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 476 uint64_t Count = Range.second; 477 // Disjoint ranges have introduce zero-filled gap that 478 // doesn't belong to current context, filter them out. 479 if (Count == 0) 480 continue; 481 482 InstructionPointer IP(Binary, RangeBegin, true); 483 484 // Disjoint ranges may have range in the middle of two instr, 485 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 486 // can be Addr1+1 to Addr2-1. We should ignore such range. 487 if (IP.Address > RangeEnd) 488 continue; 489 490 while (IP.Address <= RangeEnd) { 491 const AddressProbesMap &Address2ProbesMap = 492 Binary->getAddress2ProbesMap(); 493 auto It = Address2ProbesMap.find(IP.Address); 494 if (It != Address2ProbesMap.end()) { 495 for (const auto &Probe : It->second) { 496 if (!Probe.isBlock()) 497 continue; 498 ProbeCounter[&Probe] += Count; 499 } 500 } 501 502 IP.advance(); 503 } 504 } 505 } 506 507 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( 508 const RangeSample &RangeCounter, 509 SmallVectorImpl<std::string> &ContextStrStack) { 510 ProbeCounterMap ProbeCounter; 511 // Extract the top frame probes by looking up each address among the range in 512 // the Address2ProbeMap 513 extractProbesFromRange(RangeCounter, ProbeCounter); 514 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 515 std::unordered_set<FunctionSamples *>> 516 FrameSamples; 517 for (auto PI : ProbeCounter) { 518 const MCDecodedPseudoProbe *Probe = PI.first; 519 uint64_t Count = PI.second; 520 FunctionSamples &FunctionProfile = 521 getFunctionProfileForLeafProbe(ContextStrStack, Probe); 522 // Record the current frame and FunctionProfile whenever samples are 523 // collected for non-danglie probes. This is for reporting all of the 524 // zero count probes of the frame later. 525 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 526 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 527 FunctionProfile.addTotalSamples(Count); 528 if (Probe->isEntry()) { 529 FunctionProfile.addHeadSamples(Count); 530 // Look up for the caller's function profile 531 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 532 if (InlinerDesc != nullptr) { 533 // Since the context id will be compressed, we have to use callee's 534 // context id to infer caller's context id to ensure they share the 535 // same context prefix. 536 StringRef CalleeContextId = 537 FunctionProfile.getContext().getNameWithContext(); 538 StringRef CallerContextId; 539 FrameLocation &&CallerLeafFrameLoc = 540 getCallerContext(CalleeContextId, CallerContextId); 541 uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset; 542 assert(CallerIndex && 543 "Inferred caller's location index shouldn't be zero!"); 544 FunctionSamples &CallerProfile = 545 getFunctionProfileForContext(CallerContextId); 546 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 547 CallerProfile.addBodySamples(CallerIndex, 0, Count); 548 CallerProfile.addTotalSamples(Count); 549 CallerProfile.addCalledTargetSamples( 550 CallerIndex, 0, 551 FunctionProfile.getContext().getNameWithoutContext(), Count); 552 } 553 } 554 } 555 556 // Assign zero count for remaining probes without sample hits to 557 // differentiate from probes optimized away, of which the counts are unknown 558 // and will be inferred by the compiler. 559 for (auto &I : FrameSamples) { 560 for (auto *FunctionProfile : I.second) { 561 for (auto *Probe : I.first->getProbes()) { 562 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 563 } 564 } 565 } 566 } 567 568 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( 569 const BranchSample &BranchCounter, 570 SmallVectorImpl<std::string> &ContextStrStack) { 571 for (auto BI : BranchCounter) { 572 uint64_t SourceOffset = BI.first.first; 573 uint64_t TargetOffset = BI.first.second; 574 uint64_t Count = BI.second; 575 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 576 const MCDecodedPseudoProbe *CallProbe = 577 Binary->getCallProbeForAddr(SourceAddress); 578 if (CallProbe == nullptr) 579 continue; 580 FunctionSamples &FunctionProfile = 581 getFunctionProfileForLeafProbe(ContextStrStack, CallProbe); 582 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 583 FunctionProfile.addTotalSamples(Count); 584 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 585 Binary->getFuncFromStartOffset(TargetOffset)); 586 if (CalleeName.size() == 0) 587 continue; 588 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 589 Count); 590 } 591 } 592 593 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 594 SmallVectorImpl<std::string> &ContextStrStack, 595 const MCPseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) { 596 assert(ContextStrStack.size() && "Profile context must have the leaf frame"); 597 // Compress the context string except for the leaf frame 598 std::string LeafFrame = ContextStrStack.back(); 599 ContextStrStack.pop_back(); 600 CSProfileGenerator::compressRecursionContext(ContextStrStack); 601 CSProfileGenerator::trimContext(ContextStrStack); 602 603 std::ostringstream OContextStr; 604 for (uint32_t I = 0; I < ContextStrStack.size(); I++) { 605 if (OContextStr.str().size()) 606 OContextStr << " @ "; 607 OContextStr << ContextStrStack[I]; 608 } 609 // For leaf inlined context with the top frame, we should strip off the top 610 // frame's probe id, like: 611 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 612 if (OContextStr.str().size()) 613 OContextStr << " @ "; 614 OContextStr << StringRef(LeafFrame).split(":").first.str(); 615 616 FunctionSamples &FunctionProile = 617 getFunctionProfileForContext(OContextStr.str(), WasLeafInlined); 618 FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash); 619 return FunctionProile; 620 } 621 622 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 623 SmallVectorImpl<std::string> &ContextStrStack, 624 const MCDecodedPseudoProbe *LeafProbe) { 625 626 // Explicitly copy the context for appending the leaf context 627 SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(), 628 ContextStrStack.end()); 629 Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true); 630 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 631 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 632 return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc, 633 WasLeafInlined); 634 } 635 636 } // end namespace sampleprof 637 } // end namespace llvm 638