1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "llvm/ProfileData/ProfileCommon.h" 11 12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 13 cl::Required, 14 cl::desc("Output profile file")); 15 static cl::alias OutputA("o", cl::desc("Alias for --output"), 16 cl::aliasopt(OutputFilename)); 17 18 static cl::opt<SampleProfileFormat> OutputFormat( 19 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 20 cl::values( 21 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 22 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 23 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 24 clEnumValN(SPF_Text, "text", "Text encoding"), 25 clEnumValN(SPF_GCC, "gcc", 26 "GCC encoding (only meaningful for -sample)"))); 27 28 static cl::opt<int32_t, true> RecursionCompression( 29 "compress-recursion", 30 cl::desc("Compressing recursion by deduplicating adjacent frame " 31 "sequences up to the specified size. -1 means no size limit."), 32 cl::Hidden, 33 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 34 35 static cl::opt<bool> CSProfMergeColdContext( 36 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 37 cl::desc("If the total count of context profile is smaller than " 38 "the threshold, it will be merged into context-less base " 39 "profile.")); 40 41 static cl::opt<bool> CSProfTrimColdContext( 42 "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore, 43 cl::desc("If the total count of the profile after all merge is done " 44 "is still smaller than threshold, it will be trimmed.")); 45 46 static cl::opt<uint32_t> CSProfColdContextFrameDepth( 47 "csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore, 48 cl::desc("Keep the last K frames while merging cold profile. 1 means the " 49 "context-less base profile")); 50 51 static cl::opt<bool> EnableCSPreInliner( 52 "csspgo-preinliner", cl::Hidden, cl::init(false), 53 cl::desc("Run a global pre-inliner to merge context profile based on " 54 "estimated global top-down inline decisions")); 55 56 extern cl::opt<int> ProfileSummaryCutoffCold; 57 58 using namespace llvm; 59 using namespace sampleprof; 60 61 namespace llvm { 62 namespace sampleprof { 63 64 // Initialize the MaxCompressionSize to -1 which means no size limit 65 int32_t CSProfileGenerator::MaxCompressionSize = -1; 66 67 static bool 68 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) { 69 return BinarySampleCounters.size() && 70 BinarySampleCounters.begin()->first->usePseudoProbes(); 71 } 72 73 std::unique_ptr<ProfileGenerator> 74 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters, 75 enum PerfScriptType SampleType) { 76 std::unique_ptr<ProfileGenerator> ProfileGenerator; 77 if (SampleType == PERF_LBR_STACK) { 78 if (usePseudoProbes(BinarySampleCounters)) { 79 ProfileGenerator.reset( 80 new PseudoProbeCSProfileGenerator(BinarySampleCounters)); 81 } else { 82 ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters)); 83 } 84 } else { 85 // TODO: 86 llvm_unreachable("Unsupported perfscript!"); 87 } 88 89 return ProfileGenerator; 90 } 91 92 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 93 StringMap<FunctionSamples> &ProfileMap) { 94 if (std::error_code EC = Writer->write(ProfileMap)) 95 exitWithError(std::move(EC)); 96 } 97 98 void ProfileGenerator::write() { 99 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 100 if (std::error_code EC = WriterOrErr.getError()) 101 exitWithError(EC, OutputFilename); 102 write(std::move(WriterOrErr.get()), ProfileMap); 103 } 104 105 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges, 106 const RangeSample &Ranges) { 107 108 /* 109 Regions may overlap with each other. Using the boundary info, find all 110 disjoint ranges and their sample count. BoundaryPoint contains the count 111 multiple samples begin/end at this points. 112 113 |<--100-->| Sample1 114 |<------200------>| Sample2 115 A B C 116 117 In the example above, 118 Sample1 begins at A, ends at B, its value is 100. 119 Sample2 beings at A, ends at C, its value is 200. 120 For A, BeginCount is the sum of sample begins at A, which is 300 and no 121 samples ends at A, so EndCount is 0. 122 Then boundary points A, B, and C with begin/end counts are: 123 A: (300, 0) 124 B: (0, 100) 125 C: (0, 200) 126 */ 127 struct BoundaryPoint { 128 // Sum of sample counts beginning at this point 129 uint64_t BeginCount; 130 // Sum of sample counts ending at this point 131 uint64_t EndCount; 132 133 BoundaryPoint() : BeginCount(0), EndCount(0){}; 134 135 void addBeginCount(uint64_t Count) { BeginCount += Count; } 136 137 void addEndCount(uint64_t Count) { EndCount += Count; } 138 }; 139 140 /* 141 For the above example. With boundary points, follwing logic finds two 142 disjoint region of 143 144 [A,B]: 300 145 [B+1,C]: 200 146 147 If there is a boundary point that both begin and end, the point itself 148 becomes a separate disjoint region. For example, if we have original 149 ranges of 150 151 |<--- 100 --->| 152 |<--- 200 --->| 153 A B C 154 155 there are three boundary points with their begin/end counts of 156 157 A: (100, 0) 158 B: (200, 100) 159 C: (0, 200) 160 161 the disjoint ranges would be 162 163 [A, B-1]: 100 164 [B, B]: 300 165 [B+1, C]: 200. 166 */ 167 std::map<uint64_t, BoundaryPoint> Boundaries; 168 169 for (auto Item : Ranges) { 170 uint64_t Begin = Item.first.first; 171 uint64_t End = Item.first.second; 172 uint64_t Count = Item.second; 173 if (Boundaries.find(Begin) == Boundaries.end()) 174 Boundaries[Begin] = BoundaryPoint(); 175 Boundaries[Begin].addBeginCount(Count); 176 177 if (Boundaries.find(End) == Boundaries.end()) 178 Boundaries[End] = BoundaryPoint(); 179 Boundaries[End].addEndCount(Count); 180 } 181 182 uint64_t BeginAddress = UINT64_MAX; 183 int Count = 0; 184 for (auto Item : Boundaries) { 185 uint64_t Address = Item.first; 186 BoundaryPoint &Point = Item.second; 187 if (Point.BeginCount) { 188 if (BeginAddress != UINT64_MAX) 189 DisjointRanges[{BeginAddress, Address - 1}] = Count; 190 Count += Point.BeginCount; 191 BeginAddress = Address; 192 } 193 if (Point.EndCount) { 194 assert((BeginAddress != UINT64_MAX) && 195 "First boundary point cannot be 'end' point"); 196 DisjointRanges[{BeginAddress, Address}] = Count; 197 Count -= Point.EndCount; 198 BeginAddress = Address + 1; 199 } 200 } 201 } 202 203 FunctionSamples & 204 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr, 205 bool WasLeafInlined) { 206 auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples()); 207 if (Ret.second) { 208 // Make a copy of the underlying context string in string table 209 // before StringRef wrapper is used for context. 210 auto It = ContextStrings.insert(ContextStr.str()); 211 SampleContext FContext(*It.first, RawContext); 212 if (WasLeafInlined) 213 FContext.setAttribute(ContextWasInlined); 214 FunctionSamples &FProfile = Ret.first->second; 215 FProfile.setContext(FContext); 216 FProfile.setName(FContext.getNameWithoutContext()); 217 } 218 return Ret.first->second; 219 } 220 221 void CSProfileGenerator::generateProfile() { 222 FunctionSamples::ProfileIsCS = true; 223 for (const auto &BI : BinarySampleCounters) { 224 ProfiledBinary *Binary = BI.first; 225 for (const auto &CI : BI.second) { 226 const StringBasedCtxKey *CtxKey = 227 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 228 StringRef ContextId(CtxKey->Context); 229 // Get or create function profile for the range 230 FunctionSamples &FunctionProfile = 231 getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined); 232 233 // Fill in function body samples 234 populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter, 235 Binary); 236 // Fill in boundary sample counts as well as call site samples for calls 237 populateFunctionBoundarySamples(ContextId, FunctionProfile, 238 CI.second.BranchCounter, Binary); 239 } 240 } 241 // Fill in call site value sample for inlined calls and also use context to 242 // infer missing samples. Since we don't have call count for inlined 243 // functions, we estimate it from inlinee's profile using the entry of the 244 // body sample. 245 populateInferredFunctionSamples(); 246 247 postProcessProfiles(); 248 } 249 250 void CSProfileGenerator::updateBodySamplesforFunctionProfile( 251 FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, 252 uint64_t Count) { 253 // Filter out invalid negative(int type) lineOffset 254 if (LeafLoc.second.LineOffset & 0x80000000) 255 return; 256 // Use the maximum count of samples with same line location 257 ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt( 258 LeafLoc.second.LineOffset, LeafLoc.second.Discriminator); 259 uint64_t PreviousCount = R ? R.get() : 0; 260 if (PreviousCount < Count) { 261 FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, 262 LeafLoc.second.Discriminator, 263 Count - PreviousCount); 264 } 265 } 266 267 void CSProfileGenerator::populateFunctionBodySamples( 268 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter, 269 ProfiledBinary *Binary) { 270 // Compute disjoint ranges first, so we can use MAX 271 // for calculating count for each location. 272 RangeSample Ranges; 273 findDisjointRanges(Ranges, RangeCounter); 274 for (auto Range : Ranges) { 275 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 276 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 277 uint64_t Count = Range.second; 278 // Disjoint ranges have introduce zero-filled gap that 279 // doesn't belong to current context, filter them out. 280 if (Count == 0) 281 continue; 282 283 InstructionPointer IP(Binary, RangeBegin, true); 284 285 // Disjoint ranges may have range in the middle of two instr, 286 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 287 // can be Addr1+1 to Addr2-1. We should ignore such range. 288 if (IP.Address > RangeEnd) 289 continue; 290 291 while (IP.Address <= RangeEnd) { 292 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 293 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 294 if (LeafLoc.hasValue()) { 295 // Recording body sample for this specific context 296 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 297 } 298 // Accumulate total sample count even it's a line with invalid debug info 299 FunctionProfile.addTotalSamples(Count); 300 // Move to next IP within the range 301 IP.advance(); 302 } 303 } 304 } 305 306 void CSProfileGenerator::populateFunctionBoundarySamples( 307 StringRef ContextId, FunctionSamples &FunctionProfile, 308 const BranchSample &BranchCounters, ProfiledBinary *Binary) { 309 310 for (auto Entry : BranchCounters) { 311 uint64_t SourceOffset = Entry.first.first; 312 uint64_t TargetOffset = Entry.first.second; 313 uint64_t Count = Entry.second; 314 // Get the callee name by branch target if it's a call branch 315 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 316 Binary->getFuncFromStartOffset(TargetOffset)); 317 if (CalleeName.size() == 0) 318 continue; 319 320 // Record called target sample and its count 321 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 322 if (!LeafLoc.hasValue()) 323 continue; 324 FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset, 325 LeafLoc->second.Discriminator, 326 CalleeName, Count); 327 328 // Record head sample for called target(callee) 329 std::ostringstream OCalleeCtxStr; 330 if (ContextId.find(" @ ") != StringRef::npos) { 331 OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); 332 OCalleeCtxStr << " @ "; 333 } 334 OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str(); 335 336 FunctionSamples &CalleeProfile = 337 getFunctionProfileForContext(OCalleeCtxStr.str()); 338 assert(Count != 0 && "Unexpected zero weight branch"); 339 CalleeProfile.addHeadSamples(Count); 340 } 341 } 342 343 static FrameLocation getCallerContext(StringRef CalleeContext, 344 StringRef &CallerNameWithContext) { 345 StringRef CallerContext = CalleeContext.rsplit(" @ ").first; 346 CallerNameWithContext = CallerContext.rsplit(':').first; 347 auto ContextSplit = CallerContext.rsplit(" @ "); 348 StringRef CallerFrameStr = ContextSplit.second.size() == 0 349 ? ContextSplit.first 350 : ContextSplit.second; 351 FrameLocation LeafFrameLoc = {"", {0, 0}}; 352 StringRef Funcname; 353 SampleContext::decodeContextString(CallerFrameStr, Funcname, 354 LeafFrameLoc.second); 355 LeafFrameLoc.first = Funcname.str(); 356 return LeafFrameLoc; 357 } 358 359 void CSProfileGenerator::populateInferredFunctionSamples() { 360 for (const auto &Item : ProfileMap) { 361 const StringRef CalleeContext = Item.first(); 362 const FunctionSamples &CalleeProfile = Item.second; 363 364 // If we already have head sample counts, we must have value profile 365 // for call sites added already. Skip to avoid double counting. 366 if (CalleeProfile.getHeadSamples()) 367 continue; 368 // If we don't have context, nothing to do for caller's call site. 369 // This could happen for entry point function. 370 if (CalleeContext.find(" @ ") == StringRef::npos) 371 continue; 372 373 // Infer Caller's frame loc and context ID through string splitting 374 StringRef CallerContextId; 375 FrameLocation &&CallerLeafFrameLoc = 376 getCallerContext(CalleeContext, CallerContextId); 377 378 // It's possible that we haven't seen any sample directly in the caller, 379 // in which case CallerProfile will not exist. But we can't modify 380 // ProfileMap while iterating it. 381 // TODO: created function profile for those callers too 382 if (ProfileMap.find(CallerContextId) == ProfileMap.end()) 383 continue; 384 FunctionSamples &CallerProfile = ProfileMap[CallerContextId]; 385 386 // Since we don't have call count for inlined functions, we 387 // estimate it from inlinee's profile using entry body sample. 388 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 389 // If we don't have samples with location, use 1 to indicate live. 390 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 391 EstimatedCallCount = 1; 392 CallerProfile.addCalledTargetSamples( 393 CallerLeafFrameLoc.second.LineOffset, 394 CallerLeafFrameLoc.second.Discriminator, 395 CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount); 396 CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset, 397 CallerLeafFrameLoc.second.Discriminator, 398 EstimatedCallCount); 399 CallerProfile.addTotalSamples(EstimatedCallCount); 400 } 401 } 402 403 void CSProfileGenerator::postProcessProfiles() { 404 // Compute hot/cold threshold based on profile. This will be used for cold 405 // context profile merging/trimming. 406 computeSummaryAndThreshold(); 407 408 // Run global pre-inliner to adjust/merge context profile based on estimated 409 // inline decisions. 410 if (EnableCSPreInliner) 411 CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run(); 412 413 // Trim and merge cold context profile using cold threshold above; 414 SampleContextTrimmer(ProfileMap) 415 .trimAndMergeColdContextProfiles( 416 ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, 417 CSProfColdContextFrameDepth); 418 } 419 420 void CSProfileGenerator::computeSummaryAndThreshold() { 421 // Update the default value of cold cutoff for llvm-profgen. 422 // Do it here because we don't want to change the global default, 423 // which would lead CS profile size too large. 424 if (!ProfileSummaryCutoffCold.getNumOccurrences()) 425 ProfileSummaryCutoffCold = 999000; 426 427 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 428 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 429 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 430 (Summary->getDetailedSummary())); 431 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 432 (Summary->getDetailedSummary())); 433 } 434 435 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 436 StringMap<FunctionSamples> &ProfileMap) { 437 if (std::error_code EC = Writer->write(ProfileMap)) 438 exitWithError(std::move(EC)); 439 } 440 441 // Helper function to extract context prefix string stack 442 // Extract context stack for reusing, leaf context stack will 443 // be added compressed while looking up function profile 444 static void 445 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack, 446 const SmallVectorImpl<const PseudoProbe *> &Probes, 447 ProfiledBinary *Binary) { 448 for (const auto *P : Probes) { 449 Binary->getInlineContextForProbe(P, ContextStrStack, true); 450 } 451 } 452 453 void PseudoProbeCSProfileGenerator::generateProfile() { 454 // Enable pseudo probe functionalities in SampleProf 455 FunctionSamples::ProfileIsProbeBased = true; 456 FunctionSamples::ProfileIsCS = true; 457 for (const auto &BI : BinarySampleCounters) { 458 ProfiledBinary *Binary = BI.first; 459 for (const auto &CI : BI.second) { 460 const ProbeBasedCtxKey *CtxKey = 461 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 462 SmallVector<std::string, 16> ContextStrStack; 463 extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); 464 // Fill in function body samples from probes, also infer caller's samples 465 // from callee's probe 466 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack, 467 Binary); 468 // Fill in boundary samples for a call probe 469 populateBoundarySamplesWithProbes(CI.second.BranchCounter, 470 ContextStrStack, Binary); 471 } 472 } 473 474 postProcessProfiles(); 475 } 476 477 void PseudoProbeCSProfileGenerator::extractProbesFromRange( 478 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 479 ProfiledBinary *Binary) { 480 RangeSample Ranges; 481 findDisjointRanges(Ranges, RangeCounter); 482 for (const auto &Range : Ranges) { 483 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 484 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 485 uint64_t Count = Range.second; 486 // Disjoint ranges have introduce zero-filled gap that 487 // doesn't belong to current context, filter them out. 488 if (Count == 0) 489 continue; 490 491 InstructionPointer IP(Binary, RangeBegin, true); 492 493 // Disjoint ranges may have range in the middle of two instr, 494 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 495 // can be Addr1+1 to Addr2-1. We should ignore such range. 496 if (IP.Address > RangeEnd) 497 continue; 498 499 while (IP.Address <= RangeEnd) { 500 const AddressProbesMap &Address2ProbesMap = 501 Binary->getAddress2ProbesMap(); 502 auto It = Address2ProbesMap.find(IP.Address); 503 if (It != Address2ProbesMap.end()) { 504 for (const auto &Probe : It->second) { 505 if (!Probe.isBlock()) 506 continue; 507 ProbeCounter[&Probe] += Count; 508 } 509 } 510 511 IP.advance(); 512 } 513 } 514 } 515 516 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( 517 const RangeSample &RangeCounter, 518 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) { 519 ProbeCounterMap ProbeCounter; 520 // Extract the top frame probes by looking up each address among the range in 521 // the Address2ProbeMap 522 extractProbesFromRange(RangeCounter, ProbeCounter, Binary); 523 std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples; 524 for (auto PI : ProbeCounter) { 525 const PseudoProbe *Probe = PI.first; 526 uint64_t Count = PI.second; 527 FunctionSamples &FunctionProfile = 528 getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary); 529 // Record the current frame and FunctionProfile whenever samples are 530 // collected for non-danglie probes. This is for reporting all of the 531 // zero count probes of the frame later. 532 FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile; 533 FunctionProfile.addBodySamplesForProbe(Probe->Index, Count); 534 FunctionProfile.addTotalSamples(Count); 535 if (Probe->isEntry()) { 536 FunctionProfile.addHeadSamples(Count); 537 // Look up for the caller's function profile 538 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 539 if (InlinerDesc != nullptr) { 540 // Since the context id will be compressed, we have to use callee's 541 // context id to infer caller's context id to ensure they share the 542 // same context prefix. 543 StringRef CalleeContextId = 544 FunctionProfile.getContext().getNameWithContext(); 545 StringRef CallerContextId; 546 FrameLocation &&CallerLeafFrameLoc = 547 getCallerContext(CalleeContextId, CallerContextId); 548 uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset; 549 assert(CallerIndex && 550 "Inferred caller's location index shouldn't be zero!"); 551 FunctionSamples &CallerProfile = 552 getFunctionProfileForContext(CallerContextId); 553 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 554 CallerProfile.addBodySamples(CallerIndex, 0, Count); 555 CallerProfile.addTotalSamples(Count); 556 CallerProfile.addCalledTargetSamples( 557 CallerIndex, 0, 558 FunctionProfile.getContext().getNameWithoutContext(), Count); 559 } 560 } 561 562 // Assign zero count for remaining probes without sample hits to 563 // differentiate from probes optimized away, of which the counts are unknown 564 // and will be inferred by the compiler. 565 for (auto &I : FrameSamples) { 566 auto *FunctionProfile = I.second; 567 for (auto *Probe : I.first->getProbes()) { 568 FunctionProfile->addBodySamplesForProbe(Probe->Index, 0); 569 } 570 } 571 } 572 } 573 574 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( 575 const BranchSample &BranchCounter, 576 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) { 577 for (auto BI : BranchCounter) { 578 uint64_t SourceOffset = BI.first.first; 579 uint64_t TargetOffset = BI.first.second; 580 uint64_t Count = BI.second; 581 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 582 const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress); 583 if (CallProbe == nullptr) 584 continue; 585 FunctionSamples &FunctionProfile = 586 getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary); 587 FunctionProfile.addBodySamples(CallProbe->Index, 0, Count); 588 FunctionProfile.addTotalSamples(Count); 589 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 590 Binary->getFuncFromStartOffset(TargetOffset)); 591 if (CalleeName.size() == 0) 592 continue; 593 FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName, 594 Count); 595 } 596 } 597 598 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 599 SmallVectorImpl<std::string> &ContextStrStack, 600 const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) { 601 assert(ContextStrStack.size() && "Profile context must have the leaf frame"); 602 // Compress the context string except for the leaf frame 603 std::string LeafFrame = ContextStrStack.back(); 604 ContextStrStack.pop_back(); 605 CSProfileGenerator::compressRecursionContext(ContextStrStack); 606 607 std::ostringstream OContextStr; 608 for (uint32_t I = 0; I < ContextStrStack.size(); I++) { 609 if (OContextStr.str().size()) 610 OContextStr << " @ "; 611 OContextStr << ContextStrStack[I]; 612 } 613 // For leaf inlined context with the top frame, we should strip off the top 614 // frame's probe id, like: 615 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 616 if (OContextStr.str().size()) 617 OContextStr << " @ "; 618 OContextStr << StringRef(LeafFrame).split(":").first.str(); 619 620 FunctionSamples &FunctionProile = 621 getFunctionProfileForContext(OContextStr.str(), WasLeafInlined); 622 FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash); 623 return FunctionProile; 624 } 625 626 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 627 SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe, 628 ProfiledBinary *Binary) { 629 // Explicitly copy the context for appending the leaf context 630 SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(), 631 ContextStrStack.end()); 632 Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true); 633 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID); 634 bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite(); 635 return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc, 636 WasLeafInlined); 637 } 638 639 } // end namespace sampleprof 640 } // end namespace llvm 641