1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "ProfileGenerator.h" 9 #include "ErrorHandling.h" 10 #include "PerfReader.h" 11 #include "ProfiledBinary.h" 12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 13 #include "llvm/ProfileData/ProfileCommon.h" 14 #include <algorithm> 15 #include <float.h> 16 #include <unordered_set> 17 #include <utility> 18 19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 20 cl::Required, 21 cl::desc("Output profile file")); 22 static cl::alias OutputA("o", cl::desc("Alias for --output"), 23 cl::aliasopt(OutputFilename)); 24 25 static cl::opt<SampleProfileFormat> OutputFormat( 26 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 27 cl::values( 28 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 29 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 31 clEnumValN(SPF_Text, "text", "Text encoding"), 32 clEnumValN(SPF_GCC, "gcc", 33 "GCC encoding (only meaningful for -sample)"))); 34 35 cl::opt<bool> UseMD5( 36 "use-md5", cl::init(false), cl::Hidden, 37 cl::desc("Use md5 to represent function names in the output profile (only " 38 "meaningful for -extbinary)")); 39 40 static cl::opt<bool> PopulateProfileSymbolList( 41 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 43 44 static cl::opt<bool> FillZeroForAllFuncs( 45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 46 cl::desc("Attribute all functions' range with zero count " 47 "even it's not hit by any samples.")); 48 49 static cl::opt<int32_t, true> RecursionCompression( 50 "compress-recursion", 51 cl::desc("Compressing recursion by deduplicating adjacent frame " 52 "sequences up to the specified size. -1 means no size limit."), 53 cl::Hidden, 54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 55 56 static cl::opt<bool> 57 TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore, 58 cl::desc("If the total count of the profile is smaller " 59 "than threshold, it will be trimmed.")); 60 61 static cl::opt<bool> CSProfMergeColdContext( 62 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 63 cl::desc("If the total count of context profile is smaller than " 64 "the threshold, it will be merged into context-less base " 65 "profile.")); 66 67 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 68 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, 69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 70 "context-less base profile")); 71 72 static cl::opt<int, true> CSProfMaxContextDepth( 73 "csprof-max-context-depth", cl::ZeroOrMore, 74 cl::desc("Keep the last K contexts while merging profile. -1 means no " 75 "depth limit."), 76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 77 78 static cl::opt<double> HotFunctionDensityThreshold( 79 "hot-function-density-threshold", llvm::cl::init(1000), 80 llvm::cl::desc( 81 "specify density threshold for hot functions (default: 1000)"), 82 llvm::cl::Optional); 83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 84 llvm::cl::desc("show profile density details"), 85 llvm::cl::Optional); 86 87 static cl::opt<bool> UpdateTotalSamples( 88 "update-total-samples", llvm::cl::init(false), 89 llvm::cl::desc( 90 "Update total samples by accumulating all its body samples."), 91 llvm::cl::Optional); 92 93 extern cl::opt<int> ProfileSummaryCutoffHot; 94 95 static cl::opt<bool> GenCSNestedProfile( 96 "gen-cs-nested-profile", cl::Hidden, cl::init(true), 97 cl::desc("Generate nested function profiles for CSSPGO")); 98 99 using namespace llvm; 100 using namespace sampleprof; 101 102 namespace llvm { 103 namespace sampleprof { 104 105 // Initialize the MaxCompressionSize to -1 which means no size limit 106 int32_t CSProfileGenerator::MaxCompressionSize = -1; 107 108 int CSProfileGenerator::MaxContextDepth = -1; 109 110 bool ProfileGeneratorBase::UseFSDiscriminator = false; 111 112 std::unique_ptr<ProfileGeneratorBase> 113 ProfileGeneratorBase::create(ProfiledBinary *Binary, 114 const ContextSampleCounterMap *SampleCounters, 115 bool ProfileIsCSFlat) { 116 std::unique_ptr<ProfileGeneratorBase> Generator; 117 if (ProfileIsCSFlat) { 118 if (Binary->useFSDiscriminator()) 119 exitWithError("FS discriminator is not supported in CS profile."); 120 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 121 } else { 122 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 123 } 124 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 125 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 126 127 return Generator; 128 } 129 130 std::unique_ptr<ProfileGeneratorBase> 131 ProfileGeneratorBase::create(ProfiledBinary *Binary, 132 const SampleProfileMap &&Profiles, 133 bool ProfileIsCSFlat) { 134 std::unique_ptr<ProfileGeneratorBase> Generator; 135 if (ProfileIsCSFlat) { 136 if (Binary->useFSDiscriminator()) 137 exitWithError("FS discriminator is not supported in CS profile."); 138 Generator.reset(new CSProfileGenerator(Binary, std::move(Profiles))); 139 } else { 140 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); 141 } 142 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 143 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 144 145 return Generator; 146 } 147 148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 149 SampleProfileMap &ProfileMap) { 150 // Populate profile symbol list if extended binary format is used. 151 ProfileSymbolList SymbolList; 152 153 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 154 Binary->populateSymbolListFromDWARF(SymbolList); 155 Writer->setProfileSymbolList(&SymbolList); 156 } 157 158 if (std::error_code EC = Writer->write(ProfileMap)) 159 exitWithError(std::move(EC)); 160 } 161 162 void ProfileGeneratorBase::write() { 163 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 164 if (std::error_code EC = WriterOrErr.getError()) 165 exitWithError(EC, OutputFilename); 166 167 if (UseMD5) { 168 if (OutputFormat != SPF_Ext_Binary) 169 WithColor::warning() << "-use-md5 is ignored. Specify " 170 "--format=extbinary to enable it\n"; 171 else 172 WriterOrErr.get()->setUseMD5(); 173 } 174 175 write(std::move(WriterOrErr.get()), ProfileMap); 176 } 177 178 void ProfileGeneratorBase::showDensitySuggestion(double Density) { 179 if (Density == 0.0) 180 WithColor::warning() << "The --profile-summary-cutoff-hot option may be " 181 "set too low. Please check your command.\n"; 182 else if (Density < HotFunctionDensityThreshold) 183 WithColor::warning() 184 << "AutoFDO is estimated to optimize better with " 185 << format("%.1f", HotFunctionDensityThreshold / Density) 186 << "x more samples. Please consider increasing sampling rate or " 187 "profiling for longer duration to get more samples.\n"; 188 189 if (ShowDensity) 190 outs() << "Minimum profile density for hot functions with top " 191 << format("%.2f", 192 static_cast<double>(ProfileSummaryCutoffHot.getValue()) / 193 10000) 194 << "% total samples: " << format("%.1f", Density) << "\n"; 195 } 196 197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, 198 uint64_t HotCntThreshold) { 199 double Density = DBL_MAX; 200 std::vector<const FunctionSamples *> HotFuncs; 201 for (auto &I : Profiles) { 202 auto &FuncSamples = I.second; 203 if (FuncSamples.getTotalSamples() < HotCntThreshold) 204 continue; 205 HotFuncs.emplace_back(&FuncSamples); 206 } 207 208 for (auto *FuncSamples : HotFuncs) { 209 auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); 210 if (!Func) 211 continue; 212 uint64_t FuncSize = Func->getFuncSize(); 213 if (FuncSize == 0) 214 continue; 215 Density = 216 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / 217 FuncSize); 218 } 219 220 return Density == DBL_MAX ? 0.0 : Density; 221 } 222 223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 224 const RangeSample &Ranges) { 225 226 /* 227 Regions may overlap with each other. Using the boundary info, find all 228 disjoint ranges and their sample count. BoundaryPoint contains the count 229 multiple samples begin/end at this points. 230 231 |<--100-->| Sample1 232 |<------200------>| Sample2 233 A B C 234 235 In the example above, 236 Sample1 begins at A, ends at B, its value is 100. 237 Sample2 beings at A, ends at C, its value is 200. 238 For A, BeginCount is the sum of sample begins at A, which is 300 and no 239 samples ends at A, so EndCount is 0. 240 Then boundary points A, B, and C with begin/end counts are: 241 A: (300, 0) 242 B: (0, 100) 243 C: (0, 200) 244 */ 245 struct BoundaryPoint { 246 // Sum of sample counts beginning at this point 247 uint64_t BeginCount = UINT64_MAX; 248 // Sum of sample counts ending at this point 249 uint64_t EndCount = UINT64_MAX; 250 // Is the begin point of a zero range. 251 bool IsZeroRangeBegin = false; 252 // Is the end point of a zero range. 253 bool IsZeroRangeEnd = false; 254 255 void addBeginCount(uint64_t Count) { 256 if (BeginCount == UINT64_MAX) 257 BeginCount = 0; 258 BeginCount += Count; 259 } 260 261 void addEndCount(uint64_t Count) { 262 if (EndCount == UINT64_MAX) 263 EndCount = 0; 264 EndCount += Count; 265 } 266 }; 267 268 /* 269 For the above example. With boundary points, follwing logic finds two 270 disjoint region of 271 272 [A,B]: 300 273 [B+1,C]: 200 274 275 If there is a boundary point that both begin and end, the point itself 276 becomes a separate disjoint region. For example, if we have original 277 ranges of 278 279 |<--- 100 --->| 280 |<--- 200 --->| 281 A B C 282 283 there are three boundary points with their begin/end counts of 284 285 A: (100, 0) 286 B: (200, 100) 287 C: (0, 200) 288 289 the disjoint ranges would be 290 291 [A, B-1]: 100 292 [B, B]: 300 293 [B+1, C]: 200. 294 295 Example for zero value range: 296 297 |<--- 100 --->| 298 |<--- 200 --->| 299 |<--------------- 0 ----------------->| 300 A B C D E F 301 302 [A, B-1] : 0 303 [B, C] : 100 304 [C+1, D-1]: 0 305 [D, E] : 200 306 [E+1, F] : 0 307 */ 308 std::map<uint64_t, BoundaryPoint> Boundaries; 309 310 for (const auto &Item : Ranges) { 311 assert(Item.first.first <= Item.first.second && 312 "Invalid instruction range"); 313 auto &BeginPoint = Boundaries[Item.first.first]; 314 auto &EndPoint = Boundaries[Item.first.second]; 315 uint64_t Count = Item.second; 316 317 BeginPoint.addBeginCount(Count); 318 EndPoint.addEndCount(Count); 319 if (Count == 0) { 320 BeginPoint.IsZeroRangeBegin = true; 321 EndPoint.IsZeroRangeEnd = true; 322 } 323 } 324 325 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 326 // and the next valid address 327 uint64_t BeginAddress = UINT64_MAX; 328 int ZeroRangeDepth = 0; 329 uint64_t Count = 0; 330 for (const auto &Item : Boundaries) { 331 uint64_t Address = Item.first; 332 const BoundaryPoint &Point = Item.second; 333 if (Point.BeginCount != UINT64_MAX) { 334 if (BeginAddress != UINT64_MAX) 335 DisjointRanges[{BeginAddress, Address - 1}] = Count; 336 Count += Point.BeginCount; 337 BeginAddress = Address; 338 ZeroRangeDepth += Point.IsZeroRangeBegin; 339 } 340 if (Point.EndCount != UINT64_MAX) { 341 assert((BeginAddress != UINT64_MAX) && 342 "First boundary point cannot be 'end' point"); 343 DisjointRanges[{BeginAddress, Address}] = Count; 344 assert(Count >= Point.EndCount && "Mismatched live ranges"); 345 Count -= Point.EndCount; 346 BeginAddress = Address + 1; 347 ZeroRangeDepth -= Point.IsZeroRangeEnd; 348 // If the remaining count is zero and it's no longer in a zero range, this 349 // means we consume all the ranges before, thus mark BeginAddress as 350 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 351 // [<---- 10 ---->] 352 // [<---- 20 ---->] 353 // A B C D 354 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 355 // have the [B+1, C-1] zero range. 356 if (Count == 0 && ZeroRangeDepth == 0) 357 BeginAddress = UINT64_MAX; 358 } 359 } 360 } 361 362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 363 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 364 uint64_t Count) { 365 // Use the maximum count of samples with same line location 366 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 367 368 // Use duplication factor to compensated for loop unroll/vectorization. 369 // Note that this is only needed when we're taking MAX of the counts at 370 // the location instead of SUM. 371 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 372 373 ErrorOr<uint64_t> R = 374 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 375 376 uint64_t PreviousCount = R ? R.get() : 0; 377 if (PreviousCount <= Count) { 378 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 379 Count - PreviousCount); 380 } 381 } 382 383 void ProfileGeneratorBase::updateTotalSamples() { 384 if (!UpdateTotalSamples) 385 return; 386 387 for (auto &Item : ProfileMap) { 388 FunctionSamples &FunctionProfile = Item.second; 389 FunctionProfile.updateTotalSamples(); 390 } 391 } 392 393 void ProfileGeneratorBase::collectProfiledFunctions() { 394 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 395 if (SampleCounters) { 396 // Go through all the stacks, ranges and branches in sample counters, use 397 // the start of the range to look up the function it belongs and record the 398 // function. 399 for (const auto &CI : *SampleCounters) { 400 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { 401 for (auto Addr : CtxKey->Context) { 402 if (FuncRange *FRange = Binary->findFuncRangeForOffset( 403 Binary->virtualAddrToOffset(Addr))) 404 ProfiledFunctions.insert(FRange->Func); 405 } 406 } 407 408 for (auto Item : CI.second.RangeCounter) { 409 uint64_t StartOffset = Item.first.first; 410 if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) 411 ProfiledFunctions.insert(FRange->Func); 412 } 413 414 for (auto Item : CI.second.BranchCounter) { 415 uint64_t SourceOffset = Item.first.first; 416 uint64_t TargetOffset = Item.first.first; 417 if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) 418 ProfiledFunctions.insert(FRange->Func); 419 if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) 420 ProfiledFunctions.insert(FRange->Func); 421 } 422 } 423 } else { 424 // This is for the case the input is a llvm sample profile. 425 for (const auto &FS : ProfileMap) { 426 if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) 427 ProfiledFunctions.insert(Func); 428 } 429 } 430 431 Binary->setProfiledFunctions(ProfiledFunctions); 432 } 433 434 FunctionSamples & 435 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 436 SampleContext Context(FuncName); 437 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 438 if (Ret.second) { 439 FunctionSamples &FProfile = Ret.first->second; 440 FProfile.setContext(Context); 441 } 442 return Ret.first->second; 443 } 444 445 void ProfileGenerator::generateProfile() { 446 collectProfiledFunctions(); 447 448 if (Binary->usePseudoProbes()) 449 Binary->decodePseudoProbe(); 450 451 if (SampleCounters) { 452 if (Binary->usePseudoProbes()) { 453 generateProbeBasedProfile(); 454 } else { 455 generateLineNumBasedProfile(); 456 } 457 } 458 459 postProcessProfiles(); 460 } 461 462 void ProfileGenerator::postProcessProfiles() { 463 computeSummaryAndThreshold(); 464 trimColdProfiles(ProfileMap, ColdCountThreshold); 465 calculateAndShowDensity(ProfileMap); 466 } 467 468 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, 469 uint64_t ColdCntThreshold) { 470 if (!TrimColdProfile) 471 return; 472 473 // Move cold profiles into a tmp container. 474 std::vector<SampleContext> ColdProfiles; 475 for (const auto &I : ProfileMap) { 476 if (I.second.getTotalSamples() < ColdCntThreshold) 477 ColdProfiles.emplace_back(I.first); 478 } 479 480 // Remove the cold profile from ProfileMap. 481 for (const auto &I : ColdProfiles) 482 ProfileMap.erase(I); 483 } 484 485 void ProfileGenerator::generateLineNumBasedProfile() { 486 assert(SampleCounters->size() == 1 && 487 "Must have one entry for profile generation."); 488 const SampleCounter &SC = SampleCounters->begin()->second; 489 // Fill in function body samples 490 populateBodySamplesForAllFunctions(SC.RangeCounter); 491 // Fill in boundary sample counts as well as call site samples for calls 492 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 493 494 updateTotalSamples(); 495 } 496 497 void ProfileGenerator::generateProbeBasedProfile() { 498 assert(SampleCounters->size() == 1 && 499 "Must have one entry for profile generation."); 500 // Enable pseudo probe functionalities in SampleProf 501 FunctionSamples::ProfileIsProbeBased = true; 502 const SampleCounter &SC = SampleCounters->begin()->second; 503 // Fill in function body samples 504 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); 505 // Fill in boundary sample counts as well as call site samples for calls 506 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); 507 508 updateTotalSamples(); 509 } 510 511 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( 512 const RangeSample &RangeCounter) { 513 ProbeCounterMap ProbeCounter; 514 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it 515 // inside extractProbesFromRange. 516 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, 517 false); 518 519 for (const auto &PI : ProbeCounter) { 520 const MCDecodedPseudoProbe *Probe = PI.first; 521 uint64_t Count = PI.second; 522 SampleContextFrameVector FrameVec; 523 Binary->getInlineContextForProbe(Probe, FrameVec, true); 524 FunctionSamples &FunctionProfile = 525 getLeafProfileAndAddTotalSamples(FrameVec, Count); 526 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 527 if (Probe->isEntry()) 528 FunctionProfile.addHeadSamples(Count); 529 } 530 } 531 532 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( 533 const BranchSample &BranchCounters) { 534 for (const auto &Entry : BranchCounters) { 535 uint64_t SourceOffset = Entry.first.first; 536 uint64_t TargetOffset = Entry.first.second; 537 uint64_t Count = Entry.second; 538 assert(Count != 0 && "Unexpected zero weight branch"); 539 540 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 541 if (CalleeName.size() == 0) 542 continue; 543 544 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 545 const MCDecodedPseudoProbe *CallProbe = 546 Binary->getCallProbeForAddr(SourceAddress); 547 if (CallProbe == nullptr) 548 continue; 549 550 // Record called target sample and its count. 551 SampleContextFrameVector FrameVec; 552 Binary->getInlineContextForProbe(CallProbe, FrameVec, true); 553 554 if (!FrameVec.empty()) { 555 FunctionSamples &FunctionProfile = 556 getLeafProfileAndAddTotalSamples(FrameVec, 0); 557 FunctionProfile.addCalledTargetSamples( 558 FrameVec.back().Location.LineOffset, 0, CalleeName, Count); 559 } 560 } 561 } 562 563 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 564 const SampleContextFrameVector &FrameVec, uint64_t Count) { 565 // Get top level profile 566 FunctionSamples *FunctionProfile = 567 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 568 FunctionProfile->addTotalSamples(Count); 569 if (Binary->usePseudoProbes()) { 570 const auto *FuncDesc = Binary->getFuncDescForGUID( 571 Function::getGUID(FunctionProfile->getName())); 572 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 573 } 574 575 for (size_t I = 1; I < FrameVec.size(); I++) { 576 LineLocation Callsite( 577 FrameVec[I - 1].Location.LineOffset, 578 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 579 FunctionSamplesMap &SamplesMap = 580 FunctionProfile->functionSamplesAt(Callsite); 581 auto Ret = 582 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 583 if (Ret.second) { 584 SampleContext Context(FrameVec[I].FuncName); 585 Ret.first->second.setContext(Context); 586 } 587 FunctionProfile = &Ret.first->second; 588 FunctionProfile->addTotalSamples(Count); 589 if (Binary->usePseudoProbes()) { 590 const auto *FuncDesc = Binary->getFuncDescForGUID( 591 Function::getGUID(FunctionProfile->getName())); 592 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 593 } 594 } 595 596 return *FunctionProfile; 597 } 598 599 RangeSample 600 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 601 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 602 if (FillZeroForAllFuncs) { 603 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 604 for (auto &R : FuncI.second.Ranges) { 605 Ranges[{R.first, R.second - 1}] += 0; 606 } 607 } 608 } else { 609 // For each range, we search for all ranges of the function it belongs to 610 // and initialize it with zero count, so it remains zero if doesn't hit any 611 // samples. This is to be consistent with compiler that interpret zero count 612 // as unexecuted(cold). 613 for (const auto &I : RangeCounter) { 614 uint64_t StartOffset = I.first.first; 615 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 616 Ranges[{Range.first, Range.second - 1}] += 0; 617 } 618 } 619 RangeSample DisjointRanges; 620 findDisjointRanges(DisjointRanges, Ranges); 621 return DisjointRanges; 622 } 623 624 void ProfileGenerator::populateBodySamplesForAllFunctions( 625 const RangeSample &RangeCounter) { 626 for (const auto &Range : preprocessRangeCounter(RangeCounter)) { 627 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 628 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 629 uint64_t Count = Range.second; 630 631 InstructionPointer IP(Binary, RangeBegin, true); 632 // Disjoint ranges may have range in the middle of two instr, 633 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 634 // can be Addr1+1 to Addr2-1. We should ignore such range. 635 if (IP.Address > RangeEnd) 636 continue; 637 638 do { 639 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 640 const SampleContextFrameVector &FrameVec = 641 Binary->getFrameLocationStack(Offset); 642 if (!FrameVec.empty()) { 643 // FIXME: As accumulating total count per instruction caused some 644 // regression, we changed to accumulate total count per byte as a 645 // workaround. Tuning hotness threshold on the compiler side might be 646 // necessary in the future. 647 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( 648 FrameVec, Count * Binary->getInstSize(Offset)); 649 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 650 Count); 651 } 652 } while (IP.advance() && IP.Address <= RangeEnd); 653 } 654 } 655 656 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { 657 // Get the function range by branch target if it's a call branch. 658 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); 659 660 // We won't accumulate sample count for a range whose start is not the real 661 // function entry such as outlined function or inner labels. 662 if (!FRange || !FRange->IsFuncEntry) 663 return StringRef(); 664 665 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 666 } 667 668 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 669 const BranchSample &BranchCounters) { 670 for (const auto &Entry : BranchCounters) { 671 uint64_t SourceOffset = Entry.first.first; 672 uint64_t TargetOffset = Entry.first.second; 673 uint64_t Count = Entry.second; 674 assert(Count != 0 && "Unexpected zero weight branch"); 675 676 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 677 if (CalleeName.size() == 0) 678 continue; 679 // Record called target sample and its count. 680 const SampleContextFrameVector &FrameVec = 681 Binary->getFrameLocationStack(SourceOffset); 682 if (!FrameVec.empty()) { 683 FunctionSamples &FunctionProfile = 684 getLeafProfileAndAddTotalSamples(FrameVec, 0); 685 FunctionProfile.addCalledTargetSamples( 686 FrameVec.back().Location.LineOffset, 687 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 688 CalleeName, Count); 689 } 690 // Add head samples for callee. 691 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 692 CalleeProfile.addHeadSamples(Count); 693 } 694 } 695 696 void ProfileGeneratorBase::calculateAndShowDensity( 697 const SampleProfileMap &Profiles) { 698 double Density = calculateDensity(Profiles, HotCountThreshold); 699 showDensitySuggestion(Density); 700 } 701 702 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( 703 const SampleContextFrameVector &Context, bool WasLeafInlined) { 704 auto I = ProfileMap.find(SampleContext(Context)); 705 if (I == ProfileMap.end()) { 706 // Save the new context for future references. 707 SampleContextFrames NewContext = *Contexts.insert(Context).first; 708 SampleContext FContext(NewContext, RawContext); 709 auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); 710 if (WasLeafInlined) 711 FContext.setAttribute(ContextWasInlined); 712 FunctionSamples &FProfile = Ret.first->second; 713 FProfile.setContext(FContext); 714 return Ret.first->second; 715 } else { 716 // Update ContextWasInlined attribute for existing contexts. 717 // The current function can be called in two ways: 718 // - when processing a probe of the current frame 719 // - when processing the entry probe of an inlinee's frame, which 720 // is then used to update the callsite count of the current frame. 721 // The two can happen in any order, hence here we are making sure 722 // `ContextWasInlined` is always set as expected. 723 // TODO: Note that the former does not always happen if no probes of the 724 // current frame has samples, and if the latter happens, we could lose the 725 // attribute. This should be fixed. 726 if (WasLeafInlined) 727 I->second.getContext().setAttribute(ContextWasInlined); 728 } 729 730 return I->second; 731 } 732 733 void CSProfileGenerator::generateProfile() { 734 FunctionSamples::ProfileIsCSFlat = true; 735 736 collectProfiledFunctions(); 737 738 if (Binary->usePseudoProbes()) 739 Binary->decodePseudoProbe(); 740 741 if (SampleCounters) { 742 if (Binary->usePseudoProbes()) { 743 generateProbeBasedProfile(); 744 } else { 745 generateLineNumBasedProfile(); 746 } 747 } 748 749 if (Binary->getTrackFuncContextSize()) 750 computeSizeForProfiledFunctions(); 751 752 postProcessProfiles(); 753 } 754 755 void CSProfileGenerator::computeSizeForProfiledFunctions() { 756 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 757 for (auto *Func : Binary->getProfiledFunctions()) 758 Binary->computeInlinedContextSizeForFunc(Func); 759 760 // Flush the symbolizer to save memory. 761 Binary->flushSymbolizer(); 762 } 763 764 void CSProfileGenerator::generateLineNumBasedProfile() { 765 for (const auto &CI : *SampleCounters) { 766 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); 767 768 FunctionSamples *FunctionProfile = nullptr; 769 // Sample context will be empty if the jump is an external-to-internal call 770 // pattern, the head samples should be added for the internal function. 771 if (!CtxKey->Context.empty()) { 772 // Get or create function profile for the range 773 FunctionProfile = &getFunctionProfileForContext(CtxKey->Context, 774 CtxKey->WasLeafInlined); 775 // Fill in function body samples 776 populateBodySamplesForFunction(*FunctionProfile, CI.second.RangeCounter); 777 } 778 // Fill in boundary sample counts as well as call site samples for calls 779 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, 780 CI.second.BranchCounter); 781 } 782 // Fill in call site value sample for inlined calls and also use context to 783 // infer missing samples. Since we don't have call count for inlined 784 // functions, we estimate it from inlinee's profile using the entry of the 785 // body sample. 786 populateInferredFunctionSamples(); 787 788 updateTotalSamples(); 789 } 790 791 void CSProfileGenerator::populateBodySamplesForFunction( 792 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 793 // Compute disjoint ranges first, so we can use MAX 794 // for calculating count for each location. 795 RangeSample Ranges; 796 findDisjointRanges(Ranges, RangeCounter); 797 for (const auto &Range : Ranges) { 798 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 799 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 800 uint64_t Count = Range.second; 801 // Disjoint ranges have introduce zero-filled gap that 802 // doesn't belong to current context, filter them out. 803 if (Count == 0) 804 continue; 805 806 InstructionPointer IP(Binary, RangeBegin, true); 807 // Disjoint ranges may have range in the middle of two instr, 808 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 809 // can be Addr1+1 to Addr2-1. We should ignore such range. 810 if (IP.Address > RangeEnd) 811 continue; 812 813 do { 814 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 815 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 816 if (LeafLoc.hasValue()) { 817 // Recording body sample for this specific context 818 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 819 FunctionProfile.addTotalSamples(Count); 820 } 821 } while (IP.advance() && IP.Address <= RangeEnd); 822 } 823 } 824 825 void CSProfileGenerator::populateBoundarySamplesForFunction( 826 SampleContextFrames ContextId, FunctionSamples *CallerProfile, 827 const BranchSample &BranchCounters) { 828 829 for (const auto &Entry : BranchCounters) { 830 uint64_t SourceOffset = Entry.first.first; 831 uint64_t TargetOffset = Entry.first.second; 832 uint64_t Count = Entry.second; 833 assert(Count != 0 && "Unexpected zero weight branch"); 834 835 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 836 if (CalleeName.size() == 0) 837 continue; 838 839 SampleContextFrameVector CalleeCtx; 840 if (CallerProfile) { 841 assert(!ContextId.empty() && 842 "CallerProfile is null only if ContextId is empty"); 843 // Record called target sample and its count 844 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 845 if (LeafLoc.hasValue()) { 846 CallerProfile->addCalledTargetSamples( 847 LeafLoc->Location.LineOffset, 848 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 849 Count); 850 851 // Record head sample for called target(callee) 852 CalleeCtx.append(ContextId.begin(), ContextId.end()); 853 assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && 854 "Leaf function name doesn't match"); 855 CalleeCtx.back() = *LeafLoc; 856 } 857 } 858 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); 859 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); 860 CalleeProfile.addHeadSamples(Count); 861 } 862 } 863 864 static SampleContextFrame 865 getCallerContext(SampleContextFrames CalleeContext, 866 SampleContextFrameVector &CallerContext) { 867 assert(CalleeContext.size() > 1 && "Unexpected empty context"); 868 CalleeContext = CalleeContext.drop_back(); 869 CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); 870 SampleContextFrame CallerFrame = CallerContext.back(); 871 CallerContext.back().Location = LineLocation(0, 0); 872 return CallerFrame; 873 } 874 875 void CSProfileGenerator::populateInferredFunctionSamples() { 876 for (const auto &Item : ProfileMap) { 877 const auto &CalleeContext = Item.first; 878 const FunctionSamples &CalleeProfile = Item.second; 879 880 // If we already have head sample counts, we must have value profile 881 // for call sites added already. Skip to avoid double counting. 882 if (CalleeProfile.getHeadSamples()) 883 continue; 884 // If we don't have context, nothing to do for caller's call site. 885 // This could happen for entry point function. 886 if (CalleeContext.isBaseContext()) 887 continue; 888 889 // Infer Caller's frame loc and context ID through string splitting 890 SampleContextFrameVector CallerContextId; 891 SampleContextFrame &&CallerLeafFrameLoc = 892 getCallerContext(CalleeContext.getContextFrames(), CallerContextId); 893 SampleContextFrames CallerContext(CallerContextId); 894 895 // It's possible that we haven't seen any sample directly in the caller, 896 // in which case CallerProfile will not exist. But we can't modify 897 // ProfileMap while iterating it. 898 // TODO: created function profile for those callers too 899 if (ProfileMap.find(CallerContext) == ProfileMap.end()) 900 continue; 901 FunctionSamples &CallerProfile = ProfileMap[CallerContext]; 902 903 // Since we don't have call count for inlined functions, we 904 // estimate it from inlinee's profile using entry body sample. 905 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 906 // If we don't have samples with location, use 1 to indicate live. 907 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 908 EstimatedCallCount = 1; 909 CallerProfile.addCalledTargetSamples( 910 CallerLeafFrameLoc.Location.LineOffset, 911 CallerLeafFrameLoc.Location.Discriminator, 912 CalleeProfile.getContext().getName(), EstimatedCallCount); 913 CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, 914 CallerLeafFrameLoc.Location.Discriminator, 915 EstimatedCallCount); 916 CallerProfile.addTotalSamples(EstimatedCallCount); 917 } 918 } 919 920 void CSProfileGenerator::postProcessProfiles() { 921 // Compute hot/cold threshold based on profile. This will be used for cold 922 // context profile merging/trimming. 923 computeSummaryAndThreshold(); 924 925 // Run global pre-inliner to adjust/merge context profile based on estimated 926 // inline decisions. 927 if (EnableCSPreInliner) { 928 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) 929 .run(); 930 // Turn off the profile merger by default unless it is explicitly enabled. 931 if (!CSProfMergeColdContext.getNumOccurrences()) 932 CSProfMergeColdContext = false; 933 } 934 935 // Trim and merge cold context profile using cold threshold above. 936 if (TrimColdProfile || CSProfMergeColdContext) { 937 SampleContextTrimmer(ProfileMap) 938 .trimAndMergeColdContextProfiles( 939 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, 940 CSProfMaxColdContextDepth, EnableCSPreInliner); 941 } 942 943 // Merge function samples of CS profile to calculate profile density. 944 sampleprof::SampleProfileMap ContextLessProfiles; 945 for (const auto &I : ProfileMap) { 946 ContextLessProfiles[I.second.getName()].merge(I.second); 947 } 948 949 calculateAndShowDensity(ContextLessProfiles); 950 if (GenCSNestedProfile) { 951 CSProfileConverter CSConverter(ProfileMap); 952 CSConverter.convertProfiles(); 953 FunctionSamples::ProfileIsCSFlat = false; 954 FunctionSamples::ProfileIsCSNested = EnableCSPreInliner; 955 } 956 } 957 958 void ProfileGeneratorBase::computeSummaryAndThreshold() { 959 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 960 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 961 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 962 (Summary->getDetailedSummary())); 963 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 964 (Summary->getDetailedSummary())); 965 } 966 967 void ProfileGeneratorBase::extractProbesFromRange( 968 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 969 bool FindDisjointRanges) { 970 const RangeSample *PRanges = &RangeCounter; 971 RangeSample Ranges; 972 if (FindDisjointRanges) { 973 findDisjointRanges(Ranges, RangeCounter); 974 PRanges = &Ranges; 975 } 976 977 for (const auto &Range : *PRanges) { 978 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 979 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 980 uint64_t Count = Range.second; 981 982 InstructionPointer IP(Binary, RangeBegin, true); 983 // Disjoint ranges may have range in the middle of two instr, 984 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 985 // can be Addr1+1 to Addr2-1. We should ignore such range. 986 if (IP.Address > RangeEnd) 987 continue; 988 989 do { 990 const AddressProbesMap &Address2ProbesMap = 991 Binary->getAddress2ProbesMap(); 992 auto It = Address2ProbesMap.find(IP.Address); 993 if (It != Address2ProbesMap.end()) { 994 for (const auto &Probe : It->second) { 995 ProbeCounter[&Probe] += Count; 996 } 997 } 998 } while (IP.advance() && IP.Address <= RangeEnd); 999 } 1000 } 1001 1002 static void 1003 extractPrefixContextStack(SampleContextFrameVector &ContextStack, 1004 const SmallVectorImpl<uint64_t> &Addresses, 1005 ProfiledBinary *Binary) { 1006 SmallVector<const MCDecodedPseudoProbe *, 16> Probes; 1007 for (auto Addr : reverse(Addresses)) { 1008 const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr); 1009 // These could be the cases when a probe is not found at a calliste. Cutting 1010 // off the context from here since the inliner will not know how to consume 1011 // a context with unknown callsites. 1012 // 1. for functions that are not sampled when 1013 // --decode-probe-for-profiled-functions-only is on. 1014 // 2. for a merged callsite. Callsite merging may cause the loss of original 1015 // probe IDs. 1016 // 3. for an external callsite. 1017 if (!CallProbe) 1018 break; 1019 Probes.push_back(CallProbe); 1020 } 1021 1022 std::reverse(Probes.begin(), Probes.end()); 1023 1024 // Extract context stack for reusing, leaf context stack will be added 1025 // compressed while looking up function profile. 1026 for (const auto *P : Probes) { 1027 Binary->getInlineContextForProbe(P, ContextStack, true); 1028 } 1029 } 1030 1031 void CSProfileGenerator::generateProbeBasedProfile() { 1032 // Enable pseudo probe functionalities in SampleProf 1033 FunctionSamples::ProfileIsProbeBased = true; 1034 for (const auto &CI : *SampleCounters) { 1035 const AddrBasedCtxKey *CtxKey = 1036 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr()); 1037 SampleContextFrameVector ContextStack; 1038 extractPrefixContextStack(ContextStack, CtxKey->Context, Binary); 1039 // Fill in function body samples from probes, also infer caller's samples 1040 // from callee's probe 1041 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 1042 // Fill in boundary samples for a call probe 1043 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 1044 } 1045 } 1046 1047 void CSProfileGenerator::populateBodySamplesWithProbes( 1048 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 1049 ProbeCounterMap ProbeCounter; 1050 // Extract the top frame probes by looking up each address among the range in 1051 // the Address2ProbeMap 1052 extractProbesFromRange(RangeCounter, ProbeCounter); 1053 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 1054 std::unordered_set<FunctionSamples *>> 1055 FrameSamples; 1056 for (const auto &PI : ProbeCounter) { 1057 const MCDecodedPseudoProbe *Probe = PI.first; 1058 uint64_t Count = PI.second; 1059 // Disjoint ranges have introduce zero-filled gap that 1060 // doesn't belong to current context, filter them out. 1061 if (!Probe->isBlock() || Count == 0) 1062 continue; 1063 FunctionSamples &FunctionProfile = 1064 getFunctionProfileForLeafProbe(ContextStack, Probe); 1065 // Record the current frame and FunctionProfile whenever samples are 1066 // collected for non-danglie probes. This is for reporting all of the 1067 // zero count probes of the frame later. 1068 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 1069 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 1070 FunctionProfile.addTotalSamples(Count); 1071 if (Probe->isEntry()) { 1072 FunctionProfile.addHeadSamples(Count); 1073 // Look up for the caller's function profile 1074 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 1075 SampleContextFrames CalleeContextId = 1076 FunctionProfile.getContext().getContextFrames(); 1077 if (InlinerDesc != nullptr && CalleeContextId.size() > 1) { 1078 // Since the context id will be compressed, we have to use callee's 1079 // context id to infer caller's context id to ensure they share the 1080 // same context prefix. 1081 SampleContextFrameVector CallerContextId; 1082 SampleContextFrame &&CallerLeafFrameLoc = 1083 getCallerContext(CalleeContextId, CallerContextId); 1084 uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; 1085 assert(CallerIndex && 1086 "Inferred caller's location index shouldn't be zero!"); 1087 FunctionSamples &CallerProfile = 1088 getFunctionProfileForContext(CallerContextId); 1089 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 1090 CallerProfile.addBodySamples(CallerIndex, 0, Count); 1091 CallerProfile.addTotalSamples(Count); 1092 CallerProfile.addCalledTargetSamples( 1093 CallerIndex, 0, FunctionProfile.getContext().getName(), Count); 1094 } 1095 } 1096 } 1097 1098 // Assign zero count for remaining probes without sample hits to 1099 // differentiate from probes optimized away, of which the counts are unknown 1100 // and will be inferred by the compiler. 1101 for (auto &I : FrameSamples) { 1102 for (auto *FunctionProfile : I.second) { 1103 for (auto *Probe : I.first->getProbes()) { 1104 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 1105 } 1106 } 1107 } 1108 } 1109 1110 void CSProfileGenerator::populateBoundarySamplesWithProbes( 1111 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 1112 for (const auto &BI : BranchCounter) { 1113 uint64_t SourceOffset = BI.first.first; 1114 uint64_t TargetOffset = BI.first.second; 1115 uint64_t Count = BI.second; 1116 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 1117 const MCDecodedPseudoProbe *CallProbe = 1118 Binary->getCallProbeForAddr(SourceAddress); 1119 if (CallProbe == nullptr) 1120 continue; 1121 FunctionSamples &FunctionProfile = 1122 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 1123 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 1124 FunctionProfile.addTotalSamples(Count); 1125 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 1126 if (CalleeName.size() == 0) 1127 continue; 1128 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 1129 Count); 1130 } 1131 } 1132 1133 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 1134 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 1135 1136 // Explicitly copy the context for appending the leaf context 1137 SampleContextFrameVector NewContextStack(ContextStack.begin(), 1138 ContextStack.end()); 1139 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 1140 // For leaf inlined context with the top frame, we should strip off the top 1141 // frame's probe id, like: 1142 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 1143 auto LeafFrame = NewContextStack.back(); 1144 LeafFrame.Location = LineLocation(0, 0); 1145 NewContextStack.pop_back(); 1146 // Compress the context string except for the leaf frame 1147 CSProfileGenerator::compressRecursionContext(NewContextStack); 1148 CSProfileGenerator::trimContext(NewContextStack); 1149 NewContextStack.push_back(LeafFrame); 1150 1151 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 1152 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 1153 FunctionSamples &FunctionProile = 1154 getFunctionProfileForContext(NewContextStack, WasLeafInlined); 1155 FunctionProile.setFunctionHash(FuncDesc->FuncHash); 1156 return FunctionProile; 1157 } 1158 1159 } // end namespace sampleprof 1160 } // end namespace llvm 1161