1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "ProfileGenerator.h" 9 #include "ErrorHandling.h" 10 #include "PerfReader.h" 11 #include "ProfiledBinary.h" 12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 13 #include "llvm/ProfileData/ProfileCommon.h" 14 #include <algorithm> 15 #include <float.h> 16 #include <unordered_set> 17 #include <utility> 18 19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 20 cl::Required, 21 cl::desc("Output profile file")); 22 static cl::alias OutputA("o", cl::desc("Alias for --output"), 23 cl::aliasopt(OutputFilename)); 24 25 static cl::opt<SampleProfileFormat> OutputFormat( 26 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 27 cl::values( 28 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 29 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 31 clEnumValN(SPF_Text, "text", "Text encoding"), 32 clEnumValN(SPF_GCC, "gcc", 33 "GCC encoding (only meaningful for -sample)"))); 34 35 cl::opt<bool> UseMD5( 36 "use-md5", cl::init(false), cl::Hidden, 37 cl::desc("Use md5 to represent function names in the output profile (only " 38 "meaningful for -extbinary)")); 39 40 static cl::opt<bool> PopulateProfileSymbolList( 41 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 43 44 static cl::opt<bool> FillZeroForAllFuncs( 45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 46 cl::desc("Attribute all functions' range with zero count " 47 "even it's not hit by any samples.")); 48 49 static cl::opt<int32_t, true> RecursionCompression( 50 "compress-recursion", 51 cl::desc("Compressing recursion by deduplicating adjacent frame " 52 "sequences up to the specified size. -1 means no size limit."), 53 cl::Hidden, 54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 55 56 static cl::opt<bool> 57 TrimColdProfile("trim-cold-profile", 58 cl::desc("If the total count of the profile is smaller " 59 "than threshold, it will be trimmed.")); 60 61 static cl::opt<bool> CSProfMergeColdContext( 62 "csprof-merge-cold-context", cl::init(true), 63 cl::desc("If the total count of context profile is smaller than " 64 "the threshold, it will be merged into context-less base " 65 "profile.")); 66 67 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 68 "csprof-max-cold-context-depth", cl::init(1), 69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 70 "context-less base profile")); 71 72 static cl::opt<int, true> CSProfMaxContextDepth( 73 "csprof-max-context-depth", 74 cl::desc("Keep the last K contexts while merging profile. -1 means no " 75 "depth limit."), 76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 77 78 static cl::opt<double> HotFunctionDensityThreshold( 79 "hot-function-density-threshold", llvm::cl::init(1000), 80 llvm::cl::desc( 81 "specify density threshold for hot functions (default: 1000)"), 82 llvm::cl::Optional); 83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 84 llvm::cl::desc("show profile density details"), 85 llvm::cl::Optional); 86 87 static cl::opt<bool> UpdateTotalSamples( 88 "update-total-samples", llvm::cl::init(false), 89 llvm::cl::desc( 90 "Update total samples by accumulating all its body samples."), 91 llvm::cl::Optional); 92 93 extern cl::opt<int> ProfileSummaryCutoffHot; 94 95 static cl::opt<bool> GenCSNestedProfile( 96 "gen-cs-nested-profile", cl::Hidden, cl::init(true), 97 cl::desc("Generate nested function profiles for CSSPGO")); 98 99 using namespace llvm; 100 using namespace sampleprof; 101 102 namespace llvm { 103 namespace sampleprof { 104 105 // Initialize the MaxCompressionSize to -1 which means no size limit 106 int32_t CSProfileGenerator::MaxCompressionSize = -1; 107 108 int CSProfileGenerator::MaxContextDepth = -1; 109 110 bool ProfileGeneratorBase::UseFSDiscriminator = false; 111 112 std::unique_ptr<ProfileGeneratorBase> 113 ProfileGeneratorBase::create(ProfiledBinary *Binary, 114 const ContextSampleCounterMap *SampleCounters, 115 bool ProfileIsCS) { 116 std::unique_ptr<ProfileGeneratorBase> Generator; 117 if (ProfileIsCS) { 118 if (Binary->useFSDiscriminator()) 119 exitWithError("FS discriminator is not supported in CS profile."); 120 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 121 } else { 122 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 123 } 124 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 125 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 126 127 return Generator; 128 } 129 130 std::unique_ptr<ProfileGeneratorBase> 131 ProfileGeneratorBase::create(ProfiledBinary *Binary, 132 const SampleProfileMap &&Profiles, 133 bool ProfileIsCS) { 134 std::unique_ptr<ProfileGeneratorBase> Generator; 135 if (ProfileIsCS) { 136 if (Binary->useFSDiscriminator()) 137 exitWithError("FS discriminator is not supported in CS profile."); 138 Generator.reset(new CSProfileGenerator(Binary, std::move(Profiles))); 139 } else { 140 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); 141 } 142 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 143 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 144 145 return Generator; 146 } 147 148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 149 SampleProfileMap &ProfileMap) { 150 // Populate profile symbol list if extended binary format is used. 151 ProfileSymbolList SymbolList; 152 153 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 154 Binary->populateSymbolListFromDWARF(SymbolList); 155 Writer->setProfileSymbolList(&SymbolList); 156 } 157 158 if (std::error_code EC = Writer->write(ProfileMap)) 159 exitWithError(std::move(EC)); 160 } 161 162 void ProfileGeneratorBase::write() { 163 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 164 if (std::error_code EC = WriterOrErr.getError()) 165 exitWithError(EC, OutputFilename); 166 167 if (UseMD5) { 168 if (OutputFormat != SPF_Ext_Binary) 169 WithColor::warning() << "-use-md5 is ignored. Specify " 170 "--format=extbinary to enable it\n"; 171 else 172 WriterOrErr.get()->setUseMD5(); 173 } 174 175 write(std::move(WriterOrErr.get()), ProfileMap); 176 } 177 178 void ProfileGeneratorBase::showDensitySuggestion(double Density) { 179 if (Density == 0.0) 180 WithColor::warning() << "The --profile-summary-cutoff-hot option may be " 181 "set too low. Please check your command.\n"; 182 else if (Density < HotFunctionDensityThreshold) 183 WithColor::warning() 184 << "AutoFDO is estimated to optimize better with " 185 << format("%.1f", HotFunctionDensityThreshold / Density) 186 << "x more samples. Please consider increasing sampling rate or " 187 "profiling for longer duration to get more samples.\n"; 188 189 if (ShowDensity) 190 outs() << "Minimum profile density for hot functions with top " 191 << format("%.2f", 192 static_cast<double>(ProfileSummaryCutoffHot.getValue()) / 193 10000) 194 << "% total samples: " << format("%.1f", Density) << "\n"; 195 } 196 197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, 198 uint64_t HotCntThreshold) { 199 double Density = DBL_MAX; 200 std::vector<const FunctionSamples *> HotFuncs; 201 for (auto &I : Profiles) { 202 auto &FuncSamples = I.second; 203 if (FuncSamples.getTotalSamples() < HotCntThreshold) 204 continue; 205 HotFuncs.emplace_back(&FuncSamples); 206 } 207 208 for (auto *FuncSamples : HotFuncs) { 209 auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); 210 if (!Func) 211 continue; 212 uint64_t FuncSize = Func->getFuncSize(); 213 if (FuncSize == 0) 214 continue; 215 Density = 216 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / 217 FuncSize); 218 } 219 220 return Density == DBL_MAX ? 0.0 : Density; 221 } 222 223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 224 const RangeSample &Ranges) { 225 226 /* 227 Regions may overlap with each other. Using the boundary info, find all 228 disjoint ranges and their sample count. BoundaryPoint contains the count 229 multiple samples begin/end at this points. 230 231 |<--100-->| Sample1 232 |<------200------>| Sample2 233 A B C 234 235 In the example above, 236 Sample1 begins at A, ends at B, its value is 100. 237 Sample2 beings at A, ends at C, its value is 200. 238 For A, BeginCount is the sum of sample begins at A, which is 300 and no 239 samples ends at A, so EndCount is 0. 240 Then boundary points A, B, and C with begin/end counts are: 241 A: (300, 0) 242 B: (0, 100) 243 C: (0, 200) 244 */ 245 struct BoundaryPoint { 246 // Sum of sample counts beginning at this point 247 uint64_t BeginCount = UINT64_MAX; 248 // Sum of sample counts ending at this point 249 uint64_t EndCount = UINT64_MAX; 250 // Is the begin point of a zero range. 251 bool IsZeroRangeBegin = false; 252 // Is the end point of a zero range. 253 bool IsZeroRangeEnd = false; 254 255 void addBeginCount(uint64_t Count) { 256 if (BeginCount == UINT64_MAX) 257 BeginCount = 0; 258 BeginCount += Count; 259 } 260 261 void addEndCount(uint64_t Count) { 262 if (EndCount == UINT64_MAX) 263 EndCount = 0; 264 EndCount += Count; 265 } 266 }; 267 268 /* 269 For the above example. With boundary points, follwing logic finds two 270 disjoint region of 271 272 [A,B]: 300 273 [B+1,C]: 200 274 275 If there is a boundary point that both begin and end, the point itself 276 becomes a separate disjoint region. For example, if we have original 277 ranges of 278 279 |<--- 100 --->| 280 |<--- 200 --->| 281 A B C 282 283 there are three boundary points with their begin/end counts of 284 285 A: (100, 0) 286 B: (200, 100) 287 C: (0, 200) 288 289 the disjoint ranges would be 290 291 [A, B-1]: 100 292 [B, B]: 300 293 [B+1, C]: 200. 294 295 Example for zero value range: 296 297 |<--- 100 --->| 298 |<--- 200 --->| 299 |<--------------- 0 ----------------->| 300 A B C D E F 301 302 [A, B-1] : 0 303 [B, C] : 100 304 [C+1, D-1]: 0 305 [D, E] : 200 306 [E+1, F] : 0 307 */ 308 std::map<uint64_t, BoundaryPoint> Boundaries; 309 310 for (const auto &Item : Ranges) { 311 assert(Item.first.first <= Item.first.second && 312 "Invalid instruction range"); 313 auto &BeginPoint = Boundaries[Item.first.first]; 314 auto &EndPoint = Boundaries[Item.first.second]; 315 uint64_t Count = Item.second; 316 317 BeginPoint.addBeginCount(Count); 318 EndPoint.addEndCount(Count); 319 if (Count == 0) { 320 BeginPoint.IsZeroRangeBegin = true; 321 EndPoint.IsZeroRangeEnd = true; 322 } 323 } 324 325 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 326 // and the next valid address 327 uint64_t BeginAddress = UINT64_MAX; 328 int ZeroRangeDepth = 0; 329 uint64_t Count = 0; 330 for (const auto &Item : Boundaries) { 331 uint64_t Address = Item.first; 332 const BoundaryPoint &Point = Item.second; 333 if (Point.BeginCount != UINT64_MAX) { 334 if (BeginAddress != UINT64_MAX) 335 DisjointRanges[{BeginAddress, Address - 1}] = Count; 336 Count += Point.BeginCount; 337 BeginAddress = Address; 338 ZeroRangeDepth += Point.IsZeroRangeBegin; 339 } 340 if (Point.EndCount != UINT64_MAX) { 341 assert((BeginAddress != UINT64_MAX) && 342 "First boundary point cannot be 'end' point"); 343 DisjointRanges[{BeginAddress, Address}] = Count; 344 assert(Count >= Point.EndCount && "Mismatched live ranges"); 345 Count -= Point.EndCount; 346 BeginAddress = Address + 1; 347 ZeroRangeDepth -= Point.IsZeroRangeEnd; 348 // If the remaining count is zero and it's no longer in a zero range, this 349 // means we consume all the ranges before, thus mark BeginAddress as 350 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 351 // [<---- 10 ---->] 352 // [<---- 20 ---->] 353 // A B C D 354 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 355 // have the [B+1, C-1] zero range. 356 if (Count == 0 && ZeroRangeDepth == 0) 357 BeginAddress = UINT64_MAX; 358 } 359 } 360 } 361 362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 363 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 364 uint64_t Count) { 365 // Use the maximum count of samples with same line location 366 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 367 368 // Use duplication factor to compensated for loop unroll/vectorization. 369 // Note that this is only needed when we're taking MAX of the counts at 370 // the location instead of SUM. 371 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 372 373 ErrorOr<uint64_t> R = 374 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 375 376 uint64_t PreviousCount = R ? R.get() : 0; 377 if (PreviousCount <= Count) { 378 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 379 Count - PreviousCount); 380 } 381 } 382 383 void ProfileGeneratorBase::updateTotalSamples() { 384 for (auto &Item : ProfileMap) { 385 FunctionSamples &FunctionProfile = Item.second; 386 FunctionProfile.updateTotalSamples(); 387 } 388 } 389 390 void ProfileGeneratorBase::updateCallsiteSamples() { 391 for (auto &Item : ProfileMap) { 392 FunctionSamples &FunctionProfile = Item.second; 393 FunctionProfile.updateCallsiteSamples(); 394 } 395 } 396 397 void ProfileGeneratorBase::updateFunctionSamples() { 398 updateCallsiteSamples(); 399 400 if (UpdateTotalSamples) 401 updateTotalSamples(); 402 } 403 404 void ProfileGeneratorBase::collectProfiledFunctions() { 405 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 406 if (SampleCounters) { 407 // Go through all the stacks, ranges and branches in sample counters, use 408 // the start of the range to look up the function it belongs and record the 409 // function. 410 for (const auto &CI : *SampleCounters) { 411 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { 412 for (auto Addr : CtxKey->Context) { 413 if (FuncRange *FRange = Binary->findFuncRangeForOffset( 414 Binary->virtualAddrToOffset(Addr))) 415 ProfiledFunctions.insert(FRange->Func); 416 } 417 } 418 419 for (auto Item : CI.second.RangeCounter) { 420 uint64_t StartOffset = Item.first.first; 421 if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) 422 ProfiledFunctions.insert(FRange->Func); 423 } 424 425 for (auto Item : CI.second.BranchCounter) { 426 uint64_t SourceOffset = Item.first.first; 427 uint64_t TargetOffset = Item.first.first; 428 if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) 429 ProfiledFunctions.insert(FRange->Func); 430 if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) 431 ProfiledFunctions.insert(FRange->Func); 432 } 433 } 434 } else { 435 // This is for the case the input is a llvm sample profile. 436 for (const auto &FS : ProfileMap) { 437 if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) 438 ProfiledFunctions.insert(Func); 439 } 440 } 441 442 Binary->setProfiledFunctions(ProfiledFunctions); 443 } 444 445 FunctionSamples & 446 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 447 SampleContext Context(FuncName); 448 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 449 if (Ret.second) { 450 FunctionSamples &FProfile = Ret.first->second; 451 FProfile.setContext(Context); 452 } 453 return Ret.first->second; 454 } 455 456 void ProfileGenerator::generateProfile() { 457 collectProfiledFunctions(); 458 459 if (Binary->usePseudoProbes()) 460 Binary->decodePseudoProbe(); 461 462 if (SampleCounters) { 463 if (Binary->usePseudoProbes()) { 464 generateProbeBasedProfile(); 465 } else { 466 generateLineNumBasedProfile(); 467 } 468 } 469 470 postProcessProfiles(); 471 } 472 473 void ProfileGenerator::postProcessProfiles() { 474 computeSummaryAndThreshold(); 475 trimColdProfiles(ProfileMap, ColdCountThreshold); 476 calculateAndShowDensity(ProfileMap); 477 } 478 479 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, 480 uint64_t ColdCntThreshold) { 481 if (!TrimColdProfile) 482 return; 483 484 // Move cold profiles into a tmp container. 485 std::vector<SampleContext> ColdProfiles; 486 for (const auto &I : ProfileMap) { 487 if (I.second.getTotalSamples() < ColdCntThreshold) 488 ColdProfiles.emplace_back(I.first); 489 } 490 491 // Remove the cold profile from ProfileMap. 492 for (const auto &I : ColdProfiles) 493 ProfileMap.erase(I); 494 } 495 496 void ProfileGenerator::generateLineNumBasedProfile() { 497 assert(SampleCounters->size() == 1 && 498 "Must have one entry for profile generation."); 499 const SampleCounter &SC = SampleCounters->begin()->second; 500 // Fill in function body samples 501 populateBodySamplesForAllFunctions(SC.RangeCounter); 502 // Fill in boundary sample counts as well as call site samples for calls 503 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 504 505 updateFunctionSamples(); 506 } 507 508 void ProfileGenerator::generateProbeBasedProfile() { 509 assert(SampleCounters->size() == 1 && 510 "Must have one entry for profile generation."); 511 // Enable pseudo probe functionalities in SampleProf 512 FunctionSamples::ProfileIsProbeBased = true; 513 const SampleCounter &SC = SampleCounters->begin()->second; 514 // Fill in function body samples 515 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); 516 // Fill in boundary sample counts as well as call site samples for calls 517 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); 518 519 updateFunctionSamples(); 520 } 521 522 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( 523 const RangeSample &RangeCounter) { 524 ProbeCounterMap ProbeCounter; 525 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it 526 // inside extractProbesFromRange. 527 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, 528 false); 529 530 for (const auto &PI : ProbeCounter) { 531 const MCDecodedPseudoProbe *Probe = PI.first; 532 uint64_t Count = PI.second; 533 SampleContextFrameVector FrameVec; 534 Binary->getInlineContextForProbe(Probe, FrameVec, true); 535 FunctionSamples &FunctionProfile = 536 getLeafProfileAndAddTotalSamples(FrameVec, Count); 537 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 538 if (Probe->isEntry()) 539 FunctionProfile.addHeadSamples(Count); 540 } 541 } 542 543 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( 544 const BranchSample &BranchCounters) { 545 for (const auto &Entry : BranchCounters) { 546 uint64_t SourceOffset = Entry.first.first; 547 uint64_t TargetOffset = Entry.first.second; 548 uint64_t Count = Entry.second; 549 assert(Count != 0 && "Unexpected zero weight branch"); 550 551 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 552 if (CalleeName.size() == 0) 553 continue; 554 555 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 556 const MCDecodedPseudoProbe *CallProbe = 557 Binary->getCallProbeForAddr(SourceAddress); 558 if (CallProbe == nullptr) 559 continue; 560 561 // Record called target sample and its count. 562 SampleContextFrameVector FrameVec; 563 Binary->getInlineContextForProbe(CallProbe, FrameVec, true); 564 565 if (!FrameVec.empty()) { 566 FunctionSamples &FunctionProfile = 567 getLeafProfileAndAddTotalSamples(FrameVec, 0); 568 FunctionProfile.addCalledTargetSamples( 569 FrameVec.back().Location.LineOffset, 0, CalleeName, Count); 570 } 571 } 572 } 573 574 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 575 const SampleContextFrameVector &FrameVec, uint64_t Count) { 576 // Get top level profile 577 FunctionSamples *FunctionProfile = 578 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 579 FunctionProfile->addTotalSamples(Count); 580 if (Binary->usePseudoProbes()) { 581 const auto *FuncDesc = Binary->getFuncDescForGUID( 582 Function::getGUID(FunctionProfile->getName())); 583 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 584 } 585 586 for (size_t I = 1; I < FrameVec.size(); I++) { 587 LineLocation Callsite( 588 FrameVec[I - 1].Location.LineOffset, 589 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 590 FunctionSamplesMap &SamplesMap = 591 FunctionProfile->functionSamplesAt(Callsite); 592 auto Ret = 593 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 594 if (Ret.second) { 595 SampleContext Context(FrameVec[I].FuncName); 596 Ret.first->second.setContext(Context); 597 } 598 FunctionProfile = &Ret.first->second; 599 FunctionProfile->addTotalSamples(Count); 600 if (Binary->usePseudoProbes()) { 601 const auto *FuncDesc = Binary->getFuncDescForGUID( 602 Function::getGUID(FunctionProfile->getName())); 603 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 604 } 605 } 606 607 return *FunctionProfile; 608 } 609 610 RangeSample 611 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 612 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 613 if (FillZeroForAllFuncs) { 614 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 615 for (auto &R : FuncI.second.Ranges) { 616 Ranges[{R.first, R.second - 1}] += 0; 617 } 618 } 619 } else { 620 // For each range, we search for all ranges of the function it belongs to 621 // and initialize it with zero count, so it remains zero if doesn't hit any 622 // samples. This is to be consistent with compiler that interpret zero count 623 // as unexecuted(cold). 624 for (const auto &I : RangeCounter) { 625 uint64_t StartOffset = I.first.first; 626 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 627 Ranges[{Range.first, Range.second - 1}] += 0; 628 } 629 } 630 RangeSample DisjointRanges; 631 findDisjointRanges(DisjointRanges, Ranges); 632 return DisjointRanges; 633 } 634 635 void ProfileGenerator::populateBodySamplesForAllFunctions( 636 const RangeSample &RangeCounter) { 637 for (const auto &Range : preprocessRangeCounter(RangeCounter)) { 638 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 639 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 640 uint64_t Count = Range.second; 641 642 InstructionPointer IP(Binary, RangeBegin, true); 643 // Disjoint ranges may have range in the middle of two instr, 644 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 645 // can be Addr1+1 to Addr2-1. We should ignore such range. 646 if (IP.Address > RangeEnd) 647 continue; 648 649 do { 650 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 651 const SampleContextFrameVector &FrameVec = 652 Binary->getFrameLocationStack(Offset); 653 if (!FrameVec.empty()) { 654 // FIXME: As accumulating total count per instruction caused some 655 // regression, we changed to accumulate total count per byte as a 656 // workaround. Tuning hotness threshold on the compiler side might be 657 // necessary in the future. 658 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( 659 FrameVec, Count * Binary->getInstSize(Offset)); 660 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 661 Count); 662 } 663 } while (IP.advance() && IP.Address <= RangeEnd); 664 } 665 } 666 667 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { 668 // Get the function range by branch target if it's a call branch. 669 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); 670 671 // We won't accumulate sample count for a range whose start is not the real 672 // function entry such as outlined function or inner labels. 673 if (!FRange || !FRange->IsFuncEntry) 674 return StringRef(); 675 676 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 677 } 678 679 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 680 const BranchSample &BranchCounters) { 681 for (const auto &Entry : BranchCounters) { 682 uint64_t SourceOffset = Entry.first.first; 683 uint64_t TargetOffset = Entry.first.second; 684 uint64_t Count = Entry.second; 685 assert(Count != 0 && "Unexpected zero weight branch"); 686 687 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 688 if (CalleeName.size() == 0) 689 continue; 690 // Record called target sample and its count. 691 const SampleContextFrameVector &FrameVec = 692 Binary->getFrameLocationStack(SourceOffset); 693 if (!FrameVec.empty()) { 694 FunctionSamples &FunctionProfile = 695 getLeafProfileAndAddTotalSamples(FrameVec, 0); 696 FunctionProfile.addCalledTargetSamples( 697 FrameVec.back().Location.LineOffset, 698 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 699 CalleeName, Count); 700 } 701 // Add head samples for callee. 702 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 703 CalleeProfile.addHeadSamples(Count); 704 } 705 } 706 707 void ProfileGeneratorBase::calculateAndShowDensity( 708 const SampleProfileMap &Profiles) { 709 double Density = calculateDensity(Profiles, HotCountThreshold); 710 showDensitySuggestion(Density); 711 } 712 713 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( 714 const SampleContextFrameVector &Context, bool WasLeafInlined) { 715 auto I = ProfileMap.find(SampleContext(Context)); 716 if (I == ProfileMap.end()) { 717 // Save the new context for future references. 718 SampleContextFrames NewContext = *Contexts.insert(Context).first; 719 SampleContext FContext(NewContext, RawContext); 720 auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); 721 if (WasLeafInlined) 722 FContext.setAttribute(ContextWasInlined); 723 FunctionSamples &FProfile = Ret.first->second; 724 FProfile.setContext(FContext); 725 return Ret.first->second; 726 } else { 727 // Update ContextWasInlined attribute for existing contexts. 728 // The current function can be called in two ways: 729 // - when processing a probe of the current frame 730 // - when processing the entry probe of an inlinee's frame, which 731 // is then used to update the callsite count of the current frame. 732 // The two can happen in any order, hence here we are making sure 733 // `ContextWasInlined` is always set as expected. 734 // TODO: Note that the former does not always happen if no probes of the 735 // current frame has samples, and if the latter happens, we could lose the 736 // attribute. This should be fixed. 737 if (WasLeafInlined) 738 I->second.getContext().setAttribute(ContextWasInlined); 739 } 740 741 return I->second; 742 } 743 744 void CSProfileGenerator::generateProfile() { 745 FunctionSamples::ProfileIsCS = true; 746 747 collectProfiledFunctions(); 748 749 if (Binary->usePseudoProbes()) 750 Binary->decodePseudoProbe(); 751 752 if (SampleCounters) { 753 if (Binary->usePseudoProbes()) { 754 generateProbeBasedProfile(); 755 } else { 756 generateLineNumBasedProfile(); 757 } 758 } 759 760 if (Binary->getTrackFuncContextSize()) 761 computeSizeForProfiledFunctions(); 762 763 postProcessProfiles(); 764 } 765 766 void CSProfileGenerator::computeSizeForProfiledFunctions() { 767 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 768 for (auto *Func : Binary->getProfiledFunctions()) 769 Binary->computeInlinedContextSizeForFunc(Func); 770 771 // Flush the symbolizer to save memory. 772 Binary->flushSymbolizer(); 773 } 774 775 void CSProfileGenerator::generateLineNumBasedProfile() { 776 for (const auto &CI : *SampleCounters) { 777 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); 778 779 FunctionSamples *FunctionProfile = nullptr; 780 // Sample context will be empty if the jump is an external-to-internal call 781 // pattern, the head samples should be added for the internal function. 782 if (!CtxKey->Context.empty()) { 783 // Get or create function profile for the range 784 FunctionProfile = &getFunctionProfileForContext(CtxKey->Context, 785 CtxKey->WasLeafInlined); 786 // Fill in function body samples 787 populateBodySamplesForFunction(*FunctionProfile, CI.second.RangeCounter); 788 } 789 // Fill in boundary sample counts as well as call site samples for calls 790 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, 791 CI.second.BranchCounter); 792 } 793 // Fill in call site value sample for inlined calls and also use context to 794 // infer missing samples. Since we don't have call count for inlined 795 // functions, we estimate it from inlinee's profile using the entry of the 796 // body sample. 797 populateInferredFunctionSamples(); 798 799 updateFunctionSamples(); 800 } 801 802 void CSProfileGenerator::populateBodySamplesForFunction( 803 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 804 // Compute disjoint ranges first, so we can use MAX 805 // for calculating count for each location. 806 RangeSample Ranges; 807 findDisjointRanges(Ranges, RangeCounter); 808 for (const auto &Range : Ranges) { 809 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 810 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 811 uint64_t Count = Range.second; 812 // Disjoint ranges have introduce zero-filled gap that 813 // doesn't belong to current context, filter them out. 814 if (Count == 0) 815 continue; 816 817 InstructionPointer IP(Binary, RangeBegin, true); 818 // Disjoint ranges may have range in the middle of two instr, 819 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 820 // can be Addr1+1 to Addr2-1. We should ignore such range. 821 if (IP.Address > RangeEnd) 822 continue; 823 824 do { 825 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 826 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 827 if (LeafLoc.hasValue()) { 828 // Recording body sample for this specific context 829 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 830 FunctionProfile.addTotalSamples(Count); 831 } 832 } while (IP.advance() && IP.Address <= RangeEnd); 833 } 834 } 835 836 void CSProfileGenerator::populateBoundarySamplesForFunction( 837 SampleContextFrames ContextId, FunctionSamples *CallerProfile, 838 const BranchSample &BranchCounters) { 839 840 for (const auto &Entry : BranchCounters) { 841 uint64_t SourceOffset = Entry.first.first; 842 uint64_t TargetOffset = Entry.first.second; 843 uint64_t Count = Entry.second; 844 assert(Count != 0 && "Unexpected zero weight branch"); 845 846 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 847 if (CalleeName.size() == 0) 848 continue; 849 850 SampleContextFrameVector CalleeCtx; 851 if (CallerProfile) { 852 assert(!ContextId.empty() && 853 "CallerProfile is null only if ContextId is empty"); 854 // Record called target sample and its count 855 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 856 if (LeafLoc.hasValue()) { 857 CallerProfile->addCalledTargetSamples( 858 LeafLoc->Location.LineOffset, 859 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 860 Count); 861 862 // Record head sample for called target(callee) 863 CalleeCtx.append(ContextId.begin(), ContextId.end()); 864 assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && 865 "Leaf function name doesn't match"); 866 CalleeCtx.back() = *LeafLoc; 867 } 868 } 869 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); 870 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); 871 CalleeProfile.addHeadSamples(Count); 872 } 873 } 874 875 static SampleContextFrame 876 getCallerContext(SampleContextFrames CalleeContext, 877 SampleContextFrameVector &CallerContext) { 878 assert(CalleeContext.size() > 1 && "Unexpected empty context"); 879 CalleeContext = CalleeContext.drop_back(); 880 CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); 881 SampleContextFrame CallerFrame = CallerContext.back(); 882 CallerContext.back().Location = LineLocation(0, 0); 883 return CallerFrame; 884 } 885 886 void CSProfileGenerator::populateInferredFunctionSamples() { 887 for (const auto &Item : ProfileMap) { 888 const auto &CalleeContext = Item.first; 889 const FunctionSamples &CalleeProfile = Item.second; 890 891 // If we already have head sample counts, we must have value profile 892 // for call sites added already. Skip to avoid double counting. 893 if (CalleeProfile.getHeadSamples()) 894 continue; 895 // If we don't have context, nothing to do for caller's call site. 896 // This could happen for entry point function. 897 if (CalleeContext.isBaseContext()) 898 continue; 899 900 // Infer Caller's frame loc and context ID through string splitting 901 SampleContextFrameVector CallerContextId; 902 SampleContextFrame &&CallerLeafFrameLoc = 903 getCallerContext(CalleeContext.getContextFrames(), CallerContextId); 904 SampleContextFrames CallerContext(CallerContextId); 905 906 // It's possible that we haven't seen any sample directly in the caller, 907 // in which case CallerProfile will not exist. But we can't modify 908 // ProfileMap while iterating it. 909 // TODO: created function profile for those callers too 910 if (ProfileMap.find(CallerContext) == ProfileMap.end()) 911 continue; 912 FunctionSamples &CallerProfile = ProfileMap[CallerContext]; 913 914 // Since we don't have call count for inlined functions, we 915 // estimate it from inlinee's profile using entry body sample. 916 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 917 // If we don't have samples with location, use 1 to indicate live. 918 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 919 EstimatedCallCount = 1; 920 CallerProfile.addCalledTargetSamples( 921 CallerLeafFrameLoc.Location.LineOffset, 922 CallerLeafFrameLoc.Location.Discriminator, 923 CalleeProfile.getContext().getName(), EstimatedCallCount); 924 CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, 925 CallerLeafFrameLoc.Location.Discriminator, 926 EstimatedCallCount); 927 CallerProfile.addTotalSamples(EstimatedCallCount); 928 } 929 } 930 931 void CSProfileGenerator::postProcessProfiles() { 932 // Compute hot/cold threshold based on profile. This will be used for cold 933 // context profile merging/trimming. 934 computeSummaryAndThreshold(); 935 936 // Run global pre-inliner to adjust/merge context profile based on estimated 937 // inline decisions. 938 if (EnableCSPreInliner) { 939 CSPreInliner(ProfileMap, *Binary, Summary.get()).run(); 940 // Turn off the profile merger by default unless it is explicitly enabled. 941 if (!CSProfMergeColdContext.getNumOccurrences()) 942 CSProfMergeColdContext = false; 943 } 944 945 // Trim and merge cold context profile using cold threshold above. 946 if (TrimColdProfile || CSProfMergeColdContext) { 947 SampleContextTrimmer(ProfileMap) 948 .trimAndMergeColdContextProfiles( 949 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, 950 CSProfMaxColdContextDepth, EnableCSPreInliner); 951 } 952 953 // Merge function samples of CS profile to calculate profile density. 954 sampleprof::SampleProfileMap ContextLessProfiles; 955 for (const auto &I : ProfileMap) { 956 ContextLessProfiles[I.second.getName()].merge(I.second); 957 } 958 959 calculateAndShowDensity(ContextLessProfiles); 960 if (GenCSNestedProfile) { 961 CSProfileConverter CSConverter(ProfileMap); 962 CSConverter.convertProfiles(); 963 FunctionSamples::ProfileIsCS = false; 964 } 965 } 966 967 void ProfileGeneratorBase::computeSummaryAndThreshold() { 968 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 969 Summary = Builder.computeSummaryForProfiles(ProfileMap); 970 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 971 (Summary->getDetailedSummary())); 972 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 973 (Summary->getDetailedSummary())); 974 } 975 976 void ProfileGeneratorBase::extractProbesFromRange( 977 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 978 bool FindDisjointRanges) { 979 const RangeSample *PRanges = &RangeCounter; 980 RangeSample Ranges; 981 if (FindDisjointRanges) { 982 findDisjointRanges(Ranges, RangeCounter); 983 PRanges = &Ranges; 984 } 985 986 for (const auto &Range : *PRanges) { 987 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 988 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 989 uint64_t Count = Range.second; 990 991 InstructionPointer IP(Binary, RangeBegin, true); 992 // Disjoint ranges may have range in the middle of two instr, 993 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 994 // can be Addr1+1 to Addr2-1. We should ignore such range. 995 if (IP.Address > RangeEnd) 996 continue; 997 998 do { 999 const AddressProbesMap &Address2ProbesMap = 1000 Binary->getAddress2ProbesMap(); 1001 auto It = Address2ProbesMap.find(IP.Address); 1002 if (It != Address2ProbesMap.end()) { 1003 for (const auto &Probe : It->second) { 1004 ProbeCounter[&Probe] += Count; 1005 } 1006 } 1007 } while (IP.advance() && IP.Address <= RangeEnd); 1008 } 1009 } 1010 1011 static void 1012 extractPrefixContextStack(SampleContextFrameVector &ContextStack, 1013 const SmallVectorImpl<uint64_t> &Addresses, 1014 ProfiledBinary *Binary) { 1015 SmallVector<const MCDecodedPseudoProbe *, 16> Probes; 1016 for (auto Addr : reverse(Addresses)) { 1017 const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr); 1018 // These could be the cases when a probe is not found at a calliste. Cutting 1019 // off the context from here since the inliner will not know how to consume 1020 // a context with unknown callsites. 1021 // 1. for functions that are not sampled when 1022 // --decode-probe-for-profiled-functions-only is on. 1023 // 2. for a merged callsite. Callsite merging may cause the loss of original 1024 // probe IDs. 1025 // 3. for an external callsite. 1026 if (!CallProbe) 1027 break; 1028 Probes.push_back(CallProbe); 1029 } 1030 1031 std::reverse(Probes.begin(), Probes.end()); 1032 1033 // Extract context stack for reusing, leaf context stack will be added 1034 // compressed while looking up function profile. 1035 for (const auto *P : Probes) { 1036 Binary->getInlineContextForProbe(P, ContextStack, true); 1037 } 1038 } 1039 1040 void CSProfileGenerator::generateProbeBasedProfile() { 1041 // Enable pseudo probe functionalities in SampleProf 1042 FunctionSamples::ProfileIsProbeBased = true; 1043 for (const auto &CI : *SampleCounters) { 1044 const AddrBasedCtxKey *CtxKey = 1045 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr()); 1046 SampleContextFrameVector ContextStack; 1047 extractPrefixContextStack(ContextStack, CtxKey->Context, Binary); 1048 // Fill in function body samples from probes, also infer caller's samples 1049 // from callee's probe 1050 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 1051 // Fill in boundary samples for a call probe 1052 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 1053 } 1054 } 1055 1056 void CSProfileGenerator::populateBodySamplesWithProbes( 1057 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 1058 ProbeCounterMap ProbeCounter; 1059 // Extract the top frame probes by looking up each address among the range in 1060 // the Address2ProbeMap 1061 extractProbesFromRange(RangeCounter, ProbeCounter); 1062 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 1063 std::unordered_set<FunctionSamples *>> 1064 FrameSamples; 1065 for (const auto &PI : ProbeCounter) { 1066 const MCDecodedPseudoProbe *Probe = PI.first; 1067 uint64_t Count = PI.second; 1068 // Disjoint ranges have introduce zero-filled gap that 1069 // doesn't belong to current context, filter them out. 1070 if (!Probe->isBlock() || Count == 0) 1071 continue; 1072 FunctionSamples &FunctionProfile = 1073 getFunctionProfileForLeafProbe(ContextStack, Probe); 1074 // Record the current frame and FunctionProfile whenever samples are 1075 // collected for non-danglie probes. This is for reporting all of the 1076 // zero count probes of the frame later. 1077 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 1078 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 1079 FunctionProfile.addTotalSamples(Count); 1080 if (Probe->isEntry()) { 1081 FunctionProfile.addHeadSamples(Count); 1082 // Look up for the caller's function profile 1083 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 1084 SampleContextFrames CalleeContextId = 1085 FunctionProfile.getContext().getContextFrames(); 1086 if (InlinerDesc != nullptr && CalleeContextId.size() > 1) { 1087 // Since the context id will be compressed, we have to use callee's 1088 // context id to infer caller's context id to ensure they share the 1089 // same context prefix. 1090 SampleContextFrameVector CallerContextId; 1091 SampleContextFrame &&CallerLeafFrameLoc = 1092 getCallerContext(CalleeContextId, CallerContextId); 1093 uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; 1094 assert(CallerIndex && 1095 "Inferred caller's location index shouldn't be zero!"); 1096 FunctionSamples &CallerProfile = 1097 getFunctionProfileForContext(CallerContextId); 1098 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 1099 CallerProfile.addBodySamples(CallerIndex, 0, Count); 1100 CallerProfile.addTotalSamples(Count); 1101 CallerProfile.addCalledTargetSamples( 1102 CallerIndex, 0, FunctionProfile.getContext().getName(), Count); 1103 } 1104 } 1105 } 1106 1107 // Assign zero count for remaining probes without sample hits to 1108 // differentiate from probes optimized away, of which the counts are unknown 1109 // and will be inferred by the compiler. 1110 for (auto &I : FrameSamples) { 1111 for (auto *FunctionProfile : I.second) { 1112 for (auto *Probe : I.first->getProbes()) { 1113 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 1114 } 1115 } 1116 } 1117 } 1118 1119 void CSProfileGenerator::populateBoundarySamplesWithProbes( 1120 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 1121 for (const auto &BI : BranchCounter) { 1122 uint64_t SourceOffset = BI.first.first; 1123 uint64_t TargetOffset = BI.first.second; 1124 uint64_t Count = BI.second; 1125 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 1126 const MCDecodedPseudoProbe *CallProbe = 1127 Binary->getCallProbeForAddr(SourceAddress); 1128 if (CallProbe == nullptr) 1129 continue; 1130 FunctionSamples &FunctionProfile = 1131 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 1132 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 1133 FunctionProfile.addTotalSamples(Count); 1134 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 1135 if (CalleeName.size() == 0) 1136 continue; 1137 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 1138 Count); 1139 } 1140 } 1141 1142 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 1143 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 1144 1145 // Explicitly copy the context for appending the leaf context 1146 SampleContextFrameVector NewContextStack(ContextStack.begin(), 1147 ContextStack.end()); 1148 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 1149 // For leaf inlined context with the top frame, we should strip off the top 1150 // frame's probe id, like: 1151 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 1152 auto LeafFrame = NewContextStack.back(); 1153 LeafFrame.Location = LineLocation(0, 0); 1154 NewContextStack.pop_back(); 1155 // Compress the context string except for the leaf frame 1156 CSProfileGenerator::compressRecursionContext(NewContextStack); 1157 CSProfileGenerator::trimContext(NewContextStack); 1158 NewContextStack.push_back(LeafFrame); 1159 1160 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 1161 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 1162 FunctionSamples &FunctionProile = 1163 getFunctionProfileForContext(NewContextStack, WasLeafInlined); 1164 FunctionProile.setFunctionHash(FuncDesc->FuncHash); 1165 return FunctionProile; 1166 } 1167 1168 } // end namespace sampleprof 1169 } // end namespace llvm 1170