1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "ProfileGenerator.h" 9 #include "ErrorHandling.h" 10 #include "PerfReader.h" 11 #include "ProfiledBinary.h" 12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 13 #include "llvm/ProfileData/ProfileCommon.h" 14 #include <algorithm> 15 #include <float.h> 16 #include <unordered_set> 17 #include <utility> 18 19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 20 cl::Required, 21 cl::desc("Output profile file")); 22 static cl::alias OutputA("o", cl::desc("Alias for --output"), 23 cl::aliasopt(OutputFilename)); 24 25 static cl::opt<SampleProfileFormat> OutputFormat( 26 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 27 cl::values( 28 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 29 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 31 clEnumValN(SPF_Text, "text", "Text encoding"), 32 clEnumValN(SPF_GCC, "gcc", 33 "GCC encoding (only meaningful for -sample)"))); 34 35 cl::opt<bool> UseMD5( 36 "use-md5", cl::init(false), cl::Hidden, 37 cl::desc("Use md5 to represent function names in the output profile (only " 38 "meaningful for -extbinary)")); 39 40 static cl::opt<bool> PopulateProfileSymbolList( 41 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 43 44 static cl::opt<bool> FillZeroForAllFuncs( 45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 46 cl::desc("Attribute all functions' range with zero count " 47 "even it's not hit by any samples.")); 48 49 static cl::opt<int32_t, true> RecursionCompression( 50 "compress-recursion", 51 cl::desc("Compressing recursion by deduplicating adjacent frame " 52 "sequences up to the specified size. -1 means no size limit."), 53 cl::Hidden, 54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 55 56 static cl::opt<bool> 57 TrimColdProfile("trim-cold-profile", 58 cl::desc("If the total count of the profile is smaller " 59 "than threshold, it will be trimmed.")); 60 61 static cl::opt<bool> CSProfMergeColdContext( 62 "csprof-merge-cold-context", cl::init(true), 63 cl::desc("If the total count of context profile is smaller than " 64 "the threshold, it will be merged into context-less base " 65 "profile.")); 66 67 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 68 "csprof-max-cold-context-depth", cl::init(1), 69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 70 "context-less base profile")); 71 72 static cl::opt<int, true> CSProfMaxContextDepth( 73 "csprof-max-context-depth", 74 cl::desc("Keep the last K contexts while merging profile. -1 means no " 75 "depth limit."), 76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 77 78 static cl::opt<double> HotFunctionDensityThreshold( 79 "hot-function-density-threshold", llvm::cl::init(1000), 80 llvm::cl::desc( 81 "specify density threshold for hot functions (default: 1000)"), 82 llvm::cl::Optional); 83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 84 llvm::cl::desc("show profile density details"), 85 llvm::cl::Optional); 86 87 static cl::opt<bool> UpdateTotalSamples( 88 "update-total-samples", llvm::cl::init(false), 89 llvm::cl::desc( 90 "Update total samples by accumulating all its body samples."), 91 llvm::cl::Optional); 92 93 extern cl::opt<int> ProfileSummaryCutoffHot; 94 extern cl::opt<bool> UseContextLessSummary; 95 96 static cl::opt<bool> GenCSNestedProfile( 97 "gen-cs-nested-profile", cl::Hidden, cl::init(true), 98 cl::desc("Generate nested function profiles for CSSPGO")); 99 100 using namespace llvm; 101 using namespace sampleprof; 102 103 namespace llvm { 104 namespace sampleprof { 105 106 // Initialize the MaxCompressionSize to -1 which means no size limit 107 int32_t CSProfileGenerator::MaxCompressionSize = -1; 108 109 int CSProfileGenerator::MaxContextDepth = -1; 110 111 bool ProfileGeneratorBase::UseFSDiscriminator = false; 112 113 std::unique_ptr<ProfileGeneratorBase> 114 ProfileGeneratorBase::create(ProfiledBinary *Binary, 115 const ContextSampleCounterMap *SampleCounters, 116 bool ProfileIsCS) { 117 std::unique_ptr<ProfileGeneratorBase> Generator; 118 if (ProfileIsCS) { 119 if (Binary->useFSDiscriminator()) 120 exitWithError("FS discriminator is not supported in CS profile."); 121 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 122 } else { 123 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 124 } 125 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 126 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 127 128 return Generator; 129 } 130 131 std::unique_ptr<ProfileGeneratorBase> 132 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles, 133 bool ProfileIsCS) { 134 std::unique_ptr<ProfileGeneratorBase> Generator; 135 if (ProfileIsCS) { 136 if (Binary->useFSDiscriminator()) 137 exitWithError("FS discriminator is not supported in CS profile."); 138 Generator.reset(new CSProfileGenerator(Binary, Profiles)); 139 } else { 140 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); 141 } 142 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 143 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 144 145 return Generator; 146 } 147 148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 149 SampleProfileMap &ProfileMap) { 150 // Populate profile symbol list if extended binary format is used. 151 ProfileSymbolList SymbolList; 152 153 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 154 Binary->populateSymbolListFromDWARF(SymbolList); 155 Writer->setProfileSymbolList(&SymbolList); 156 } 157 158 if (std::error_code EC = Writer->write(ProfileMap)) 159 exitWithError(std::move(EC)); 160 } 161 162 void ProfileGeneratorBase::write() { 163 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 164 if (std::error_code EC = WriterOrErr.getError()) 165 exitWithError(EC, OutputFilename); 166 167 if (UseMD5) { 168 if (OutputFormat != SPF_Ext_Binary) 169 WithColor::warning() << "-use-md5 is ignored. Specify " 170 "--format=extbinary to enable it\n"; 171 else 172 WriterOrErr.get()->setUseMD5(); 173 } 174 175 write(std::move(WriterOrErr.get()), ProfileMap); 176 } 177 178 void ProfileGeneratorBase::showDensitySuggestion(double Density) { 179 if (Density == 0.0) 180 WithColor::warning() << "The --profile-summary-cutoff-hot option may be " 181 "set too low. Please check your command.\n"; 182 else if (Density < HotFunctionDensityThreshold) 183 WithColor::warning() 184 << "AutoFDO is estimated to optimize better with " 185 << format("%.1f", HotFunctionDensityThreshold / Density) 186 << "x more samples. Please consider increasing sampling rate or " 187 "profiling for longer duration to get more samples.\n"; 188 189 if (ShowDensity) 190 outs() << "Minimum profile density for hot functions with top " 191 << format("%.2f", 192 static_cast<double>(ProfileSummaryCutoffHot.getValue()) / 193 10000) 194 << "% total samples: " << format("%.1f", Density) << "\n"; 195 } 196 197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, 198 uint64_t HotCntThreshold) { 199 double Density = DBL_MAX; 200 std::vector<const FunctionSamples *> HotFuncs; 201 for (auto &I : Profiles) { 202 auto &FuncSamples = I.second; 203 if (FuncSamples.getTotalSamples() < HotCntThreshold) 204 continue; 205 HotFuncs.emplace_back(&FuncSamples); 206 } 207 208 for (auto *FuncSamples : HotFuncs) { 209 auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); 210 if (!Func) 211 continue; 212 uint64_t FuncSize = Func->getFuncSize(); 213 if (FuncSize == 0) 214 continue; 215 Density = 216 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / 217 FuncSize); 218 } 219 220 return Density == DBL_MAX ? 0.0 : Density; 221 } 222 223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 224 const RangeSample &Ranges) { 225 226 /* 227 Regions may overlap with each other. Using the boundary info, find all 228 disjoint ranges and their sample count. BoundaryPoint contains the count 229 multiple samples begin/end at this points. 230 231 |<--100-->| Sample1 232 |<------200------>| Sample2 233 A B C 234 235 In the example above, 236 Sample1 begins at A, ends at B, its value is 100. 237 Sample2 beings at A, ends at C, its value is 200. 238 For A, BeginCount is the sum of sample begins at A, which is 300 and no 239 samples ends at A, so EndCount is 0. 240 Then boundary points A, B, and C with begin/end counts are: 241 A: (300, 0) 242 B: (0, 100) 243 C: (0, 200) 244 */ 245 struct BoundaryPoint { 246 // Sum of sample counts beginning at this point 247 uint64_t BeginCount = UINT64_MAX; 248 // Sum of sample counts ending at this point 249 uint64_t EndCount = UINT64_MAX; 250 // Is the begin point of a zero range. 251 bool IsZeroRangeBegin = false; 252 // Is the end point of a zero range. 253 bool IsZeroRangeEnd = false; 254 255 void addBeginCount(uint64_t Count) { 256 if (BeginCount == UINT64_MAX) 257 BeginCount = 0; 258 BeginCount += Count; 259 } 260 261 void addEndCount(uint64_t Count) { 262 if (EndCount == UINT64_MAX) 263 EndCount = 0; 264 EndCount += Count; 265 } 266 }; 267 268 /* 269 For the above example. With boundary points, follwing logic finds two 270 disjoint region of 271 272 [A,B]: 300 273 [B+1,C]: 200 274 275 If there is a boundary point that both begin and end, the point itself 276 becomes a separate disjoint region. For example, if we have original 277 ranges of 278 279 |<--- 100 --->| 280 |<--- 200 --->| 281 A B C 282 283 there are three boundary points with their begin/end counts of 284 285 A: (100, 0) 286 B: (200, 100) 287 C: (0, 200) 288 289 the disjoint ranges would be 290 291 [A, B-1]: 100 292 [B, B]: 300 293 [B+1, C]: 200. 294 295 Example for zero value range: 296 297 |<--- 100 --->| 298 |<--- 200 --->| 299 |<--------------- 0 ----------------->| 300 A B C D E F 301 302 [A, B-1] : 0 303 [B, C] : 100 304 [C+1, D-1]: 0 305 [D, E] : 200 306 [E+1, F] : 0 307 */ 308 std::map<uint64_t, BoundaryPoint> Boundaries; 309 310 for (const auto &Item : Ranges) { 311 assert(Item.first.first <= Item.first.second && 312 "Invalid instruction range"); 313 auto &BeginPoint = Boundaries[Item.first.first]; 314 auto &EndPoint = Boundaries[Item.first.second]; 315 uint64_t Count = Item.second; 316 317 BeginPoint.addBeginCount(Count); 318 EndPoint.addEndCount(Count); 319 if (Count == 0) { 320 BeginPoint.IsZeroRangeBegin = true; 321 EndPoint.IsZeroRangeEnd = true; 322 } 323 } 324 325 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 326 // and the next valid address 327 uint64_t BeginAddress = UINT64_MAX; 328 int ZeroRangeDepth = 0; 329 uint64_t Count = 0; 330 for (const auto &Item : Boundaries) { 331 uint64_t Address = Item.first; 332 const BoundaryPoint &Point = Item.second; 333 if (Point.BeginCount != UINT64_MAX) { 334 if (BeginAddress != UINT64_MAX) 335 DisjointRanges[{BeginAddress, Address - 1}] = Count; 336 Count += Point.BeginCount; 337 BeginAddress = Address; 338 ZeroRangeDepth += Point.IsZeroRangeBegin; 339 } 340 if (Point.EndCount != UINT64_MAX) { 341 assert((BeginAddress != UINT64_MAX) && 342 "First boundary point cannot be 'end' point"); 343 DisjointRanges[{BeginAddress, Address}] = Count; 344 assert(Count >= Point.EndCount && "Mismatched live ranges"); 345 Count -= Point.EndCount; 346 BeginAddress = Address + 1; 347 ZeroRangeDepth -= Point.IsZeroRangeEnd; 348 // If the remaining count is zero and it's no longer in a zero range, this 349 // means we consume all the ranges before, thus mark BeginAddress as 350 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 351 // [<---- 10 ---->] 352 // [<---- 20 ---->] 353 // A B C D 354 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 355 // have the [B+1, C-1] zero range. 356 if (Count == 0 && ZeroRangeDepth == 0) 357 BeginAddress = UINT64_MAX; 358 } 359 } 360 } 361 362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 363 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 364 uint64_t Count) { 365 // Use the maximum count of samples with same line location 366 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 367 368 // Use duplication factor to compensated for loop unroll/vectorization. 369 // Note that this is only needed when we're taking MAX of the counts at 370 // the location instead of SUM. 371 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 372 373 ErrorOr<uint64_t> R = 374 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 375 376 uint64_t PreviousCount = R ? R.get() : 0; 377 if (PreviousCount <= Count) { 378 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 379 Count - PreviousCount); 380 } 381 } 382 383 void ProfileGeneratorBase::updateTotalSamples() { 384 for (auto &Item : ProfileMap) { 385 FunctionSamples &FunctionProfile = Item.second; 386 FunctionProfile.updateTotalSamples(); 387 } 388 } 389 390 void ProfileGeneratorBase::updateCallsiteSamples() { 391 for (auto &Item : ProfileMap) { 392 FunctionSamples &FunctionProfile = Item.second; 393 FunctionProfile.updateCallsiteSamples(); 394 } 395 } 396 397 void ProfileGeneratorBase::updateFunctionSamples() { 398 updateCallsiteSamples(); 399 400 if (UpdateTotalSamples) 401 updateTotalSamples(); 402 } 403 404 void ProfileGeneratorBase::collectProfiledFunctions() { 405 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 406 if (collectFunctionsFromRawProfile(ProfiledFunctions)) 407 Binary->setProfiledFunctions(ProfiledFunctions); 408 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions)) 409 Binary->setProfiledFunctions(ProfiledFunctions); 410 else 411 llvm_unreachable("Unsupported input profile"); 412 } 413 414 bool ProfileGeneratorBase::collectFunctionsFromRawProfile( 415 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 416 if (!SampleCounters) 417 return false; 418 // Go through all the stacks, ranges and branches in sample counters, use 419 // the start of the range to look up the function it belongs and record the 420 // function. 421 for (const auto &CI : *SampleCounters) { 422 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { 423 for (auto Addr : CtxKey->Context) { 424 if (FuncRange *FRange = Binary->findFuncRangeForOffset( 425 Binary->virtualAddrToOffset(Addr))) 426 ProfiledFunctions.insert(FRange->Func); 427 } 428 } 429 430 for (auto Item : CI.second.RangeCounter) { 431 uint64_t StartOffset = Item.first.first; 432 if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) 433 ProfiledFunctions.insert(FRange->Func); 434 } 435 436 for (auto Item : CI.second.BranchCounter) { 437 uint64_t SourceOffset = Item.first.first; 438 uint64_t TargetOffset = Item.first.first; 439 if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) 440 ProfiledFunctions.insert(FRange->Func); 441 if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) 442 ProfiledFunctions.insert(FRange->Func); 443 } 444 } 445 return true; 446 } 447 448 bool ProfileGenerator::collectFunctionsFromLLVMProfile( 449 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 450 for (const auto &FS : ProfileMap) { 451 if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) 452 ProfiledFunctions.insert(Func); 453 } 454 return true; 455 } 456 457 bool CSProfileGenerator::collectFunctionsFromLLVMProfile( 458 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 459 for (auto *Node : ContextTracker) { 460 if (!Node->getFuncName().empty()) 461 if (auto *Func = Binary->getBinaryFunction(Node->getFuncName())) 462 ProfiledFunctions.insert(Func); 463 } 464 return true; 465 } 466 467 FunctionSamples & 468 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 469 SampleContext Context(FuncName); 470 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 471 if (Ret.second) { 472 FunctionSamples &FProfile = Ret.first->second; 473 FProfile.setContext(Context); 474 } 475 return Ret.first->second; 476 } 477 478 void ProfileGenerator::generateProfile() { 479 collectProfiledFunctions(); 480 481 if (Binary->usePseudoProbes()) 482 Binary->decodePseudoProbe(); 483 484 if (SampleCounters) { 485 if (Binary->usePseudoProbes()) { 486 generateProbeBasedProfile(); 487 } else { 488 generateLineNumBasedProfile(); 489 } 490 } 491 492 postProcessProfiles(); 493 } 494 495 void ProfileGenerator::postProcessProfiles() { 496 computeSummaryAndThreshold(ProfileMap); 497 trimColdProfiles(ProfileMap, ColdCountThreshold); 498 calculateAndShowDensity(ProfileMap); 499 } 500 501 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, 502 uint64_t ColdCntThreshold) { 503 if (!TrimColdProfile) 504 return; 505 506 // Move cold profiles into a tmp container. 507 std::vector<SampleContext> ColdProfiles; 508 for (const auto &I : ProfileMap) { 509 if (I.second.getTotalSamples() < ColdCntThreshold) 510 ColdProfiles.emplace_back(I.first); 511 } 512 513 // Remove the cold profile from ProfileMap. 514 for (const auto &I : ColdProfiles) 515 ProfileMap.erase(I); 516 } 517 518 void ProfileGenerator::generateLineNumBasedProfile() { 519 assert(SampleCounters->size() == 1 && 520 "Must have one entry for profile generation."); 521 const SampleCounter &SC = SampleCounters->begin()->second; 522 // Fill in function body samples 523 populateBodySamplesForAllFunctions(SC.RangeCounter); 524 // Fill in boundary sample counts as well as call site samples for calls 525 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 526 527 updateFunctionSamples(); 528 } 529 530 void ProfileGenerator::generateProbeBasedProfile() { 531 assert(SampleCounters->size() == 1 && 532 "Must have one entry for profile generation."); 533 // Enable pseudo probe functionalities in SampleProf 534 FunctionSamples::ProfileIsProbeBased = true; 535 const SampleCounter &SC = SampleCounters->begin()->second; 536 // Fill in function body samples 537 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); 538 // Fill in boundary sample counts as well as call site samples for calls 539 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); 540 541 updateFunctionSamples(); 542 } 543 544 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( 545 const RangeSample &RangeCounter) { 546 ProbeCounterMap ProbeCounter; 547 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it 548 // inside extractProbesFromRange. 549 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, 550 false); 551 552 for (const auto &PI : ProbeCounter) { 553 const MCDecodedPseudoProbe *Probe = PI.first; 554 uint64_t Count = PI.second; 555 SampleContextFrameVector FrameVec; 556 Binary->getInlineContextForProbe(Probe, FrameVec, true); 557 FunctionSamples &FunctionProfile = 558 getLeafProfileAndAddTotalSamples(FrameVec, Count); 559 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 560 if (Probe->isEntry()) 561 FunctionProfile.addHeadSamples(Count); 562 } 563 } 564 565 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( 566 const BranchSample &BranchCounters) { 567 for (const auto &Entry : BranchCounters) { 568 uint64_t SourceOffset = Entry.first.first; 569 uint64_t TargetOffset = Entry.first.second; 570 uint64_t Count = Entry.second; 571 assert(Count != 0 && "Unexpected zero weight branch"); 572 573 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 574 if (CalleeName.size() == 0) 575 continue; 576 577 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 578 const MCDecodedPseudoProbe *CallProbe = 579 Binary->getCallProbeForAddr(SourceAddress); 580 if (CallProbe == nullptr) 581 continue; 582 583 // Record called target sample and its count. 584 SampleContextFrameVector FrameVec; 585 Binary->getInlineContextForProbe(CallProbe, FrameVec, true); 586 587 if (!FrameVec.empty()) { 588 FunctionSamples &FunctionProfile = 589 getLeafProfileAndAddTotalSamples(FrameVec, 0); 590 FunctionProfile.addCalledTargetSamples( 591 FrameVec.back().Location.LineOffset, 0, CalleeName, Count); 592 } 593 } 594 } 595 596 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 597 const SampleContextFrameVector &FrameVec, uint64_t Count) { 598 // Get top level profile 599 FunctionSamples *FunctionProfile = 600 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 601 FunctionProfile->addTotalSamples(Count); 602 if (Binary->usePseudoProbes()) { 603 const auto *FuncDesc = Binary->getFuncDescForGUID( 604 Function::getGUID(FunctionProfile->getName())); 605 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 606 } 607 608 for (size_t I = 1; I < FrameVec.size(); I++) { 609 LineLocation Callsite( 610 FrameVec[I - 1].Location.LineOffset, 611 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 612 FunctionSamplesMap &SamplesMap = 613 FunctionProfile->functionSamplesAt(Callsite); 614 auto Ret = 615 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 616 if (Ret.second) { 617 SampleContext Context(FrameVec[I].FuncName); 618 Ret.first->second.setContext(Context); 619 } 620 FunctionProfile = &Ret.first->second; 621 FunctionProfile->addTotalSamples(Count); 622 if (Binary->usePseudoProbes()) { 623 const auto *FuncDesc = Binary->getFuncDescForGUID( 624 Function::getGUID(FunctionProfile->getName())); 625 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 626 } 627 } 628 629 return *FunctionProfile; 630 } 631 632 RangeSample 633 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 634 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 635 if (FillZeroForAllFuncs) { 636 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 637 for (auto &R : FuncI.second.Ranges) { 638 Ranges[{R.first, R.second - 1}] += 0; 639 } 640 } 641 } else { 642 // For each range, we search for all ranges of the function it belongs to 643 // and initialize it with zero count, so it remains zero if doesn't hit any 644 // samples. This is to be consistent with compiler that interpret zero count 645 // as unexecuted(cold). 646 for (const auto &I : RangeCounter) { 647 uint64_t StartOffset = I.first.first; 648 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 649 Ranges[{Range.first, Range.second - 1}] += 0; 650 } 651 } 652 RangeSample DisjointRanges; 653 findDisjointRanges(DisjointRanges, Ranges); 654 return DisjointRanges; 655 } 656 657 void ProfileGenerator::populateBodySamplesForAllFunctions( 658 const RangeSample &RangeCounter) { 659 for (const auto &Range : preprocessRangeCounter(RangeCounter)) { 660 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 661 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 662 uint64_t Count = Range.second; 663 664 InstructionPointer IP(Binary, RangeBegin, true); 665 // Disjoint ranges may have range in the middle of two instr, 666 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 667 // can be Addr1+1 to Addr2-1. We should ignore such range. 668 if (IP.Address > RangeEnd) 669 continue; 670 671 do { 672 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 673 const SampleContextFrameVector &FrameVec = 674 Binary->getFrameLocationStack(Offset); 675 if (!FrameVec.empty()) { 676 // FIXME: As accumulating total count per instruction caused some 677 // regression, we changed to accumulate total count per byte as a 678 // workaround. Tuning hotness threshold on the compiler side might be 679 // necessary in the future. 680 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( 681 FrameVec, Count * Binary->getInstSize(Offset)); 682 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 683 Count); 684 } 685 } while (IP.advance() && IP.Address <= RangeEnd); 686 } 687 } 688 689 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { 690 // Get the function range by branch target if it's a call branch. 691 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); 692 693 // We won't accumulate sample count for a range whose start is not the real 694 // function entry such as outlined function or inner labels. 695 if (!FRange || !FRange->IsFuncEntry) 696 return StringRef(); 697 698 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 699 } 700 701 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 702 const BranchSample &BranchCounters) { 703 for (const auto &Entry : BranchCounters) { 704 uint64_t SourceOffset = Entry.first.first; 705 uint64_t TargetOffset = Entry.first.second; 706 uint64_t Count = Entry.second; 707 assert(Count != 0 && "Unexpected zero weight branch"); 708 709 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 710 if (CalleeName.size() == 0) 711 continue; 712 // Record called target sample and its count. 713 const SampleContextFrameVector &FrameVec = 714 Binary->getFrameLocationStack(SourceOffset); 715 if (!FrameVec.empty()) { 716 FunctionSamples &FunctionProfile = 717 getLeafProfileAndAddTotalSamples(FrameVec, 0); 718 FunctionProfile.addCalledTargetSamples( 719 FrameVec.back().Location.LineOffset, 720 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 721 CalleeName, Count); 722 } 723 // Add head samples for callee. 724 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 725 CalleeProfile.addHeadSamples(Count); 726 } 727 } 728 729 void ProfileGeneratorBase::calculateAndShowDensity( 730 const SampleProfileMap &Profiles) { 731 double Density = calculateDensity(Profiles, HotCountThreshold); 732 showDensitySuggestion(Density); 733 } 734 735 FunctionSamples * 736 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, 737 bool WasLeafInlined) { 738 FunctionSamples *FProfile = ContextNode->getFunctionSamples(); 739 if (!FProfile) { 740 FSamplesList.emplace_back(); 741 FProfile = &FSamplesList.back(); 742 FProfile->setName(ContextNode->getFuncName()); 743 ContextNode->setFunctionSamples(FProfile); 744 } 745 // Update ContextWasInlined attribute for existing contexts. 746 // The current function can be called in two ways: 747 // - when processing a probe of the current frame 748 // - when processing the entry probe of an inlinee's frame, which 749 // is then used to update the callsite count of the current frame. 750 // The two can happen in any order, hence here we are making sure 751 // `ContextWasInlined` is always set as expected. 752 // TODO: Note that the former does not always happen if no probes of the 753 // current frame has samples, and if the latter happens, we could lose the 754 // attribute. This should be fixed. 755 if (WasLeafInlined) 756 FProfile->getContext().setAttribute(ContextWasInlined); 757 return FProfile; 758 } 759 760 ContextTrieNode * 761 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context, 762 bool WasLeafInlined) { 763 ContextTrieNode *ContextNode = 764 ContextTracker.getOrCreateContextPath(Context, true); 765 getOrCreateFunctionSamples(ContextNode, WasLeafInlined); 766 return ContextNode; 767 } 768 769 void CSProfileGenerator::generateProfile() { 770 FunctionSamples::ProfileIsCS = true; 771 772 collectProfiledFunctions(); 773 774 if (Binary->usePseudoProbes()) 775 Binary->decodePseudoProbe(); 776 777 if (SampleCounters) { 778 if (Binary->usePseudoProbes()) { 779 generateProbeBasedProfile(); 780 } else { 781 generateLineNumBasedProfile(); 782 } 783 } 784 785 if (Binary->getTrackFuncContextSize()) 786 computeSizeForProfiledFunctions(); 787 788 postProcessProfiles(); 789 } 790 791 void CSProfileGenerator::computeSizeForProfiledFunctions() { 792 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 793 for (auto *Func : Binary->getProfiledFunctions()) 794 Binary->computeInlinedContextSizeForFunc(Func); 795 796 // Flush the symbolizer to save memory. 797 Binary->flushSymbolizer(); 798 } 799 800 void CSProfileGenerator::updateFunctionSamples() { 801 for (auto *Node : ContextTracker) { 802 FunctionSamples *FSamples = Node->getFunctionSamples(); 803 if (FSamples) { 804 if (UpdateTotalSamples) 805 FSamples->updateTotalSamples(); 806 FSamples->updateCallsiteSamples(); 807 } 808 } 809 } 810 811 void CSProfileGenerator::generateLineNumBasedProfile() { 812 for (const auto &CI : *SampleCounters) { 813 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); 814 815 ContextTrieNode *ContextNode = &getRootContext(); 816 // Sample context will be empty if the jump is an external-to-internal call 817 // pattern, the head samples should be added for the internal function. 818 if (!CtxKey->Context.empty()) { 819 // Get or create function profile for the range 820 ContextNode = 821 getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined); 822 // Fill in function body samples 823 populateBodySamplesForFunction(*ContextNode->getFunctionSamples(), 824 CI.second.RangeCounter); 825 } 826 // Fill in boundary sample counts as well as call site samples for calls 827 populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter); 828 } 829 // Fill in call site value sample for inlined calls and also use context to 830 // infer missing samples. Since we don't have call count for inlined 831 // functions, we estimate it from inlinee's profile using the entry of the 832 // body sample. 833 populateInferredFunctionSamples(getRootContext()); 834 835 updateFunctionSamples(); 836 } 837 838 void CSProfileGenerator::populateBodySamplesForFunction( 839 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 840 // Compute disjoint ranges first, so we can use MAX 841 // for calculating count for each location. 842 RangeSample Ranges; 843 findDisjointRanges(Ranges, RangeCounter); 844 for (const auto &Range : Ranges) { 845 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 846 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 847 uint64_t Count = Range.second; 848 // Disjoint ranges have introduce zero-filled gap that 849 // doesn't belong to current context, filter them out. 850 if (Count == 0) 851 continue; 852 853 InstructionPointer IP(Binary, RangeBegin, true); 854 // Disjoint ranges may have range in the middle of two instr, 855 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 856 // can be Addr1+1 to Addr2-1. We should ignore such range. 857 if (IP.Address > RangeEnd) 858 continue; 859 860 do { 861 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 862 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 863 if (LeafLoc) { 864 // Recording body sample for this specific context 865 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 866 FunctionProfile.addTotalSamples(Count); 867 } 868 } while (IP.advance() && IP.Address <= RangeEnd); 869 } 870 } 871 872 void CSProfileGenerator::populateBoundarySamplesForFunction( 873 ContextTrieNode *Node, const BranchSample &BranchCounters) { 874 875 for (const auto &Entry : BranchCounters) { 876 uint64_t SourceOffset = Entry.first.first; 877 uint64_t TargetOffset = Entry.first.second; 878 uint64_t Count = Entry.second; 879 assert(Count != 0 && "Unexpected zero weight branch"); 880 881 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 882 if (CalleeName.size() == 0) 883 continue; 884 885 ContextTrieNode *CallerNode = Node; 886 LineLocation CalleeCallSite(0, 0); 887 if (CallerNode != &getRootContext()) { 888 // Record called target sample and its count 889 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 890 if (LeafLoc) { 891 CallerNode->getFunctionSamples()->addCalledTargetSamples( 892 LeafLoc->Location.LineOffset, 893 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 894 Count); 895 // Record head sample for called target(callee) 896 CalleeCallSite = LeafLoc->Location; 897 } 898 } 899 900 ContextTrieNode *CalleeNode = 901 CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName); 902 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode); 903 CalleeProfile->addHeadSamples(Count); 904 } 905 } 906 907 void CSProfileGenerator::populateInferredFunctionSamples( 908 ContextTrieNode &Node) { 909 // There is no call jmp sample between the inliner and inlinee, we need to use 910 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s 911 // sample depends on child(inlinee)'s sample, so traverse the tree in 912 // post-order. 913 for (auto &It : Node.getAllChildContext()) 914 populateInferredFunctionSamples(It.second); 915 916 FunctionSamples *CalleeProfile = Node.getFunctionSamples(); 917 if (!CalleeProfile) 918 return; 919 // If we already have head sample counts, we must have value profile 920 // for call sites added already. Skip to avoid double counting. 921 if (CalleeProfile->getHeadSamples()) 922 return; 923 ContextTrieNode *CallerNode = Node.getParentContext(); 924 // If we don't have context, nothing to do for caller's call site. 925 // This could happen for entry point function. 926 if (CallerNode == &getRootContext()) 927 return; 928 929 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc(); 930 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); 931 // Since we don't have call count for inlined functions, we 932 // estimate it from inlinee's profile using entry body sample. 933 uint64_t EstimatedCallCount = CalleeProfile->getEntrySamples(); 934 // If we don't have samples with location, use 1 to indicate live. 935 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) 936 EstimatedCallCount = 1; 937 CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset, 938 CallerLeafFrameLoc.Discriminator, 939 Node.getFuncName(), EstimatedCallCount); 940 CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset, 941 CallerLeafFrameLoc.Discriminator, 942 EstimatedCallCount); 943 CallerProfile.addTotalSamples(EstimatedCallCount); 944 } 945 946 void CSProfileGenerator::convertToProfileMap( 947 ContextTrieNode &Node, SampleContextFrameVector &Context) { 948 FunctionSamples *FProfile = Node.getFunctionSamples(); 949 if (FProfile) { 950 Context.emplace_back(Node.getFuncName(), LineLocation(0, 0)); 951 // Save the new context for future references. 952 SampleContextFrames NewContext = *Contexts.insert(Context).first; 953 auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile)); 954 FunctionSamples &NewProfile = Ret.first->second; 955 NewProfile.getContext().setContext(NewContext); 956 Context.pop_back(); 957 } 958 959 for (auto &It : Node.getAllChildContext()) { 960 ContextTrieNode &ChildNode = It.second; 961 Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc()); 962 convertToProfileMap(ChildNode, Context); 963 Context.pop_back(); 964 } 965 } 966 967 void CSProfileGenerator::convertToProfileMap() { 968 assert(ProfileMap.empty() && 969 "ProfileMap should be empty before converting from the trie"); 970 assert(IsProfileValidOnTrie && 971 "Do not convert the trie twice, it's already destroyed"); 972 973 SampleContextFrameVector Context; 974 for (auto &It : getRootContext().getAllChildContext()) 975 convertToProfileMap(It.second, Context); 976 977 IsProfileValidOnTrie = false; 978 } 979 980 void CSProfileGenerator::postProcessProfiles() { 981 // Compute hot/cold threshold based on profile. This will be used for cold 982 // context profile merging/trimming. 983 computeSummaryAndThreshold(); 984 985 // Run global pre-inliner to adjust/merge context profile based on estimated 986 // inline decisions. 987 if (EnableCSPreInliner) { 988 ContextTracker.populateFuncToCtxtMap(); 989 CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); 990 // Turn off the profile merger by default unless it is explicitly enabled. 991 if (!CSProfMergeColdContext.getNumOccurrences()) 992 CSProfMergeColdContext = false; 993 } 994 995 convertToProfileMap(); 996 997 // Trim and merge cold context profile using cold threshold above. 998 if (TrimColdProfile || CSProfMergeColdContext) { 999 SampleContextTrimmer(ProfileMap) 1000 .trimAndMergeColdContextProfiles( 1001 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, 1002 CSProfMaxColdContextDepth, EnableCSPreInliner); 1003 } 1004 1005 // Merge function samples of CS profile to calculate profile density. 1006 sampleprof::SampleProfileMap ContextLessProfiles; 1007 for (const auto &I : ProfileMap) { 1008 ContextLessProfiles[I.second.getName()].merge(I.second); 1009 } 1010 1011 calculateAndShowDensity(ContextLessProfiles); 1012 if (GenCSNestedProfile) { 1013 CSProfileConverter CSConverter(ProfileMap); 1014 CSConverter.convertProfiles(); 1015 FunctionSamples::ProfileIsCS = false; 1016 } 1017 } 1018 1019 void ProfileGeneratorBase::computeSummaryAndThreshold( 1020 SampleProfileMap &Profiles) { 1021 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1022 Summary = Builder.computeSummaryForProfiles(Profiles); 1023 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 1024 (Summary->getDetailedSummary())); 1025 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 1026 (Summary->getDetailedSummary())); 1027 } 1028 1029 void CSProfileGenerator::computeSummaryAndThreshold() { 1030 // Always merge and use context-less profile map to compute summary. 1031 SampleProfileMap ContextLessProfiles; 1032 ContextTracker.createContextLessProfileMap(ContextLessProfiles); 1033 1034 // Set the flag below to avoid merging the profile again in 1035 // computeSummaryAndThreshold 1036 FunctionSamples::ProfileIsCS = false; 1037 assert( 1038 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && 1039 "Don't set --profile-summary-contextless to false for profile " 1040 "generation"); 1041 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles); 1042 // Recover the old value. 1043 FunctionSamples::ProfileIsCS = true; 1044 } 1045 1046 void ProfileGeneratorBase::extractProbesFromRange( 1047 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 1048 bool FindDisjointRanges) { 1049 const RangeSample *PRanges = &RangeCounter; 1050 RangeSample Ranges; 1051 if (FindDisjointRanges) { 1052 findDisjointRanges(Ranges, RangeCounter); 1053 PRanges = &Ranges; 1054 } 1055 1056 for (const auto &Range : *PRanges) { 1057 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 1058 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 1059 uint64_t Count = Range.second; 1060 1061 InstructionPointer IP(Binary, RangeBegin, true); 1062 // Disjoint ranges may have range in the middle of two instr, 1063 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 1064 // can be Addr1+1 to Addr2-1. We should ignore such range. 1065 if (IP.Address > RangeEnd) 1066 continue; 1067 1068 do { 1069 const AddressProbesMap &Address2ProbesMap = 1070 Binary->getAddress2ProbesMap(); 1071 auto It = Address2ProbesMap.find(IP.Address); 1072 if (It != Address2ProbesMap.end()) { 1073 for (const auto &Probe : It->second) { 1074 ProbeCounter[&Probe] += Count; 1075 } 1076 } 1077 } while (IP.advance() && IP.Address <= RangeEnd); 1078 } 1079 } 1080 1081 static void 1082 extractPrefixContextStack(SampleContextFrameVector &ContextStack, 1083 const SmallVectorImpl<uint64_t> &Addresses, 1084 ProfiledBinary *Binary) { 1085 SmallVector<const MCDecodedPseudoProbe *, 16> Probes; 1086 for (auto Addr : reverse(Addresses)) { 1087 const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr); 1088 // These could be the cases when a probe is not found at a calliste. Cutting 1089 // off the context from here since the inliner will not know how to consume 1090 // a context with unknown callsites. 1091 // 1. for functions that are not sampled when 1092 // --decode-probe-for-profiled-functions-only is on. 1093 // 2. for a merged callsite. Callsite merging may cause the loss of original 1094 // probe IDs. 1095 // 3. for an external callsite. 1096 if (!CallProbe) 1097 break; 1098 Probes.push_back(CallProbe); 1099 } 1100 1101 std::reverse(Probes.begin(), Probes.end()); 1102 1103 // Extract context stack for reusing, leaf context stack will be added 1104 // compressed while looking up function profile. 1105 for (const auto *P : Probes) { 1106 Binary->getInlineContextForProbe(P, ContextStack, true); 1107 } 1108 } 1109 1110 void CSProfileGenerator::generateProbeBasedProfile() { 1111 // Enable pseudo probe functionalities in SampleProf 1112 FunctionSamples::ProfileIsProbeBased = true; 1113 for (const auto &CI : *SampleCounters) { 1114 const AddrBasedCtxKey *CtxKey = 1115 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr()); 1116 SampleContextFrameVector ContextStack; 1117 extractPrefixContextStack(ContextStack, CtxKey->Context, Binary); 1118 // Fill in function body samples from probes, also infer caller's samples 1119 // from callee's probe 1120 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 1121 // Fill in boundary samples for a call probe 1122 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 1123 } 1124 } 1125 1126 void CSProfileGenerator::populateBodySamplesWithProbes( 1127 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 1128 ProbeCounterMap ProbeCounter; 1129 // Extract the top frame probes by looking up each address among the range in 1130 // the Address2ProbeMap 1131 extractProbesFromRange(RangeCounter, ProbeCounter); 1132 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 1133 std::unordered_set<FunctionSamples *>> 1134 FrameSamples; 1135 for (const auto &PI : ProbeCounter) { 1136 const MCDecodedPseudoProbe *Probe = PI.first; 1137 uint64_t Count = PI.second; 1138 // Disjoint ranges have introduce zero-filled gap that 1139 // doesn't belong to current context, filter them out. 1140 if (!Probe->isBlock() || Count == 0) 1141 continue; 1142 1143 ContextTrieNode *ContextNode = 1144 getContextNodeForLeafProbe(ContextStack, Probe); 1145 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); 1146 // Record the current frame and FunctionProfile whenever samples are 1147 // collected for non-danglie probes. This is for reporting all of the 1148 // zero count probes of the frame later. 1149 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 1150 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 1151 FunctionProfile.addTotalSamples(Count); 1152 if (Probe->isEntry()) { 1153 FunctionProfile.addHeadSamples(Count); 1154 // Look up for the caller's function profile 1155 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 1156 ContextTrieNode *CallerNode = ContextNode->getParentContext(); 1157 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) { 1158 // Since the context id will be compressed, we have to use callee's 1159 // context id to infer caller's context id to ensure they share the 1160 // same context prefix. 1161 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset; 1162 assert(CallerIndex && 1163 "Inferred caller's location index shouldn't be zero!"); 1164 FunctionSamples &CallerProfile = 1165 *getOrCreateFunctionSamples(CallerNode); 1166 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 1167 CallerProfile.addBodySamples(CallerIndex, 0, Count); 1168 CallerProfile.addTotalSamples(Count); 1169 CallerProfile.addCalledTargetSamples(CallerIndex, 0, 1170 ContextNode->getFuncName(), Count); 1171 } 1172 } 1173 } 1174 1175 // Assign zero count for remaining probes without sample hits to 1176 // differentiate from probes optimized away, of which the counts are unknown 1177 // and will be inferred by the compiler. 1178 for (auto &I : FrameSamples) { 1179 for (auto *FunctionProfile : I.second) { 1180 for (auto *Probe : I.first->getProbes()) { 1181 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 1182 } 1183 } 1184 } 1185 } 1186 1187 void CSProfileGenerator::populateBoundarySamplesWithProbes( 1188 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 1189 for (const auto &BI : BranchCounter) { 1190 uint64_t SourceOffset = BI.first.first; 1191 uint64_t TargetOffset = BI.first.second; 1192 uint64_t Count = BI.second; 1193 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 1194 const MCDecodedPseudoProbe *CallProbe = 1195 Binary->getCallProbeForAddr(SourceAddress); 1196 if (CallProbe == nullptr) 1197 continue; 1198 FunctionSamples &FunctionProfile = 1199 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 1200 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 1201 FunctionProfile.addTotalSamples(Count); 1202 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 1203 if (CalleeName.size() == 0) 1204 continue; 1205 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 1206 Count); 1207 } 1208 } 1209 1210 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( 1211 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 1212 1213 // Explicitly copy the context for appending the leaf context 1214 SampleContextFrameVector NewContextStack(ContextStack.begin(), 1215 ContextStack.end()); 1216 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 1217 // For leaf inlined context with the top frame, we should strip off the top 1218 // frame's probe id, like: 1219 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 1220 auto LeafFrame = NewContextStack.back(); 1221 LeafFrame.Location = LineLocation(0, 0); 1222 NewContextStack.pop_back(); 1223 // Compress the context string except for the leaf frame 1224 CSProfileGenerator::compressRecursionContext(NewContextStack); 1225 CSProfileGenerator::trimContext(NewContextStack); 1226 NewContextStack.push_back(LeafFrame); 1227 1228 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 1229 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 1230 ContextTrieNode *ContextNode = 1231 getOrCreateContextNode(NewContextStack, WasLeafInlined); 1232 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); 1233 return ContextNode; 1234 } 1235 1236 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 1237 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 1238 return *getContextNodeForLeafProbe(ContextStack, LeafProbe) 1239 ->getFunctionSamples(); 1240 } 1241 1242 } // end namespace sampleprof 1243 } // end namespace llvm 1244