1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "ErrorHandling.h" 11 #include "ProfiledBinary.h" 12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 13 #include "llvm/ProfileData/ProfileCommon.h" 14 #include <float.h> 15 #include <unordered_set> 16 17 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 18 cl::Required, 19 cl::desc("Output profile file")); 20 static cl::alias OutputA("o", cl::desc("Alias for --output"), 21 cl::aliasopt(OutputFilename)); 22 23 static cl::opt<SampleProfileFormat> OutputFormat( 24 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 25 cl::values( 26 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 27 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 28 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 29 clEnumValN(SPF_Text, "text", "Text encoding"), 30 clEnumValN(SPF_GCC, "gcc", 31 "GCC encoding (only meaningful for -sample)"))); 32 33 cl::opt<bool> UseMD5( 34 "use-md5", cl::init(false), cl::Hidden, 35 cl::desc("Use md5 to represent function names in the output profile (only " 36 "meaningful for -extbinary)")); 37 38 static cl::opt<bool> PopulateProfileSymbolList( 39 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 40 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 41 42 static cl::opt<bool> FillZeroForAllFuncs( 43 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 44 cl::desc("Attribute all functions' range with zero count " 45 "even it's not hit by any samples.")); 46 47 static cl::opt<int32_t, true> RecursionCompression( 48 "compress-recursion", 49 cl::desc("Compressing recursion by deduplicating adjacent frame " 50 "sequences up to the specified size. -1 means no size limit."), 51 cl::Hidden, 52 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 53 54 static cl::opt<bool> 55 TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore, 56 cl::desc("If the total count of the profile is smaller " 57 "than threshold, it will be trimmed.")); 58 59 static cl::opt<bool> CSProfMergeColdContext( 60 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 61 cl::desc("If the total count of context profile is smaller than " 62 "the threshold, it will be merged into context-less base " 63 "profile.")); 64 65 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 66 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, 67 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 68 "context-less base profile")); 69 70 static cl::opt<int, true> CSProfMaxContextDepth( 71 "csprof-max-context-depth", cl::ZeroOrMore, 72 cl::desc("Keep the last K contexts while merging profile. -1 means no " 73 "depth limit."), 74 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 75 76 static cl::opt<double> HotFunctionDensityThreshold( 77 "hot-function-density-threshold", llvm::cl::init(1000), 78 llvm::cl::desc( 79 "specify density threshold for hot functions (default: 1000)"), 80 llvm::cl::Optional); 81 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 82 llvm::cl::desc("show profile density details"), 83 llvm::cl::Optional); 84 85 static cl::opt<bool> UpdateTotalSamples( 86 "update-total-samples", llvm::cl::init(false), 87 llvm::cl::desc( 88 "Update total samples by accumulating all its body samples."), 89 llvm::cl::Optional); 90 91 extern cl::opt<int> ProfileSummaryCutoffHot; 92 93 static cl::opt<bool> GenCSNestedProfile( 94 "gen-cs-nested-profile", cl::Hidden, cl::init(false), 95 cl::desc("Generate nested function profiles for CSSPGO")); 96 97 using namespace llvm; 98 using namespace sampleprof; 99 100 namespace llvm { 101 namespace sampleprof { 102 103 // Initialize the MaxCompressionSize to -1 which means no size limit 104 int32_t CSProfileGenerator::MaxCompressionSize = -1; 105 106 int CSProfileGenerator::MaxContextDepth = -1; 107 108 bool ProfileGeneratorBase::UseFSDiscriminator = false; 109 110 std::unique_ptr<ProfileGeneratorBase> 111 ProfileGeneratorBase::create(ProfiledBinary *Binary, 112 const ContextSampleCounterMap &SampleCounters, 113 bool ProfileIsCSFlat) { 114 std::unique_ptr<ProfileGeneratorBase> Generator; 115 if (ProfileIsCSFlat) { 116 if (Binary->useFSDiscriminator()) 117 exitWithError("FS discriminator is not supported in CS profile."); 118 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 119 } else { 120 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 121 } 122 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 123 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 124 125 return Generator; 126 } 127 128 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 129 SampleProfileMap &ProfileMap) { 130 // Populate profile symbol list if extended binary format is used. 131 ProfileSymbolList SymbolList; 132 133 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 134 Binary->populateSymbolListFromDWARF(SymbolList); 135 Writer->setProfileSymbolList(&SymbolList); 136 } 137 138 if (std::error_code EC = Writer->write(ProfileMap)) 139 exitWithError(std::move(EC)); 140 } 141 142 void ProfileGeneratorBase::write() { 143 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 144 if (std::error_code EC = WriterOrErr.getError()) 145 exitWithError(EC, OutputFilename); 146 147 if (UseMD5) { 148 if (OutputFormat != SPF_Ext_Binary) 149 WithColor::warning() << "-use-md5 is ignored. Specify " 150 "--format=extbinary to enable it\n"; 151 else 152 WriterOrErr.get()->setUseMD5(); 153 } 154 155 write(std::move(WriterOrErr.get()), ProfileMap); 156 } 157 158 void ProfileGeneratorBase::showDensitySuggestion(double Density) { 159 if (Density == 0.0) 160 WithColor::warning() << "The --profile-summary-cutoff-hot option may be " 161 "set too low. Please check your command.\n"; 162 else if (Density < HotFunctionDensityThreshold) 163 WithColor::warning() 164 << "AutoFDO is estimated to optimize better with " 165 << format("%.1f", HotFunctionDensityThreshold / Density) 166 << "x more samples. Please consider increasing sampling rate or " 167 "profiling for longer duration to get more samples.\n"; 168 169 if (ShowDensity) 170 outs() << "Minimum profile density for hot functions with top " 171 << format("%.2f", 172 static_cast<double>(ProfileSummaryCutoffHot.getValue()) / 173 10000) 174 << "% total samples: " << format("%.1f", Density) << "\n"; 175 } 176 177 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, 178 uint64_t HotCntThreshold) { 179 double Density = DBL_MAX; 180 std::vector<const FunctionSamples *> HotFuncs; 181 for (auto &I : Profiles) { 182 auto &FuncSamples = I.second; 183 if (FuncSamples.getTotalSamples() < HotCntThreshold) 184 continue; 185 HotFuncs.emplace_back(&FuncSamples); 186 } 187 188 for (auto *FuncSamples : HotFuncs) { 189 auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); 190 if (!Func) 191 continue; 192 uint64_t FuncSize = Func->getFuncSize(); 193 if (FuncSize == 0) 194 continue; 195 Density = 196 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / 197 FuncSize); 198 } 199 200 return Density == DBL_MAX ? 0.0 : Density; 201 } 202 203 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 204 const RangeSample &Ranges) { 205 206 /* 207 Regions may overlap with each other. Using the boundary info, find all 208 disjoint ranges and their sample count. BoundaryPoint contains the count 209 multiple samples begin/end at this points. 210 211 |<--100-->| Sample1 212 |<------200------>| Sample2 213 A B C 214 215 In the example above, 216 Sample1 begins at A, ends at B, its value is 100. 217 Sample2 beings at A, ends at C, its value is 200. 218 For A, BeginCount is the sum of sample begins at A, which is 300 and no 219 samples ends at A, so EndCount is 0. 220 Then boundary points A, B, and C with begin/end counts are: 221 A: (300, 0) 222 B: (0, 100) 223 C: (0, 200) 224 */ 225 struct BoundaryPoint { 226 // Sum of sample counts beginning at this point 227 uint64_t BeginCount = UINT64_MAX; 228 // Sum of sample counts ending at this point 229 uint64_t EndCount = UINT64_MAX; 230 // Is the begin point of a zero range. 231 bool IsZeroRangeBegin = false; 232 // Is the end point of a zero range. 233 bool IsZeroRangeEnd = false; 234 235 void addBeginCount(uint64_t Count) { 236 if (BeginCount == UINT64_MAX) 237 BeginCount = 0; 238 BeginCount += Count; 239 } 240 241 void addEndCount(uint64_t Count) { 242 if (EndCount == UINT64_MAX) 243 EndCount = 0; 244 EndCount += Count; 245 } 246 }; 247 248 /* 249 For the above example. With boundary points, follwing logic finds two 250 disjoint region of 251 252 [A,B]: 300 253 [B+1,C]: 200 254 255 If there is a boundary point that both begin and end, the point itself 256 becomes a separate disjoint region. For example, if we have original 257 ranges of 258 259 |<--- 100 --->| 260 |<--- 200 --->| 261 A B C 262 263 there are three boundary points with their begin/end counts of 264 265 A: (100, 0) 266 B: (200, 100) 267 C: (0, 200) 268 269 the disjoint ranges would be 270 271 [A, B-1]: 100 272 [B, B]: 300 273 [B+1, C]: 200. 274 275 Example for zero value range: 276 277 |<--- 100 --->| 278 |<--- 200 --->| 279 |<--------------- 0 ----------------->| 280 A B C D E F 281 282 [A, B-1] : 0 283 [B, C] : 100 284 [C+1, D-1]: 0 285 [D, E] : 200 286 [E+1, F] : 0 287 */ 288 std::map<uint64_t, BoundaryPoint> Boundaries; 289 290 for (const auto &Item : Ranges) { 291 assert(Item.first.first <= Item.first.second && 292 "Invalid instruction range"); 293 auto &BeginPoint = Boundaries[Item.first.first]; 294 auto &EndPoint = Boundaries[Item.first.second]; 295 uint64_t Count = Item.second; 296 297 BeginPoint.addBeginCount(Count); 298 EndPoint.addEndCount(Count); 299 if (Count == 0) { 300 BeginPoint.IsZeroRangeBegin = true; 301 EndPoint.IsZeroRangeEnd = true; 302 } 303 } 304 305 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 306 // and the next valid address 307 uint64_t BeginAddress = UINT64_MAX; 308 int ZeroRangeDepth = 0; 309 uint64_t Count = 0; 310 for (const auto &Item : Boundaries) { 311 uint64_t Address = Item.first; 312 const BoundaryPoint &Point = Item.second; 313 if (Point.BeginCount != UINT64_MAX) { 314 if (BeginAddress != UINT64_MAX) 315 DisjointRanges[{BeginAddress, Address - 1}] = Count; 316 Count += Point.BeginCount; 317 BeginAddress = Address; 318 ZeroRangeDepth += Point.IsZeroRangeBegin; 319 } 320 if (Point.EndCount != UINT64_MAX) { 321 assert((BeginAddress != UINT64_MAX) && 322 "First boundary point cannot be 'end' point"); 323 DisjointRanges[{BeginAddress, Address}] = Count; 324 assert(Count >= Point.EndCount && "Mismatched live ranges"); 325 Count -= Point.EndCount; 326 BeginAddress = Address + 1; 327 ZeroRangeDepth -= Point.IsZeroRangeEnd; 328 // If the remaining count is zero and it's no longer in a zero range, this 329 // means we consume all the ranges before, thus mark BeginAddress as 330 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 331 // [<---- 10 ---->] 332 // [<---- 20 ---->] 333 // A B C D 334 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 335 // have the [B+1, C-1] zero range. 336 if (Count == 0 && ZeroRangeDepth == 0) 337 BeginAddress = UINT64_MAX; 338 } 339 } 340 } 341 342 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 343 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 344 uint64_t Count) { 345 // Use the maximum count of samples with same line location 346 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 347 348 // Use duplication factor to compensated for loop unroll/vectorization. 349 // Note that this is only needed when we're taking MAX of the counts at 350 // the location instead of SUM. 351 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 352 353 ErrorOr<uint64_t> R = 354 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 355 356 uint64_t PreviousCount = R ? R.get() : 0; 357 if (PreviousCount <= Count) { 358 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 359 Count - PreviousCount); 360 } 361 } 362 363 void ProfileGeneratorBase::updateTotalSamples() { 364 if (!UpdateTotalSamples) 365 return; 366 367 for (auto &Item : ProfileMap) { 368 FunctionSamples &FunctionProfile = Item.second; 369 FunctionProfile.updateTotalSamples(); 370 } 371 } 372 373 FunctionSamples & 374 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 375 SampleContext Context(FuncName); 376 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 377 if (Ret.second) { 378 FunctionSamples &FProfile = Ret.first->second; 379 FProfile.setContext(Context); 380 } 381 return Ret.first->second; 382 } 383 384 void ProfileGenerator::generateProfile() { 385 if (Binary->usePseudoProbes()) { 386 generateProbeBasedProfile(); 387 } else { 388 generateLineNumBasedProfile(); 389 } 390 postProcessProfiles(); 391 } 392 393 void ProfileGenerator::postProcessProfiles() { 394 computeSummaryAndThreshold(); 395 trimColdProfiles(ProfileMap, ColdCountThreshold); 396 calculateAndShowDensity(ProfileMap); 397 } 398 399 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, 400 uint64_t ColdCntThreshold) { 401 if (!TrimColdProfile) 402 return; 403 404 // Move cold profiles into a tmp container. 405 std::vector<SampleContext> ColdProfiles; 406 for (const auto &I : ProfileMap) { 407 if (I.second.getTotalSamples() < ColdCntThreshold) 408 ColdProfiles.emplace_back(I.first); 409 } 410 411 // Remove the cold profile from ProfileMap. 412 for (const auto &I : ColdProfiles) 413 ProfileMap.erase(I); 414 } 415 416 void ProfileGenerator::generateLineNumBasedProfile() { 417 assert(SampleCounters.size() == 1 && 418 "Must have one entry for profile generation."); 419 const SampleCounter &SC = SampleCounters.begin()->second; 420 // Fill in function body samples 421 populateBodySamplesForAllFunctions(SC.RangeCounter); 422 // Fill in boundary sample counts as well as call site samples for calls 423 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 424 425 updateTotalSamples(); 426 } 427 428 void ProfileGenerator::generateProbeBasedProfile() { 429 assert(SampleCounters.size() == 1 && 430 "Must have one entry for profile generation."); 431 // Enable pseudo probe functionalities in SampleProf 432 FunctionSamples::ProfileIsProbeBased = true; 433 const SampleCounter &SC = SampleCounters.begin()->second; 434 // Fill in function body samples 435 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); 436 // Fill in boundary sample counts as well as call site samples for calls 437 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); 438 439 updateTotalSamples(); 440 } 441 442 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( 443 const RangeSample &RangeCounter) { 444 ProbeCounterMap ProbeCounter; 445 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside 446 // extractProbesFromRange. 447 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false); 448 449 for (const auto &PI : ProbeCounter) { 450 const MCDecodedPseudoProbe *Probe = PI.first; 451 uint64_t Count = PI.second; 452 SampleContextFrameVector FrameVec; 453 Binary->getInlineContextForProbe(Probe, FrameVec, true); 454 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count); 455 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 456 if (Probe->isEntry()) 457 FunctionProfile.addHeadSamples(Count); 458 } 459 } 460 461 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( 462 const BranchSample &BranchCounters) { 463 for (const auto &Entry : BranchCounters) { 464 uint64_t SourceOffset = Entry.first.first; 465 uint64_t TargetOffset = Entry.first.second; 466 uint64_t Count = Entry.second; 467 assert(Count != 0 && "Unexpected zero weight branch"); 468 469 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 470 if (CalleeName.size() == 0) 471 continue; 472 473 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 474 const MCDecodedPseudoProbe *CallProbe = 475 Binary->getCallProbeForAddr(SourceAddress); 476 if (CallProbe == nullptr) 477 continue; 478 479 // Record called target sample and its count. 480 SampleContextFrameVector FrameVec; 481 Binary->getInlineContextForProbe(CallProbe, FrameVec, true); 482 483 if (!FrameVec.empty()) { 484 FunctionSamples &FunctionProfile = 485 getLeafProfileAndAddTotalSamples(FrameVec, 0); 486 FunctionProfile.addCalledTargetSamples( 487 FrameVec.back().Location.LineOffset, 0, CalleeName, Count); 488 } 489 } 490 } 491 492 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 493 const SampleContextFrameVector &FrameVec, uint64_t Count) { 494 // Get top level profile 495 FunctionSamples *FunctionProfile = 496 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 497 FunctionProfile->addTotalSamples(Count); 498 if (Binary->usePseudoProbes()) { 499 const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); 500 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 501 } 502 503 for (size_t I = 1; I < FrameVec.size(); I++) { 504 LineLocation Callsite( 505 FrameVec[I - 1].Location.LineOffset, 506 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 507 FunctionSamplesMap &SamplesMap = 508 FunctionProfile->functionSamplesAt(Callsite); 509 auto Ret = 510 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 511 if (Ret.second) { 512 SampleContext Context(FrameVec[I].FuncName); 513 Ret.first->second.setContext(Context); 514 } 515 FunctionProfile = &Ret.first->second; 516 FunctionProfile->addTotalSamples(Count); 517 if (Binary->usePseudoProbes()) { 518 const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); 519 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 520 } 521 } 522 523 return *FunctionProfile; 524 } 525 526 RangeSample 527 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 528 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 529 if (FillZeroForAllFuncs) { 530 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 531 for (auto &R : FuncI.second.Ranges) { 532 Ranges[{R.first, R.second - 1}] += 0; 533 } 534 } 535 } else { 536 // For each range, we search for all ranges of the function it belongs to 537 // and initialize it with zero count, so it remains zero if doesn't hit any 538 // samples. This is to be consistent with compiler that interpret zero count 539 // as unexecuted(cold). 540 for (const auto &I : RangeCounter) { 541 uint64_t StartOffset = I.first.first; 542 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 543 Ranges[{Range.first, Range.second - 1}] += 0; 544 } 545 } 546 RangeSample DisjointRanges; 547 findDisjointRanges(DisjointRanges, Ranges); 548 return DisjointRanges; 549 } 550 551 void ProfileGenerator::populateBodySamplesForAllFunctions( 552 const RangeSample &RangeCounter) { 553 for (const auto &Range : preprocessRangeCounter(RangeCounter)) { 554 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 555 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 556 uint64_t Count = Range.second; 557 558 InstructionPointer IP(Binary, RangeBegin, true); 559 // Disjoint ranges may have range in the middle of two instr, 560 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 561 // can be Addr1+1 to Addr2-1. We should ignore such range. 562 if (IP.Address > RangeEnd) 563 continue; 564 565 do { 566 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 567 const SampleContextFrameVector &FrameVec = 568 Binary->getFrameLocationStack(Offset); 569 if (!FrameVec.empty()) { 570 // FIXME: As accumulating total count per instruction caused some 571 // regression, we changed to accumulate total count per byte as a 572 // workaround. Tuning hotness threshold on the compiler side might be 573 // necessary in the future. 574 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( 575 FrameVec, Count * Binary->getInstSize(Offset)); 576 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 577 Count); 578 } 579 } while (IP.advance() && IP.Address <= RangeEnd); 580 } 581 } 582 583 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { 584 // Get the function range by branch target if it's a call branch. 585 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); 586 587 // We won't accumulate sample count for a range whose start is not the real 588 // function entry such as outlined function or inner labels. 589 if (!FRange || !FRange->IsFuncEntry) 590 return StringRef(); 591 592 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 593 } 594 595 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 596 const BranchSample &BranchCounters) { 597 for (const auto &Entry : BranchCounters) { 598 uint64_t SourceOffset = Entry.first.first; 599 uint64_t TargetOffset = Entry.first.second; 600 uint64_t Count = Entry.second; 601 assert(Count != 0 && "Unexpected zero weight branch"); 602 603 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 604 if (CalleeName.size() == 0) 605 continue; 606 // Record called target sample and its count. 607 const SampleContextFrameVector &FrameVec = 608 Binary->getFrameLocationStack(SourceOffset); 609 if (!FrameVec.empty()) { 610 FunctionSamples &FunctionProfile = 611 getLeafProfileAndAddTotalSamples(FrameVec, 0); 612 FunctionProfile.addCalledTargetSamples( 613 FrameVec.back().Location.LineOffset, 614 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 615 CalleeName, Count); 616 } 617 // Add head samples for callee. 618 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 619 CalleeProfile.addHeadSamples(Count); 620 } 621 } 622 623 void ProfileGeneratorBase::calculateAndShowDensity( 624 const SampleProfileMap &Profiles) { 625 double Density = calculateDensity(Profiles, HotCountThreshold); 626 showDensitySuggestion(Density); 627 } 628 629 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( 630 const SampleContextFrameVector &Context, bool WasLeafInlined) { 631 auto I = ProfileMap.find(SampleContext(Context)); 632 if (I == ProfileMap.end()) { 633 // Save the new context for future references. 634 SampleContextFrames NewContext = *Contexts.insert(Context).first; 635 SampleContext FContext(NewContext, RawContext); 636 auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); 637 if (WasLeafInlined) 638 FContext.setAttribute(ContextWasInlined); 639 FunctionSamples &FProfile = Ret.first->second; 640 FProfile.setContext(FContext); 641 return Ret.first->second; 642 } 643 return I->second; 644 } 645 646 void CSProfileGenerator::generateProfile() { 647 FunctionSamples::ProfileIsCSFlat = true; 648 649 if (Binary->getTrackFuncContextSize()) 650 computeSizeForProfiledFunctions(); 651 652 if (Binary->usePseudoProbes()) { 653 generateProbeBasedProfile(); 654 } else { 655 generateLineNumBasedProfile(); 656 } 657 postProcessProfiles(); 658 } 659 660 void CSProfileGenerator::computeSizeForProfiledFunctions() { 661 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 662 663 // Go through all the ranges in the CS counters, use the start of the range to 664 // look up the function it belongs and record the function. 665 for (const auto &CI : SampleCounters) { 666 for (const auto &Item : CI.second.RangeCounter) { 667 // FIXME: Filter the bogus crossing function range. 668 uint64_t StartOffset = Item.first.first; 669 if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) 670 ProfiledFunctions.insert(FRange->Func); 671 } 672 } 673 674 for (auto *Func : ProfiledFunctions) 675 Binary->computeInlinedContextSizeForFunc(Func); 676 677 // Flush the symbolizer to save memory. 678 Binary->flushSymbolizer(); 679 } 680 681 void CSProfileGenerator::generateLineNumBasedProfile() { 682 for (const auto &CI : SampleCounters) { 683 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); 684 685 // Get or create function profile for the range 686 FunctionSamples &FunctionProfile = 687 getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); 688 689 // Fill in function body samples 690 populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); 691 // Fill in boundary sample counts as well as call site samples for calls 692 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, 693 CI.second.BranchCounter); 694 } 695 // Fill in call site value sample for inlined calls and also use context to 696 // infer missing samples. Since we don't have call count for inlined 697 // functions, we estimate it from inlinee's profile using the entry of the 698 // body sample. 699 populateInferredFunctionSamples(); 700 701 updateTotalSamples(); 702 } 703 704 void CSProfileGenerator::populateBodySamplesForFunction( 705 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 706 // Compute disjoint ranges first, so we can use MAX 707 // for calculating count for each location. 708 RangeSample Ranges; 709 findDisjointRanges(Ranges, RangeCounter); 710 for (const auto &Range : Ranges) { 711 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 712 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 713 uint64_t Count = Range.second; 714 // Disjoint ranges have introduce zero-filled gap that 715 // doesn't belong to current context, filter them out. 716 if (Count == 0) 717 continue; 718 719 InstructionPointer IP(Binary, RangeBegin, true); 720 // Disjoint ranges may have range in the middle of two instr, 721 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 722 // can be Addr1+1 to Addr2-1. We should ignore such range. 723 if (IP.Address > RangeEnd) 724 continue; 725 726 do { 727 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 728 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 729 if (LeafLoc.hasValue()) { 730 // Recording body sample for this specific context 731 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 732 FunctionProfile.addTotalSamples(Count); 733 } 734 } while (IP.advance() && IP.Address <= RangeEnd); 735 } 736 } 737 738 void CSProfileGenerator::populateBoundarySamplesForFunction( 739 SampleContextFrames ContextId, FunctionSamples &FunctionProfile, 740 const BranchSample &BranchCounters) { 741 742 for (const auto &Entry : BranchCounters) { 743 uint64_t SourceOffset = Entry.first.first; 744 uint64_t TargetOffset = Entry.first.second; 745 uint64_t Count = Entry.second; 746 assert(Count != 0 && "Unexpected zero weight branch"); 747 748 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 749 if (CalleeName.size() == 0) 750 continue; 751 752 // Record called target sample and its count 753 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 754 if (!LeafLoc.hasValue()) 755 continue; 756 FunctionProfile.addCalledTargetSamples( 757 LeafLoc->Location.LineOffset, 758 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 759 Count); 760 761 // Record head sample for called target(callee) 762 SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); 763 assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && 764 "Leaf function name doesn't match"); 765 CalleeCtx.back() = *LeafLoc; 766 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); 767 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); 768 CalleeProfile.addHeadSamples(Count); 769 } 770 } 771 772 static SampleContextFrame 773 getCallerContext(SampleContextFrames CalleeContext, 774 SampleContextFrameVector &CallerContext) { 775 assert(CalleeContext.size() > 1 && "Unexpected empty context"); 776 CalleeContext = CalleeContext.drop_back(); 777 CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); 778 SampleContextFrame CallerFrame = CallerContext.back(); 779 CallerContext.back().Location = LineLocation(0, 0); 780 return CallerFrame; 781 } 782 783 void CSProfileGenerator::populateInferredFunctionSamples() { 784 for (const auto &Item : ProfileMap) { 785 const auto &CalleeContext = Item.first; 786 const FunctionSamples &CalleeProfile = Item.second; 787 788 // If we already have head sample counts, we must have value profile 789 // for call sites added already. Skip to avoid double counting. 790 if (CalleeProfile.getHeadSamples()) 791 continue; 792 // If we don't have context, nothing to do for caller's call site. 793 // This could happen for entry point function. 794 if (CalleeContext.isBaseContext()) 795 continue; 796 797 // Infer Caller's frame loc and context ID through string splitting 798 SampleContextFrameVector CallerContextId; 799 SampleContextFrame &&CallerLeafFrameLoc = 800 getCallerContext(CalleeContext.getContextFrames(), CallerContextId); 801 SampleContextFrames CallerContext(CallerContextId); 802 803 // It's possible that we haven't seen any sample directly in the caller, 804 // in which case CallerProfile will not exist. But we can't modify 805 // ProfileMap while iterating it. 806 // TODO: created function profile for those callers too 807 if (ProfileMap.find(CallerContext) == ProfileMap.end()) 808 continue; 809 FunctionSamples &CallerProfile = ProfileMap[CallerContext]; 810 811 // Since we don't have call count for inlined functions, we 812 // estimate it from inlinee's profile using entry body sample. 813 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 814 // If we don't have samples with location, use 1 to indicate live. 815 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 816 EstimatedCallCount = 1; 817 CallerProfile.addCalledTargetSamples( 818 CallerLeafFrameLoc.Location.LineOffset, 819 CallerLeafFrameLoc.Location.Discriminator, 820 CalleeProfile.getContext().getName(), EstimatedCallCount); 821 CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, 822 CallerLeafFrameLoc.Location.Discriminator, 823 EstimatedCallCount); 824 CallerProfile.addTotalSamples(EstimatedCallCount); 825 } 826 } 827 828 void CSProfileGenerator::postProcessProfiles() { 829 // Compute hot/cold threshold based on profile. This will be used for cold 830 // context profile merging/trimming. 831 computeSummaryAndThreshold(); 832 833 // Run global pre-inliner to adjust/merge context profile based on estimated 834 // inline decisions. 835 if (EnableCSPreInliner) { 836 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) 837 .run(); 838 // Turn off the profile merger by default unless it is explicitly enabled. 839 if (!CSProfMergeColdContext.getNumOccurrences()) 840 CSProfMergeColdContext = false; 841 } 842 843 // Trim and merge cold context profile using cold threshold above. 844 if (TrimColdProfile || CSProfMergeColdContext) { 845 SampleContextTrimmer(ProfileMap) 846 .trimAndMergeColdContextProfiles( 847 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, 848 CSProfMaxColdContextDepth, EnableCSPreInliner); 849 } 850 851 // Merge function samples of CS profile to calculate profile density. 852 sampleprof::SampleProfileMap ContextLessProfiles; 853 for (const auto &I : ProfileMap) { 854 ContextLessProfiles[I.second.getName()].merge(I.second); 855 } 856 857 calculateAndShowDensity(ContextLessProfiles); 858 if (GenCSNestedProfile) { 859 CSProfileConverter CSConverter(ProfileMap); 860 CSConverter.convertProfiles(); 861 FunctionSamples::ProfileIsCSFlat = false; 862 FunctionSamples::ProfileIsCSNested = EnableCSPreInliner; 863 } 864 } 865 866 void ProfileGeneratorBase::computeSummaryAndThreshold() { 867 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 868 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 869 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 870 (Summary->getDetailedSummary())); 871 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 872 (Summary->getDetailedSummary())); 873 } 874 875 void ProfileGeneratorBase::extractProbesFromRange( 876 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 877 bool FindDisjointRanges) { 878 const RangeSample *PRanges = &RangeCounter; 879 RangeSample Ranges; 880 if (FindDisjointRanges) { 881 findDisjointRanges(Ranges, RangeCounter); 882 PRanges = &Ranges; 883 } 884 885 for (const auto &Range : *PRanges) { 886 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 887 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 888 uint64_t Count = Range.second; 889 890 InstructionPointer IP(Binary, RangeBegin, true); 891 // Disjoint ranges may have range in the middle of two instr, 892 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 893 // can be Addr1+1 to Addr2-1. We should ignore such range. 894 if (IP.Address > RangeEnd) 895 continue; 896 897 do { 898 const AddressProbesMap &Address2ProbesMap = 899 Binary->getAddress2ProbesMap(); 900 auto It = Address2ProbesMap.find(IP.Address); 901 if (It != Address2ProbesMap.end()) { 902 for (const auto &Probe : It->second) { 903 ProbeCounter[&Probe] += Count; 904 } 905 } 906 } while (IP.advance() && IP.Address <= RangeEnd); 907 } 908 } 909 910 // Helper function to extract context prefix string stack 911 // Extract context stack for reusing, leaf context stack will 912 // be added compressed while looking up function profile 913 static void extractPrefixContextStack( 914 SampleContextFrameVector &ContextStack, 915 const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes, 916 ProfiledBinary *Binary) { 917 for (const auto *P : Probes) { 918 Binary->getInlineContextForProbe(P, ContextStack, true); 919 } 920 } 921 922 void CSProfileGenerator::generateProbeBasedProfile() { 923 // Enable pseudo probe functionalities in SampleProf 924 FunctionSamples::ProfileIsProbeBased = true; 925 for (const auto &CI : SampleCounters) { 926 const auto *CtxKey = cast<ProbeBasedCtxKey>(CI.first.getPtr()); 927 SampleContextFrameVector ContextStack; 928 extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); 929 // Fill in function body samples from probes, also infer caller's samples 930 // from callee's probe 931 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 932 // Fill in boundary samples for a call probe 933 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 934 } 935 } 936 937 void CSProfileGenerator::populateBodySamplesWithProbes( 938 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 939 ProbeCounterMap ProbeCounter; 940 // Extract the top frame probes by looking up each address among the range in 941 // the Address2ProbeMap 942 extractProbesFromRange(RangeCounter, ProbeCounter); 943 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 944 std::unordered_set<FunctionSamples *>> 945 FrameSamples; 946 for (const auto &PI : ProbeCounter) { 947 const MCDecodedPseudoProbe *Probe = PI.first; 948 uint64_t Count = PI.second; 949 // Disjoint ranges have introduce zero-filled gap that 950 // doesn't belong to current context, filter them out. 951 if (!Probe->isBlock() || Count == 0) 952 continue; 953 FunctionSamples &FunctionProfile = 954 getFunctionProfileForLeafProbe(ContextStack, Probe); 955 // Record the current frame and FunctionProfile whenever samples are 956 // collected for non-danglie probes. This is for reporting all of the 957 // zero count probes of the frame later. 958 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 959 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 960 FunctionProfile.addTotalSamples(Count); 961 if (Probe->isEntry()) { 962 FunctionProfile.addHeadSamples(Count); 963 // Look up for the caller's function profile 964 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 965 SampleContextFrames CalleeContextId = 966 FunctionProfile.getContext().getContextFrames(); 967 if (InlinerDesc != nullptr && CalleeContextId.size() > 1) { 968 // Since the context id will be compressed, we have to use callee's 969 // context id to infer caller's context id to ensure they share the 970 // same context prefix. 971 SampleContextFrameVector CallerContextId; 972 SampleContextFrame &&CallerLeafFrameLoc = 973 getCallerContext(CalleeContextId, CallerContextId); 974 uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; 975 assert(CallerIndex && 976 "Inferred caller's location index shouldn't be zero!"); 977 FunctionSamples &CallerProfile = 978 getFunctionProfileForContext(CallerContextId); 979 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 980 CallerProfile.addBodySamples(CallerIndex, 0, Count); 981 CallerProfile.addTotalSamples(Count); 982 CallerProfile.addCalledTargetSamples( 983 CallerIndex, 0, FunctionProfile.getContext().getName(), Count); 984 } 985 } 986 } 987 988 // Assign zero count for remaining probes without sample hits to 989 // differentiate from probes optimized away, of which the counts are unknown 990 // and will be inferred by the compiler. 991 for (auto &I : FrameSamples) { 992 for (auto *FunctionProfile : I.second) { 993 for (auto *Probe : I.first->getProbes()) { 994 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 995 } 996 } 997 } 998 } 999 1000 void CSProfileGenerator::populateBoundarySamplesWithProbes( 1001 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 1002 for (const auto &BI : BranchCounter) { 1003 uint64_t SourceOffset = BI.first.first; 1004 uint64_t TargetOffset = BI.first.second; 1005 uint64_t Count = BI.second; 1006 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 1007 const MCDecodedPseudoProbe *CallProbe = 1008 Binary->getCallProbeForAddr(SourceAddress); 1009 if (CallProbe == nullptr) 1010 continue; 1011 FunctionSamples &FunctionProfile = 1012 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 1013 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 1014 FunctionProfile.addTotalSamples(Count); 1015 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 1016 if (CalleeName.size() == 0) 1017 continue; 1018 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 1019 Count); 1020 } 1021 } 1022 1023 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 1024 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 1025 1026 // Explicitly copy the context for appending the leaf context 1027 SampleContextFrameVector NewContextStack(ContextStack.begin(), 1028 ContextStack.end()); 1029 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 1030 // For leaf inlined context with the top frame, we should strip off the top 1031 // frame's probe id, like: 1032 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 1033 auto LeafFrame = NewContextStack.back(); 1034 LeafFrame.Location = LineLocation(0, 0); 1035 NewContextStack.pop_back(); 1036 // Compress the context string except for the leaf frame 1037 CSProfileGenerator::compressRecursionContext(NewContextStack); 1038 CSProfileGenerator::trimContext(NewContextStack); 1039 NewContextStack.push_back(LeafFrame); 1040 1041 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 1042 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 1043 FunctionSamples &FunctionProile = 1044 getFunctionProfileForContext(NewContextStack, WasLeafInlined); 1045 FunctionProile.setFunctionHash(FuncDesc->FuncHash); 1046 return FunctionProile; 1047 } 1048 1049 } // end namespace sampleprof 1050 } // end namespace llvm 1051