1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "ProfiledBinary.h" 11 #include "llvm/ProfileData/ProfileCommon.h" 12 #include <float.h> 13 #include <unordered_set> 14 15 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 16 cl::Required, 17 cl::desc("Output profile file")); 18 static cl::alias OutputA("o", cl::desc("Alias for --output"), 19 cl::aliasopt(OutputFilename)); 20 21 static cl::opt<SampleProfileFormat> OutputFormat( 22 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 23 cl::values( 24 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 25 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 26 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 27 clEnumValN(SPF_Text, "text", "Text encoding"), 28 clEnumValN(SPF_GCC, "gcc", 29 "GCC encoding (only meaningful for -sample)"))); 30 31 cl::opt<bool> UseMD5( 32 "use-md5", cl::init(false), cl::Hidden, 33 cl::desc("Use md5 to represent function names in the output profile (only " 34 "meaningful for -extbinary)")); 35 36 static cl::opt<bool> PopulateProfileSymbolList( 37 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 38 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 39 40 static cl::opt<bool> FillZeroForAllFuncs( 41 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 42 cl::desc("Attribute all functions' range with zero count " 43 "even it's not hit by any samples.")); 44 45 static cl::opt<int32_t, true> RecursionCompression( 46 "compress-recursion", 47 cl::desc("Compressing recursion by deduplicating adjacent frame " 48 "sequences up to the specified size. -1 means no size limit."), 49 cl::Hidden, 50 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 51 52 static cl::opt<bool> CSProfMergeColdContext( 53 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 54 cl::desc("If the total count of context profile is smaller than " 55 "the threshold, it will be merged into context-less base " 56 "profile.")); 57 58 static cl::opt<bool> CSProfTrimColdContext( 59 "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore, 60 cl::desc("If the total count of the profile after all merge is done " 61 "is still smaller than threshold, it will be trimmed.")); 62 63 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 64 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, 65 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 66 "context-less base profile")); 67 68 static cl::opt<int, true> CSProfMaxContextDepth( 69 "csprof-max-context-depth", cl::ZeroOrMore, 70 cl::desc("Keep the last K contexts while merging profile. -1 means no " 71 "depth limit."), 72 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 73 74 static cl::opt<double> HotFunctionDensityThreshold( 75 "hot-function-density-threshold", llvm::cl::init(1000), 76 llvm::cl::desc( 77 "specify density threshold for hot functions (default: 1000)"), 78 llvm::cl::Optional); 79 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 80 llvm::cl::desc("show profile density details"), 81 llvm::cl::Optional); 82 83 extern cl::opt<int> ProfileSummaryCutoffHot; 84 85 using namespace llvm; 86 using namespace sampleprof; 87 88 namespace llvm { 89 namespace sampleprof { 90 91 // Initialize the MaxCompressionSize to -1 which means no size limit 92 int32_t CSProfileGenerator::MaxCompressionSize = -1; 93 94 int CSProfileGenerator::MaxContextDepth = -1; 95 96 std::unique_ptr<ProfileGeneratorBase> 97 ProfileGeneratorBase::create(ProfiledBinary *Binary, 98 const ContextSampleCounterMap &SampleCounters, 99 bool ProfileIsCS) { 100 std::unique_ptr<ProfileGeneratorBase> Generator; 101 if (ProfileIsCS) { 102 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 103 } else { 104 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 105 } 106 107 return Generator; 108 } 109 110 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 111 SampleProfileMap &ProfileMap) { 112 // Populate profile symbol list if extended binary format is used. 113 ProfileSymbolList SymbolList; 114 115 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 116 Binary->populateSymbolListFromDWARF(SymbolList); 117 Writer->setProfileSymbolList(&SymbolList); 118 } 119 120 if (std::error_code EC = Writer->write(ProfileMap)) 121 exitWithError(std::move(EC)); 122 } 123 124 void ProfileGeneratorBase::write() { 125 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 126 if (std::error_code EC = WriterOrErr.getError()) 127 exitWithError(EC, OutputFilename); 128 129 if (UseMD5) { 130 if (OutputFormat != SPF_Ext_Binary) 131 WithColor::warning() << "-use-md5 is ignored. Specify " 132 "--format=extbinary to enable it\n"; 133 else 134 WriterOrErr.get()->setUseMD5(); 135 } 136 137 write(std::move(WriterOrErr.get()), ProfileMap); 138 } 139 140 void ProfileGeneratorBase::showDensitySuggestion(double Density) { 141 if (Density == 0.0) 142 WithColor::warning() << "The --profile-summary-cutoff-hot option may be " 143 "set too low. Please check your command.\n"; 144 else if (Density < HotFunctionDensityThreshold) 145 WithColor::warning() 146 << "AutoFDO is estimated to optimize better with " 147 << format("%.1f", HotFunctionDensityThreshold / Density) 148 << "x more samples. Please consider increasing sampling rate or " 149 "profiling for longer duration to get more samples.\n"; 150 151 if (ShowDensity) 152 outs() << "Minimum profile density for hot functions with top " 153 << format("%.2f", 154 static_cast<double>(ProfileSummaryCutoffHot.getValue()) / 155 10000) 156 << "% total samples: " << format("%.1f", Density) << "\n"; 157 } 158 159 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, 160 uint64_t HotCntThreshold) { 161 double Density = DBL_MAX; 162 std::vector<const FunctionSamples *> HotFuncs; 163 for (auto &I : Profiles) { 164 auto &FuncSamples = I.second; 165 if (FuncSamples.getTotalSamples() < HotCntThreshold) 166 continue; 167 HotFuncs.emplace_back(&FuncSamples); 168 } 169 170 for (auto *FuncSamples : HotFuncs) { 171 auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); 172 if (!Func) 173 continue; 174 uint64_t FuncSize = Func->getFuncSize(); 175 if (FuncSize == 0) 176 continue; 177 Density = 178 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / 179 FuncSize); 180 } 181 182 return Density == DBL_MAX ? 0.0 : Density; 183 } 184 185 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 186 const RangeSample &Ranges) { 187 188 /* 189 Regions may overlap with each other. Using the boundary info, find all 190 disjoint ranges and their sample count. BoundaryPoint contains the count 191 multiple samples begin/end at this points. 192 193 |<--100-->| Sample1 194 |<------200------>| Sample2 195 A B C 196 197 In the example above, 198 Sample1 begins at A, ends at B, its value is 100. 199 Sample2 beings at A, ends at C, its value is 200. 200 For A, BeginCount is the sum of sample begins at A, which is 300 and no 201 samples ends at A, so EndCount is 0. 202 Then boundary points A, B, and C with begin/end counts are: 203 A: (300, 0) 204 B: (0, 100) 205 C: (0, 200) 206 */ 207 struct BoundaryPoint { 208 // Sum of sample counts beginning at this point 209 uint64_t BeginCount = UINT64_MAX; 210 // Sum of sample counts ending at this point 211 uint64_t EndCount = UINT64_MAX; 212 // Is the begin point of a zero range. 213 bool IsZeroRangeBegin = false; 214 // Is the end point of a zero range. 215 bool IsZeroRangeEnd = false; 216 217 void addBeginCount(uint64_t Count) { 218 if (BeginCount == UINT64_MAX) 219 BeginCount = 0; 220 BeginCount += Count; 221 } 222 223 void addEndCount(uint64_t Count) { 224 if (EndCount == UINT64_MAX) 225 EndCount = 0; 226 EndCount += Count; 227 } 228 }; 229 230 /* 231 For the above example. With boundary points, follwing logic finds two 232 disjoint region of 233 234 [A,B]: 300 235 [B+1,C]: 200 236 237 If there is a boundary point that both begin and end, the point itself 238 becomes a separate disjoint region. For example, if we have original 239 ranges of 240 241 |<--- 100 --->| 242 |<--- 200 --->| 243 A B C 244 245 there are three boundary points with their begin/end counts of 246 247 A: (100, 0) 248 B: (200, 100) 249 C: (0, 200) 250 251 the disjoint ranges would be 252 253 [A, B-1]: 100 254 [B, B]: 300 255 [B+1, C]: 200. 256 257 Example for zero value range: 258 259 |<--- 100 --->| 260 |<--- 200 --->| 261 |<--------------- 0 ----------------->| 262 A B C D E F 263 264 [A, B-1] : 0 265 [B, C] : 100 266 [C+1, D-1]: 0 267 [D, E] : 200 268 [E+1, F] : 0 269 */ 270 std::map<uint64_t, BoundaryPoint> Boundaries; 271 272 for (auto Item : Ranges) { 273 assert(Item.first.first <= Item.first.second && 274 "Invalid instruction range"); 275 auto &BeginPoint = Boundaries[Item.first.first]; 276 auto &EndPoint = Boundaries[Item.first.second]; 277 uint64_t Count = Item.second; 278 279 BeginPoint.addBeginCount(Count); 280 EndPoint.addEndCount(Count); 281 if (Count == 0) { 282 BeginPoint.IsZeroRangeBegin = true; 283 EndPoint.IsZeroRangeEnd = true; 284 } 285 } 286 287 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 288 // and the next valid address 289 uint64_t BeginAddress = UINT64_MAX; 290 int ZeroRangeDepth = 0; 291 uint64_t Count = 0; 292 for (auto Item : Boundaries) { 293 uint64_t Address = Item.first; 294 BoundaryPoint &Point = Item.second; 295 if (Point.BeginCount != UINT64_MAX) { 296 if (BeginAddress != UINT64_MAX) 297 DisjointRanges[{BeginAddress, Address - 1}] = Count; 298 Count += Point.BeginCount; 299 BeginAddress = Address; 300 ZeroRangeDepth += Point.IsZeroRangeBegin; 301 } 302 if (Point.EndCount != UINT64_MAX) { 303 assert((BeginAddress != UINT64_MAX) && 304 "First boundary point cannot be 'end' point"); 305 DisjointRanges[{BeginAddress, Address}] = Count; 306 assert(Count >= Point.EndCount && "Mismatched live ranges"); 307 Count -= Point.EndCount; 308 BeginAddress = Address + 1; 309 ZeroRangeDepth -= Point.IsZeroRangeEnd; 310 // If the remaining count is zero and it's no longer in a zero range, this 311 // means we consume all the ranges before, thus mark BeginAddress as 312 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 313 // [<---- 10 ---->] 314 // [<---- 20 ---->] 315 // A B C D 316 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 317 // have the [B+1, C-1] zero range. 318 if (Count == 0 && ZeroRangeDepth == 0) 319 BeginAddress = UINT64_MAX; 320 } 321 } 322 } 323 324 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 325 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 326 uint64_t Count) { 327 // Use the maximum count of samples with same line location 328 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 329 330 // Use duplication factor to compensated for loop unroll/vectorization. 331 // Note that this is only needed when we're taking MAX of the counts at 332 // the location instead of SUM. 333 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 334 335 ErrorOr<uint64_t> R = 336 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 337 338 uint64_t PreviousCount = R ? R.get() : 0; 339 if (PreviousCount <= Count) { 340 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 341 Count - PreviousCount); 342 } 343 } 344 345 void ProfileGeneratorBase::updateTotalSamples() { 346 for (auto &Item : ProfileMap) { 347 FunctionSamples &FunctionProfile = Item.second; 348 FunctionProfile.updateTotalSamples(); 349 } 350 } 351 352 FunctionSamples & 353 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 354 SampleContext Context(FuncName); 355 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 356 if (Ret.second) { 357 FunctionSamples &FProfile = Ret.first->second; 358 FProfile.setContext(Context); 359 } 360 return Ret.first->second; 361 } 362 363 void ProfileGenerator::generateProfile() { 364 if (Binary->usePseudoProbes()) { 365 // TODO: Support probe based profile generation 366 } else { 367 generateLineNumBasedProfile(); 368 } 369 postProcessProfiles(); 370 } 371 372 void ProfileGenerator::postProcessProfiles() { 373 computeSummaryAndThreshold(); 374 calculateAndShowDensity(ProfileMap); 375 } 376 377 void ProfileGenerator::generateLineNumBasedProfile() { 378 assert(SampleCounters.size() == 1 && 379 "Must have one entry for profile generation."); 380 const SampleCounter &SC = SampleCounters.begin()->second; 381 // Fill in function body samples 382 populateBodySamplesForAllFunctions(SC.RangeCounter); 383 // Fill in boundary sample counts as well as call site samples for calls 384 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 385 386 updateTotalSamples(); 387 } 388 389 FunctionSamples &ProfileGenerator::getLeafFrameProfile( 390 const SampleContextFrameVector &FrameVec) { 391 // Get top level profile 392 FunctionSamples *FunctionProfile = 393 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 394 395 for (size_t I = 1; I < FrameVec.size(); I++) { 396 LineLocation Callsite( 397 FrameVec[I - 1].Location.LineOffset, 398 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 399 FunctionSamplesMap &SamplesMap = 400 FunctionProfile->functionSamplesAt(Callsite); 401 auto Ret = 402 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 403 if (Ret.second) { 404 SampleContext Context(FrameVec[I].FuncName); 405 Ret.first->second.setContext(Context); 406 } 407 FunctionProfile = &Ret.first->second; 408 } 409 410 return *FunctionProfile; 411 } 412 413 RangeSample 414 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 415 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 416 if (FillZeroForAllFuncs) { 417 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 418 for (auto &R : FuncI.second.Ranges) { 419 Ranges[{R.first, R.second - 1}] += 0; 420 } 421 } 422 } else { 423 // For each range, we search for all ranges of the function it belongs to 424 // and initialize it with zero count, so it remains zero if doesn't hit any 425 // samples. This is to be consistent with compiler that interpret zero count 426 // as unexecuted(cold). 427 for (auto I : RangeCounter) { 428 uint64_t StartOffset = I.first.first; 429 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 430 Ranges[{Range.first, Range.second - 1}] += 0; 431 } 432 } 433 RangeSample DisjointRanges; 434 findDisjointRanges(DisjointRanges, Ranges); 435 return DisjointRanges; 436 } 437 438 void ProfileGenerator::populateBodySamplesForAllFunctions( 439 const RangeSample &RangeCounter) { 440 for (auto Range : preprocessRangeCounter(RangeCounter)) { 441 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 442 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 443 uint64_t Count = Range.second; 444 445 InstructionPointer IP(Binary, RangeBegin, true); 446 // Disjoint ranges may have range in the middle of two instr, 447 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 448 // can be Addr1+1 to Addr2-1. We should ignore such range. 449 if (IP.Address > RangeEnd) 450 continue; 451 452 do { 453 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 454 const SampleContextFrameVector &FrameVec = 455 Binary->getFrameLocationStack(Offset); 456 if (!FrameVec.empty()) { 457 FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec); 458 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 459 Count); 460 } 461 } while (IP.advance() && IP.Address <= RangeEnd); 462 } 463 } 464 465 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { 466 // Get the function range by branch target if it's a call branch. 467 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); 468 469 // We won't accumulate sample count for a range whose start is not the real 470 // function entry such as outlined function or inner labels. 471 if (!FRange || !FRange->IsFuncEntry) 472 return StringRef(); 473 474 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 475 } 476 477 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 478 const BranchSample &BranchCounters) { 479 for (auto Entry : BranchCounters) { 480 uint64_t SourceOffset = Entry.first.first; 481 uint64_t TargetOffset = Entry.first.second; 482 uint64_t Count = Entry.second; 483 assert(Count != 0 && "Unexpected zero weight branch"); 484 485 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 486 if (CalleeName.size() == 0) 487 continue; 488 // Record called target sample and its count. 489 const SampleContextFrameVector &FrameVec = 490 Binary->getFrameLocationStack(SourceOffset); 491 if (!FrameVec.empty()) { 492 FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec); 493 FunctionProfile.addCalledTargetSamples( 494 FrameVec.back().Location.LineOffset, 495 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 496 CalleeName, Count); 497 } 498 // Add head samples for callee. 499 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 500 CalleeProfile.addHeadSamples(Count); 501 } 502 } 503 504 void ProfileGeneratorBase::calculateAndShowDensity( 505 const SampleProfileMap &Profiles) { 506 double Density = calculateDensity(Profiles, HotCountThreshold); 507 showDensitySuggestion(Density); 508 } 509 510 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( 511 const SampleContextFrameVector &Context, bool WasLeafInlined) { 512 auto I = ProfileMap.find(SampleContext(Context)); 513 if (I == ProfileMap.end()) { 514 // Save the new context for future references. 515 SampleContextFrames NewContext = *Contexts.insert(Context).first; 516 SampleContext FContext(NewContext, RawContext); 517 auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); 518 if (WasLeafInlined) 519 FContext.setAttribute(ContextWasInlined); 520 FunctionSamples &FProfile = Ret.first->second; 521 FProfile.setContext(FContext); 522 return Ret.first->second; 523 } 524 return I->second; 525 } 526 527 void CSProfileGenerator::generateProfile() { 528 FunctionSamples::ProfileIsCS = true; 529 530 if (Binary->getTrackFuncContextSize()) 531 computeSizeForProfiledFunctions(); 532 533 if (Binary->usePseudoProbes()) { 534 // Enable pseudo probe functionalities in SampleProf 535 FunctionSamples::ProfileIsProbeBased = true; 536 generateProbeBasedProfile(); 537 } else { 538 generateLineNumBasedProfile(); 539 } 540 postProcessProfiles(); 541 } 542 543 void CSProfileGenerator::computeSizeForProfiledFunctions() { 544 // Hash map to deduplicate the function range and the item is a pair of 545 // function start and end offset. 546 std::unordered_map<uint64_t, uint64_t> AggregatedRanges; 547 // Go through all the ranges in the CS counters, use the start of the range to 548 // look up the function it belongs and record the function range. 549 for (const auto &CI : SampleCounters) { 550 for (auto Item : CI.second.RangeCounter) { 551 // FIXME: Filter the bogus crossing function range. 552 uint64_t StartOffset = Item.first.first; 553 // Note that a function can be spilt into multiple ranges, so get all 554 // ranges of the function. 555 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 556 AggregatedRanges[Range.first] = Range.second; 557 } 558 } 559 560 for (auto I : AggregatedRanges) { 561 uint64_t StartOffset = I.first; 562 uint64_t EndOffset = I.second; 563 Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); 564 } 565 } 566 567 void CSProfileGenerator::generateLineNumBasedProfile() { 568 for (const auto &CI : SampleCounters) { 569 const StringBasedCtxKey *CtxKey = 570 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 571 // Get or create function profile for the range 572 FunctionSamples &FunctionProfile = 573 getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); 574 575 // Fill in function body samples 576 populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); 577 // Fill in boundary sample counts as well as call site samples for calls 578 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, 579 CI.second.BranchCounter); 580 } 581 // Fill in call site value sample for inlined calls and also use context to 582 // infer missing samples. Since we don't have call count for inlined 583 // functions, we estimate it from inlinee's profile using the entry of the 584 // body sample. 585 populateInferredFunctionSamples(); 586 587 updateTotalSamples(); 588 } 589 590 void CSProfileGenerator::populateBodySamplesForFunction( 591 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 592 // Compute disjoint ranges first, so we can use MAX 593 // for calculating count for each location. 594 RangeSample Ranges; 595 findDisjointRanges(Ranges, RangeCounter); 596 for (auto Range : Ranges) { 597 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 598 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 599 uint64_t Count = Range.second; 600 // Disjoint ranges have introduce zero-filled gap that 601 // doesn't belong to current context, filter them out. 602 if (Count == 0) 603 continue; 604 605 InstructionPointer IP(Binary, RangeBegin, true); 606 // Disjoint ranges may have range in the middle of two instr, 607 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 608 // can be Addr1+1 to Addr2-1. We should ignore such range. 609 if (IP.Address > RangeEnd) 610 continue; 611 612 do { 613 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 614 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 615 if (LeafLoc.hasValue()) { 616 // Recording body sample for this specific context 617 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 618 } 619 } while (IP.advance() && IP.Address <= RangeEnd); 620 } 621 } 622 623 void CSProfileGenerator::populateBoundarySamplesForFunction( 624 SampleContextFrames ContextId, FunctionSamples &FunctionProfile, 625 const BranchSample &BranchCounters) { 626 627 for (auto Entry : BranchCounters) { 628 uint64_t SourceOffset = Entry.first.first; 629 uint64_t TargetOffset = Entry.first.second; 630 uint64_t Count = Entry.second; 631 assert(Count != 0 && "Unexpected zero weight branch"); 632 633 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 634 if (CalleeName.size() == 0) 635 continue; 636 637 // Record called target sample and its count 638 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 639 if (!LeafLoc.hasValue()) 640 continue; 641 FunctionProfile.addCalledTargetSamples( 642 LeafLoc->Location.LineOffset, 643 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 644 Count); 645 646 // Record head sample for called target(callee) 647 SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); 648 assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && 649 "Leaf function name doesn't match"); 650 CalleeCtx.back() = *LeafLoc; 651 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); 652 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); 653 CalleeProfile.addHeadSamples(Count); 654 } 655 } 656 657 static SampleContextFrame 658 getCallerContext(SampleContextFrames CalleeContext, 659 SampleContextFrameVector &CallerContext) { 660 assert(CalleeContext.size() > 1 && "Unexpected empty context"); 661 CalleeContext = CalleeContext.drop_back(); 662 CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); 663 SampleContextFrame CallerFrame = CallerContext.back(); 664 CallerContext.back().Location = LineLocation(0, 0); 665 return CallerFrame; 666 } 667 668 void CSProfileGenerator::populateInferredFunctionSamples() { 669 for (const auto &Item : ProfileMap) { 670 const auto &CalleeContext = Item.first; 671 const FunctionSamples &CalleeProfile = Item.second; 672 673 // If we already have head sample counts, we must have value profile 674 // for call sites added already. Skip to avoid double counting. 675 if (CalleeProfile.getHeadSamples()) 676 continue; 677 // If we don't have context, nothing to do for caller's call site. 678 // This could happen for entry point function. 679 if (CalleeContext.isBaseContext()) 680 continue; 681 682 // Infer Caller's frame loc and context ID through string splitting 683 SampleContextFrameVector CallerContextId; 684 SampleContextFrame &&CallerLeafFrameLoc = 685 getCallerContext(CalleeContext.getContextFrames(), CallerContextId); 686 SampleContextFrames CallerContext(CallerContextId); 687 688 // It's possible that we haven't seen any sample directly in the caller, 689 // in which case CallerProfile will not exist. But we can't modify 690 // ProfileMap while iterating it. 691 // TODO: created function profile for those callers too 692 if (ProfileMap.find(CallerContext) == ProfileMap.end()) 693 continue; 694 FunctionSamples &CallerProfile = ProfileMap[CallerContext]; 695 696 // Since we don't have call count for inlined functions, we 697 // estimate it from inlinee's profile using entry body sample. 698 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 699 // If we don't have samples with location, use 1 to indicate live. 700 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 701 EstimatedCallCount = 1; 702 CallerProfile.addCalledTargetSamples( 703 CallerLeafFrameLoc.Location.LineOffset, 704 CallerLeafFrameLoc.Location.Discriminator, 705 CalleeProfile.getContext().getName(), EstimatedCallCount); 706 CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, 707 CallerLeafFrameLoc.Location.Discriminator, 708 EstimatedCallCount); 709 } 710 } 711 712 void CSProfileGenerator::postProcessProfiles() { 713 // Compute hot/cold threshold based on profile. This will be used for cold 714 // context profile merging/trimming. 715 computeSummaryAndThreshold(); 716 717 // Run global pre-inliner to adjust/merge context profile based on estimated 718 // inline decisions. 719 if (EnableCSPreInliner) { 720 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) 721 .run(); 722 // Turn off the profile merger by default unless it is explicitly enabled. 723 if (!CSProfMergeColdContext.getNumOccurrences()) 724 CSProfMergeColdContext = false; 725 } 726 727 // Trim and merge cold context profile using cold threshold above. 728 if (CSProfTrimColdContext || CSProfMergeColdContext) { 729 SampleContextTrimmer(ProfileMap) 730 .trimAndMergeColdContextProfiles( 731 HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, 732 CSProfMaxColdContextDepth, EnableCSPreInliner); 733 } 734 735 // Merge function samples of CS profile to calculate profile density. 736 sampleprof::SampleProfileMap ContextLessProfiles; 737 for (const auto &I : ProfileMap) { 738 ContextLessProfiles[I.second.getName()].merge(I.second); 739 } 740 741 calculateAndShowDensity(ContextLessProfiles); 742 } 743 744 void ProfileGeneratorBase::computeSummaryAndThreshold() { 745 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 746 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 747 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 748 (Summary->getDetailedSummary())); 749 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 750 (Summary->getDetailedSummary())); 751 } 752 753 // Helper function to extract context prefix string stack 754 // Extract context stack for reusing, leaf context stack will 755 // be added compressed while looking up function profile 756 static void extractPrefixContextStack( 757 SampleContextFrameVector &ContextStack, 758 const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes, 759 ProfiledBinary *Binary) { 760 for (const auto *P : Probes) { 761 Binary->getInlineContextForProbe(P, ContextStack, true); 762 } 763 } 764 765 void CSProfileGenerator::generateProbeBasedProfile() { 766 for (const auto &CI : SampleCounters) { 767 const ProbeBasedCtxKey *CtxKey = 768 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 769 SampleContextFrameVector ContextStack; 770 extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); 771 // Fill in function body samples from probes, also infer caller's samples 772 // from callee's probe 773 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 774 // Fill in boundary samples for a call probe 775 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 776 } 777 } 778 779 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, 780 ProbeCounterMap &ProbeCounter) { 781 RangeSample Ranges; 782 findDisjointRanges(Ranges, RangeCounter); 783 for (const auto &Range : Ranges) { 784 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 785 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 786 uint64_t Count = Range.second; 787 // Disjoint ranges have introduce zero-filled gap that 788 // doesn't belong to current context, filter them out. 789 if (Count == 0) 790 continue; 791 792 InstructionPointer IP(Binary, RangeBegin, true); 793 // Disjoint ranges may have range in the middle of two instr, 794 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 795 // can be Addr1+1 to Addr2-1. We should ignore such range. 796 if (IP.Address > RangeEnd) 797 continue; 798 799 do { 800 const AddressProbesMap &Address2ProbesMap = 801 Binary->getAddress2ProbesMap(); 802 auto It = Address2ProbesMap.find(IP.Address); 803 if (It != Address2ProbesMap.end()) { 804 for (const auto &Probe : It->second) { 805 if (!Probe.isBlock()) 806 continue; 807 ProbeCounter[&Probe] += Count; 808 } 809 } 810 } while (IP.advance() && IP.Address <= RangeEnd); 811 } 812 } 813 814 void CSProfileGenerator::populateBodySamplesWithProbes( 815 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 816 ProbeCounterMap ProbeCounter; 817 // Extract the top frame probes by looking up each address among the range in 818 // the Address2ProbeMap 819 extractProbesFromRange(RangeCounter, ProbeCounter); 820 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 821 std::unordered_set<FunctionSamples *>> 822 FrameSamples; 823 for (auto PI : ProbeCounter) { 824 const MCDecodedPseudoProbe *Probe = PI.first; 825 uint64_t Count = PI.second; 826 FunctionSamples &FunctionProfile = 827 getFunctionProfileForLeafProbe(ContextStack, Probe); 828 // Record the current frame and FunctionProfile whenever samples are 829 // collected for non-danglie probes. This is for reporting all of the 830 // zero count probes of the frame later. 831 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 832 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 833 FunctionProfile.addTotalSamples(Count); 834 if (Probe->isEntry()) { 835 FunctionProfile.addHeadSamples(Count); 836 // Look up for the caller's function profile 837 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 838 if (InlinerDesc != nullptr) { 839 // Since the context id will be compressed, we have to use callee's 840 // context id to infer caller's context id to ensure they share the 841 // same context prefix. 842 SampleContextFrames CalleeContextId = 843 FunctionProfile.getContext().getContextFrames(); 844 SampleContextFrameVector CallerContextId; 845 SampleContextFrame &&CallerLeafFrameLoc = 846 getCallerContext(CalleeContextId, CallerContextId); 847 uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; 848 assert(CallerIndex && 849 "Inferred caller's location index shouldn't be zero!"); 850 FunctionSamples &CallerProfile = 851 getFunctionProfileForContext(CallerContextId); 852 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 853 CallerProfile.addBodySamples(CallerIndex, 0, Count); 854 CallerProfile.addTotalSamples(Count); 855 CallerProfile.addCalledTargetSamples( 856 CallerIndex, 0, FunctionProfile.getContext().getName(), Count); 857 } 858 } 859 } 860 861 // Assign zero count for remaining probes without sample hits to 862 // differentiate from probes optimized away, of which the counts are unknown 863 // and will be inferred by the compiler. 864 for (auto &I : FrameSamples) { 865 for (auto *FunctionProfile : I.second) { 866 for (auto *Probe : I.first->getProbes()) { 867 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 868 } 869 } 870 } 871 } 872 873 void CSProfileGenerator::populateBoundarySamplesWithProbes( 874 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 875 for (auto BI : BranchCounter) { 876 uint64_t SourceOffset = BI.first.first; 877 uint64_t TargetOffset = BI.first.second; 878 uint64_t Count = BI.second; 879 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 880 const MCDecodedPseudoProbe *CallProbe = 881 Binary->getCallProbeForAddr(SourceAddress); 882 if (CallProbe == nullptr) 883 continue; 884 FunctionSamples &FunctionProfile = 885 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 886 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 887 FunctionProfile.addTotalSamples(Count); 888 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 889 if (CalleeName.size() == 0) 890 continue; 891 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 892 Count); 893 } 894 } 895 896 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 897 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 898 899 // Explicitly copy the context for appending the leaf context 900 SampleContextFrameVector NewContextStack(ContextStack.begin(), 901 ContextStack.end()); 902 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 903 // For leaf inlined context with the top frame, we should strip off the top 904 // frame's probe id, like: 905 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 906 auto LeafFrame = NewContextStack.back(); 907 LeafFrame.Location = LineLocation(0, 0); 908 NewContextStack.pop_back(); 909 // Compress the context string except for the leaf frame 910 CSProfileGenerator::compressRecursionContext(NewContextStack); 911 CSProfileGenerator::trimContext(NewContextStack); 912 NewContextStack.push_back(LeafFrame); 913 914 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 915 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 916 FunctionSamples &FunctionProile = 917 getFunctionProfileForContext(NewContextStack, WasLeafInlined); 918 FunctionProile.setFunctionHash(FuncDesc->FuncHash); 919 return FunctionProile; 920 } 921 922 } // end namespace sampleprof 923 } // end namespace llvm 924