1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "ProfiledBinary.h" 11 #include "llvm/ProfileData/ProfileCommon.h" 12 #include <unordered_set> 13 14 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 15 cl::Required, 16 cl::desc("Output profile file")); 17 static cl::alias OutputA("o", cl::desc("Alias for --output"), 18 cl::aliasopt(OutputFilename)); 19 20 static cl::opt<SampleProfileFormat> OutputFormat( 21 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 22 cl::values( 23 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 24 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 25 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 26 clEnumValN(SPF_Text, "text", "Text encoding"), 27 clEnumValN(SPF_GCC, "gcc", 28 "GCC encoding (only meaningful for -sample)"))); 29 30 cl::opt<bool> UseMD5( 31 "use-md5", cl::init(false), cl::Hidden, 32 cl::desc("Use md5 to represent function names in the output profile (only " 33 "meaningful for -extbinary)")); 34 35 static cl::opt<bool> PopulateProfileSymbolList( 36 "populate-profile-symbol-list", cl::init(true), cl::Hidden, 37 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 38 39 static cl::opt<int32_t, true> RecursionCompression( 40 "compress-recursion", 41 cl::desc("Compressing recursion by deduplicating adjacent frame " 42 "sequences up to the specified size. -1 means no size limit."), 43 cl::Hidden, 44 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 45 46 static cl::opt<bool> CSProfMergeColdContext( 47 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 48 cl::desc("If the total count of context profile is smaller than " 49 "the threshold, it will be merged into context-less base " 50 "profile.")); 51 52 static cl::opt<bool> CSProfTrimColdContext( 53 "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore, 54 cl::desc("If the total count of the profile after all merge is done " 55 "is still smaller than threshold, it will be trimmed.")); 56 57 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 58 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, 59 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 60 "context-less base profile")); 61 62 static cl::opt<int, true> CSProfMaxContextDepth( 63 "csprof-max-context-depth", cl::ZeroOrMore, 64 cl::desc("Keep the last K contexts while merging profile. -1 means no " 65 "depth limit."), 66 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 67 68 extern cl::opt<int> ProfileSummaryCutoffCold; 69 70 using namespace llvm; 71 using namespace sampleprof; 72 73 namespace llvm { 74 namespace sampleprof { 75 76 // Initialize the MaxCompressionSize to -1 which means no size limit 77 int32_t CSProfileGenerator::MaxCompressionSize = -1; 78 79 int CSProfileGenerator::MaxContextDepth = -1; 80 81 std::unique_ptr<ProfileGeneratorBase> 82 ProfileGeneratorBase::create(ProfiledBinary *Binary, 83 const ContextSampleCounterMap &SampleCounters, 84 bool ProfileIsCS) { 85 std::unique_ptr<ProfileGeneratorBase> Generator; 86 if (ProfileIsCS) { 87 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 88 } else { 89 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 90 } 91 92 return Generator; 93 } 94 95 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 96 SampleProfileMap &ProfileMap) { 97 // Populate profile symbol list if extended binary format is used. 98 ProfileSymbolList SymbolList; 99 100 // Turn it off temporarily for CS profile. 101 if (FunctionSamples::ProfileIsCS && 102 !PopulateProfileSymbolList.getNumOccurrences()) 103 PopulateProfileSymbolList = false; 104 105 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 106 for (const auto &Item : ProfileMap) { 107 auto &Profile = Item.second; 108 SymbolList.add(Profile.getName(), true); 109 } 110 Writer->setProfileSymbolList(&SymbolList); 111 } 112 113 if (std::error_code EC = Writer->write(ProfileMap)) 114 exitWithError(std::move(EC)); 115 } 116 117 void ProfileGeneratorBase::write() { 118 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 119 if (std::error_code EC = WriterOrErr.getError()) 120 exitWithError(EC, OutputFilename); 121 122 if (UseMD5) { 123 if (OutputFormat != SPF_Ext_Binary) 124 WithColor::warning() << "-use-md5 is ignored. Specify " 125 "--format=extbinary to enable it\n"; 126 else 127 WriterOrErr.get()->setUseMD5(); 128 } 129 130 write(std::move(WriterOrErr.get()), ProfileMap); 131 } 132 133 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 134 const RangeSample &Ranges) { 135 136 /* 137 Regions may overlap with each other. Using the boundary info, find all 138 disjoint ranges and their sample count. BoundaryPoint contains the count 139 multiple samples begin/end at this points. 140 141 |<--100-->| Sample1 142 |<------200------>| Sample2 143 A B C 144 145 In the example above, 146 Sample1 begins at A, ends at B, its value is 100. 147 Sample2 beings at A, ends at C, its value is 200. 148 For A, BeginCount is the sum of sample begins at A, which is 300 and no 149 samples ends at A, so EndCount is 0. 150 Then boundary points A, B, and C with begin/end counts are: 151 A: (300, 0) 152 B: (0, 100) 153 C: (0, 200) 154 */ 155 struct BoundaryPoint { 156 // Sum of sample counts beginning at this point 157 uint64_t BeginCount = UINT64_MAX; 158 // Sum of sample counts ending at this point 159 uint64_t EndCount = UINT64_MAX; 160 // Is the begin point of a zero range. 161 bool IsZeroRangeBegin = false; 162 // Is the end point of a zero range. 163 bool IsZeroRangeEnd = false; 164 165 void addBeginCount(uint64_t Count) { 166 if (BeginCount == UINT64_MAX) 167 BeginCount = 0; 168 BeginCount += Count; 169 } 170 171 void addEndCount(uint64_t Count) { 172 if (EndCount == UINT64_MAX) 173 EndCount = 0; 174 EndCount += Count; 175 } 176 }; 177 178 /* 179 For the above example. With boundary points, follwing logic finds two 180 disjoint region of 181 182 [A,B]: 300 183 [B+1,C]: 200 184 185 If there is a boundary point that both begin and end, the point itself 186 becomes a separate disjoint region. For example, if we have original 187 ranges of 188 189 |<--- 100 --->| 190 |<--- 200 --->| 191 A B C 192 193 there are three boundary points with their begin/end counts of 194 195 A: (100, 0) 196 B: (200, 100) 197 C: (0, 200) 198 199 the disjoint ranges would be 200 201 [A, B-1]: 100 202 [B, B]: 300 203 [B+1, C]: 200. 204 205 Example for zero value range: 206 207 |<--- 100 --->| 208 |<--- 200 --->| 209 |<--------------- 0 ----------------->| 210 A B C D E F 211 212 [A, B-1] : 0 213 [B, C] : 100 214 [C+1, D-1]: 0 215 [D, E] : 200 216 [E+1, F] : 0 217 */ 218 std::map<uint64_t, BoundaryPoint> Boundaries; 219 220 for (auto Item : Ranges) { 221 assert(Item.first.first <= Item.first.second && 222 "Invalid instruction range"); 223 auto &BeginPoint = Boundaries[Item.first.first]; 224 auto &EndPoint = Boundaries[Item.first.second]; 225 uint64_t Count = Item.second; 226 227 BeginPoint.addBeginCount(Count); 228 EndPoint.addEndCount(Count); 229 if (Count == 0) { 230 BeginPoint.IsZeroRangeBegin = true; 231 EndPoint.IsZeroRangeEnd = true; 232 } 233 } 234 235 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 236 // and the next valid address 237 uint64_t BeginAddress = UINT64_MAX; 238 int ZeroRangeDepth = 0; 239 uint64_t Count = 0; 240 for (auto Item : Boundaries) { 241 uint64_t Address = Item.first; 242 BoundaryPoint &Point = Item.second; 243 if (Point.BeginCount != UINT64_MAX) { 244 if (BeginAddress != UINT64_MAX) 245 DisjointRanges[{BeginAddress, Address - 1}] = Count; 246 Count += Point.BeginCount; 247 BeginAddress = Address; 248 ZeroRangeDepth += Point.IsZeroRangeBegin; 249 } 250 if (Point.EndCount != UINT64_MAX) { 251 assert((BeginAddress != UINT64_MAX) && 252 "First boundary point cannot be 'end' point"); 253 DisjointRanges[{BeginAddress, Address}] = Count; 254 assert(Count >= Point.EndCount && "Mismatched live ranges"); 255 Count -= Point.EndCount; 256 BeginAddress = Address + 1; 257 ZeroRangeDepth -= Point.IsZeroRangeEnd; 258 // If the remaining count is zero and it's no longer in a zero range, this 259 // means we consume all the ranges before, thus mark BeginAddress as 260 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 261 // [<---- 10 ---->] 262 // [<---- 20 ---->] 263 // A B C D 264 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 265 // have the [B+1, C-1] zero range. 266 if (Count == 0 && ZeroRangeDepth == 0) 267 BeginAddress = UINT64_MAX; 268 } 269 } 270 } 271 272 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 273 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 274 uint64_t Count) { 275 // Use the maximum count of samples with same line location 276 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 277 ErrorOr<uint64_t> R = 278 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 279 280 uint64_t PreviousCount = R ? R.get() : 0; 281 if (PreviousCount <= Count) { 282 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 283 Count - PreviousCount); 284 } 285 } 286 287 FunctionSamples & 288 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 289 SampleContext Context(FuncName); 290 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 291 if (Ret.second) { 292 FunctionSamples &FProfile = Ret.first->second; 293 FProfile.setContext(Context); 294 } 295 return Ret.first->second; 296 } 297 298 void ProfileGenerator::generateProfile() { 299 if (Binary->usePseudoProbes()) { 300 // TODO: Support probe based profile generation 301 } else { 302 generateLineNumBasedProfile(); 303 } 304 } 305 306 void ProfileGenerator::generateLineNumBasedProfile() { 307 assert(SampleCounters.size() == 1 && 308 "Must have one entry for profile generation."); 309 const SampleCounter &SC = SampleCounters.begin()->second; 310 // Fill in function body samples 311 populateBodySamplesForAllFunctions(SC.RangeCounter); 312 // Fill in boundary sample counts as well as call site samples for calls 313 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 314 } 315 316 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 317 const SampleContextFrameVector &FrameVec, uint64_t Count) { 318 // Get top level profile 319 FunctionSamples *FunctionProfile = 320 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 321 FunctionProfile->addTotalSamples(Count); 322 323 for (size_t I = 1; I < FrameVec.size(); I++) { 324 LineLocation Callsite( 325 FrameVec[I - 1].Location.LineOffset, 326 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 327 FunctionSamplesMap &SamplesMap = 328 FunctionProfile->functionSamplesAt(Callsite); 329 auto Ret = 330 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 331 if (Ret.second) { 332 SampleContext Context(FrameVec[I].FuncName); 333 Ret.first->second.setContext(Context); 334 } 335 FunctionProfile = &Ret.first->second; 336 FunctionProfile->addTotalSamples(Count); 337 } 338 339 return *FunctionProfile; 340 } 341 342 RangeSample 343 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 344 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 345 // For each range, we search for the range of the function it belongs to and 346 // initialize it with zero count, so it remains zero if doesn't hit any 347 // samples. This is to be consistent with compiler that interpret zero count 348 // as unexecuted(cold). 349 for (auto I : RangeCounter) { 350 uint64_t RangeBegin = I.first.first; 351 uint64_t RangeEnd = I.first.second; 352 // Find the function offset range the current range begin belongs to. 353 auto FuncRange = Binary->findFuncOffsetRange(RangeBegin); 354 if (FuncRange.second == 0) 355 WithColor::warning() 356 << "[" << format("%8" PRIx64, RangeBegin) << " - " 357 << format("%8" PRIx64, RangeEnd) 358 << "]: Invalid range or disassembling error in profiled binary.\n"; 359 else if (RangeEnd > FuncRange.second) 360 WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - " 361 << format("%8" PRIx64, RangeEnd) 362 << "]: Range is across different functions.\n"; 363 else 364 Ranges[FuncRange] += 0; 365 } 366 RangeSample DisjointRanges; 367 findDisjointRanges(DisjointRanges, Ranges); 368 return DisjointRanges; 369 } 370 371 void ProfileGenerator::populateBodySamplesForAllFunctions( 372 const RangeSample &RangeCounter) { 373 for (auto Range : preprocessRangeCounter(RangeCounter)) { 374 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 375 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 376 uint64_t Count = Range.second; 377 378 InstructionPointer IP(Binary, RangeBegin, true); 379 // Disjoint ranges may have range in the middle of two instr, 380 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 381 // can be Addr1+1 to Addr2-1. We should ignore such range. 382 while (IP.Address <= RangeEnd) { 383 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 384 const SampleContextFrameVector &FrameVec = 385 Binary->getFrameLocationStack(Offset); 386 if (!FrameVec.empty()) { 387 uint64_t DC = Count * getDuplicationFactor( 388 FrameVec.back().Location.Discriminator); 389 FunctionSamples &FunctionProfile = 390 getLeafProfileAndAddTotalSamples(FrameVec, DC); 391 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 392 DC); 393 } 394 // Move to next IP within the range. 395 IP.advance(); 396 } 397 } 398 } 399 400 static bool isOutlinedFunction(StringRef CalleeName) { 401 // Check whether it's from hot-cold func split or coro split. 402 return CalleeName.find(".resume") != StringRef::npos || 403 CalleeName.find(".cold") != StringRef::npos; 404 } 405 406 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { 407 // Get the callee name by branch target if it's a call branch. 408 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 409 Binary->getFuncFromStartOffset(TargetOffset)); 410 411 // We won't accumulate sample count againt outlined function. 412 if (CalleeName.size() == 0 || isOutlinedFunction(CalleeName)) 413 return StringRef(); 414 415 return CalleeName; 416 } 417 418 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 419 const BranchSample &BranchCounters) { 420 for (auto Entry : BranchCounters) { 421 uint64_t SourceOffset = Entry.first.first; 422 uint64_t TargetOffset = Entry.first.second; 423 uint64_t Count = Entry.second; 424 assert(Count != 0 && "Unexpected zero weight branch"); 425 426 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 427 if (CalleeName.size() == 0) 428 continue; 429 // Record called target sample and its count. 430 const SampleContextFrameVector &FrameVec = 431 Binary->getFrameLocationStack(SourceOffset); 432 if (!FrameVec.empty()) { 433 Count *= getDuplicationFactor(FrameVec.back().Location.Discriminator); 434 FunctionSamples &FunctionProfile = 435 getLeafProfileAndAddTotalSamples(FrameVec, Count); 436 FunctionProfile.addCalledTargetSamples( 437 FrameVec.back().Location.LineOffset, 438 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 439 CalleeName, Count); 440 } 441 // Add head samples for callee. 442 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 443 CalleeProfile.addHeadSamples(Count); 444 } 445 } 446 447 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( 448 const SampleContextFrameVector &Context, bool WasLeafInlined) { 449 auto I = ProfileMap.find(SampleContext(Context)); 450 if (I == ProfileMap.end()) { 451 // Save the new context for future references. 452 SampleContextFrames NewContext = *Contexts.insert(Context).first; 453 SampleContext FContext(NewContext, RawContext); 454 auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); 455 if (WasLeafInlined) 456 FContext.setAttribute(ContextWasInlined); 457 FunctionSamples &FProfile = Ret.first->second; 458 FProfile.setContext(FContext); 459 return Ret.first->second; 460 } 461 return I->second; 462 } 463 464 void CSProfileGenerator::generateProfile() { 465 FunctionSamples::ProfileIsCS = true; 466 467 if (Binary->getTrackFuncContextSize()) 468 computeSizeForProfiledFunctions(); 469 470 if (Binary->usePseudoProbes()) { 471 // Enable pseudo probe functionalities in SampleProf 472 FunctionSamples::ProfileIsProbeBased = true; 473 generateProbeBasedProfile(); 474 } else { 475 generateLineNumBasedProfile(); 476 } 477 postProcessProfiles(); 478 } 479 480 void CSProfileGenerator::computeSizeForProfiledFunctions() { 481 // Hash map to deduplicate the function range and the item is a pair of 482 // function start and end offset. 483 std::unordered_map<uint64_t, uint64_t> FuncRanges; 484 // Go through all the ranges in the CS counters, use the start of the range to 485 // look up the function it belongs and record the function range. 486 for (const auto &CI : SampleCounters) { 487 for (auto Item : CI.second.RangeCounter) { 488 // FIXME: Filter the bogus crossing function range. 489 uint64_t RangeStartOffset = Item.first.first; 490 auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset); 491 if (FuncRange.second != 0) 492 FuncRanges[FuncRange.first] = FuncRange.second; 493 } 494 } 495 496 for (auto I : FuncRanges) { 497 uint64_t StartOffset = I.first; 498 uint64_t EndOffset = I.second; 499 Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); 500 } 501 } 502 503 void CSProfileGenerator::generateLineNumBasedProfile() { 504 for (const auto &CI : SampleCounters) { 505 const StringBasedCtxKey *CtxKey = 506 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 507 // Get or create function profile for the range 508 FunctionSamples &FunctionProfile = 509 getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); 510 511 // Fill in function body samples 512 populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); 513 // Fill in boundary sample counts as well as call site samples for calls 514 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, 515 CI.second.BranchCounter); 516 } 517 // Fill in call site value sample for inlined calls and also use context to 518 // infer missing samples. Since we don't have call count for inlined 519 // functions, we estimate it from inlinee's profile using the entry of the 520 // body sample. 521 populateInferredFunctionSamples(); 522 } 523 524 void CSProfileGenerator::populateBodySamplesForFunction( 525 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 526 // Compute disjoint ranges first, so we can use MAX 527 // for calculating count for each location. 528 RangeSample Ranges; 529 findDisjointRanges(Ranges, RangeCounter); 530 for (auto Range : Ranges) { 531 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 532 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 533 uint64_t Count = Range.second; 534 // Disjoint ranges have introduce zero-filled gap that 535 // doesn't belong to current context, filter them out. 536 if (Count == 0) 537 continue; 538 539 InstructionPointer IP(Binary, RangeBegin, true); 540 // Disjoint ranges may have range in the middle of two instr, 541 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 542 // can be Addr1+1 to Addr2-1. We should ignore such range. 543 while (IP.Address <= RangeEnd) { 544 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 545 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 546 if (LeafLoc.hasValue()) { 547 // Recording body sample for this specific context 548 uint64_t DC = 549 Count * getDuplicationFactor(LeafLoc->Location.Discriminator); 550 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, DC); 551 FunctionProfile.addTotalSamples(DC); 552 } 553 554 // Move to next IP within the range 555 IP.advance(); 556 } 557 } 558 } 559 560 void CSProfileGenerator::populateBoundarySamplesForFunction( 561 SampleContextFrames ContextId, FunctionSamples &FunctionProfile, 562 const BranchSample &BranchCounters) { 563 564 for (auto Entry : BranchCounters) { 565 uint64_t SourceOffset = Entry.first.first; 566 uint64_t TargetOffset = Entry.first.second; 567 uint64_t Count = Entry.second; 568 assert(Count != 0 && "Unexpected zero weight branch"); 569 570 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 571 if (CalleeName.size() == 0) 572 continue; 573 574 // Record called target sample and its count 575 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 576 if (!LeafLoc.hasValue()) 577 continue; 578 Count *= getDuplicationFactor(LeafLoc->Location.Discriminator); 579 FunctionProfile.addCalledTargetSamples( 580 LeafLoc->Location.LineOffset, 581 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 582 Count); 583 584 // Record head sample for called target(callee) 585 SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); 586 assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && 587 "Leaf function name doesn't match"); 588 CalleeCtx.back() = *LeafLoc; 589 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); 590 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); 591 CalleeProfile.addHeadSamples(Count); 592 } 593 } 594 595 static SampleContextFrame 596 getCallerContext(SampleContextFrames CalleeContext, 597 SampleContextFrameVector &CallerContext) { 598 assert(CalleeContext.size() > 1 && "Unexpected empty context"); 599 CalleeContext = CalleeContext.drop_back(); 600 CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); 601 SampleContextFrame CallerFrame = CallerContext.back(); 602 CallerContext.back().Location = LineLocation(0, 0); 603 return CallerFrame; 604 } 605 606 void CSProfileGenerator::populateInferredFunctionSamples() { 607 for (const auto &Item : ProfileMap) { 608 const auto &CalleeContext = Item.first; 609 const FunctionSamples &CalleeProfile = Item.second; 610 611 // If we already have head sample counts, we must have value profile 612 // for call sites added already. Skip to avoid double counting. 613 if (CalleeProfile.getHeadSamples()) 614 continue; 615 // If we don't have context, nothing to do for caller's call site. 616 // This could happen for entry point function. 617 if (CalleeContext.isBaseContext()) 618 continue; 619 620 // Infer Caller's frame loc and context ID through string splitting 621 SampleContextFrameVector CallerContextId; 622 SampleContextFrame &&CallerLeafFrameLoc = 623 getCallerContext(CalleeContext.getContextFrames(), CallerContextId); 624 SampleContextFrames CallerContext(CallerContextId); 625 626 // It's possible that we haven't seen any sample directly in the caller, 627 // in which case CallerProfile will not exist. But we can't modify 628 // ProfileMap while iterating it. 629 // TODO: created function profile for those callers too 630 if (ProfileMap.find(CallerContext) == ProfileMap.end()) 631 continue; 632 FunctionSamples &CallerProfile = ProfileMap[CallerContext]; 633 634 // Since we don't have call count for inlined functions, we 635 // estimate it from inlinee's profile using entry body sample. 636 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 637 // If we don't have samples with location, use 1 to indicate live. 638 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 639 EstimatedCallCount = 1; 640 CallerProfile.addCalledTargetSamples( 641 CallerLeafFrameLoc.Location.LineOffset, 642 CallerLeafFrameLoc.Location.Discriminator, 643 CalleeProfile.getContext().getName(), EstimatedCallCount); 644 CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, 645 CallerLeafFrameLoc.Location.Discriminator, 646 EstimatedCallCount); 647 CallerProfile.addTotalSamples(EstimatedCallCount); 648 } 649 } 650 651 void CSProfileGenerator::postProcessProfiles() { 652 // Compute hot/cold threshold based on profile. This will be used for cold 653 // context profile merging/trimming. 654 computeSummaryAndThreshold(); 655 656 // Run global pre-inliner to adjust/merge context profile based on estimated 657 // inline decisions. 658 if (EnableCSPreInliner) { 659 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) 660 .run(); 661 } 662 663 // Trim and merge cold context profile using cold threshold above. By default, 664 // we skip such merging and trimming when preinliner is on. 665 if (!EnableCSPreInliner || CSProfTrimColdContext.getNumOccurrences() || 666 CSProfMergeColdContext.getNumOccurrences()) { 667 SampleContextTrimmer(ProfileMap) 668 .trimAndMergeColdContextProfiles( 669 HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, 670 CSProfMaxColdContextDepth); 671 } 672 } 673 674 void CSProfileGenerator::computeSummaryAndThreshold() { 675 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 676 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 677 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 678 (Summary->getDetailedSummary())); 679 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 680 (Summary->getDetailedSummary())); 681 } 682 683 // Helper function to extract context prefix string stack 684 // Extract context stack for reusing, leaf context stack will 685 // be added compressed while looking up function profile 686 static void extractPrefixContextStack( 687 SampleContextFrameVector &ContextStack, 688 const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes, 689 ProfiledBinary *Binary) { 690 for (const auto *P : Probes) { 691 Binary->getInlineContextForProbe(P, ContextStack, true); 692 } 693 } 694 695 void CSProfileGenerator::generateProbeBasedProfile() { 696 for (const auto &CI : SampleCounters) { 697 const ProbeBasedCtxKey *CtxKey = 698 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 699 SampleContextFrameVector ContextStack; 700 extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); 701 // Fill in function body samples from probes, also infer caller's samples 702 // from callee's probe 703 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 704 // Fill in boundary samples for a call probe 705 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 706 } 707 } 708 709 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, 710 ProbeCounterMap &ProbeCounter) { 711 RangeSample Ranges; 712 findDisjointRanges(Ranges, RangeCounter); 713 for (const auto &Range : Ranges) { 714 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 715 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 716 uint64_t Count = Range.second; 717 // Disjoint ranges have introduce zero-filled gap that 718 // doesn't belong to current context, filter them out. 719 if (Count == 0) 720 continue; 721 722 InstructionPointer IP(Binary, RangeBegin, true); 723 724 // Disjoint ranges may have range in the middle of two instr, 725 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 726 // can be Addr1+1 to Addr2-1. We should ignore such range. 727 if (IP.Address > RangeEnd) 728 continue; 729 730 while (IP.Address <= RangeEnd) { 731 const AddressProbesMap &Address2ProbesMap = 732 Binary->getAddress2ProbesMap(); 733 auto It = Address2ProbesMap.find(IP.Address); 734 if (It != Address2ProbesMap.end()) { 735 for (const auto &Probe : It->second) { 736 if (!Probe.isBlock()) 737 continue; 738 ProbeCounter[&Probe] += Count; 739 } 740 } 741 742 IP.advance(); 743 } 744 } 745 } 746 747 void CSProfileGenerator::populateBodySamplesWithProbes( 748 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 749 ProbeCounterMap ProbeCounter; 750 // Extract the top frame probes by looking up each address among the range in 751 // the Address2ProbeMap 752 extractProbesFromRange(RangeCounter, ProbeCounter); 753 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 754 std::unordered_set<FunctionSamples *>> 755 FrameSamples; 756 for (auto PI : ProbeCounter) { 757 const MCDecodedPseudoProbe *Probe = PI.first; 758 uint64_t Count = PI.second; 759 FunctionSamples &FunctionProfile = 760 getFunctionProfileForLeafProbe(ContextStack, Probe); 761 // Record the current frame and FunctionProfile whenever samples are 762 // collected for non-danglie probes. This is for reporting all of the 763 // zero count probes of the frame later. 764 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 765 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 766 FunctionProfile.addTotalSamples(Count); 767 if (Probe->isEntry()) { 768 FunctionProfile.addHeadSamples(Count); 769 // Look up for the caller's function profile 770 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 771 if (InlinerDesc != nullptr) { 772 // Since the context id will be compressed, we have to use callee's 773 // context id to infer caller's context id to ensure they share the 774 // same context prefix. 775 SampleContextFrames CalleeContextId = 776 FunctionProfile.getContext().getContextFrames(); 777 SampleContextFrameVector CallerContextId; 778 SampleContextFrame &&CallerLeafFrameLoc = 779 getCallerContext(CalleeContextId, CallerContextId); 780 uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; 781 assert(CallerIndex && 782 "Inferred caller's location index shouldn't be zero!"); 783 FunctionSamples &CallerProfile = 784 getFunctionProfileForContext(CallerContextId); 785 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 786 CallerProfile.addBodySamples(CallerIndex, 0, Count); 787 CallerProfile.addTotalSamples(Count); 788 CallerProfile.addCalledTargetSamples( 789 CallerIndex, 0, FunctionProfile.getContext().getName(), Count); 790 } 791 } 792 } 793 794 // Assign zero count for remaining probes without sample hits to 795 // differentiate from probes optimized away, of which the counts are unknown 796 // and will be inferred by the compiler. 797 for (auto &I : FrameSamples) { 798 for (auto *FunctionProfile : I.second) { 799 for (auto *Probe : I.first->getProbes()) { 800 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 801 } 802 } 803 } 804 } 805 806 void CSProfileGenerator::populateBoundarySamplesWithProbes( 807 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 808 for (auto BI : BranchCounter) { 809 uint64_t SourceOffset = BI.first.first; 810 uint64_t TargetOffset = BI.first.second; 811 uint64_t Count = BI.second; 812 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 813 const MCDecodedPseudoProbe *CallProbe = 814 Binary->getCallProbeForAddr(SourceAddress); 815 if (CallProbe == nullptr) 816 continue; 817 FunctionSamples &FunctionProfile = 818 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 819 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 820 FunctionProfile.addTotalSamples(Count); 821 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 822 if (CalleeName.size() == 0) 823 continue; 824 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 825 Count); 826 } 827 } 828 829 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 830 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 831 832 // Explicitly copy the context for appending the leaf context 833 SampleContextFrameVector NewContextStack(ContextStack.begin(), 834 ContextStack.end()); 835 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 836 // For leaf inlined context with the top frame, we should strip off the top 837 // frame's probe id, like: 838 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 839 auto LeafFrame = NewContextStack.back(); 840 LeafFrame.Location = LineLocation(0, 0); 841 NewContextStack.pop_back(); 842 // Compress the context string except for the leaf frame 843 CSProfileGenerator::compressRecursionContext(NewContextStack); 844 CSProfileGenerator::trimContext(NewContextStack); 845 NewContextStack.push_back(LeafFrame); 846 847 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 848 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 849 FunctionSamples &FunctionProile = 850 getFunctionProfileForContext(NewContextStack, WasLeafInlined); 851 FunctionProile.setFunctionHash(FuncDesc->FuncHash); 852 return FunctionProile; 853 } 854 855 } // end namespace sampleprof 856 } // end namespace llvm 857