1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "ProfiledBinary.h" 11 #include "llvm/ProfileData/ProfileCommon.h" 12 #include <unordered_set> 13 14 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 15 cl::Required, 16 cl::desc("Output profile file")); 17 static cl::alias OutputA("o", cl::desc("Alias for --output"), 18 cl::aliasopt(OutputFilename)); 19 20 static cl::opt<SampleProfileFormat> OutputFormat( 21 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 22 cl::values( 23 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 24 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 25 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 26 clEnumValN(SPF_Text, "text", "Text encoding"), 27 clEnumValN(SPF_GCC, "gcc", 28 "GCC encoding (only meaningful for -sample)"))); 29 30 cl::opt<bool> UseMD5( 31 "use-md5", cl::init(false), cl::Hidden, 32 cl::desc("Use md5 to represent function names in the output profile (only " 33 "meaningful for -extbinary)")); 34 35 static cl::opt<bool> PopulateProfileSymbolList( 36 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 37 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 38 39 static cl::opt<bool> FillZeroForAllFuncs( 40 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 41 cl::desc("Attribute all functions' range with zero count " 42 "even it's not hit by any samples.")); 43 44 static cl::opt<int32_t, true> RecursionCompression( 45 "compress-recursion", 46 cl::desc("Compressing recursion by deduplicating adjacent frame " 47 "sequences up to the specified size. -1 means no size limit."), 48 cl::Hidden, 49 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 50 51 static cl::opt<bool> CSProfMergeColdContext( 52 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 53 cl::desc("If the total count of context profile is smaller than " 54 "the threshold, it will be merged into context-less base " 55 "profile.")); 56 57 static cl::opt<bool> CSProfTrimColdContext( 58 "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore, 59 cl::desc("If the total count of the profile after all merge is done " 60 "is still smaller than threshold, it will be trimmed.")); 61 62 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 63 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, 64 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 65 "context-less base profile")); 66 67 static cl::opt<int, true> CSProfMaxContextDepth( 68 "csprof-max-context-depth", cl::ZeroOrMore, 69 cl::desc("Keep the last K contexts while merging profile. -1 means no " 70 "depth limit."), 71 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 72 73 extern cl::opt<int> ProfileSummaryCutoffCold; 74 75 using namespace llvm; 76 using namespace sampleprof; 77 78 namespace llvm { 79 namespace sampleprof { 80 81 // Initialize the MaxCompressionSize to -1 which means no size limit 82 int32_t CSProfileGenerator::MaxCompressionSize = -1; 83 84 int CSProfileGenerator::MaxContextDepth = -1; 85 86 std::unique_ptr<ProfileGeneratorBase> 87 ProfileGeneratorBase::create(ProfiledBinary *Binary, 88 const ContextSampleCounterMap &SampleCounters, 89 bool ProfileIsCS) { 90 std::unique_ptr<ProfileGeneratorBase> Generator; 91 if (ProfileIsCS) { 92 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 93 } else { 94 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 95 } 96 97 return Generator; 98 } 99 100 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 101 SampleProfileMap &ProfileMap) { 102 // Populate profile symbol list if extended binary format is used. 103 ProfileSymbolList SymbolList; 104 105 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 106 Binary->populateSymbolListFromDWARF(SymbolList); 107 Writer->setProfileSymbolList(&SymbolList); 108 } 109 110 if (std::error_code EC = Writer->write(ProfileMap)) 111 exitWithError(std::move(EC)); 112 } 113 114 void ProfileGeneratorBase::write() { 115 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 116 if (std::error_code EC = WriterOrErr.getError()) 117 exitWithError(EC, OutputFilename); 118 119 if (UseMD5) { 120 if (OutputFormat != SPF_Ext_Binary) 121 WithColor::warning() << "-use-md5 is ignored. Specify " 122 "--format=extbinary to enable it\n"; 123 else 124 WriterOrErr.get()->setUseMD5(); 125 } 126 127 write(std::move(WriterOrErr.get()), ProfileMap); 128 } 129 130 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 131 const RangeSample &Ranges) { 132 133 /* 134 Regions may overlap with each other. Using the boundary info, find all 135 disjoint ranges and their sample count. BoundaryPoint contains the count 136 multiple samples begin/end at this points. 137 138 |<--100-->| Sample1 139 |<------200------>| Sample2 140 A B C 141 142 In the example above, 143 Sample1 begins at A, ends at B, its value is 100. 144 Sample2 beings at A, ends at C, its value is 200. 145 For A, BeginCount is the sum of sample begins at A, which is 300 and no 146 samples ends at A, so EndCount is 0. 147 Then boundary points A, B, and C with begin/end counts are: 148 A: (300, 0) 149 B: (0, 100) 150 C: (0, 200) 151 */ 152 struct BoundaryPoint { 153 // Sum of sample counts beginning at this point 154 uint64_t BeginCount = UINT64_MAX; 155 // Sum of sample counts ending at this point 156 uint64_t EndCount = UINT64_MAX; 157 // Is the begin point of a zero range. 158 bool IsZeroRangeBegin = false; 159 // Is the end point of a zero range. 160 bool IsZeroRangeEnd = false; 161 162 void addBeginCount(uint64_t Count) { 163 if (BeginCount == UINT64_MAX) 164 BeginCount = 0; 165 BeginCount += Count; 166 } 167 168 void addEndCount(uint64_t Count) { 169 if (EndCount == UINT64_MAX) 170 EndCount = 0; 171 EndCount += Count; 172 } 173 }; 174 175 /* 176 For the above example. With boundary points, follwing logic finds two 177 disjoint region of 178 179 [A,B]: 300 180 [B+1,C]: 200 181 182 If there is a boundary point that both begin and end, the point itself 183 becomes a separate disjoint region. For example, if we have original 184 ranges of 185 186 |<--- 100 --->| 187 |<--- 200 --->| 188 A B C 189 190 there are three boundary points with their begin/end counts of 191 192 A: (100, 0) 193 B: (200, 100) 194 C: (0, 200) 195 196 the disjoint ranges would be 197 198 [A, B-1]: 100 199 [B, B]: 300 200 [B+1, C]: 200. 201 202 Example for zero value range: 203 204 |<--- 100 --->| 205 |<--- 200 --->| 206 |<--------------- 0 ----------------->| 207 A B C D E F 208 209 [A, B-1] : 0 210 [B, C] : 100 211 [C+1, D-1]: 0 212 [D, E] : 200 213 [E+1, F] : 0 214 */ 215 std::map<uint64_t, BoundaryPoint> Boundaries; 216 217 for (auto Item : Ranges) { 218 assert(Item.first.first <= Item.first.second && 219 "Invalid instruction range"); 220 auto &BeginPoint = Boundaries[Item.first.first]; 221 auto &EndPoint = Boundaries[Item.first.second]; 222 uint64_t Count = Item.second; 223 224 BeginPoint.addBeginCount(Count); 225 EndPoint.addEndCount(Count); 226 if (Count == 0) { 227 BeginPoint.IsZeroRangeBegin = true; 228 EndPoint.IsZeroRangeEnd = true; 229 } 230 } 231 232 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 233 // and the next valid address 234 uint64_t BeginAddress = UINT64_MAX; 235 int ZeroRangeDepth = 0; 236 uint64_t Count = 0; 237 for (auto Item : Boundaries) { 238 uint64_t Address = Item.first; 239 BoundaryPoint &Point = Item.second; 240 if (Point.BeginCount != UINT64_MAX) { 241 if (BeginAddress != UINT64_MAX) 242 DisjointRanges[{BeginAddress, Address - 1}] = Count; 243 Count += Point.BeginCount; 244 BeginAddress = Address; 245 ZeroRangeDepth += Point.IsZeroRangeBegin; 246 } 247 if (Point.EndCount != UINT64_MAX) { 248 assert((BeginAddress != UINT64_MAX) && 249 "First boundary point cannot be 'end' point"); 250 DisjointRanges[{BeginAddress, Address}] = Count; 251 assert(Count >= Point.EndCount && "Mismatched live ranges"); 252 Count -= Point.EndCount; 253 BeginAddress = Address + 1; 254 ZeroRangeDepth -= Point.IsZeroRangeEnd; 255 // If the remaining count is zero and it's no longer in a zero range, this 256 // means we consume all the ranges before, thus mark BeginAddress as 257 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 258 // [<---- 10 ---->] 259 // [<---- 20 ---->] 260 // A B C D 261 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 262 // have the [B+1, C-1] zero range. 263 if (Count == 0 && ZeroRangeDepth == 0) 264 BeginAddress = UINT64_MAX; 265 } 266 } 267 } 268 269 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 270 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 271 uint64_t Count) { 272 // Use the maximum count of samples with same line location 273 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 274 275 // Use duplication factor to compensated for loop unroll/vectorization. 276 // Note that this is only needed when we're taking MAX of the counts at 277 // the location instead of SUM. 278 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 279 280 ErrorOr<uint64_t> R = 281 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 282 283 uint64_t PreviousCount = R ? R.get() : 0; 284 if (PreviousCount <= Count) { 285 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 286 Count - PreviousCount); 287 } 288 } 289 290 void ProfileGeneratorBase::updateTotalSamples() { 291 for (auto &Item : ProfileMap) { 292 FunctionSamples &FunctionProfile = Item.second; 293 FunctionProfile.updateTotalSamples(); 294 } 295 } 296 297 FunctionSamples & 298 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 299 SampleContext Context(FuncName); 300 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 301 if (Ret.second) { 302 FunctionSamples &FProfile = Ret.first->second; 303 FProfile.setContext(Context); 304 } 305 return Ret.first->second; 306 } 307 308 void ProfileGenerator::generateProfile() { 309 if (Binary->usePseudoProbes()) { 310 // TODO: Support probe based profile generation 311 } else { 312 generateLineNumBasedProfile(); 313 } 314 } 315 316 void ProfileGenerator::generateLineNumBasedProfile() { 317 assert(SampleCounters.size() == 1 && 318 "Must have one entry for profile generation."); 319 const SampleCounter &SC = SampleCounters.begin()->second; 320 // Fill in function body samples 321 populateBodySamplesForAllFunctions(SC.RangeCounter); 322 // Fill in boundary sample counts as well as call site samples for calls 323 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 324 325 updateTotalSamples(); 326 } 327 328 FunctionSamples &ProfileGenerator::getLeafFrameProfile( 329 const SampleContextFrameVector &FrameVec) { 330 // Get top level profile 331 FunctionSamples *FunctionProfile = 332 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 333 334 for (size_t I = 1; I < FrameVec.size(); I++) { 335 LineLocation Callsite( 336 FrameVec[I - 1].Location.LineOffset, 337 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 338 FunctionSamplesMap &SamplesMap = 339 FunctionProfile->functionSamplesAt(Callsite); 340 auto Ret = 341 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 342 if (Ret.second) { 343 SampleContext Context(FrameVec[I].FuncName); 344 Ret.first->second.setContext(Context); 345 } 346 FunctionProfile = &Ret.first->second; 347 } 348 349 return *FunctionProfile; 350 } 351 352 RangeSample 353 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 354 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 355 if (FillZeroForAllFuncs) { 356 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 357 for (auto &R : FuncI.second.Ranges) { 358 Ranges[{R.first, R.second - 1}] += 0; 359 } 360 } 361 } else { 362 // For each range, we search for all ranges of the function it belongs to 363 // and initialize it with zero count, so it remains zero if doesn't hit any 364 // samples. This is to be consistent with compiler that interpret zero count 365 // as unexecuted(cold). 366 for (auto I : RangeCounter) { 367 uint64_t StartOffset = I.first.first; 368 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 369 Ranges[{Range.first, Range.second - 1}] += 0; 370 } 371 } 372 RangeSample DisjointRanges; 373 findDisjointRanges(DisjointRanges, Ranges); 374 return DisjointRanges; 375 } 376 377 void ProfileGenerator::populateBodySamplesForAllFunctions( 378 const RangeSample &RangeCounter) { 379 for (auto Range : preprocessRangeCounter(RangeCounter)) { 380 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 381 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 382 uint64_t Count = Range.second; 383 384 InstructionPointer IP(Binary, RangeBegin, true); 385 // Disjoint ranges may have range in the middle of two instr, 386 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 387 // can be Addr1+1 to Addr2-1. We should ignore such range. 388 if (IP.Address > RangeEnd) 389 continue; 390 391 do { 392 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 393 const SampleContextFrameVector &FrameVec = 394 Binary->getFrameLocationStack(Offset); 395 if (!FrameVec.empty()) { 396 FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec); 397 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 398 Count); 399 } 400 } while (IP.advance() && IP.Address <= RangeEnd); 401 } 402 } 403 404 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { 405 // Get the function range by branch target if it's a call branch. 406 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); 407 408 // We won't accumulate sample count for a range whose start is not the real 409 // function entry such as outlined function or inner labels. 410 if (!FRange || !FRange->IsFuncEntry) 411 return StringRef(); 412 413 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 414 } 415 416 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 417 const BranchSample &BranchCounters) { 418 for (auto Entry : BranchCounters) { 419 uint64_t SourceOffset = Entry.first.first; 420 uint64_t TargetOffset = Entry.first.second; 421 uint64_t Count = Entry.second; 422 assert(Count != 0 && "Unexpected zero weight branch"); 423 424 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 425 if (CalleeName.size() == 0) 426 continue; 427 // Record called target sample and its count. 428 const SampleContextFrameVector &FrameVec = 429 Binary->getFrameLocationStack(SourceOffset); 430 if (!FrameVec.empty()) { 431 FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec); 432 FunctionProfile.addCalledTargetSamples( 433 FrameVec.back().Location.LineOffset, 434 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 435 CalleeName, Count); 436 } 437 // Add head samples for callee. 438 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 439 CalleeProfile.addHeadSamples(Count); 440 } 441 } 442 443 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( 444 const SampleContextFrameVector &Context, bool WasLeafInlined) { 445 auto I = ProfileMap.find(SampleContext(Context)); 446 if (I == ProfileMap.end()) { 447 // Save the new context for future references. 448 SampleContextFrames NewContext = *Contexts.insert(Context).first; 449 SampleContext FContext(NewContext, RawContext); 450 auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); 451 if (WasLeafInlined) 452 FContext.setAttribute(ContextWasInlined); 453 FunctionSamples &FProfile = Ret.first->second; 454 FProfile.setContext(FContext); 455 return Ret.first->second; 456 } 457 return I->second; 458 } 459 460 void CSProfileGenerator::generateProfile() { 461 FunctionSamples::ProfileIsCS = true; 462 463 if (Binary->getTrackFuncContextSize()) 464 computeSizeForProfiledFunctions(); 465 466 if (Binary->usePseudoProbes()) { 467 // Enable pseudo probe functionalities in SampleProf 468 FunctionSamples::ProfileIsProbeBased = true; 469 generateProbeBasedProfile(); 470 } else { 471 generateLineNumBasedProfile(); 472 } 473 postProcessProfiles(); 474 } 475 476 void CSProfileGenerator::computeSizeForProfiledFunctions() { 477 // Hash map to deduplicate the function range and the item is a pair of 478 // function start and end offset. 479 std::unordered_map<uint64_t, uint64_t> AggregatedRanges; 480 // Go through all the ranges in the CS counters, use the start of the range to 481 // look up the function it belongs and record the function range. 482 for (const auto &CI : SampleCounters) { 483 for (auto Item : CI.second.RangeCounter) { 484 // FIXME: Filter the bogus crossing function range. 485 uint64_t StartOffset = Item.first.first; 486 // Note that a function can be spilt into multiple ranges, so get all 487 // ranges of the function. 488 for (const auto &Range : Binary->getRangesForOffset(StartOffset)) 489 AggregatedRanges[Range.first] = Range.second; 490 } 491 } 492 493 for (auto I : AggregatedRanges) { 494 uint64_t StartOffset = I.first; 495 uint64_t EndOffset = I.second; 496 Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); 497 } 498 } 499 500 void CSProfileGenerator::generateLineNumBasedProfile() { 501 for (const auto &CI : SampleCounters) { 502 const StringBasedCtxKey *CtxKey = 503 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 504 // Get or create function profile for the range 505 FunctionSamples &FunctionProfile = 506 getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); 507 508 // Fill in function body samples 509 populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); 510 // Fill in boundary sample counts as well as call site samples for calls 511 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, 512 CI.second.BranchCounter); 513 } 514 // Fill in call site value sample for inlined calls and also use context to 515 // infer missing samples. Since we don't have call count for inlined 516 // functions, we estimate it from inlinee's profile using the entry of the 517 // body sample. 518 populateInferredFunctionSamples(); 519 520 updateTotalSamples(); 521 } 522 523 void CSProfileGenerator::populateBodySamplesForFunction( 524 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 525 // Compute disjoint ranges first, so we can use MAX 526 // for calculating count for each location. 527 RangeSample Ranges; 528 findDisjointRanges(Ranges, RangeCounter); 529 for (auto Range : Ranges) { 530 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 531 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 532 uint64_t Count = Range.second; 533 // Disjoint ranges have introduce zero-filled gap that 534 // doesn't belong to current context, filter them out. 535 if (Count == 0) 536 continue; 537 538 InstructionPointer IP(Binary, RangeBegin, true); 539 // Disjoint ranges may have range in the middle of two instr, 540 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 541 // can be Addr1+1 to Addr2-1. We should ignore such range. 542 if (IP.Address > RangeEnd) 543 continue; 544 545 do { 546 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 547 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 548 if (LeafLoc.hasValue()) { 549 // Recording body sample for this specific context 550 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 551 } 552 } while (IP.advance() && IP.Address <= RangeEnd); 553 } 554 } 555 556 void CSProfileGenerator::populateBoundarySamplesForFunction( 557 SampleContextFrames ContextId, FunctionSamples &FunctionProfile, 558 const BranchSample &BranchCounters) { 559 560 for (auto Entry : BranchCounters) { 561 uint64_t SourceOffset = Entry.first.first; 562 uint64_t TargetOffset = Entry.first.second; 563 uint64_t Count = Entry.second; 564 assert(Count != 0 && "Unexpected zero weight branch"); 565 566 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 567 if (CalleeName.size() == 0) 568 continue; 569 570 // Record called target sample and its count 571 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 572 if (!LeafLoc.hasValue()) 573 continue; 574 FunctionProfile.addCalledTargetSamples( 575 LeafLoc->Location.LineOffset, 576 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 577 Count); 578 579 // Record head sample for called target(callee) 580 SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); 581 assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && 582 "Leaf function name doesn't match"); 583 CalleeCtx.back() = *LeafLoc; 584 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); 585 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); 586 CalleeProfile.addHeadSamples(Count); 587 } 588 } 589 590 static SampleContextFrame 591 getCallerContext(SampleContextFrames CalleeContext, 592 SampleContextFrameVector &CallerContext) { 593 assert(CalleeContext.size() > 1 && "Unexpected empty context"); 594 CalleeContext = CalleeContext.drop_back(); 595 CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); 596 SampleContextFrame CallerFrame = CallerContext.back(); 597 CallerContext.back().Location = LineLocation(0, 0); 598 return CallerFrame; 599 } 600 601 void CSProfileGenerator::populateInferredFunctionSamples() { 602 for (const auto &Item : ProfileMap) { 603 const auto &CalleeContext = Item.first; 604 const FunctionSamples &CalleeProfile = Item.second; 605 606 // If we already have head sample counts, we must have value profile 607 // for call sites added already. Skip to avoid double counting. 608 if (CalleeProfile.getHeadSamples()) 609 continue; 610 // If we don't have context, nothing to do for caller's call site. 611 // This could happen for entry point function. 612 if (CalleeContext.isBaseContext()) 613 continue; 614 615 // Infer Caller's frame loc and context ID through string splitting 616 SampleContextFrameVector CallerContextId; 617 SampleContextFrame &&CallerLeafFrameLoc = 618 getCallerContext(CalleeContext.getContextFrames(), CallerContextId); 619 SampleContextFrames CallerContext(CallerContextId); 620 621 // It's possible that we haven't seen any sample directly in the caller, 622 // in which case CallerProfile will not exist. But we can't modify 623 // ProfileMap while iterating it. 624 // TODO: created function profile for those callers too 625 if (ProfileMap.find(CallerContext) == ProfileMap.end()) 626 continue; 627 FunctionSamples &CallerProfile = ProfileMap[CallerContext]; 628 629 // Since we don't have call count for inlined functions, we 630 // estimate it from inlinee's profile using entry body sample. 631 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 632 // If we don't have samples with location, use 1 to indicate live. 633 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 634 EstimatedCallCount = 1; 635 CallerProfile.addCalledTargetSamples( 636 CallerLeafFrameLoc.Location.LineOffset, 637 CallerLeafFrameLoc.Location.Discriminator, 638 CalleeProfile.getContext().getName(), EstimatedCallCount); 639 CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, 640 CallerLeafFrameLoc.Location.Discriminator, 641 EstimatedCallCount); 642 } 643 } 644 645 void CSProfileGenerator::postProcessProfiles() { 646 // Compute hot/cold threshold based on profile. This will be used for cold 647 // context profile merging/trimming. 648 computeSummaryAndThreshold(); 649 650 // Run global pre-inliner to adjust/merge context profile based on estimated 651 // inline decisions. 652 if (EnableCSPreInliner) { 653 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) 654 .run(); 655 // Turn off the profile merger by default unless it is explicitly enabled. 656 if (!CSProfMergeColdContext.getNumOccurrences()) 657 CSProfMergeColdContext = false; 658 } 659 660 // Trim and merge cold context profile using cold threshold above. 661 if (CSProfTrimColdContext || CSProfMergeColdContext) { 662 SampleContextTrimmer(ProfileMap) 663 .trimAndMergeColdContextProfiles( 664 HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, 665 CSProfMaxColdContextDepth, EnableCSPreInliner); 666 } 667 } 668 669 void CSProfileGenerator::computeSummaryAndThreshold() { 670 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 671 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 672 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 673 (Summary->getDetailedSummary())); 674 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 675 (Summary->getDetailedSummary())); 676 } 677 678 // Helper function to extract context prefix string stack 679 // Extract context stack for reusing, leaf context stack will 680 // be added compressed while looking up function profile 681 static void extractPrefixContextStack( 682 SampleContextFrameVector &ContextStack, 683 const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes, 684 ProfiledBinary *Binary) { 685 for (const auto *P : Probes) { 686 Binary->getInlineContextForProbe(P, ContextStack, true); 687 } 688 } 689 690 void CSProfileGenerator::generateProbeBasedProfile() { 691 for (const auto &CI : SampleCounters) { 692 const ProbeBasedCtxKey *CtxKey = 693 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 694 SampleContextFrameVector ContextStack; 695 extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); 696 // Fill in function body samples from probes, also infer caller's samples 697 // from callee's probe 698 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 699 // Fill in boundary samples for a call probe 700 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 701 } 702 } 703 704 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, 705 ProbeCounterMap &ProbeCounter) { 706 RangeSample Ranges; 707 findDisjointRanges(Ranges, RangeCounter); 708 for (const auto &Range : Ranges) { 709 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 710 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 711 uint64_t Count = Range.second; 712 // Disjoint ranges have introduce zero-filled gap that 713 // doesn't belong to current context, filter them out. 714 if (Count == 0) 715 continue; 716 717 InstructionPointer IP(Binary, RangeBegin, true); 718 // Disjoint ranges may have range in the middle of two instr, 719 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 720 // can be Addr1+1 to Addr2-1. We should ignore such range. 721 if (IP.Address > RangeEnd) 722 continue; 723 724 do { 725 const AddressProbesMap &Address2ProbesMap = 726 Binary->getAddress2ProbesMap(); 727 auto It = Address2ProbesMap.find(IP.Address); 728 if (It != Address2ProbesMap.end()) { 729 for (const auto &Probe : It->second) { 730 if (!Probe.isBlock()) 731 continue; 732 ProbeCounter[&Probe] += Count; 733 } 734 } 735 } while (IP.advance() && IP.Address <= RangeEnd); 736 } 737 } 738 739 void CSProfileGenerator::populateBodySamplesWithProbes( 740 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 741 ProbeCounterMap ProbeCounter; 742 // Extract the top frame probes by looking up each address among the range in 743 // the Address2ProbeMap 744 extractProbesFromRange(RangeCounter, ProbeCounter); 745 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 746 std::unordered_set<FunctionSamples *>> 747 FrameSamples; 748 for (auto PI : ProbeCounter) { 749 const MCDecodedPseudoProbe *Probe = PI.first; 750 uint64_t Count = PI.second; 751 FunctionSamples &FunctionProfile = 752 getFunctionProfileForLeafProbe(ContextStack, Probe); 753 // Record the current frame and FunctionProfile whenever samples are 754 // collected for non-danglie probes. This is for reporting all of the 755 // zero count probes of the frame later. 756 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 757 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 758 FunctionProfile.addTotalSamples(Count); 759 if (Probe->isEntry()) { 760 FunctionProfile.addHeadSamples(Count); 761 // Look up for the caller's function profile 762 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 763 if (InlinerDesc != nullptr) { 764 // Since the context id will be compressed, we have to use callee's 765 // context id to infer caller's context id to ensure they share the 766 // same context prefix. 767 SampleContextFrames CalleeContextId = 768 FunctionProfile.getContext().getContextFrames(); 769 SampleContextFrameVector CallerContextId; 770 SampleContextFrame &&CallerLeafFrameLoc = 771 getCallerContext(CalleeContextId, CallerContextId); 772 uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; 773 assert(CallerIndex && 774 "Inferred caller's location index shouldn't be zero!"); 775 FunctionSamples &CallerProfile = 776 getFunctionProfileForContext(CallerContextId); 777 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 778 CallerProfile.addBodySamples(CallerIndex, 0, Count); 779 CallerProfile.addTotalSamples(Count); 780 CallerProfile.addCalledTargetSamples( 781 CallerIndex, 0, FunctionProfile.getContext().getName(), Count); 782 } 783 } 784 } 785 786 // Assign zero count for remaining probes without sample hits to 787 // differentiate from probes optimized away, of which the counts are unknown 788 // and will be inferred by the compiler. 789 for (auto &I : FrameSamples) { 790 for (auto *FunctionProfile : I.second) { 791 for (auto *Probe : I.first->getProbes()) { 792 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 793 } 794 } 795 } 796 } 797 798 void CSProfileGenerator::populateBoundarySamplesWithProbes( 799 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 800 for (auto BI : BranchCounter) { 801 uint64_t SourceOffset = BI.first.first; 802 uint64_t TargetOffset = BI.first.second; 803 uint64_t Count = BI.second; 804 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 805 const MCDecodedPseudoProbe *CallProbe = 806 Binary->getCallProbeForAddr(SourceAddress); 807 if (CallProbe == nullptr) 808 continue; 809 FunctionSamples &FunctionProfile = 810 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 811 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 812 FunctionProfile.addTotalSamples(Count); 813 StringRef CalleeName = getCalleeNameForOffset(TargetOffset); 814 if (CalleeName.size() == 0) 815 continue; 816 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 817 Count); 818 } 819 } 820 821 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 822 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 823 824 // Explicitly copy the context for appending the leaf context 825 SampleContextFrameVector NewContextStack(ContextStack.begin(), 826 ContextStack.end()); 827 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 828 // For leaf inlined context with the top frame, we should strip off the top 829 // frame's probe id, like: 830 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 831 auto LeafFrame = NewContextStack.back(); 832 LeafFrame.Location = LineLocation(0, 0); 833 NewContextStack.pop_back(); 834 // Compress the context string except for the leaf frame 835 CSProfileGenerator::compressRecursionContext(NewContextStack); 836 CSProfileGenerator::trimContext(NewContextStack); 837 NewContextStack.push_back(LeafFrame); 838 839 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 840 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 841 FunctionSamples &FunctionProile = 842 getFunctionProfileForContext(NewContextStack, WasLeafInlined); 843 FunctionProile.setFunctionHash(FuncDesc->FuncHash); 844 return FunctionProile; 845 } 846 847 } // end namespace sampleprof 848 } // end namespace llvm 849