1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "ProfiledBinary.h" 11 #include "llvm/ProfileData/ProfileCommon.h" 12 #include <unordered_set> 13 14 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 15 cl::Required, 16 cl::desc("Output profile file")); 17 static cl::alias OutputA("o", cl::desc("Alias for --output"), 18 cl::aliasopt(OutputFilename)); 19 20 static cl::opt<SampleProfileFormat> OutputFormat( 21 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 22 cl::values( 23 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 24 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 25 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 26 clEnumValN(SPF_Text, "text", "Text encoding"), 27 clEnumValN(SPF_GCC, "gcc", 28 "GCC encoding (only meaningful for -sample)"))); 29 30 cl::opt<bool> UseMD5( 31 "use-md5", cl::init(false), cl::Hidden, 32 cl::desc("Use md5 to represent function names in the output profile (only " 33 "meaningful for -extbinary)")); 34 35 static cl::opt<int32_t, true> RecursionCompression( 36 "compress-recursion", 37 cl::desc("Compressing recursion by deduplicating adjacent frame " 38 "sequences up to the specified size. -1 means no size limit."), 39 cl::Hidden, 40 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 41 42 static cl::opt<bool> CSProfMergeColdContext( 43 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 44 cl::desc("If the total count of context profile is smaller than " 45 "the threshold, it will be merged into context-less base " 46 "profile.")); 47 48 static cl::opt<bool> CSProfTrimColdContext( 49 "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore, 50 cl::desc("If the total count of the profile after all merge is done " 51 "is still smaller than threshold, it will be trimmed.")); 52 53 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 54 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, 55 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 56 "context-less base profile")); 57 58 static cl::opt<int, true> CSProfMaxContextDepth( 59 "csprof-max-context-depth", cl::ZeroOrMore, 60 cl::desc("Keep the last K contexts while merging profile. -1 means no " 61 "depth limit."), 62 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 63 64 extern cl::opt<int> ProfileSummaryCutoffCold; 65 66 using namespace llvm; 67 using namespace sampleprof; 68 69 namespace llvm { 70 namespace sampleprof { 71 72 // Initialize the MaxCompressionSize to -1 which means no size limit 73 int32_t CSProfileGenerator::MaxCompressionSize = -1; 74 75 int CSProfileGenerator::MaxContextDepth = -1; 76 77 std::unique_ptr<ProfileGeneratorBase> 78 ProfileGeneratorBase::create(ProfiledBinary *Binary, 79 const ContextSampleCounterMap &SampleCounters, 80 bool ProfileIsCS) { 81 std::unique_ptr<ProfileGeneratorBase> Generator; 82 if (ProfileIsCS) { 83 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 84 } else { 85 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 86 } 87 88 return Generator; 89 } 90 91 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 92 SampleProfileMap &ProfileMap) { 93 if (std::error_code EC = Writer->write(ProfileMap)) 94 exitWithError(std::move(EC)); 95 } 96 97 void ProfileGeneratorBase::write() { 98 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 99 if (std::error_code EC = WriterOrErr.getError()) 100 exitWithError(EC, OutputFilename); 101 102 if (UseMD5) { 103 if (OutputFormat != SPF_Ext_Binary) 104 WithColor::warning() << "-use-md5 is ignored. Specify " 105 "--format=extbinary to enable it\n"; 106 else 107 WriterOrErr.get()->setUseMD5(); 108 } 109 110 write(std::move(WriterOrErr.get()), ProfileMap); 111 } 112 113 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 114 const RangeSample &Ranges) { 115 116 /* 117 Regions may overlap with each other. Using the boundary info, find all 118 disjoint ranges and their sample count. BoundaryPoint contains the count 119 multiple samples begin/end at this points. 120 121 |<--100-->| Sample1 122 |<------200------>| Sample2 123 A B C 124 125 In the example above, 126 Sample1 begins at A, ends at B, its value is 100. 127 Sample2 beings at A, ends at C, its value is 200. 128 For A, BeginCount is the sum of sample begins at A, which is 300 and no 129 samples ends at A, so EndCount is 0. 130 Then boundary points A, B, and C with begin/end counts are: 131 A: (300, 0) 132 B: (0, 100) 133 C: (0, 200) 134 */ 135 struct BoundaryPoint { 136 // Sum of sample counts beginning at this point 137 uint64_t BeginCount = UINT64_MAX; 138 // Sum of sample counts ending at this point 139 uint64_t EndCount = UINT64_MAX; 140 // Is the begin point of a zero range. 141 bool IsZeroRangeBegin = false; 142 // Is the end point of a zero range. 143 bool IsZeroRangeEnd = false; 144 145 void addBeginCount(uint64_t Count) { 146 if (BeginCount == UINT64_MAX) 147 BeginCount = 0; 148 BeginCount += Count; 149 } 150 151 void addEndCount(uint64_t Count) { 152 if (EndCount == UINT64_MAX) 153 EndCount = 0; 154 EndCount += Count; 155 } 156 }; 157 158 /* 159 For the above example. With boundary points, follwing logic finds two 160 disjoint region of 161 162 [A,B]: 300 163 [B+1,C]: 200 164 165 If there is a boundary point that both begin and end, the point itself 166 becomes a separate disjoint region. For example, if we have original 167 ranges of 168 169 |<--- 100 --->| 170 |<--- 200 --->| 171 A B C 172 173 there are three boundary points with their begin/end counts of 174 175 A: (100, 0) 176 B: (200, 100) 177 C: (0, 200) 178 179 the disjoint ranges would be 180 181 [A, B-1]: 100 182 [B, B]: 300 183 [B+1, C]: 200. 184 185 Example for zero value range: 186 187 |<--- 100 --->| 188 |<--- 200 --->| 189 |<--------------- 0 ----------------->| 190 A B C D E F 191 192 [A, B-1] : 0 193 [B, C] : 100 194 [C+1, D-1]: 0 195 [D, E] : 200 196 [E+1, F] : 0 197 */ 198 std::map<uint64_t, BoundaryPoint> Boundaries; 199 200 for (auto Item : Ranges) { 201 assert(Item.first.first <= Item.first.second && 202 "Invalid instruction range"); 203 auto &BeginPoint = Boundaries[Item.first.first]; 204 auto &EndPoint = Boundaries[Item.first.second]; 205 uint64_t Count = Item.second; 206 207 BeginPoint.addBeginCount(Count); 208 EndPoint.addEndCount(Count); 209 if (Count == 0) { 210 BeginPoint.IsZeroRangeBegin = true; 211 EndPoint.IsZeroRangeEnd = true; 212 } 213 } 214 215 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 216 // and the next valid address 217 uint64_t BeginAddress = UINT64_MAX; 218 int ZeroRangeDepth = 0; 219 uint64_t Count = 0; 220 for (auto Item : Boundaries) { 221 uint64_t Address = Item.first; 222 BoundaryPoint &Point = Item.second; 223 if (Point.BeginCount != UINT64_MAX) { 224 if (BeginAddress != UINT64_MAX) 225 DisjointRanges[{BeginAddress, Address - 1}] = Count; 226 Count += Point.BeginCount; 227 BeginAddress = Address; 228 ZeroRangeDepth += Point.IsZeroRangeBegin; 229 } 230 if (Point.EndCount != UINT64_MAX) { 231 assert((BeginAddress != UINT64_MAX) && 232 "First boundary point cannot be 'end' point"); 233 DisjointRanges[{BeginAddress, Address}] = Count; 234 assert(Count >= Point.EndCount && "Mismatched live ranges"); 235 Count -= Point.EndCount; 236 BeginAddress = Address + 1; 237 ZeroRangeDepth -= Point.IsZeroRangeEnd; 238 // If the remaining count is zero and it's no longer in a zero range, this 239 // means we consume all the ranges before, thus mark BeginAddress as 240 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 241 // [<---- 10 ---->] 242 // [<---- 20 ---->] 243 // A B C D 244 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 245 // have the [B+1, C-1] zero range. 246 if (Count == 0 && ZeroRangeDepth == 0) 247 BeginAddress = UINT64_MAX; 248 } 249 } 250 } 251 252 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 253 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 254 uint64_t Count) { 255 // Filter out invalid negative(int type) lineOffset 256 if (LeafLoc.Callsite.LineOffset & 0x80000000) 257 return; 258 // Use the maximum count of samples with same line location 259 ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt( 260 LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); 261 uint64_t PreviousCount = R ? R.get() : 0; 262 if (PreviousCount <= Count) { 263 FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, 264 LeafLoc.Callsite.Discriminator, 265 Count - PreviousCount); 266 } 267 } 268 269 FunctionSamples & 270 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 271 SampleContext Context(FuncName); 272 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 273 if (Ret.second) { 274 FunctionSamples &FProfile = Ret.first->second; 275 FProfile.setContext(Context); 276 } 277 return Ret.first->second; 278 } 279 280 void ProfileGenerator::generateProfile() { 281 if (Binary->usePseudoProbes()) { 282 // TODO: Support probe based profile generation 283 } else { 284 generateLineNumBasedProfile(); 285 } 286 } 287 288 void ProfileGenerator::generateLineNumBasedProfile() { 289 assert(SampleCounters.size() == 1 && 290 "Must have one entry for profile generation."); 291 const SampleCounter &SC = SampleCounters.begin()->second; 292 // Fill in function body samples 293 populateBodySamplesForAllFunctions(SC.RangeCounter); 294 // Fill in boundary sample counts as well as call site samples for calls 295 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 296 } 297 298 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 299 const SampleContextFrameVector &FrameVec, uint64_t Count) { 300 // Get top level profile 301 FunctionSamples *FunctionProfile = 302 &getTopLevelFunctionProfile(FrameVec[0].CallerName); 303 FunctionProfile->addTotalSamples(Count); 304 305 for (size_t I = 1; I < FrameVec.size(); I++) { 306 FunctionSamplesMap &SamplesMap = 307 FunctionProfile->functionSamplesAt(FrameVec[I - 1].Callsite); 308 auto Ret = 309 SamplesMap.emplace(FrameVec[I].CallerName.str(), FunctionSamples()); 310 if (Ret.second) { 311 SampleContext Context(FrameVec[I].CallerName); 312 Ret.first->second.setContext(Context); 313 } 314 FunctionProfile = &Ret.first->second; 315 FunctionProfile->addTotalSamples(Count); 316 } 317 318 return *FunctionProfile; 319 } 320 321 RangeSample 322 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 323 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 324 // For each range, we search for the range of the function it belongs to and 325 // initialize it with zero count, so it remains zero if doesn't hit any 326 // samples. This is to be consistent with compiler that interpret zero count 327 // as unexecuted(cold). 328 for (auto I : RangeCounter) { 329 uint64_t RangeBegin = I.first.first; 330 uint64_t RangeEnd = I.first.second; 331 // Find the function offset range the current range begin belongs to. 332 auto FuncRange = Binary->findFuncOffsetRange(RangeBegin); 333 if (FuncRange.second == 0) 334 WithColor::warning() 335 << "[" << format("%8" PRIx64, RangeBegin) << " - " 336 << format("%8" PRIx64, RangeEnd) 337 << "]: Invalid range or disassembling error in profiled binary.\n"; 338 else if (RangeEnd > FuncRange.second) 339 WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - " 340 << format("%8" PRIx64, RangeEnd) 341 << "]: Range is across different functions.\n"; 342 else 343 Ranges[FuncRange] += 0; 344 } 345 RangeSample DisjointRanges; 346 findDisjointRanges(DisjointRanges, Ranges); 347 return DisjointRanges; 348 } 349 350 void ProfileGenerator::populateBodySamplesForAllFunctions( 351 const RangeSample &RangeCounter) { 352 for (auto Range : preprocessRangeCounter(RangeCounter)) { 353 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 354 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 355 uint64_t Count = Range.second; 356 357 InstructionPointer IP(Binary, RangeBegin, true); 358 // Disjoint ranges may have range in the middle of two instr, 359 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 360 // can be Addr1+1 to Addr2-1. We should ignore such range. 361 while (IP.Address <= RangeEnd) { 362 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 363 const SampleContextFrameVector &FrameVec = 364 Binary->getFrameLocationStack(Offset); 365 if (!FrameVec.empty()) { 366 FunctionSamples &FunctionProfile = 367 getLeafProfileAndAddTotalSamples(FrameVec, Count); 368 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 369 Count); 370 } 371 // Move to next IP within the range. 372 IP.advance(); 373 } 374 } 375 } 376 377 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 378 const BranchSample &BranchCounters) { 379 for (auto Entry : BranchCounters) { 380 uint64_t SourceOffset = Entry.first.first; 381 uint64_t TargetOffset = Entry.first.second; 382 uint64_t Count = Entry.second; 383 assert(Count != 0 && "Unexpected zero weight branch"); 384 385 // Get the callee name by branch target if it's a call branch. 386 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 387 Binary->getFuncFromStartOffset(TargetOffset)); 388 if (CalleeName.size() == 0) 389 continue; 390 // Record called target sample and its count. 391 const SampleContextFrameVector &FrameVec = 392 Binary->getFrameLocationStack(SourceOffset); 393 if (!FrameVec.empty()) { 394 FunctionSamples &FunctionProfile = 395 getLeafProfileAndAddTotalSamples(FrameVec, Count); 396 FunctionProfile.addCalledTargetSamples( 397 FrameVec.back().Callsite.LineOffset, 398 FrameVec.back().Callsite.Discriminator, CalleeName, Count); 399 } 400 // Add head samples for callee. 401 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 402 CalleeProfile.addHeadSamples(Count); 403 } 404 } 405 406 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( 407 const SampleContextFrameVector &Context, bool WasLeafInlined) { 408 auto I = ProfileMap.find(SampleContext(Context)); 409 if (I == ProfileMap.end()) { 410 // Save the new context for future references. 411 SampleContextFrames NewContext = *Contexts.insert(Context).first; 412 SampleContext FContext(NewContext, RawContext); 413 auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); 414 if (WasLeafInlined) 415 FContext.setAttribute(ContextWasInlined); 416 FunctionSamples &FProfile = Ret.first->second; 417 FProfile.setContext(FContext); 418 return Ret.first->second; 419 } 420 return I->second; 421 } 422 423 void CSProfileGenerator::generateProfile() { 424 FunctionSamples::ProfileIsCS = true; 425 426 if (Binary->getTrackFuncContextSize()) 427 computeSizeForProfiledFunctions(); 428 429 if (Binary->usePseudoProbes()) { 430 // Enable pseudo probe functionalities in SampleProf 431 FunctionSamples::ProfileIsProbeBased = true; 432 generateProbeBasedProfile(); 433 } else { 434 generateLineNumBasedProfile(); 435 } 436 postProcessProfiles(); 437 } 438 439 void CSProfileGenerator::computeSizeForProfiledFunctions() { 440 // Hash map to deduplicate the function range and the item is a pair of 441 // function start and end offset. 442 std::unordered_map<uint64_t, uint64_t> FuncRanges; 443 // Go through all the ranges in the CS counters, use the start of the range to 444 // look up the function it belongs and record the function range. 445 for (const auto &CI : SampleCounters) { 446 for (auto Item : CI.second.RangeCounter) { 447 // FIXME: Filter the bogus crossing function range. 448 uint64_t RangeStartOffset = Item.first.first; 449 auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset); 450 if (FuncRange.second != 0) 451 FuncRanges[FuncRange.first] = FuncRange.second; 452 } 453 } 454 455 for (auto I : FuncRanges) { 456 uint64_t StartOffset = I.first; 457 uint64_t EndOffset = I.second; 458 Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); 459 } 460 } 461 462 void CSProfileGenerator::generateLineNumBasedProfile() { 463 for (const auto &CI : SampleCounters) { 464 const StringBasedCtxKey *CtxKey = 465 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 466 // Get or create function profile for the range 467 FunctionSamples &FunctionProfile = 468 getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); 469 470 // Fill in function body samples 471 populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); 472 // Fill in boundary sample counts as well as call site samples for calls 473 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, 474 CI.second.BranchCounter); 475 } 476 // Fill in call site value sample for inlined calls and also use context to 477 // infer missing samples. Since we don't have call count for inlined 478 // functions, we estimate it from inlinee's profile using the entry of the 479 // body sample. 480 populateInferredFunctionSamples(); 481 } 482 483 void CSProfileGenerator::populateBodySamplesForFunction( 484 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 485 // Compute disjoint ranges first, so we can use MAX 486 // for calculating count for each location. 487 RangeSample Ranges; 488 findDisjointRanges(Ranges, RangeCounter); 489 for (auto Range : Ranges) { 490 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 491 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 492 uint64_t Count = Range.second; 493 // Disjoint ranges have introduce zero-filled gap that 494 // doesn't belong to current context, filter them out. 495 if (Count == 0) 496 continue; 497 498 InstructionPointer IP(Binary, RangeBegin, true); 499 // Disjoint ranges may have range in the middle of two instr, 500 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 501 // can be Addr1+1 to Addr2-1. We should ignore such range. 502 while (IP.Address <= RangeEnd) { 503 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 504 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 505 if (LeafLoc.hasValue()) { 506 // Recording body sample for this specific context 507 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 508 } 509 // Accumulate total sample count even it's a line with invalid debug info 510 FunctionProfile.addTotalSamples(Count); 511 // Move to next IP within the range 512 IP.advance(); 513 } 514 } 515 } 516 517 void CSProfileGenerator::populateBoundarySamplesForFunction( 518 SampleContextFrames ContextId, FunctionSamples &FunctionProfile, 519 const BranchSample &BranchCounters) { 520 521 for (auto Entry : BranchCounters) { 522 uint64_t SourceOffset = Entry.first.first; 523 uint64_t TargetOffset = Entry.first.second; 524 uint64_t Count = Entry.second; 525 assert(Count != 0 && "Unexpected zero weight branch"); 526 527 // Get the callee name by branch target if it's a call branch 528 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 529 Binary->getFuncFromStartOffset(TargetOffset)); 530 if (CalleeName.size() == 0) 531 continue; 532 533 // Record called target sample and its count 534 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 535 if (!LeafLoc.hasValue()) 536 continue; 537 FunctionProfile.addCalledTargetSamples(LeafLoc->Callsite.LineOffset, 538 LeafLoc->Callsite.Discriminator, 539 CalleeName, Count); 540 541 // Record head sample for called target(callee) 542 SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); 543 assert(CalleeCtx.back().CallerName == LeafLoc->CallerName && 544 "Leaf function name doesn't match"); 545 CalleeCtx.back() = *LeafLoc; 546 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); 547 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); 548 CalleeProfile.addHeadSamples(Count); 549 } 550 } 551 552 static SampleContextFrame 553 getCallerContext(SampleContextFrames CalleeContext, 554 SampleContextFrameVector &CallerContext) { 555 assert(CalleeContext.size() > 1 && "Unexpected empty context"); 556 CalleeContext = CalleeContext.drop_back(); 557 CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); 558 SampleContextFrame CallerFrame = CallerContext.back(); 559 CallerContext.back().Callsite = LineLocation(0, 0); 560 return CallerFrame; 561 } 562 563 void CSProfileGenerator::populateInferredFunctionSamples() { 564 for (const auto &Item : ProfileMap) { 565 const auto &CalleeContext = Item.first; 566 const FunctionSamples &CalleeProfile = Item.second; 567 568 // If we already have head sample counts, we must have value profile 569 // for call sites added already. Skip to avoid double counting. 570 if (CalleeProfile.getHeadSamples()) 571 continue; 572 // If we don't have context, nothing to do for caller's call site. 573 // This could happen for entry point function. 574 if (CalleeContext.isBaseContext()) 575 continue; 576 577 // Infer Caller's frame loc and context ID through string splitting 578 SampleContextFrameVector CallerContextId; 579 SampleContextFrame &&CallerLeafFrameLoc = 580 getCallerContext(CalleeContext.getContextFrames(), CallerContextId); 581 SampleContextFrames CallerContext(CallerContextId); 582 583 // It's possible that we haven't seen any sample directly in the caller, 584 // in which case CallerProfile will not exist. But we can't modify 585 // ProfileMap while iterating it. 586 // TODO: created function profile for those callers too 587 if (ProfileMap.find(CallerContext) == ProfileMap.end()) 588 continue; 589 FunctionSamples &CallerProfile = ProfileMap[CallerContext]; 590 591 // Since we don't have call count for inlined functions, we 592 // estimate it from inlinee's profile using entry body sample. 593 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 594 // If we don't have samples with location, use 1 to indicate live. 595 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 596 EstimatedCallCount = 1; 597 CallerProfile.addCalledTargetSamples( 598 CallerLeafFrameLoc.Callsite.LineOffset, 599 CallerLeafFrameLoc.Callsite.Discriminator, 600 CalleeProfile.getContext().getName(), EstimatedCallCount); 601 CallerProfile.addBodySamples(CallerLeafFrameLoc.Callsite.LineOffset, 602 CallerLeafFrameLoc.Callsite.Discriminator, 603 EstimatedCallCount); 604 CallerProfile.addTotalSamples(EstimatedCallCount); 605 } 606 } 607 608 void CSProfileGenerator::postProcessProfiles() { 609 // Compute hot/cold threshold based on profile. This will be used for cold 610 // context profile merging/trimming. 611 computeSummaryAndThreshold(); 612 613 // Run global pre-inliner to adjust/merge context profile based on estimated 614 // inline decisions. 615 if (EnableCSPreInliner) { 616 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) 617 .run(); 618 } 619 620 // Trim and merge cold context profile using cold threshold above. By default, 621 // we skip such merging and trimming when preinliner is on. 622 if (!EnableCSPreInliner || CSProfTrimColdContext.getNumOccurrences() || 623 CSProfMergeColdContext.getNumOccurrences()) { 624 SampleContextTrimmer(ProfileMap) 625 .trimAndMergeColdContextProfiles( 626 HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, 627 CSProfMaxColdContextDepth); 628 } 629 } 630 631 void CSProfileGenerator::computeSummaryAndThreshold() { 632 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 633 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 634 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 635 (Summary->getDetailedSummary())); 636 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 637 (Summary->getDetailedSummary())); 638 } 639 640 // Helper function to extract context prefix string stack 641 // Extract context stack for reusing, leaf context stack will 642 // be added compressed while looking up function profile 643 static void extractPrefixContextStack( 644 SampleContextFrameVector &ContextStack, 645 const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes, 646 ProfiledBinary *Binary) { 647 for (const auto *P : Probes) { 648 Binary->getInlineContextForProbe(P, ContextStack, true); 649 } 650 } 651 652 void CSProfileGenerator::generateProbeBasedProfile() { 653 for (const auto &CI : SampleCounters) { 654 const ProbeBasedCtxKey *CtxKey = 655 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 656 SampleContextFrameVector ContextStack; 657 extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); 658 // Fill in function body samples from probes, also infer caller's samples 659 // from callee's probe 660 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 661 // Fill in boundary samples for a call probe 662 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 663 } 664 } 665 666 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, 667 ProbeCounterMap &ProbeCounter) { 668 RangeSample Ranges; 669 findDisjointRanges(Ranges, RangeCounter); 670 for (const auto &Range : Ranges) { 671 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 672 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 673 uint64_t Count = Range.second; 674 // Disjoint ranges have introduce zero-filled gap that 675 // doesn't belong to current context, filter them out. 676 if (Count == 0) 677 continue; 678 679 InstructionPointer IP(Binary, RangeBegin, true); 680 681 // Disjoint ranges may have range in the middle of two instr, 682 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 683 // can be Addr1+1 to Addr2-1. We should ignore such range. 684 if (IP.Address > RangeEnd) 685 continue; 686 687 while (IP.Address <= RangeEnd) { 688 const AddressProbesMap &Address2ProbesMap = 689 Binary->getAddress2ProbesMap(); 690 auto It = Address2ProbesMap.find(IP.Address); 691 if (It != Address2ProbesMap.end()) { 692 for (const auto &Probe : It->second) { 693 if (!Probe.isBlock()) 694 continue; 695 ProbeCounter[&Probe] += Count; 696 } 697 } 698 699 IP.advance(); 700 } 701 } 702 } 703 704 void CSProfileGenerator::populateBodySamplesWithProbes( 705 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 706 ProbeCounterMap ProbeCounter; 707 // Extract the top frame probes by looking up each address among the range in 708 // the Address2ProbeMap 709 extractProbesFromRange(RangeCounter, ProbeCounter); 710 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 711 std::unordered_set<FunctionSamples *>> 712 FrameSamples; 713 for (auto PI : ProbeCounter) { 714 const MCDecodedPseudoProbe *Probe = PI.first; 715 uint64_t Count = PI.second; 716 FunctionSamples &FunctionProfile = 717 getFunctionProfileForLeafProbe(ContextStack, Probe); 718 // Record the current frame and FunctionProfile whenever samples are 719 // collected for non-danglie probes. This is for reporting all of the 720 // zero count probes of the frame later. 721 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 722 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 723 FunctionProfile.addTotalSamples(Count); 724 if (Probe->isEntry()) { 725 FunctionProfile.addHeadSamples(Count); 726 // Look up for the caller's function profile 727 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 728 if (InlinerDesc != nullptr) { 729 // Since the context id will be compressed, we have to use callee's 730 // context id to infer caller's context id to ensure they share the 731 // same context prefix. 732 SampleContextFrames CalleeContextId = 733 FunctionProfile.getContext().getContextFrames(); 734 SampleContextFrameVector CallerContextId; 735 SampleContextFrame &&CallerLeafFrameLoc = 736 getCallerContext(CalleeContextId, CallerContextId); 737 uint64_t CallerIndex = CallerLeafFrameLoc.Callsite.LineOffset; 738 assert(CallerIndex && 739 "Inferred caller's location index shouldn't be zero!"); 740 FunctionSamples &CallerProfile = 741 getFunctionProfileForContext(CallerContextId); 742 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 743 CallerProfile.addBodySamples(CallerIndex, 0, Count); 744 CallerProfile.addTotalSamples(Count); 745 CallerProfile.addCalledTargetSamples( 746 CallerIndex, 0, FunctionProfile.getContext().getName(), Count); 747 } 748 } 749 } 750 751 // Assign zero count for remaining probes without sample hits to 752 // differentiate from probes optimized away, of which the counts are unknown 753 // and will be inferred by the compiler. 754 for (auto &I : FrameSamples) { 755 for (auto *FunctionProfile : I.second) { 756 for (auto *Probe : I.first->getProbes()) { 757 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 758 } 759 } 760 } 761 } 762 763 void CSProfileGenerator::populateBoundarySamplesWithProbes( 764 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 765 for (auto BI : BranchCounter) { 766 uint64_t SourceOffset = BI.first.first; 767 uint64_t TargetOffset = BI.first.second; 768 uint64_t Count = BI.second; 769 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 770 const MCDecodedPseudoProbe *CallProbe = 771 Binary->getCallProbeForAddr(SourceAddress); 772 if (CallProbe == nullptr) 773 continue; 774 FunctionSamples &FunctionProfile = 775 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 776 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 777 FunctionProfile.addTotalSamples(Count); 778 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 779 Binary->getFuncFromStartOffset(TargetOffset)); 780 if (CalleeName.size() == 0) 781 continue; 782 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 783 Count); 784 } 785 } 786 787 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 788 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 789 790 // Explicitly copy the context for appending the leaf context 791 SampleContextFrameVector NewContextStack(ContextStack.begin(), 792 ContextStack.end()); 793 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 794 // For leaf inlined context with the top frame, we should strip off the top 795 // frame's probe id, like: 796 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 797 auto LeafFrame = NewContextStack.back(); 798 LeafFrame.Callsite = LineLocation(0, 0); 799 NewContextStack.pop_back(); 800 // Compress the context string except for the leaf frame 801 CSProfileGenerator::compressRecursionContext(NewContextStack); 802 CSProfileGenerator::trimContext(NewContextStack); 803 NewContextStack.push_back(LeafFrame); 804 805 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 806 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 807 FunctionSamples &FunctionProile = 808 getFunctionProfileForContext(NewContextStack, WasLeafInlined); 809 FunctionProile.setFunctionHash(FuncDesc->FuncHash); 810 return FunctionProile; 811 } 812 813 } // end namespace sampleprof 814 } // end namespace llvm 815