1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "llvm/ProfileData/ProfileCommon.h" 11 12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 13 cl::Required, 14 cl::desc("Output profile file")); 15 static cl::alias OutputA("o", cl::desc("Alias for --output"), 16 cl::aliasopt(OutputFilename)); 17 18 static cl::opt<SampleProfileFormat> OutputFormat( 19 "format", cl::desc("Format of output profile"), cl::init(SPF_Text), 20 cl::values( 21 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 22 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 23 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 24 clEnumValN(SPF_Text, "text", "Text encoding"), 25 clEnumValN(SPF_GCC, "gcc", 26 "GCC encoding (only meaningful for -sample)"))); 27 28 static cl::opt<int32_t, true> RecursionCompression( 29 "compress-recursion", 30 cl::desc("Compressing recursion by deduplicating adjacent frame " 31 "sequences up to the specified size. -1 means no size limit."), 32 cl::Hidden, 33 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 34 35 static cl::opt<uint64_t> CSProfColdThreshold( 36 "csprof-cold-thres", cl::init(100), cl::ZeroOrMore, 37 cl::desc("Specify the total samples threshold for a context profile to " 38 "be considered cold, any cold profiles will be merged into " 39 "context-less base profiles")); 40 41 static cl::opt<bool> CSProfMergeColdContext( 42 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 43 cl::desc("This works together with --csprof-cold-thres. If the total count " 44 "of context profile is smaller than the threshold, it will be " 45 "merged into context-less base profile.")); 46 47 static cl::opt<bool> CSProfTrimColdContext( 48 "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore, 49 cl::desc("This works together with --csprof-cold-thres. If the total count " 50 "of the profile after all merge is done is still smaller than " 51 "threshold, it will be trimmed.")); 52 53 using namespace llvm; 54 using namespace sampleprof; 55 56 namespace llvm { 57 namespace sampleprof { 58 59 // Initialize the MaxCompressionSize to -1 which means no size limit 60 int32_t CSProfileGenerator::MaxCompressionSize = -1; 61 62 static bool 63 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) { 64 return BinarySampleCounters.size() && 65 BinarySampleCounters.begin()->first->usePseudoProbes(); 66 } 67 68 std::unique_ptr<ProfileGenerator> 69 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters, 70 enum PerfScriptType SampleType) { 71 std::unique_ptr<ProfileGenerator> ProfileGenerator; 72 if (SampleType == PERF_LBR_STACK) { 73 if (usePseudoProbes(BinarySampleCounters)) { 74 ProfileGenerator.reset( 75 new PseudoProbeCSProfileGenerator(BinarySampleCounters)); 76 } else { 77 ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters)); 78 } 79 } else { 80 // TODO: 81 llvm_unreachable("Unsupported perfscript!"); 82 } 83 84 return ProfileGenerator; 85 } 86 87 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 88 StringMap<FunctionSamples> &ProfileMap) { 89 Writer->write(ProfileMap); 90 } 91 92 void ProfileGenerator::write() { 93 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 94 if (std::error_code EC = WriterOrErr.getError()) 95 exitWithError(EC, OutputFilename); 96 write(std::move(WriterOrErr.get()), ProfileMap); 97 } 98 99 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges, 100 const RangeSample &Ranges) { 101 102 /* 103 Regions may overlap with each other. Using the boundary info, find all 104 disjoint ranges and their sample count. BoundaryPoint contains the count 105 multiple samples begin/end at this points. 106 107 |<--100-->| Sample1 108 |<------200------>| Sample2 109 A B C 110 111 In the example above, 112 Sample1 begins at A, ends at B, its value is 100. 113 Sample2 beings at A, ends at C, its value is 200. 114 For A, BeginCount is the sum of sample begins at A, which is 300 and no 115 samples ends at A, so EndCount is 0. 116 Then boundary points A, B, and C with begin/end counts are: 117 A: (300, 0) 118 B: (0, 100) 119 C: (0, 200) 120 */ 121 struct BoundaryPoint { 122 // Sum of sample counts beginning at this point 123 uint64_t BeginCount; 124 // Sum of sample counts ending at this point 125 uint64_t EndCount; 126 127 BoundaryPoint() : BeginCount(0), EndCount(0){}; 128 129 void addBeginCount(uint64_t Count) { BeginCount += Count; } 130 131 void addEndCount(uint64_t Count) { EndCount += Count; } 132 }; 133 134 /* 135 For the above example. With boundary points, follwing logic finds two 136 disjoint region of 137 138 [A,B]: 300 139 [B+1,C]: 200 140 141 If there is a boundary point that both begin and end, the point itself 142 becomes a separate disjoint region. For example, if we have original 143 ranges of 144 145 |<--- 100 --->| 146 |<--- 200 --->| 147 A B C 148 149 there are three boundary points with their begin/end counts of 150 151 A: (100, 0) 152 B: (200, 100) 153 C: (0, 200) 154 155 the disjoint ranges would be 156 157 [A, B-1]: 100 158 [B, B]: 300 159 [B+1, C]: 200. 160 */ 161 std::map<uint64_t, BoundaryPoint> Boundaries; 162 163 for (auto Item : Ranges) { 164 uint64_t Begin = Item.first.first; 165 uint64_t End = Item.first.second; 166 uint64_t Count = Item.second; 167 if (Boundaries.find(Begin) == Boundaries.end()) 168 Boundaries[Begin] = BoundaryPoint(); 169 Boundaries[Begin].addBeginCount(Count); 170 171 if (Boundaries.find(End) == Boundaries.end()) 172 Boundaries[End] = BoundaryPoint(); 173 Boundaries[End].addEndCount(Count); 174 } 175 176 uint64_t BeginAddress = 0; 177 int Count = 0; 178 for (auto Item : Boundaries) { 179 uint64_t Address = Item.first; 180 BoundaryPoint &Point = Item.second; 181 if (Point.BeginCount) { 182 if (BeginAddress) 183 DisjointRanges[{BeginAddress, Address - 1}] = Count; 184 Count += Point.BeginCount; 185 BeginAddress = Address; 186 } 187 if (Point.EndCount) { 188 assert(BeginAddress && "First boundary point cannot be 'end' point"); 189 DisjointRanges[{BeginAddress, Address}] = Count; 190 Count -= Point.EndCount; 191 BeginAddress = Address + 1; 192 } 193 } 194 } 195 196 FunctionSamples & 197 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr, 198 bool WasLeafInlined) { 199 auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples()); 200 if (Ret.second) { 201 SampleContext FContext(Ret.first->first(), RawContext); 202 if (WasLeafInlined) 203 FContext.setAttribute(ContextWasInlined); 204 FunctionSamples &FProfile = Ret.first->second; 205 FProfile.setContext(FContext); 206 FProfile.setName(FContext.getNameWithoutContext()); 207 } 208 return Ret.first->second; 209 } 210 211 void CSProfileGenerator::generateProfile() { 212 FunctionSamples::ProfileIsCS = true; 213 for (const auto &BI : BinarySampleCounters) { 214 ProfiledBinary *Binary = BI.first; 215 for (const auto &CI : BI.second) { 216 const StringBasedCtxKey *CtxKey = 217 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 218 StringRef ContextId(CtxKey->Context); 219 // Get or create function profile for the range 220 FunctionSamples &FunctionProfile = 221 getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined); 222 223 // Fill in function body samples 224 populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter, 225 Binary); 226 // Fill in boundary sample counts as well as call site samples for calls 227 populateFunctionBoundarySamples(ContextId, FunctionProfile, 228 CI.second.BranchCounter, Binary); 229 } 230 } 231 // Fill in call site value sample for inlined calls and also use context to 232 // infer missing samples. Since we don't have call count for inlined 233 // functions, we estimate it from inlinee's profile using the entry of the 234 // body sample. 235 populateInferredFunctionSamples(); 236 237 // Compute hot/cold threshold based on profile. This will be used for cold 238 // context profile merging/trimming. 239 computeSummaryAndThreshold(); 240 } 241 242 void CSProfileGenerator::updateBodySamplesforFunctionProfile( 243 FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, 244 uint64_t Count) { 245 // Filter out invalid negative(int type) lineOffset 246 if (LeafLoc.second.LineOffset & 0x80000000) 247 return; 248 // Use the maximum count of samples with same line location 249 ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt( 250 LeafLoc.second.LineOffset, LeafLoc.second.Discriminator); 251 uint64_t PreviousCount = R ? R.get() : 0; 252 if (PreviousCount < Count) { 253 FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, 254 LeafLoc.second.Discriminator, 255 Count - PreviousCount); 256 } 257 } 258 259 void CSProfileGenerator::populateFunctionBodySamples( 260 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter, 261 ProfiledBinary *Binary) { 262 // Compute disjoint ranges first, so we can use MAX 263 // for calculating count for each location. 264 RangeSample Ranges; 265 findDisjointRanges(Ranges, RangeCounter); 266 for (auto Range : Ranges) { 267 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 268 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 269 uint64_t Count = Range.second; 270 // Disjoint ranges have introduce zero-filled gap that 271 // doesn't belong to current context, filter them out. 272 if (Count == 0) 273 continue; 274 275 InstructionPointer IP(Binary, RangeBegin, true); 276 277 // Disjoint ranges may have range in the middle of two instr, 278 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 279 // can be Addr1+1 to Addr2-1. We should ignore such range. 280 if (IP.Address > RangeEnd) 281 continue; 282 283 while (IP.Address <= RangeEnd) { 284 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 285 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 286 if (LeafLoc.hasValue()) { 287 // Recording body sample for this specific context 288 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 289 } 290 // Accumulate total sample count even it's a line with invalid debug info 291 FunctionProfile.addTotalSamples(Count); 292 // Move to next IP within the range 293 IP.advance(); 294 } 295 } 296 } 297 298 void CSProfileGenerator::populateFunctionBoundarySamples( 299 StringRef ContextId, FunctionSamples &FunctionProfile, 300 const BranchSample &BranchCounters, ProfiledBinary *Binary) { 301 302 for (auto Entry : BranchCounters) { 303 uint64_t SourceOffset = Entry.first.first; 304 uint64_t TargetOffset = Entry.first.second; 305 uint64_t Count = Entry.second; 306 // Get the callee name by branch target if it's a call branch 307 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 308 Binary->getFuncFromStartOffset(TargetOffset)); 309 if (CalleeName.size() == 0) 310 continue; 311 312 // Record called target sample and its count 313 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 314 if (!LeafLoc.hasValue()) 315 continue; 316 FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset, 317 LeafLoc->second.Discriminator, 318 CalleeName, Count); 319 320 // Record head sample for called target(callee) 321 std::ostringstream OCalleeCtxStr; 322 if (ContextId.find(" @ ") != StringRef::npos) { 323 OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); 324 OCalleeCtxStr << " @ "; 325 } 326 OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str(); 327 328 FunctionSamples &CalleeProfile = 329 getFunctionProfileForContext(OCalleeCtxStr.str()); 330 assert(Count != 0 && "Unexpected zero weight branch"); 331 CalleeProfile.addHeadSamples(Count); 332 } 333 } 334 335 static FrameLocation getCallerContext(StringRef CalleeContext, 336 StringRef &CallerNameWithContext) { 337 StringRef CallerContext = CalleeContext.rsplit(" @ ").first; 338 CallerNameWithContext = CallerContext.rsplit(':').first; 339 auto ContextSplit = CallerContext.rsplit(" @ "); 340 StringRef CallerFrameStr = ContextSplit.second.size() == 0 341 ? ContextSplit.first 342 : ContextSplit.second; 343 FrameLocation LeafFrameLoc = {"", {0, 0}}; 344 StringRef Funcname; 345 SampleContext::decodeContextString(CallerFrameStr, Funcname, 346 LeafFrameLoc.second); 347 LeafFrameLoc.first = Funcname.str(); 348 return LeafFrameLoc; 349 } 350 351 void CSProfileGenerator::populateInferredFunctionSamples() { 352 for (const auto &Item : ProfileMap) { 353 const StringRef CalleeContext = Item.first(); 354 const FunctionSamples &CalleeProfile = Item.second; 355 356 // If we already have head sample counts, we must have value profile 357 // for call sites added already. Skip to avoid double counting. 358 if (CalleeProfile.getHeadSamples()) 359 continue; 360 // If we don't have context, nothing to do for caller's call site. 361 // This could happen for entry point function. 362 if (CalleeContext.find(" @ ") == StringRef::npos) 363 continue; 364 365 // Infer Caller's frame loc and context ID through string splitting 366 StringRef CallerContextId; 367 FrameLocation &&CallerLeafFrameLoc = 368 getCallerContext(CalleeContext, CallerContextId); 369 370 // It's possible that we haven't seen any sample directly in the caller, 371 // in which case CallerProfile will not exist. But we can't modify 372 // ProfileMap while iterating it. 373 // TODO: created function profile for those callers too 374 if (ProfileMap.find(CallerContextId) == ProfileMap.end()) 375 continue; 376 FunctionSamples &CallerProfile = ProfileMap[CallerContextId]; 377 378 // Since we don't have call count for inlined functions, we 379 // estimate it from inlinee's profile using entry body sample. 380 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 381 // If we don't have samples with location, use 1 to indicate live. 382 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 383 EstimatedCallCount = 1; 384 CallerProfile.addCalledTargetSamples( 385 CallerLeafFrameLoc.second.LineOffset, 386 CallerLeafFrameLoc.second.Discriminator, 387 CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount); 388 CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset, 389 CallerLeafFrameLoc.second.Discriminator, 390 EstimatedCallCount); 391 CallerProfile.addTotalSamples(EstimatedCallCount); 392 } 393 } 394 395 void CSProfileGenerator::computeSummaryAndThreshold() { 396 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 397 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 398 PSI.reset(new ProfileSummaryInfo(std::move(Summary))); 399 } 400 401 void CSProfileGenerator::mergeAndTrimColdProfile( 402 StringMap<FunctionSamples> &ProfileMap) { 403 if (!CSProfMergeColdContext && !CSProfTrimColdContext) 404 return; 405 406 // Use threshold calculated from profile summary unless specified 407 uint64_t ColdThreshold = PSI->getColdCountThreshold(); 408 if (CSProfColdThreshold.getNumOccurrences()) { 409 ColdThreshold = CSProfColdThreshold; 410 } 411 412 // Nothing to merge if sample threshold is zero 413 if (ColdThreshold == 0) 414 return; 415 416 // Filter the cold profiles from ProfileMap and move them into a tmp 417 // container 418 std::vector<std::pair<StringRef, const FunctionSamples *>> ColdProfiles; 419 for (const auto &I : ProfileMap) { 420 const FunctionSamples &FunctionProfile = I.second; 421 if (FunctionProfile.getTotalSamples() >= ColdThreshold) 422 continue; 423 ColdProfiles.emplace_back(I.getKey(), &I.second); 424 } 425 426 // Remove the code profile from ProfileMap and merge them into BaseProileMap 427 StringMap<FunctionSamples> BaseProfileMap; 428 for (const auto &I : ColdProfiles) { 429 if (CSProfMergeColdContext) { 430 auto Ret = BaseProfileMap.try_emplace( 431 I.second->getContext().getNameWithoutContext(), FunctionSamples()); 432 FunctionSamples &BaseProfile = Ret.first->second; 433 BaseProfile.merge(*I.second); 434 } 435 ProfileMap.erase(I.first); 436 } 437 438 // Merge the base profiles into ProfileMap; 439 for (const auto &I : BaseProfileMap) { 440 // Filter the cold base profile 441 if (CSProfTrimColdContext && 442 I.second.getTotalSamples() < CSProfColdThreshold && 443 ProfileMap.find(I.getKey()) == ProfileMap.end()) 444 continue; 445 // Merge the profile if the original profile exists, otherwise just insert 446 // as a new profile 447 FunctionSamples &OrigProfile = getFunctionProfileForContext(I.getKey()); 448 OrigProfile.merge(I.second); 449 } 450 } 451 452 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 453 StringMap<FunctionSamples> &ProfileMap) { 454 mergeAndTrimColdProfile(ProfileMap); 455 // Add bracket for context key to support different profile binary format 456 StringMap<FunctionSamples> CxtWithBracketPMap; 457 for (const auto &Item : ProfileMap) { 458 std::string ContextWithBracket = "[" + Item.first().str() + "]"; 459 auto Ret = CxtWithBracketPMap.try_emplace(ContextWithBracket, Item.second); 460 assert(Ret.second && "Must be a unique context"); 461 SampleContext FContext(Ret.first->first(), RawContext); 462 FunctionSamples &FProfile = Ret.first->second; 463 FContext.setAllAttributes(FProfile.getContext().getAllAttributes()); 464 FProfile.setName(FContext.getNameWithContext(true)); 465 FProfile.setContext(FContext); 466 } 467 Writer->write(CxtWithBracketPMap); 468 } 469 470 // Helper function to extract context prefix string stack 471 // Extract context stack for reusing, leaf context stack will 472 // be added compressed while looking up function profile 473 static void 474 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack, 475 const SmallVectorImpl<const PseudoProbe *> &Probes, 476 ProfiledBinary *Binary) { 477 for (const auto *P : Probes) { 478 Binary->getInlineContextForProbe(P, ContextStrStack, true); 479 } 480 } 481 482 void PseudoProbeCSProfileGenerator::generateProfile() { 483 // Enable pseudo probe functionalities in SampleProf 484 FunctionSamples::ProfileIsProbeBased = true; 485 FunctionSamples::ProfileIsCS = true; 486 for (const auto &BI : BinarySampleCounters) { 487 ProfiledBinary *Binary = BI.first; 488 for (const auto &CI : BI.second) { 489 const ProbeBasedCtxKey *CtxKey = 490 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 491 SmallVector<std::string, 16> ContextStrStack; 492 extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); 493 // Fill in function body samples from probes, also infer caller's samples 494 // from callee's probe 495 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack, 496 Binary); 497 // Fill in boundary samples for a call probe 498 populateBoundarySamplesWithProbes(CI.second.BranchCounter, 499 ContextStrStack, Binary); 500 } 501 } 502 503 // Compute hot/cold threshold based on profile. This will be used for cold 504 // context profile merging/trimming. 505 computeSummaryAndThreshold(); 506 } 507 508 void PseudoProbeCSProfileGenerator::extractProbesFromRange( 509 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 510 ProfiledBinary *Binary) { 511 RangeSample Ranges; 512 findDisjointRanges(Ranges, RangeCounter); 513 for (const auto &Range : Ranges) { 514 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 515 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 516 uint64_t Count = Range.second; 517 // Disjoint ranges have introduce zero-filled gap that 518 // doesn't belong to current context, filter them out. 519 if (Count == 0) 520 continue; 521 522 InstructionPointer IP(Binary, RangeBegin, true); 523 524 // Disjoint ranges may have range in the middle of two instr, 525 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 526 // can be Addr1+1 to Addr2-1. We should ignore such range. 527 if (IP.Address > RangeEnd) 528 continue; 529 530 while (IP.Address <= RangeEnd) { 531 const AddressProbesMap &Address2ProbesMap = 532 Binary->getAddress2ProbesMap(); 533 auto It = Address2ProbesMap.find(IP.Address); 534 if (It != Address2ProbesMap.end()) { 535 for (const auto &Probe : It->second) { 536 if (!Probe.isBlock()) 537 continue; 538 ProbeCounter[&Probe] += Count; 539 } 540 } 541 542 IP.advance(); 543 } 544 } 545 } 546 547 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( 548 const RangeSample &RangeCounter, 549 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) { 550 ProbeCounterMap ProbeCounter; 551 // Extract the top frame probes by looking up each address among the range in 552 // the Address2ProbeMap 553 extractProbesFromRange(RangeCounter, ProbeCounter, Binary); 554 for (auto PI : ProbeCounter) { 555 const PseudoProbe *Probe = PI.first; 556 uint64_t Count = PI.second; 557 FunctionSamples &FunctionProfile = 558 getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary); 559 560 // Use InvalidProbeCount(UINT64_MAX) to mark sample count for a dangling 561 // probe. Dangling probes are the probes associated to an empty block. With 562 // this place holder, sample count on dangling probe will not be trusted by 563 // the compiler and it will rely on the counts inference algorithm to get 564 // the probe a reasonable count. 565 if (Probe->isDangling()) { 566 FunctionProfile.addBodySamplesForProbe( 567 Probe->Index, FunctionSamples::InvalidProbeCount); 568 continue; 569 } 570 FunctionProfile.addBodySamplesForProbe(Probe->Index, Count); 571 FunctionProfile.addTotalSamples(Count); 572 if (Probe->isEntry()) { 573 FunctionProfile.addHeadSamples(Count); 574 // Look up for the caller's function profile 575 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 576 if (InlinerDesc != nullptr) { 577 // Since the context id will be compressed, we have to use callee's 578 // context id to infer caller's context id to ensure they share the 579 // same context prefix. 580 StringRef CalleeContextId = 581 FunctionProfile.getContext().getNameWithContext(true); 582 StringRef CallerContextId; 583 FrameLocation &&CallerLeafFrameLoc = 584 getCallerContext(CalleeContextId, CallerContextId); 585 uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset; 586 assert(CallerIndex && 587 "Inferred caller's location index shouldn't be zero!"); 588 FunctionSamples &CallerProfile = 589 getFunctionProfileForContext(CallerContextId); 590 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 591 CallerProfile.addBodySamples(CallerIndex, 0, Count); 592 CallerProfile.addTotalSamples(Count); 593 CallerProfile.addCalledTargetSamples( 594 CallerIndex, 0, 595 FunctionProfile.getContext().getNameWithoutContext(), Count); 596 } 597 } 598 } 599 } 600 601 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( 602 const BranchSample &BranchCounter, 603 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) { 604 for (auto BI : BranchCounter) { 605 uint64_t SourceOffset = BI.first.first; 606 uint64_t TargetOffset = BI.first.second; 607 uint64_t Count = BI.second; 608 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 609 const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress); 610 if (CallProbe == nullptr) 611 continue; 612 FunctionSamples &FunctionProfile = 613 getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary); 614 FunctionProfile.addBodySamples(CallProbe->Index, 0, Count); 615 FunctionProfile.addTotalSamples(Count); 616 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 617 Binary->getFuncFromStartOffset(TargetOffset)); 618 if (CalleeName.size() == 0) 619 continue; 620 FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName, 621 Count); 622 } 623 } 624 625 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 626 SmallVectorImpl<std::string> &ContextStrStack, 627 const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) { 628 assert(ContextStrStack.size() && "Profile context must have the leaf frame"); 629 // Compress the context string except for the leaf frame 630 std::string LeafFrame = ContextStrStack.back(); 631 ContextStrStack.pop_back(); 632 CSProfileGenerator::compressRecursionContext(ContextStrStack); 633 634 std::ostringstream OContextStr; 635 for (uint32_t I = 0; I < ContextStrStack.size(); I++) { 636 if (OContextStr.str().size()) 637 OContextStr << " @ "; 638 OContextStr << ContextStrStack[I]; 639 } 640 // For leaf inlined context with the top frame, we should strip off the top 641 // frame's probe id, like: 642 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 643 if (OContextStr.str().size()) 644 OContextStr << " @ "; 645 OContextStr << StringRef(LeafFrame).split(":").first.str(); 646 647 FunctionSamples &FunctionProile = 648 getFunctionProfileForContext(OContextStr.str(), WasLeafInlined); 649 FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash); 650 return FunctionProile; 651 } 652 653 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 654 SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe, 655 ProfiledBinary *Binary) { 656 // Explicitly copy the context for appending the leaf context 657 SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(), 658 ContextStrStack.end()); 659 Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true); 660 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID); 661 bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite(); 662 return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc, 663 WasLeafInlined); 664 } 665 666 } // end namespace sampleprof 667 } // end namespace llvm 668