1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfileGenerator.h" 10 #include "llvm/ProfileData/ProfileCommon.h" 11 12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 13 cl::Required, 14 cl::desc("Output profile file")); 15 static cl::alias OutputA("o", cl::desc("Alias for --output"), 16 cl::aliasopt(OutputFilename)); 17 18 static cl::opt<SampleProfileFormat> OutputFormat( 19 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 20 cl::values( 21 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 22 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 23 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 24 clEnumValN(SPF_Text, "text", "Text encoding"), 25 clEnumValN(SPF_GCC, "gcc", 26 "GCC encoding (only meaningful for -sample)"))); 27 28 static cl::opt<int32_t, true> RecursionCompression( 29 "compress-recursion", 30 cl::desc("Compressing recursion by deduplicating adjacent frame " 31 "sequences up to the specified size. -1 means no size limit."), 32 cl::Hidden, 33 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 34 35 static cl::opt<bool> CSProfMergeColdContext( 36 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, 37 cl::desc("If the total count of context profile is smaller than " 38 "the threshold, it will be merged into context-less base " 39 "profile.")); 40 41 static cl::opt<bool> CSProfTrimColdContext( 42 "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore, 43 cl::desc("If the total count of the profile after all merge is done " 44 "is still smaller than threshold, it will be trimmed.")); 45 46 extern cl::opt<int> ProfileSummaryCutoffCold; 47 48 using namespace llvm; 49 using namespace sampleprof; 50 51 namespace llvm { 52 namespace sampleprof { 53 54 // Initialize the MaxCompressionSize to -1 which means no size limit 55 int32_t CSProfileGenerator::MaxCompressionSize = -1; 56 57 static bool 58 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) { 59 return BinarySampleCounters.size() && 60 BinarySampleCounters.begin()->first->usePseudoProbes(); 61 } 62 63 std::unique_ptr<ProfileGenerator> 64 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters, 65 enum PerfScriptType SampleType) { 66 std::unique_ptr<ProfileGenerator> ProfileGenerator; 67 if (SampleType == PERF_LBR_STACK) { 68 if (usePseudoProbes(BinarySampleCounters)) { 69 ProfileGenerator.reset( 70 new PseudoProbeCSProfileGenerator(BinarySampleCounters)); 71 } else { 72 ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters)); 73 } 74 } else { 75 // TODO: 76 llvm_unreachable("Unsupported perfscript!"); 77 } 78 79 return ProfileGenerator; 80 } 81 82 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 83 StringMap<FunctionSamples> &ProfileMap) { 84 if (std::error_code EC = Writer->write(ProfileMap)) 85 exitWithError(std::move(EC)); 86 } 87 88 void ProfileGenerator::write() { 89 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 90 if (std::error_code EC = WriterOrErr.getError()) 91 exitWithError(EC, OutputFilename); 92 write(std::move(WriterOrErr.get()), ProfileMap); 93 } 94 95 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges, 96 const RangeSample &Ranges) { 97 98 /* 99 Regions may overlap with each other. Using the boundary info, find all 100 disjoint ranges and their sample count. BoundaryPoint contains the count 101 multiple samples begin/end at this points. 102 103 |<--100-->| Sample1 104 |<------200------>| Sample2 105 A B C 106 107 In the example above, 108 Sample1 begins at A, ends at B, its value is 100. 109 Sample2 beings at A, ends at C, its value is 200. 110 For A, BeginCount is the sum of sample begins at A, which is 300 and no 111 samples ends at A, so EndCount is 0. 112 Then boundary points A, B, and C with begin/end counts are: 113 A: (300, 0) 114 B: (0, 100) 115 C: (0, 200) 116 */ 117 struct BoundaryPoint { 118 // Sum of sample counts beginning at this point 119 uint64_t BeginCount; 120 // Sum of sample counts ending at this point 121 uint64_t EndCount; 122 123 BoundaryPoint() : BeginCount(0), EndCount(0){}; 124 125 void addBeginCount(uint64_t Count) { BeginCount += Count; } 126 127 void addEndCount(uint64_t Count) { EndCount += Count; } 128 }; 129 130 /* 131 For the above example. With boundary points, follwing logic finds two 132 disjoint region of 133 134 [A,B]: 300 135 [B+1,C]: 200 136 137 If there is a boundary point that both begin and end, the point itself 138 becomes a separate disjoint region. For example, if we have original 139 ranges of 140 141 |<--- 100 --->| 142 |<--- 200 --->| 143 A B C 144 145 there are three boundary points with their begin/end counts of 146 147 A: (100, 0) 148 B: (200, 100) 149 C: (0, 200) 150 151 the disjoint ranges would be 152 153 [A, B-1]: 100 154 [B, B]: 300 155 [B+1, C]: 200. 156 */ 157 std::map<uint64_t, BoundaryPoint> Boundaries; 158 159 for (auto Item : Ranges) { 160 uint64_t Begin = Item.first.first; 161 uint64_t End = Item.first.second; 162 uint64_t Count = Item.second; 163 if (Boundaries.find(Begin) == Boundaries.end()) 164 Boundaries[Begin] = BoundaryPoint(); 165 Boundaries[Begin].addBeginCount(Count); 166 167 if (Boundaries.find(End) == Boundaries.end()) 168 Boundaries[End] = BoundaryPoint(); 169 Boundaries[End].addEndCount(Count); 170 } 171 172 uint64_t BeginAddress = 0; 173 int Count = 0; 174 for (auto Item : Boundaries) { 175 uint64_t Address = Item.first; 176 BoundaryPoint &Point = Item.second; 177 if (Point.BeginCount) { 178 if (BeginAddress) 179 DisjointRanges[{BeginAddress, Address - 1}] = Count; 180 Count += Point.BeginCount; 181 BeginAddress = Address; 182 } 183 if (Point.EndCount) { 184 assert(BeginAddress && "First boundary point cannot be 'end' point"); 185 DisjointRanges[{BeginAddress, Address}] = Count; 186 Count -= Point.EndCount; 187 BeginAddress = Address + 1; 188 } 189 } 190 } 191 192 FunctionSamples & 193 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr, 194 bool WasLeafInlined) { 195 auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples()); 196 if (Ret.second) { 197 // Make a copy of the underlying context string in string table 198 // before StringRef wrapper is used for context. 199 auto It = ContextStrings.insert(ContextStr.str()); 200 SampleContext FContext(*It.first, RawContext); 201 if (WasLeafInlined) 202 FContext.setAttribute(ContextWasInlined); 203 FunctionSamples &FProfile = Ret.first->second; 204 FProfile.setContext(FContext); 205 FProfile.setName(FContext.getNameWithoutContext()); 206 } 207 return Ret.first->second; 208 } 209 210 void CSProfileGenerator::generateProfile() { 211 FunctionSamples::ProfileIsCS = true; 212 for (const auto &BI : BinarySampleCounters) { 213 ProfiledBinary *Binary = BI.first; 214 for (const auto &CI : BI.second) { 215 const StringBasedCtxKey *CtxKey = 216 dyn_cast<StringBasedCtxKey>(CI.first.getPtr()); 217 StringRef ContextId(CtxKey->Context); 218 // Get or create function profile for the range 219 FunctionSamples &FunctionProfile = 220 getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined); 221 222 // Fill in function body samples 223 populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter, 224 Binary); 225 // Fill in boundary sample counts as well as call site samples for calls 226 populateFunctionBoundarySamples(ContextId, FunctionProfile, 227 CI.second.BranchCounter, Binary); 228 } 229 } 230 // Fill in call site value sample for inlined calls and also use context to 231 // infer missing samples. Since we don't have call count for inlined 232 // functions, we estimate it from inlinee's profile using the entry of the 233 // body sample. 234 populateInferredFunctionSamples(); 235 236 postProcessProfiles(); 237 } 238 239 void CSProfileGenerator::updateBodySamplesforFunctionProfile( 240 FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, 241 uint64_t Count) { 242 // Filter out invalid negative(int type) lineOffset 243 if (LeafLoc.second.LineOffset & 0x80000000) 244 return; 245 // Use the maximum count of samples with same line location 246 ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt( 247 LeafLoc.second.LineOffset, LeafLoc.second.Discriminator); 248 uint64_t PreviousCount = R ? R.get() : 0; 249 if (PreviousCount < Count) { 250 FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, 251 LeafLoc.second.Discriminator, 252 Count - PreviousCount); 253 } 254 } 255 256 void CSProfileGenerator::populateFunctionBodySamples( 257 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter, 258 ProfiledBinary *Binary) { 259 // Compute disjoint ranges first, so we can use MAX 260 // for calculating count for each location. 261 RangeSample Ranges; 262 findDisjointRanges(Ranges, RangeCounter); 263 for (auto Range : Ranges) { 264 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 265 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 266 uint64_t Count = Range.second; 267 // Disjoint ranges have introduce zero-filled gap that 268 // doesn't belong to current context, filter them out. 269 if (Count == 0) 270 continue; 271 272 InstructionPointer IP(Binary, RangeBegin, true); 273 274 // Disjoint ranges may have range in the middle of two instr, 275 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 276 // can be Addr1+1 to Addr2-1. We should ignore such range. 277 if (IP.Address > RangeEnd) 278 continue; 279 280 while (IP.Address <= RangeEnd) { 281 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); 282 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); 283 if (LeafLoc.hasValue()) { 284 // Recording body sample for this specific context 285 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 286 } 287 // Accumulate total sample count even it's a line with invalid debug info 288 FunctionProfile.addTotalSamples(Count); 289 // Move to next IP within the range 290 IP.advance(); 291 } 292 } 293 } 294 295 void CSProfileGenerator::populateFunctionBoundarySamples( 296 StringRef ContextId, FunctionSamples &FunctionProfile, 297 const BranchSample &BranchCounters, ProfiledBinary *Binary) { 298 299 for (auto Entry : BranchCounters) { 300 uint64_t SourceOffset = Entry.first.first; 301 uint64_t TargetOffset = Entry.first.second; 302 uint64_t Count = Entry.second; 303 // Get the callee name by branch target if it's a call branch 304 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 305 Binary->getFuncFromStartOffset(TargetOffset)); 306 if (CalleeName.size() == 0) 307 continue; 308 309 // Record called target sample and its count 310 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); 311 if (!LeafLoc.hasValue()) 312 continue; 313 FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset, 314 LeafLoc->second.Discriminator, 315 CalleeName, Count); 316 317 // Record head sample for called target(callee) 318 std::ostringstream OCalleeCtxStr; 319 if (ContextId.find(" @ ") != StringRef::npos) { 320 OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); 321 OCalleeCtxStr << " @ "; 322 } 323 OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str(); 324 325 FunctionSamples &CalleeProfile = 326 getFunctionProfileForContext(OCalleeCtxStr.str()); 327 assert(Count != 0 && "Unexpected zero weight branch"); 328 CalleeProfile.addHeadSamples(Count); 329 } 330 } 331 332 static FrameLocation getCallerContext(StringRef CalleeContext, 333 StringRef &CallerNameWithContext) { 334 StringRef CallerContext = CalleeContext.rsplit(" @ ").first; 335 CallerNameWithContext = CallerContext.rsplit(':').first; 336 auto ContextSplit = CallerContext.rsplit(" @ "); 337 StringRef CallerFrameStr = ContextSplit.second.size() == 0 338 ? ContextSplit.first 339 : ContextSplit.second; 340 FrameLocation LeafFrameLoc = {"", {0, 0}}; 341 StringRef Funcname; 342 SampleContext::decodeContextString(CallerFrameStr, Funcname, 343 LeafFrameLoc.second); 344 LeafFrameLoc.first = Funcname.str(); 345 return LeafFrameLoc; 346 } 347 348 void CSProfileGenerator::populateInferredFunctionSamples() { 349 for (const auto &Item : ProfileMap) { 350 const StringRef CalleeContext = Item.first(); 351 const FunctionSamples &CalleeProfile = Item.second; 352 353 // If we already have head sample counts, we must have value profile 354 // for call sites added already. Skip to avoid double counting. 355 if (CalleeProfile.getHeadSamples()) 356 continue; 357 // If we don't have context, nothing to do for caller's call site. 358 // This could happen for entry point function. 359 if (CalleeContext.find(" @ ") == StringRef::npos) 360 continue; 361 362 // Infer Caller's frame loc and context ID through string splitting 363 StringRef CallerContextId; 364 FrameLocation &&CallerLeafFrameLoc = 365 getCallerContext(CalleeContext, CallerContextId); 366 367 // It's possible that we haven't seen any sample directly in the caller, 368 // in which case CallerProfile will not exist. But we can't modify 369 // ProfileMap while iterating it. 370 // TODO: created function profile for those callers too 371 if (ProfileMap.find(CallerContextId) == ProfileMap.end()) 372 continue; 373 FunctionSamples &CallerProfile = ProfileMap[CallerContextId]; 374 375 // Since we don't have call count for inlined functions, we 376 // estimate it from inlinee's profile using entry body sample. 377 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); 378 // If we don't have samples with location, use 1 to indicate live. 379 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) 380 EstimatedCallCount = 1; 381 CallerProfile.addCalledTargetSamples( 382 CallerLeafFrameLoc.second.LineOffset, 383 CallerLeafFrameLoc.second.Discriminator, 384 CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount); 385 CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset, 386 CallerLeafFrameLoc.second.Discriminator, 387 EstimatedCallCount); 388 CallerProfile.addTotalSamples(EstimatedCallCount); 389 } 390 } 391 392 void CSProfileGenerator::postProcessProfiles() { 393 // Compute hot/cold threshold based on profile. This will be used for cold 394 // context profile merging/trimming. 395 computeSummaryAndThreshold(); 396 397 // Run global pre-inliner to adjust/merge context profile based on estimated 398 // inline decisions. 399 CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run(); 400 401 // Trim and merge cold context profile using cold threshold above; 402 SampleContextTrimmer(ProfileMap) 403 .trimAndMergeColdContextProfiles( 404 ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext); 405 } 406 407 void CSProfileGenerator::computeSummaryAndThreshold() { 408 // Update the default value of cold cutoff for llvm-profgen. 409 // Do it here because we don't want to change the global default, 410 // which would lead CS profile size too large. 411 if (!ProfileSummaryCutoffCold.getNumOccurrences()) 412 ProfileSummaryCutoffCold = 999000; 413 414 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 415 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 416 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 417 (Summary->getDetailedSummary())); 418 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 419 (Summary->getDetailedSummary())); 420 } 421 422 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer, 423 StringMap<FunctionSamples> &ProfileMap) { 424 if (std::error_code EC = Writer->write(ProfileMap)) 425 exitWithError(std::move(EC)); 426 } 427 428 // Helper function to extract context prefix string stack 429 // Extract context stack for reusing, leaf context stack will 430 // be added compressed while looking up function profile 431 static void 432 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack, 433 const SmallVectorImpl<const PseudoProbe *> &Probes, 434 ProfiledBinary *Binary) { 435 for (const auto *P : Probes) { 436 Binary->getInlineContextForProbe(P, ContextStrStack, true); 437 } 438 } 439 440 void PseudoProbeCSProfileGenerator::generateProfile() { 441 // Enable pseudo probe functionalities in SampleProf 442 FunctionSamples::ProfileIsProbeBased = true; 443 FunctionSamples::ProfileIsCS = true; 444 for (const auto &BI : BinarySampleCounters) { 445 ProfiledBinary *Binary = BI.first; 446 for (const auto &CI : BI.second) { 447 const ProbeBasedCtxKey *CtxKey = 448 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); 449 SmallVector<std::string, 16> ContextStrStack; 450 extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); 451 // Fill in function body samples from probes, also infer caller's samples 452 // from callee's probe 453 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack, 454 Binary); 455 // Fill in boundary samples for a call probe 456 populateBoundarySamplesWithProbes(CI.second.BranchCounter, 457 ContextStrStack, Binary); 458 } 459 } 460 461 postProcessProfiles(); 462 } 463 464 void PseudoProbeCSProfileGenerator::extractProbesFromRange( 465 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 466 ProfiledBinary *Binary) { 467 RangeSample Ranges; 468 findDisjointRanges(Ranges, RangeCounter); 469 for (const auto &Range : Ranges) { 470 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); 471 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); 472 uint64_t Count = Range.second; 473 // Disjoint ranges have introduce zero-filled gap that 474 // doesn't belong to current context, filter them out. 475 if (Count == 0) 476 continue; 477 478 InstructionPointer IP(Binary, RangeBegin, true); 479 480 // Disjoint ranges may have range in the middle of two instr, 481 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 482 // can be Addr1+1 to Addr2-1. We should ignore such range. 483 if (IP.Address > RangeEnd) 484 continue; 485 486 while (IP.Address <= RangeEnd) { 487 const AddressProbesMap &Address2ProbesMap = 488 Binary->getAddress2ProbesMap(); 489 auto It = Address2ProbesMap.find(IP.Address); 490 if (It != Address2ProbesMap.end()) { 491 for (const auto &Probe : It->second) { 492 if (!Probe.isBlock()) 493 continue; 494 ProbeCounter[&Probe] += Count; 495 } 496 } 497 498 IP.advance(); 499 } 500 } 501 } 502 503 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( 504 const RangeSample &RangeCounter, 505 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) { 506 ProbeCounterMap ProbeCounter; 507 // Extract the top frame probes by looking up each address among the range in 508 // the Address2ProbeMap 509 extractProbesFromRange(RangeCounter, ProbeCounter, Binary); 510 std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples; 511 for (auto PI : ProbeCounter) { 512 const PseudoProbe *Probe = PI.first; 513 uint64_t Count = PI.second; 514 // Ignore dangling probes since they will be reported later if needed. 515 if (Probe->isDangling()) 516 continue; 517 FunctionSamples &FunctionProfile = 518 getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary); 519 // Record the current frame and FunctionProfile whenever samples are 520 // collected for non-danglie probes. This is for reporting all of the 521 // dangling probes of the frame later. 522 FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile; 523 FunctionProfile.addBodySamplesForProbe(Probe->Index, Count); 524 FunctionProfile.addTotalSamples(Count); 525 if (Probe->isEntry()) { 526 FunctionProfile.addHeadSamples(Count); 527 // Look up for the caller's function profile 528 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 529 if (InlinerDesc != nullptr) { 530 // Since the context id will be compressed, we have to use callee's 531 // context id to infer caller's context id to ensure they share the 532 // same context prefix. 533 StringRef CalleeContextId = 534 FunctionProfile.getContext().getNameWithContext(); 535 StringRef CallerContextId; 536 FrameLocation &&CallerLeafFrameLoc = 537 getCallerContext(CalleeContextId, CallerContextId); 538 uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset; 539 assert(CallerIndex && 540 "Inferred caller's location index shouldn't be zero!"); 541 FunctionSamples &CallerProfile = 542 getFunctionProfileForContext(CallerContextId); 543 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 544 CallerProfile.addBodySamples(CallerIndex, 0, Count); 545 CallerProfile.addTotalSamples(Count); 546 CallerProfile.addCalledTargetSamples( 547 CallerIndex, 0, 548 FunctionProfile.getContext().getNameWithoutContext(), Count); 549 } 550 } 551 552 // Report dangling probes for frames that have real samples collected. 553 // Dangling probes are the probes associated to an empty block. With this 554 // place holder, sample count on a dangling probe will not be trusted by the 555 // compiler and we will rely on the counts inference algorithm to get the 556 // probe a reasonable count. Use InvalidProbeCount to mark sample count for 557 // a dangling probe. 558 for (auto &I : FrameSamples) { 559 auto *FunctionProfile = I.second; 560 for (auto *Probe : I.first->getProbes()) { 561 if (Probe->isDangling()) { 562 FunctionProfile->addBodySamplesForProbe( 563 Probe->Index, FunctionSamples::InvalidProbeCount); 564 } 565 } 566 } 567 } 568 } 569 570 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( 571 const BranchSample &BranchCounter, 572 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) { 573 for (auto BI : BranchCounter) { 574 uint64_t SourceOffset = BI.first.first; 575 uint64_t TargetOffset = BI.first.second; 576 uint64_t Count = BI.second; 577 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); 578 const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress); 579 if (CallProbe == nullptr) 580 continue; 581 FunctionSamples &FunctionProfile = 582 getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary); 583 FunctionProfile.addBodySamples(CallProbe->Index, 0, Count); 584 FunctionProfile.addTotalSamples(Count); 585 StringRef CalleeName = FunctionSamples::getCanonicalFnName( 586 Binary->getFuncFromStartOffset(TargetOffset)); 587 if (CalleeName.size() == 0) 588 continue; 589 FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName, 590 Count); 591 } 592 } 593 594 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 595 SmallVectorImpl<std::string> &ContextStrStack, 596 const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) { 597 assert(ContextStrStack.size() && "Profile context must have the leaf frame"); 598 // Compress the context string except for the leaf frame 599 std::string LeafFrame = ContextStrStack.back(); 600 ContextStrStack.pop_back(); 601 CSProfileGenerator::compressRecursionContext(ContextStrStack); 602 603 std::ostringstream OContextStr; 604 for (uint32_t I = 0; I < ContextStrStack.size(); I++) { 605 if (OContextStr.str().size()) 606 OContextStr << " @ "; 607 OContextStr << ContextStrStack[I]; 608 } 609 // For leaf inlined context with the top frame, we should strip off the top 610 // frame's probe id, like: 611 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 612 if (OContextStr.str().size()) 613 OContextStr << " @ "; 614 OContextStr << StringRef(LeafFrame).split(":").first.str(); 615 616 FunctionSamples &FunctionProile = 617 getFunctionProfileForContext(OContextStr.str(), WasLeafInlined); 618 FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash); 619 return FunctionProile; 620 } 621 622 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( 623 SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe, 624 ProfiledBinary *Binary) { 625 // Explicitly copy the context for appending the leaf context 626 SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(), 627 ContextStrStack.end()); 628 Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true); 629 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID); 630 bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite(); 631 return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc, 632 WasLeafInlined); 633 } 634 635 } // end namespace sampleprof 636 } // end namespace llvm 637