1 //===- SampleContextTracker.cpp - Context-sensitive Profile Tracker -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SampleContextTracker used by CSSPGO. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/IPO/SampleContextTracker.h" 14 #include "llvm/ADT/StringMap.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/IR/DebugInfoMetadata.h" 17 #include "llvm/IR/Instructions.h" 18 #include "llvm/ProfileData/SampleProf.h" 19 #include <map> 20 #include <queue> 21 #include <vector> 22 23 using namespace llvm; 24 using namespace sampleprof; 25 26 #define DEBUG_TYPE "sample-context-tracker" 27 28 namespace llvm { 29 30 ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, 31 StringRef CalleeName) { 32 if (CalleeName.empty()) 33 return getHottestChildContext(CallSite); 34 35 uint32_t Hash = nodeHash(CalleeName, CallSite); 36 auto It = AllChildContext.find(Hash); 37 if (It != AllChildContext.end()) 38 return &It->second; 39 return nullptr; 40 } 41 42 ContextTrieNode * 43 ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) { 44 // CSFDO-TODO: This could be slow, change AllChildContext so we can 45 // do point look up for child node by call site alone. 46 // Retrieve the child node with max count for indirect call 47 ContextTrieNode *ChildNodeRet = nullptr; 48 uint64_t MaxCalleeSamples = 0; 49 for (auto &It : AllChildContext) { 50 ContextTrieNode &ChildNode = It.second; 51 if (ChildNode.CallSiteLoc != CallSite) 52 continue; 53 FunctionSamples *Samples = ChildNode.getFunctionSamples(); 54 if (!Samples) 55 continue; 56 if (Samples->getTotalSamples() > MaxCalleeSamples) { 57 ChildNodeRet = &ChildNode; 58 MaxCalleeSamples = Samples->getTotalSamples(); 59 } 60 } 61 62 return ChildNodeRet; 63 } 64 65 ContextTrieNode &ContextTrieNode::moveToChildContext( 66 const LineLocation &CallSite, ContextTrieNode &&NodeToMove, 67 StringRef ContextStrToRemove, bool DeleteNode) { 68 uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite); 69 assert(!AllChildContext.count(Hash) && "Node to remove must exist"); 70 LineLocation OldCallSite = NodeToMove.CallSiteLoc; 71 ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); 72 AllChildContext[Hash] = NodeToMove; 73 ContextTrieNode &NewNode = AllChildContext[Hash]; 74 NewNode.CallSiteLoc = CallSite; 75 76 // Walk through nodes in the moved the subtree, and update 77 // FunctionSamples' context as for the context promotion. 78 // We also need to set new parant link for all children. 79 std::queue<ContextTrieNode *> NodeToUpdate; 80 NewNode.setParentContext(this); 81 NodeToUpdate.push(&NewNode); 82 83 while (!NodeToUpdate.empty()) { 84 ContextTrieNode *Node = NodeToUpdate.front(); 85 NodeToUpdate.pop(); 86 FunctionSamples *FSamples = Node->getFunctionSamples(); 87 88 if (FSamples) { 89 FSamples->getContext().promoteOnPath(ContextStrToRemove); 90 FSamples->getContext().setState(SyntheticContext); 91 LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext() 92 << "\n"); 93 } 94 95 for (auto &It : Node->getAllChildContext()) { 96 ContextTrieNode *ChildNode = &It.second; 97 ChildNode->setParentContext(Node); 98 NodeToUpdate.push(ChildNode); 99 } 100 } 101 102 // Original context no longer needed, destroy if requested. 103 if (DeleteNode) 104 OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); 105 106 return NewNode; 107 } 108 109 void ContextTrieNode::removeChildContext(const LineLocation &CallSite, 110 StringRef CalleeName) { 111 uint32_t Hash = nodeHash(CalleeName, CallSite); 112 // Note this essentially calls dtor and destroys that child context 113 AllChildContext.erase(Hash); 114 } 115 116 std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() { 117 return AllChildContext; 118 } 119 120 StringRef ContextTrieNode::getFuncName() const { return FuncName; } 121 122 FunctionSamples *ContextTrieNode::getFunctionSamples() const { 123 return FuncSamples; 124 } 125 126 void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) { 127 FuncSamples = FSamples; 128 } 129 130 Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; } 131 132 void ContextTrieNode::addFunctionSize(uint32_t FSize) { 133 if (!FuncSize.hasValue()) 134 FuncSize = 0; 135 136 FuncSize = FuncSize.getValue() + FSize; 137 } 138 139 LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; } 140 141 ContextTrieNode *ContextTrieNode::getParentContext() const { 142 return ParentContext; 143 } 144 145 void ContextTrieNode::setParentContext(ContextTrieNode *Parent) { 146 ParentContext = Parent; 147 } 148 149 void ContextTrieNode::dumpNode() { 150 dbgs() << "Node: " << FuncName << "\n" 151 << " Callsite: " << CallSiteLoc << "\n" 152 << " Size: " << FuncSize << "\n" 153 << " Children:\n"; 154 155 for (auto &It : AllChildContext) { 156 dbgs() << " Node: " << It.second.getFuncName() << "\n"; 157 } 158 } 159 160 void ContextTrieNode::dumpTree() { 161 dbgs() << "Context Profile Tree:\n"; 162 std::queue<ContextTrieNode *> NodeQueue; 163 NodeQueue.push(this); 164 165 while (!NodeQueue.empty()) { 166 ContextTrieNode *Node = NodeQueue.front(); 167 NodeQueue.pop(); 168 Node->dumpNode(); 169 170 for (auto &It : Node->getAllChildContext()) { 171 ContextTrieNode *ChildNode = &It.second; 172 NodeQueue.push(ChildNode); 173 } 174 } 175 } 176 177 uint32_t ContextTrieNode::nodeHash(StringRef ChildName, 178 const LineLocation &Callsite) { 179 // We still use child's name for child hash, this is 180 // because for children of root node, we don't have 181 // different line/discriminator, and we'll rely on name 182 // to differentiate children. 183 uint32_t NameHash = std::hash<std::string>{}(ChildName.str()); 184 uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator; 185 return NameHash + (LocId << 5) + LocId; 186 } 187 188 ContextTrieNode *ContextTrieNode::getOrCreateChildContext( 189 const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) { 190 uint32_t Hash = nodeHash(CalleeName, CallSite); 191 auto It = AllChildContext.find(Hash); 192 if (It != AllChildContext.end()) { 193 assert(It->second.getFuncName() == CalleeName && 194 "Hash collision for child context node"); 195 return &It->second; 196 } 197 198 if (!AllowCreate) 199 return nullptr; 200 201 AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite); 202 return &AllChildContext[Hash]; 203 } 204 205 // Profiler tracker than manages profiles and its associated context 206 SampleContextTracker::SampleContextTracker( 207 StringMap<FunctionSamples> &Profiles) { 208 for (auto &FuncSample : Profiles) { 209 FunctionSamples *FSamples = &FuncSample.second; 210 SampleContext Context(FuncSample.first(), RawContext); 211 LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n"); 212 if (!Context.isBaseContext()) 213 FuncToCtxtProfiles[Context.getNameWithoutContext()].push_back(FSamples); 214 ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); 215 assert(!NewNode->getFunctionSamples() && 216 "New node can't have sample profile"); 217 NewNode->setFunctionSamples(FSamples); 218 } 219 } 220 221 FunctionSamples * 222 SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, 223 StringRef CalleeName) { 224 LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n"); 225 DILocation *DIL = Inst.getDebugLoc(); 226 if (!DIL) 227 return nullptr; 228 229 CalleeName = FunctionSamples::getCanonicalFnName(CalleeName); 230 231 // For indirect call, CalleeName will be empty, in which case the context 232 // profile for callee with largest total samples will be returned. 233 ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName); 234 if (CalleeContext) { 235 FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); 236 LLVM_DEBUG(if (FSamples) { 237 dbgs() << " Callee context found: " << FSamples->getContext() << "\n"; 238 }); 239 return FSamples; 240 } 241 242 return nullptr; 243 } 244 245 std::vector<const FunctionSamples *> 246 SampleContextTracker::getIndirectCalleeContextSamplesFor( 247 const DILocation *DIL) { 248 std::vector<const FunctionSamples *> R; 249 if (!DIL) 250 return R; 251 252 ContextTrieNode *CallerNode = getContextFor(DIL); 253 LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); 254 for (auto &It : CallerNode->getAllChildContext()) { 255 ContextTrieNode &ChildNode = It.second; 256 if (ChildNode.getCallSiteLoc() != CallSite) 257 continue; 258 if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples()) 259 R.push_back(CalleeSamples); 260 } 261 262 return R; 263 } 264 265 FunctionSamples * 266 SampleContextTracker::getContextSamplesFor(const DILocation *DIL) { 267 assert(DIL && "Expect non-null location"); 268 269 ContextTrieNode *ContextNode = getContextFor(DIL); 270 if (!ContextNode) 271 return nullptr; 272 273 // We may have inlined callees during pre-LTO compilation, in which case 274 // we need to rely on the inline stack from !dbg to mark context profile 275 // as inlined, instead of `MarkContextSamplesInlined` during inlining. 276 // Sample profile loader walks through all instructions to get profile, 277 // which calls this function. So once that is done, all previously inlined 278 // context profile should be marked properly. 279 FunctionSamples *Samples = ContextNode->getFunctionSamples(); 280 if (Samples && ContextNode->getParentContext() != &RootContext) 281 Samples->getContext().setState(InlinedContext); 282 283 return Samples; 284 } 285 286 FunctionSamples * 287 SampleContextTracker::getContextSamplesFor(const SampleContext &Context) { 288 ContextTrieNode *Node = getContextFor(Context); 289 if (!Node) 290 return nullptr; 291 292 return Node->getFunctionSamples(); 293 } 294 295 SampleContextTracker::ContextSamplesTy & 296 SampleContextTracker::getAllContextSamplesFor(const Function &Func) { 297 StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); 298 return FuncToCtxtProfiles[CanonName]; 299 } 300 301 SampleContextTracker::ContextSamplesTy & 302 SampleContextTracker::getAllContextSamplesFor(StringRef Name) { 303 return FuncToCtxtProfiles[Name]; 304 } 305 306 FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func, 307 bool MergeContext) { 308 StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); 309 return getBaseSamplesFor(CanonName, MergeContext); 310 } 311 312 FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, 313 bool MergeContext) { 314 LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); 315 // Base profile is top-level node (child of root node), so try to retrieve 316 // existing top-level node for given function first. If it exists, it could be 317 // that we've merged base profile before, or there's actually context-less 318 // profile from the input (e.g. due to unreliable stack walking). 319 ContextTrieNode *Node = getTopLevelContextNode(Name); 320 if (MergeContext) { 321 LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " << Name 322 << "\n"); 323 324 // We have profile for function under different contexts, 325 // create synthetic base profile and merge context profiles 326 // into base profile. 327 for (auto *CSamples : FuncToCtxtProfiles[Name]) { 328 SampleContext &Context = CSamples->getContext(); 329 ContextTrieNode *FromNode = getContextFor(Context); 330 if (FromNode == Node) 331 continue; 332 333 // Skip inlined context profile and also don't re-merge any context 334 if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) 335 continue; 336 337 ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode); 338 assert((!Node || Node == &ToNode) && "Expect only one base profile"); 339 Node = &ToNode; 340 } 341 } 342 343 // Still no profile even after merge/promotion (if allowed) 344 if (!Node) 345 return nullptr; 346 347 return Node->getFunctionSamples(); 348 } 349 350 void SampleContextTracker::markContextSamplesInlined( 351 const FunctionSamples *InlinedSamples) { 352 assert(InlinedSamples && "Expect non-null inlined samples"); 353 LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " 354 << InlinedSamples->getContext() << "\n"); 355 InlinedSamples->getContext().setState(InlinedContext); 356 } 357 358 ContextTrieNode &SampleContextTracker::getRootContext() { return RootContext; } 359 360 void SampleContextTracker::promoteMergeContextSamplesTree( 361 const Instruction &Inst, StringRef CalleeName) { 362 LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n" 363 << Inst << "\n"); 364 // Get the caller context for the call instruction, we don't use callee 365 // name from call because there can be context from indirect calls too. 366 DILocation *DIL = Inst.getDebugLoc(); 367 ContextTrieNode *CallerNode = getContextFor(DIL); 368 if (!CallerNode) 369 return; 370 371 // Get the context that needs to be promoted 372 LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); 373 // For indirect call, CalleeName will be empty, in which case we need to 374 // promote all non-inlined child context profiles. 375 if (CalleeName.empty()) { 376 for (auto &It : CallerNode->getAllChildContext()) { 377 ContextTrieNode *NodeToPromo = &It.second; 378 if (CallSite != NodeToPromo->getCallSiteLoc()) 379 continue; 380 FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples(); 381 if (FromSamples && FromSamples->getContext().hasState(InlinedContext)) 382 continue; 383 promoteMergeContextSamplesTree(*NodeToPromo); 384 } 385 return; 386 } 387 388 // Get the context for the given callee that needs to be promoted 389 ContextTrieNode *NodeToPromo = 390 CallerNode->getChildContext(CallSite, CalleeName); 391 if (!NodeToPromo) 392 return; 393 394 promoteMergeContextSamplesTree(*NodeToPromo); 395 } 396 397 ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( 398 ContextTrieNode &NodeToPromo) { 399 // Promote the input node to be directly under root. This can happen 400 // when we decided to not inline a function under context represented 401 // by the input node. The promote and merge is then needed to reflect 402 // the context profile in the base (context-less) profile. 403 FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); 404 assert(FromSamples && "Shouldn't promote a context without profile"); 405 LLVM_DEBUG(dbgs() << " Found context tree root to promote: " 406 << FromSamples->getContext() << "\n"); 407 408 assert(!FromSamples->getContext().hasState(InlinedContext) && 409 "Shouldn't promote inlined context profile"); 410 StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext(); 411 return promoteMergeContextSamplesTree(NodeToPromo, RootContext, 412 ContextStrToRemove); 413 } 414 415 void SampleContextTracker::dump() { RootContext.dumpTree(); } 416 417 ContextTrieNode * 418 SampleContextTracker::getContextFor(const SampleContext &Context) { 419 return getOrCreateContextPath(Context, false); 420 } 421 422 ContextTrieNode * 423 SampleContextTracker::getCalleeContextFor(const DILocation *DIL, 424 StringRef CalleeName) { 425 assert(DIL && "Expect non-null location"); 426 427 ContextTrieNode *CallContext = getContextFor(DIL); 428 if (!CallContext) 429 return nullptr; 430 431 // When CalleeName is empty, the child context profile with max 432 // total samples will be returned. 433 return CallContext->getChildContext( 434 FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); 435 } 436 437 ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { 438 assert(DIL && "Expect non-null location"); 439 SmallVector<std::pair<LineLocation, StringRef>, 10> S; 440 441 // Use C++ linkage name if possible. 442 const DILocation *PrevDIL = DIL; 443 for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { 444 StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName(); 445 if (Name.empty()) 446 Name = PrevDIL->getScope()->getSubprogram()->getName(); 447 S.push_back( 448 std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), Name)); 449 PrevDIL = DIL; 450 } 451 452 // Push root node, note that root node like main may only 453 // a name, but not linkage name. 454 StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName(); 455 if (RootName.empty()) 456 RootName = PrevDIL->getScope()->getSubprogram()->getName(); 457 S.push_back(std::make_pair(LineLocation(0, 0), RootName)); 458 459 ContextTrieNode *ContextNode = &RootContext; 460 int I = S.size(); 461 while (--I >= 0 && ContextNode) { 462 LineLocation &CallSite = S[I].first; 463 StringRef &CalleeName = S[I].second; 464 ContextNode = ContextNode->getChildContext(CallSite, CalleeName); 465 } 466 467 if (I < 0) 468 return ContextNode; 469 470 return nullptr; 471 } 472 473 ContextTrieNode * 474 SampleContextTracker::getOrCreateContextPath(const SampleContext &Context, 475 bool AllowCreate) { 476 ContextTrieNode *ContextNode = &RootContext; 477 StringRef ContextRemain = Context; 478 StringRef ChildContext; 479 StringRef CalleeName; 480 LineLocation CallSiteLoc(0, 0); 481 482 while (ContextNode && !ContextRemain.empty()) { 483 auto ContextSplit = SampleContext::splitContextString(ContextRemain); 484 ChildContext = ContextSplit.first; 485 ContextRemain = ContextSplit.second; 486 LineLocation NextCallSiteLoc(0, 0); 487 SampleContext::decodeContextString(ChildContext, CalleeName, 488 NextCallSiteLoc); 489 490 // Create child node at parent line/disc location 491 if (AllowCreate) { 492 ContextNode = 493 ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName); 494 } else { 495 ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName); 496 } 497 CallSiteLoc = NextCallSiteLoc; 498 } 499 500 assert((!AllowCreate || ContextNode) && 501 "Node must exist if creation is allowed"); 502 return ContextNode; 503 } 504 505 ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) { 506 assert(!FName.empty() && "Top level node query must provide valid name"); 507 return RootContext.getChildContext(LineLocation(0, 0), FName); 508 } 509 510 ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) { 511 assert(!getTopLevelContextNode(FName) && "Node to add must not exist"); 512 return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName); 513 } 514 515 void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, 516 ContextTrieNode &ToNode, 517 StringRef ContextStrToRemove) { 518 FunctionSamples *FromSamples = FromNode.getFunctionSamples(); 519 FunctionSamples *ToSamples = ToNode.getFunctionSamples(); 520 if (FromSamples && ToSamples) { 521 // Merge/duplicate FromSamples into ToSamples 522 ToSamples->merge(*FromSamples); 523 ToSamples->getContext().setState(SyntheticContext); 524 FromSamples->getContext().setState(MergedContext); 525 } else if (FromSamples) { 526 // Transfer FromSamples from FromNode to ToNode 527 ToNode.setFunctionSamples(FromSamples); 528 FromSamples->getContext().setState(SyntheticContext); 529 FromSamples->getContext().promoteOnPath(ContextStrToRemove); 530 FromNode.setFunctionSamples(nullptr); 531 } 532 } 533 534 ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( 535 ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, 536 StringRef ContextStrToRemove) { 537 assert(!ContextStrToRemove.empty() && "Context to remove can't be empty"); 538 539 // Ignore call site location if destination is top level under root 540 LineLocation NewCallSiteLoc = LineLocation(0, 0); 541 LineLocation OldCallSiteLoc = FromNode.getCallSiteLoc(); 542 ContextTrieNode &FromNodeParent = *FromNode.getParentContext(); 543 ContextTrieNode *ToNode = nullptr; 544 bool MoveToRoot = (&ToNodeParent == &RootContext); 545 if (!MoveToRoot) { 546 NewCallSiteLoc = OldCallSiteLoc; 547 } 548 549 // Locate destination node, create/move if not existing 550 ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncName()); 551 if (!ToNode) { 552 // Do not delete node to move from its parent here because 553 // caller is iterating over children of that parent node. 554 ToNode = &ToNodeParent.moveToChildContext( 555 NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false); 556 } else { 557 // Destination node exists, merge samples for the context tree 558 mergeContextNode(FromNode, *ToNode, ContextStrToRemove); 559 LLVM_DEBUG({ 560 if (ToNode->getFunctionSamples()) 561 dbgs() << " Context promoted and merged to: " 562 << ToNode->getFunctionSamples()->getContext() << "\n"; 563 }); 564 565 // Recursively promote and merge children 566 for (auto &It : FromNode.getAllChildContext()) { 567 ContextTrieNode &FromChildNode = It.second; 568 promoteMergeContextSamplesTree(FromChildNode, *ToNode, 569 ContextStrToRemove); 570 } 571 572 // Remove children once they're all merged 573 FromNode.getAllChildContext().clear(); 574 } 575 576 // For root of subtree, remove itself from old parent too 577 if (MoveToRoot) 578 FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncName()); 579 580 return *ToNode; 581 } 582 } // namespace llvm 583