1 //===- SampleContextTracker.cpp - Context-sensitive Profile Tracker -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SampleContextTracker used by CSSPGO. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/IPO/SampleContextTracker.h" 14 #include "llvm/ADT/StringMap.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/IR/DebugInfoMetadata.h" 17 #include "llvm/IR/Instructions.h" 18 #include "llvm/ProfileData/SampleProf.h" 19 #include <map> 20 #include <queue> 21 #include <vector> 22 23 using namespace llvm; 24 using namespace sampleprof; 25 26 #define DEBUG_TYPE "sample-context-tracker" 27 28 namespace llvm { 29 30 ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, 31 StringRef CalleeName) { 32 if (CalleeName.empty()) 33 return getHottestChildContext(CallSite); 34 35 uint32_t Hash = nodeHash(CalleeName, CallSite); 36 auto It = AllChildContext.find(Hash); 37 if (It != AllChildContext.end()) 38 return &It->second; 39 return nullptr; 40 } 41 42 ContextTrieNode * 43 ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) { 44 // CSFDO-TODO: This could be slow, change AllChildContext so we can 45 // do point look up for child node by call site alone. 46 // Retrieve the child node with max count for indirect call 47 ContextTrieNode *ChildNodeRet = nullptr; 48 uint64_t MaxCalleeSamples = 0; 49 for (auto &It : AllChildContext) { 50 ContextTrieNode &ChildNode = It.second; 51 if (ChildNode.CallSiteLoc != CallSite) 52 continue; 53 FunctionSamples *Samples = ChildNode.getFunctionSamples(); 54 if (!Samples) 55 continue; 56 if (Samples->getTotalSamples() > MaxCalleeSamples) { 57 ChildNodeRet = &ChildNode; 58 MaxCalleeSamples = Samples->getTotalSamples(); 59 } 60 } 61 62 return ChildNodeRet; 63 } 64 65 ContextTrieNode &ContextTrieNode::moveToChildContext( 66 const LineLocation &CallSite, ContextTrieNode &&NodeToMove, 67 uint32_t ContextFramesToRemove, bool DeleteNode) { 68 uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite); 69 assert(!AllChildContext.count(Hash) && "Node to remove must exist"); 70 LineLocation OldCallSite = NodeToMove.CallSiteLoc; 71 ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); 72 AllChildContext[Hash] = NodeToMove; 73 ContextTrieNode &NewNode = AllChildContext[Hash]; 74 NewNode.CallSiteLoc = CallSite; 75 76 // Walk through nodes in the moved the subtree, and update 77 // FunctionSamples' context as for the context promotion. 78 // We also need to set new parant link for all children. 79 std::queue<ContextTrieNode *> NodeToUpdate; 80 NewNode.setParentContext(this); 81 NodeToUpdate.push(&NewNode); 82 83 while (!NodeToUpdate.empty()) { 84 ContextTrieNode *Node = NodeToUpdate.front(); 85 NodeToUpdate.pop(); 86 FunctionSamples *FSamples = Node->getFunctionSamples(); 87 88 if (FSamples) { 89 FSamples->getContext().promoteOnPath(ContextFramesToRemove); 90 FSamples->getContext().setState(SyntheticContext); 91 LLVM_DEBUG(dbgs() << " Context promoted to: " 92 << FSamples->getContext().toString() << "\n"); 93 } 94 95 for (auto &It : Node->getAllChildContext()) { 96 ContextTrieNode *ChildNode = &It.second; 97 ChildNode->setParentContext(Node); 98 NodeToUpdate.push(ChildNode); 99 } 100 } 101 102 // Original context no longer needed, destroy if requested. 103 if (DeleteNode) 104 OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); 105 106 return NewNode; 107 } 108 109 void ContextTrieNode::removeChildContext(const LineLocation &CallSite, 110 StringRef CalleeName) { 111 uint32_t Hash = nodeHash(CalleeName, CallSite); 112 // Note this essentially calls dtor and destroys that child context 113 AllChildContext.erase(Hash); 114 } 115 116 std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() { 117 return AllChildContext; 118 } 119 120 StringRef ContextTrieNode::getFuncName() const { return FuncName; } 121 122 FunctionSamples *ContextTrieNode::getFunctionSamples() const { 123 return FuncSamples; 124 } 125 126 void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) { 127 FuncSamples = FSamples; 128 } 129 130 Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; } 131 132 void ContextTrieNode::addFunctionSize(uint32_t FSize) { 133 if (!FuncSize.hasValue()) 134 FuncSize = 0; 135 136 FuncSize = FuncSize.getValue() + FSize; 137 } 138 139 LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; } 140 141 ContextTrieNode *ContextTrieNode::getParentContext() const { 142 return ParentContext; 143 } 144 145 void ContextTrieNode::setParentContext(ContextTrieNode *Parent) { 146 ParentContext = Parent; 147 } 148 149 void ContextTrieNode::dumpNode() { 150 dbgs() << "Node: " << FuncName << "\n" 151 << " Callsite: " << CallSiteLoc << "\n" 152 << " Size: " << FuncSize << "\n" 153 << " Children:\n"; 154 155 for (auto &It : AllChildContext) { 156 dbgs() << " Node: " << It.second.getFuncName() << "\n"; 157 } 158 } 159 160 void ContextTrieNode::dumpTree() { 161 dbgs() << "Context Profile Tree:\n"; 162 std::queue<ContextTrieNode *> NodeQueue; 163 NodeQueue.push(this); 164 165 while (!NodeQueue.empty()) { 166 ContextTrieNode *Node = NodeQueue.front(); 167 NodeQueue.pop(); 168 Node->dumpNode(); 169 170 for (auto &It : Node->getAllChildContext()) { 171 ContextTrieNode *ChildNode = &It.second; 172 NodeQueue.push(ChildNode); 173 } 174 } 175 } 176 177 uint32_t ContextTrieNode::nodeHash(StringRef ChildName, 178 const LineLocation &Callsite) { 179 // We still use child's name for child hash, this is 180 // because for children of root node, we don't have 181 // different line/discriminator, and we'll rely on name 182 // to differentiate children. 183 uint32_t NameHash = std::hash<std::string>{}(ChildName.str()); 184 uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator; 185 return NameHash + (LocId << 5) + LocId; 186 } 187 188 ContextTrieNode *ContextTrieNode::getOrCreateChildContext( 189 const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) { 190 uint32_t Hash = nodeHash(CalleeName, CallSite); 191 auto It = AllChildContext.find(Hash); 192 if (It != AllChildContext.end()) { 193 assert(It->second.getFuncName() == CalleeName && 194 "Hash collision for child context node"); 195 return &It->second; 196 } 197 198 if (!AllowCreate) 199 return nullptr; 200 201 AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite); 202 return &AllChildContext[Hash]; 203 } 204 205 // Profiler tracker than manages profiles and its associated context 206 SampleContextTracker::SampleContextTracker(SampleProfileMap &Profiles) { 207 for (auto &FuncSample : Profiles) { 208 FunctionSamples *FSamples = &FuncSample.second; 209 SampleContext Context = FuncSample.first; 210 LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString() 211 << "\n"); 212 if (!Context.isBaseContext()) 213 FuncToCtxtProfiles[Context.getName()].insert(FSamples); 214 ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); 215 assert(!NewNode->getFunctionSamples() && 216 "New node can't have sample profile"); 217 NewNode->setFunctionSamples(FSamples); 218 } 219 } 220 221 FunctionSamples * 222 SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, 223 StringRef CalleeName) { 224 LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n"); 225 DILocation *DIL = Inst.getDebugLoc(); 226 if (!DIL) 227 return nullptr; 228 229 CalleeName = FunctionSamples::getCanonicalFnName(CalleeName); 230 231 // For indirect call, CalleeName will be empty, in which case the context 232 // profile for callee with largest total samples will be returned. 233 ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName); 234 if (CalleeContext) { 235 FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); 236 LLVM_DEBUG(if (FSamples) { 237 dbgs() << " Callee context found: " << FSamples->getContext().toString() 238 << "\n"; 239 }); 240 return FSamples; 241 } 242 243 return nullptr; 244 } 245 246 std::vector<const FunctionSamples *> 247 SampleContextTracker::getIndirectCalleeContextSamplesFor( 248 const DILocation *DIL) { 249 std::vector<const FunctionSamples *> R; 250 if (!DIL) 251 return R; 252 253 ContextTrieNode *CallerNode = getContextFor(DIL); 254 LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); 255 for (auto &It : CallerNode->getAllChildContext()) { 256 ContextTrieNode &ChildNode = It.second; 257 if (ChildNode.getCallSiteLoc() != CallSite) 258 continue; 259 if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples()) 260 R.push_back(CalleeSamples); 261 } 262 263 return R; 264 } 265 266 FunctionSamples * 267 SampleContextTracker::getContextSamplesFor(const DILocation *DIL) { 268 assert(DIL && "Expect non-null location"); 269 270 ContextTrieNode *ContextNode = getContextFor(DIL); 271 if (!ContextNode) 272 return nullptr; 273 274 // We may have inlined callees during pre-LTO compilation, in which case 275 // we need to rely on the inline stack from !dbg to mark context profile 276 // as inlined, instead of `MarkContextSamplesInlined` during inlining. 277 // Sample profile loader walks through all instructions to get profile, 278 // which calls this function. So once that is done, all previously inlined 279 // context profile should be marked properly. 280 FunctionSamples *Samples = ContextNode->getFunctionSamples(); 281 if (Samples && ContextNode->getParentContext() != &RootContext) 282 Samples->getContext().setState(InlinedContext); 283 284 return Samples; 285 } 286 287 FunctionSamples * 288 SampleContextTracker::getContextSamplesFor(const SampleContext &Context) { 289 ContextTrieNode *Node = getContextFor(Context); 290 if (!Node) 291 return nullptr; 292 293 return Node->getFunctionSamples(); 294 } 295 296 SampleContextTracker::ContextSamplesTy & 297 SampleContextTracker::getAllContextSamplesFor(const Function &Func) { 298 StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); 299 return FuncToCtxtProfiles[CanonName]; 300 } 301 302 SampleContextTracker::ContextSamplesTy & 303 SampleContextTracker::getAllContextSamplesFor(StringRef Name) { 304 return FuncToCtxtProfiles[Name]; 305 } 306 307 FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func, 308 bool MergeContext) { 309 StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); 310 return getBaseSamplesFor(CanonName, MergeContext); 311 } 312 313 FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, 314 bool MergeContext) { 315 LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); 316 // Base profile is top-level node (child of root node), so try to retrieve 317 // existing top-level node for given function first. If it exists, it could be 318 // that we've merged base profile before, or there's actually context-less 319 // profile from the input (e.g. due to unreliable stack walking). 320 ContextTrieNode *Node = getTopLevelContextNode(Name); 321 if (MergeContext) { 322 LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " << Name 323 << "\n"); 324 325 // We have profile for function under different contexts, 326 // create synthetic base profile and merge context profiles 327 // into base profile. 328 for (auto *CSamples : FuncToCtxtProfiles[Name]) { 329 SampleContext &Context = CSamples->getContext(); 330 // Skip inlined context profile and also don't re-merge any context 331 if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) 332 continue; 333 334 ContextTrieNode *FromNode = getContextFor(Context); 335 if (FromNode == Node) 336 continue; 337 338 ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode); 339 assert((!Node || Node == &ToNode) && "Expect only one base profile"); 340 Node = &ToNode; 341 } 342 } 343 344 // Still no profile even after merge/promotion (if allowed) 345 if (!Node) 346 return nullptr; 347 348 return Node->getFunctionSamples(); 349 } 350 351 void SampleContextTracker::markContextSamplesInlined( 352 const FunctionSamples *InlinedSamples) { 353 assert(InlinedSamples && "Expect non-null inlined samples"); 354 LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " 355 << InlinedSamples->getContext().toString() << "\n"); 356 InlinedSamples->getContext().setState(InlinedContext); 357 } 358 359 ContextTrieNode &SampleContextTracker::getRootContext() { return RootContext; } 360 361 void SampleContextTracker::promoteMergeContextSamplesTree( 362 const Instruction &Inst, StringRef CalleeName) { 363 LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n" 364 << Inst << "\n"); 365 // Get the caller context for the call instruction, we don't use callee 366 // name from call because there can be context from indirect calls too. 367 DILocation *DIL = Inst.getDebugLoc(); 368 ContextTrieNode *CallerNode = getContextFor(DIL); 369 if (!CallerNode) 370 return; 371 372 // Get the context that needs to be promoted 373 LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); 374 // For indirect call, CalleeName will be empty, in which case we need to 375 // promote all non-inlined child context profiles. 376 if (CalleeName.empty()) { 377 for (auto &It : CallerNode->getAllChildContext()) { 378 ContextTrieNode *NodeToPromo = &It.second; 379 if (CallSite != NodeToPromo->getCallSiteLoc()) 380 continue; 381 FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples(); 382 if (FromSamples && FromSamples->getContext().hasState(InlinedContext)) 383 continue; 384 promoteMergeContextSamplesTree(*NodeToPromo); 385 } 386 return; 387 } 388 389 // Get the context for the given callee that needs to be promoted 390 ContextTrieNode *NodeToPromo = 391 CallerNode->getChildContext(CallSite, CalleeName); 392 if (!NodeToPromo) 393 return; 394 395 promoteMergeContextSamplesTree(*NodeToPromo); 396 } 397 398 ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( 399 ContextTrieNode &NodeToPromo) { 400 // Promote the input node to be directly under root. This can happen 401 // when we decided to not inline a function under context represented 402 // by the input node. The promote and merge is then needed to reflect 403 // the context profile in the base (context-less) profile. 404 FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); 405 assert(FromSamples && "Shouldn't promote a context without profile"); 406 LLVM_DEBUG(dbgs() << " Found context tree root to promote: " 407 << FromSamples->getContext().toString() << "\n"); 408 409 assert(!FromSamples->getContext().hasState(InlinedContext) && 410 "Shouldn't promote inlined context profile"); 411 uint32_t ContextFramesToRemove = 412 FromSamples->getContext().getContextFrames().size() - 1; 413 return promoteMergeContextSamplesTree(NodeToPromo, RootContext, 414 ContextFramesToRemove); 415 } 416 417 void SampleContextTracker::dump() { RootContext.dumpTree(); } 418 419 ContextTrieNode * 420 SampleContextTracker::getContextFor(const SampleContext &Context) { 421 return getOrCreateContextPath(Context, false); 422 } 423 424 ContextTrieNode * 425 SampleContextTracker::getCalleeContextFor(const DILocation *DIL, 426 StringRef CalleeName) { 427 assert(DIL && "Expect non-null location"); 428 429 ContextTrieNode *CallContext = getContextFor(DIL); 430 if (!CallContext) 431 return nullptr; 432 433 // When CalleeName is empty, the child context profile with max 434 // total samples will be returned. 435 return CallContext->getChildContext( 436 FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); 437 } 438 439 ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { 440 assert(DIL && "Expect non-null location"); 441 SmallVector<std::pair<LineLocation, StringRef>, 10> S; 442 443 // Use C++ linkage name if possible. 444 const DILocation *PrevDIL = DIL; 445 for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { 446 StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName(); 447 if (Name.empty()) 448 Name = PrevDIL->getScope()->getSubprogram()->getName(); 449 S.push_back( 450 std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), Name)); 451 PrevDIL = DIL; 452 } 453 454 // Push root node, note that root node like main may only 455 // a name, but not linkage name. 456 StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName(); 457 if (RootName.empty()) 458 RootName = PrevDIL->getScope()->getSubprogram()->getName(); 459 S.push_back(std::make_pair(LineLocation(0, 0), RootName)); 460 461 ContextTrieNode *ContextNode = &RootContext; 462 int I = S.size(); 463 while (--I >= 0 && ContextNode) { 464 LineLocation &CallSite = S[I].first; 465 StringRef &CalleeName = S[I].second; 466 ContextNode = ContextNode->getChildContext(CallSite, CalleeName); 467 } 468 469 if (I < 0) 470 return ContextNode; 471 472 return nullptr; 473 } 474 475 ContextTrieNode * 476 SampleContextTracker::getOrCreateContextPath(const SampleContext &Context, 477 bool AllowCreate) { 478 ContextTrieNode *ContextNode = &RootContext; 479 LineLocation CallSiteLoc(0, 0); 480 481 for (auto &Callsite : Context.getContextFrames()) { 482 // Create child node at parent line/disc location 483 if (AllowCreate) { 484 ContextNode = ContextNode->getOrCreateChildContext(CallSiteLoc, 485 Callsite.CallerName); 486 } else { 487 ContextNode = 488 ContextNode->getChildContext(CallSiteLoc, Callsite.CallerName); 489 } 490 CallSiteLoc = Callsite.Callsite; 491 } 492 493 assert((!AllowCreate || ContextNode) && 494 "Node must exist if creation is allowed"); 495 return ContextNode; 496 } 497 498 ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) { 499 assert(!FName.empty() && "Top level node query must provide valid name"); 500 return RootContext.getChildContext(LineLocation(0, 0), FName); 501 } 502 503 ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) { 504 assert(!getTopLevelContextNode(FName) && "Node to add must not exist"); 505 return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName); 506 } 507 508 void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, 509 ContextTrieNode &ToNode, 510 uint32_t ContextFramesToRemove) { 511 FunctionSamples *FromSamples = FromNode.getFunctionSamples(); 512 FunctionSamples *ToSamples = ToNode.getFunctionSamples(); 513 if (FromSamples && ToSamples) { 514 // Merge/duplicate FromSamples into ToSamples 515 ToSamples->merge(*FromSamples); 516 ToSamples->getContext().setState(SyntheticContext); 517 FromSamples->getContext().setState(MergedContext); 518 } else if (FromSamples) { 519 // Transfer FromSamples from FromNode to ToNode 520 ToNode.setFunctionSamples(FromSamples); 521 FromSamples->getContext().setState(SyntheticContext); 522 FromSamples->getContext().promoteOnPath(ContextFramesToRemove); 523 FromNode.setFunctionSamples(nullptr); 524 } 525 } 526 527 ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( 528 ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, 529 uint32_t ContextFramesToRemove) { 530 assert(ContextFramesToRemove && "Context to remove can't be empty"); 531 532 // Ignore call site location if destination is top level under root 533 LineLocation NewCallSiteLoc = LineLocation(0, 0); 534 LineLocation OldCallSiteLoc = FromNode.getCallSiteLoc(); 535 ContextTrieNode &FromNodeParent = *FromNode.getParentContext(); 536 ContextTrieNode *ToNode = nullptr; 537 bool MoveToRoot = (&ToNodeParent == &RootContext); 538 if (!MoveToRoot) { 539 NewCallSiteLoc = OldCallSiteLoc; 540 } 541 542 // Locate destination node, create/move if not existing 543 ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncName()); 544 if (!ToNode) { 545 // Do not delete node to move from its parent here because 546 // caller is iterating over children of that parent node. 547 ToNode = &ToNodeParent.moveToChildContext( 548 NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false); 549 } else { 550 // Destination node exists, merge samples for the context tree 551 mergeContextNode(FromNode, *ToNode, ContextFramesToRemove); 552 LLVM_DEBUG({ 553 if (ToNode->getFunctionSamples()) 554 dbgs() << " Context promoted and merged to: " 555 << ToNode->getFunctionSamples()->getContext().toString() << "\n"; 556 }); 557 558 // Recursively promote and merge children 559 for (auto &It : FromNode.getAllChildContext()) { 560 ContextTrieNode &FromChildNode = It.second; 561 promoteMergeContextSamplesTree(FromChildNode, *ToNode, 562 ContextFramesToRemove); 563 } 564 565 // Remove children once they're all merged 566 FromNode.getAllChildContext().clear(); 567 } 568 569 // For root of subtree, remove itself from old parent too 570 if (MoveToRoot) 571 FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncName()); 572 573 return *ToNode; 574 } 575 } // namespace llvm 576