1 //===- InlineFunction.cpp - Code to perform function inlining -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements inlining of a function into a call site, resolving 10 // parameters and the return value as appropriate. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/None.h" 16 #include "llvm/ADT/Optional.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SetVector.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/iterator_range.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Analysis/AssumptionCache.h" 25 #include "llvm/Analysis/BlockFrequencyInfo.h" 26 #include "llvm/Analysis/CallGraph.h" 27 #include "llvm/Analysis/CaptureTracking.h" 28 #include "llvm/Analysis/EHPersonalities.h" 29 #include "llvm/Analysis/InstructionSimplify.h" 30 #include "llvm/Analysis/ProfileSummaryInfo.h" 31 #include "llvm/Transforms/Utils/Local.h" 32 #include "llvm/Analysis/ValueTracking.h" 33 #include "llvm/Analysis/VectorUtils.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/BasicBlock.h" 36 #include "llvm/IR/CFG.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/DIBuilder.h" 40 #include "llvm/IR/DataLayout.h" 41 #include "llvm/IR/DebugInfoMetadata.h" 42 #include "llvm/IR/DebugLoc.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Dominators.h" 45 #include "llvm/IR/Function.h" 46 #include "llvm/IR/IRBuilder.h" 47 #include "llvm/IR/InstrTypes.h" 48 #include "llvm/IR/Instruction.h" 49 #include "llvm/IR/Instructions.h" 50 #include "llvm/IR/IntrinsicInst.h" 51 #include "llvm/IR/Intrinsics.h" 52 #include "llvm/IR/LLVMContext.h" 53 #include "llvm/IR/MDBuilder.h" 54 #include "llvm/IR/Metadata.h" 55 #include "llvm/IR/Module.h" 56 #include "llvm/IR/Type.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/Support/Casting.h" 60 #include "llvm/Support/CommandLine.h" 61 #include "llvm/Support/ErrorHandling.h" 62 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 63 #include "llvm/Transforms/Utils/Cloning.h" 64 #include "llvm/Transforms/Utils/ValueMapper.h" 65 #include <algorithm> 66 #include <cassert> 67 #include <cstdint> 68 #include <iterator> 69 #include <limits> 70 #include <string> 71 #include <utility> 72 #include <vector> 73 74 using namespace llvm; 75 using ProfileCount = Function::ProfileCount; 76 77 static cl::opt<bool> 78 EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), 79 cl::Hidden, 80 cl::desc("Convert noalias attributes to metadata during inlining.")); 81 82 static cl::opt<bool> 83 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", 84 cl::init(true), cl::Hidden, 85 cl::desc("Convert align attributes to assumptions during inlining.")); 86 87 static cl::opt<bool> UpdateReturnAttributes( 88 "update-return-attrs", cl::init(true), cl::Hidden, 89 cl::desc("Update return attributes on calls within inlined body")); 90 91 static cl::opt<bool> UpdateLoadMetadataDuringInlining( 92 "update-load-metadata-during-inlining", cl::init(true), cl::Hidden, 93 cl::desc("Update metadata on loads within inlined body")); 94 95 static cl::opt<unsigned> InlinerAttributeWindow( 96 "max-inst-checked-for-throw-during-inlining", cl::Hidden, 97 cl::desc("the maximum number of instructions analyzed for may throw during " 98 "attribute inference in inlined body"), 99 cl::init(4)); 100 101 llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, 102 AAResults *CalleeAAR, 103 bool InsertLifetime) { 104 return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime); 105 } 106 107 namespace { 108 109 /// A class for recording information about inlining a landing pad. 110 class LandingPadInliningInfo { 111 /// Destination of the invoke's unwind. 112 BasicBlock *OuterResumeDest; 113 114 /// Destination for the callee's resume. 115 BasicBlock *InnerResumeDest = nullptr; 116 117 /// LandingPadInst associated with the invoke. 118 LandingPadInst *CallerLPad = nullptr; 119 120 /// PHI for EH values from landingpad insts. 121 PHINode *InnerEHValuesPHI = nullptr; 122 123 SmallVector<Value*, 8> UnwindDestPHIValues; 124 125 public: 126 LandingPadInliningInfo(InvokeInst *II) 127 : OuterResumeDest(II->getUnwindDest()) { 128 // If there are PHI nodes in the unwind destination block, we need to keep 129 // track of which values came into them from the invoke before removing 130 // the edge from this block. 131 BasicBlock *InvokeBB = II->getParent(); 132 BasicBlock::iterator I = OuterResumeDest->begin(); 133 for (; isa<PHINode>(I); ++I) { 134 // Save the value to use for this edge. 135 PHINode *PHI = cast<PHINode>(I); 136 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 137 } 138 139 CallerLPad = cast<LandingPadInst>(I); 140 } 141 142 /// The outer unwind destination is the target of 143 /// unwind edges introduced for calls within the inlined function. 144 BasicBlock *getOuterResumeDest() const { 145 return OuterResumeDest; 146 } 147 148 BasicBlock *getInnerResumeDest(); 149 150 LandingPadInst *getLandingPadInst() const { return CallerLPad; } 151 152 /// Forward the 'resume' instruction to the caller's landing pad block. 153 /// When the landing pad block has only one predecessor, this is 154 /// a simple branch. When there is more than one predecessor, we need to 155 /// split the landing pad block after the landingpad instruction and jump 156 /// to there. 157 void forwardResume(ResumeInst *RI, 158 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); 159 160 /// Add incoming-PHI values to the unwind destination block for the given 161 /// basic block, using the values for the original invoke's source block. 162 void addIncomingPHIValuesFor(BasicBlock *BB) const { 163 addIncomingPHIValuesForInto(BB, OuterResumeDest); 164 } 165 166 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { 167 BasicBlock::iterator I = dest->begin(); 168 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 169 PHINode *phi = cast<PHINode>(I); 170 phi->addIncoming(UnwindDestPHIValues[i], src); 171 } 172 } 173 }; 174 175 } // end anonymous namespace 176 177 /// Get or create a target for the branch from ResumeInsts. 178 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { 179 if (InnerResumeDest) return InnerResumeDest; 180 181 // Split the landing pad. 182 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); 183 InnerResumeDest = 184 OuterResumeDest->splitBasicBlock(SplitPoint, 185 OuterResumeDest->getName() + ".body"); 186 187 // The number of incoming edges we expect to the inner landing pad. 188 const unsigned PHICapacity = 2; 189 190 // Create corresponding new PHIs for all the PHIs in the outer landing pad. 191 Instruction *InsertPoint = &InnerResumeDest->front(); 192 BasicBlock::iterator I = OuterResumeDest->begin(); 193 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 194 PHINode *OuterPHI = cast<PHINode>(I); 195 PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, 196 OuterPHI->getName() + ".lpad-body", 197 InsertPoint); 198 OuterPHI->replaceAllUsesWith(InnerPHI); 199 InnerPHI->addIncoming(OuterPHI, OuterResumeDest); 200 } 201 202 // Create a PHI for the exception values. 203 InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, 204 "eh.lpad-body", InsertPoint); 205 CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); 206 InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); 207 208 // All done. 209 return InnerResumeDest; 210 } 211 212 /// Forward the 'resume' instruction to the caller's landing pad block. 213 /// When the landing pad block has only one predecessor, this is a simple 214 /// branch. When there is more than one predecessor, we need to split the 215 /// landing pad block after the landingpad instruction and jump to there. 216 void LandingPadInliningInfo::forwardResume( 217 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { 218 BasicBlock *Dest = getInnerResumeDest(); 219 BasicBlock *Src = RI->getParent(); 220 221 BranchInst::Create(Dest, Src); 222 223 // Update the PHIs in the destination. They were inserted in an order which 224 // makes this work. 225 addIncomingPHIValuesForInto(Src, Dest); 226 227 InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); 228 RI->eraseFromParent(); 229 } 230 231 /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. 232 static Value *getParentPad(Value *EHPad) { 233 if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) 234 return FPI->getParentPad(); 235 return cast<CatchSwitchInst>(EHPad)->getParentPad(); 236 } 237 238 using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; 239 240 /// Helper for getUnwindDestToken that does the descendant-ward part of 241 /// the search. 242 static Value *getUnwindDestTokenHelper(Instruction *EHPad, 243 UnwindDestMemoTy &MemoMap) { 244 SmallVector<Instruction *, 8> Worklist(1, EHPad); 245 246 while (!Worklist.empty()) { 247 Instruction *CurrentPad = Worklist.pop_back_val(); 248 // We only put pads on the worklist that aren't in the MemoMap. When 249 // we find an unwind dest for a pad we may update its ancestors, but 250 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, 251 // so they should never get updated while queued on the worklist. 252 assert(!MemoMap.count(CurrentPad)); 253 Value *UnwindDestToken = nullptr; 254 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { 255 if (CatchSwitch->hasUnwindDest()) { 256 UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); 257 } else { 258 // Catchswitch doesn't have a 'nounwind' variant, and one might be 259 // annotated as "unwinds to caller" when really it's nounwind (see 260 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the 261 // parent's unwind dest from this. We can check its catchpads' 262 // descendants, since they might include a cleanuppad with an 263 // "unwinds to caller" cleanupret, which can be trusted. 264 for (auto HI = CatchSwitch->handler_begin(), 265 HE = CatchSwitch->handler_end(); 266 HI != HE && !UnwindDestToken; ++HI) { 267 BasicBlock *HandlerBlock = *HI; 268 auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); 269 for (User *Child : CatchPad->users()) { 270 // Intentionally ignore invokes here -- since the catchswitch is 271 // marked "unwind to caller", it would be a verifier error if it 272 // contained an invoke which unwinds out of it, so any invoke we'd 273 // encounter must unwind to some child of the catch. 274 if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) 275 continue; 276 277 Instruction *ChildPad = cast<Instruction>(Child); 278 auto Memo = MemoMap.find(ChildPad); 279 if (Memo == MemoMap.end()) { 280 // Haven't figured out this child pad yet; queue it. 281 Worklist.push_back(ChildPad); 282 continue; 283 } 284 // We've already checked this child, but might have found that 285 // it offers no proof either way. 286 Value *ChildUnwindDestToken = Memo->second; 287 if (!ChildUnwindDestToken) 288 continue; 289 // We already know the child's unwind dest, which can either 290 // be ConstantTokenNone to indicate unwind to caller, or can 291 // be another child of the catchpad. Only the former indicates 292 // the unwind dest of the catchswitch. 293 if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { 294 UnwindDestToken = ChildUnwindDestToken; 295 break; 296 } 297 assert(getParentPad(ChildUnwindDestToken) == CatchPad); 298 } 299 } 300 } 301 } else { 302 auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); 303 for (User *U : CleanupPad->users()) { 304 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { 305 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) 306 UnwindDestToken = RetUnwindDest->getFirstNonPHI(); 307 else 308 UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); 309 break; 310 } 311 Value *ChildUnwindDestToken; 312 if (auto *Invoke = dyn_cast<InvokeInst>(U)) { 313 ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); 314 } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { 315 Instruction *ChildPad = cast<Instruction>(U); 316 auto Memo = MemoMap.find(ChildPad); 317 if (Memo == MemoMap.end()) { 318 // Haven't resolved this child yet; queue it and keep searching. 319 Worklist.push_back(ChildPad); 320 continue; 321 } 322 // We've checked this child, but still need to ignore it if it 323 // had no proof either way. 324 ChildUnwindDestToken = Memo->second; 325 if (!ChildUnwindDestToken) 326 continue; 327 } else { 328 // Not a relevant user of the cleanuppad 329 continue; 330 } 331 // In a well-formed program, the child/invoke must either unwind to 332 // an(other) child of the cleanup, or exit the cleanup. In the 333 // first case, continue searching. 334 if (isa<Instruction>(ChildUnwindDestToken) && 335 getParentPad(ChildUnwindDestToken) == CleanupPad) 336 continue; 337 UnwindDestToken = ChildUnwindDestToken; 338 break; 339 } 340 } 341 // If we haven't found an unwind dest for CurrentPad, we may have queued its 342 // children, so move on to the next in the worklist. 343 if (!UnwindDestToken) 344 continue; 345 346 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits 347 // any ancestors of CurrentPad up to but not including UnwindDestToken's 348 // parent pad. Record this in the memo map, and check to see if the 349 // original EHPad being queried is one of the ones exited. 350 Value *UnwindParent; 351 if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) 352 UnwindParent = getParentPad(UnwindPad); 353 else 354 UnwindParent = nullptr; 355 bool ExitedOriginalPad = false; 356 for (Instruction *ExitedPad = CurrentPad; 357 ExitedPad && ExitedPad != UnwindParent; 358 ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { 359 // Skip over catchpads since they just follow their catchswitches. 360 if (isa<CatchPadInst>(ExitedPad)) 361 continue; 362 MemoMap[ExitedPad] = UnwindDestToken; 363 ExitedOriginalPad |= (ExitedPad == EHPad); 364 } 365 366 if (ExitedOriginalPad) 367 return UnwindDestToken; 368 369 // Continue the search. 370 } 371 372 // No definitive information is contained within this funclet. 373 return nullptr; 374 } 375 376 /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, 377 /// return that pad instruction. If it unwinds to caller, return 378 /// ConstantTokenNone. If it does not have a definitive unwind destination, 379 /// return nullptr. 380 /// 381 /// This routine gets invoked for calls in funclets in inlinees when inlining 382 /// an invoke. Since many funclets don't have calls inside them, it's queried 383 /// on-demand rather than building a map of pads to unwind dests up front. 384 /// Determining a funclet's unwind dest may require recursively searching its 385 /// descendants, and also ancestors and cousins if the descendants don't provide 386 /// an answer. Since most funclets will have their unwind dest immediately 387 /// available as the unwind dest of a catchswitch or cleanupret, this routine 388 /// searches top-down from the given pad and then up. To avoid worst-case 389 /// quadratic run-time given that approach, it uses a memo map to avoid 390 /// re-processing funclet trees. The callers that rewrite the IR as they go 391 /// take advantage of this, for correctness, by checking/forcing rewritten 392 /// pads' entries to match the original callee view. 393 static Value *getUnwindDestToken(Instruction *EHPad, 394 UnwindDestMemoTy &MemoMap) { 395 // Catchpads unwind to the same place as their catchswitch; 396 // redirct any queries on catchpads so the code below can 397 // deal with just catchswitches and cleanuppads. 398 if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) 399 EHPad = CPI->getCatchSwitch(); 400 401 // Check if we've already determined the unwind dest for this pad. 402 auto Memo = MemoMap.find(EHPad); 403 if (Memo != MemoMap.end()) 404 return Memo->second; 405 406 // Search EHPad and, if necessary, its descendants. 407 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); 408 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); 409 if (UnwindDestToken) 410 return UnwindDestToken; 411 412 // No information is available for this EHPad from itself or any of its 413 // descendants. An unwind all the way out to a pad in the caller would 414 // need also to agree with the unwind dest of the parent funclet, so 415 // search up the chain to try to find a funclet with information. Put 416 // null entries in the memo map to avoid re-processing as we go up. 417 MemoMap[EHPad] = nullptr; 418 #ifndef NDEBUG 419 SmallPtrSet<Instruction *, 4> TempMemos; 420 TempMemos.insert(EHPad); 421 #endif 422 Instruction *LastUselessPad = EHPad; 423 Value *AncestorToken; 424 for (AncestorToken = getParentPad(EHPad); 425 auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); 426 AncestorToken = getParentPad(AncestorToken)) { 427 // Skip over catchpads since they just follow their catchswitches. 428 if (isa<CatchPadInst>(AncestorPad)) 429 continue; 430 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we 431 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this 432 // call to getUnwindDestToken, that would mean that AncestorPad had no 433 // information in itself, its descendants, or its ancestors. If that 434 // were the case, then we should also have recorded the lack of information 435 // for the descendant that we're coming from. So assert that we don't 436 // find a null entry in the MemoMap for AncestorPad. 437 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); 438 auto AncestorMemo = MemoMap.find(AncestorPad); 439 if (AncestorMemo == MemoMap.end()) { 440 UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); 441 } else { 442 UnwindDestToken = AncestorMemo->second; 443 } 444 if (UnwindDestToken) 445 break; 446 LastUselessPad = AncestorPad; 447 MemoMap[LastUselessPad] = nullptr; 448 #ifndef NDEBUG 449 TempMemos.insert(LastUselessPad); 450 #endif 451 } 452 453 // We know that getUnwindDestTokenHelper was called on LastUselessPad and 454 // returned nullptr (and likewise for EHPad and any of its ancestors up to 455 // LastUselessPad), so LastUselessPad has no information from below. Since 456 // getUnwindDestTokenHelper must investigate all downward paths through 457 // no-information nodes to prove that a node has no information like this, 458 // and since any time it finds information it records it in the MemoMap for 459 // not just the immediately-containing funclet but also any ancestors also 460 // exited, it must be the case that, walking downward from LastUselessPad, 461 // visiting just those nodes which have not been mapped to an unwind dest 462 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since 463 // they are just used to keep getUnwindDestTokenHelper from repeating work), 464 // any node visited must have been exhaustively searched with no information 465 // for it found. 466 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); 467 while (!Worklist.empty()) { 468 Instruction *UselessPad = Worklist.pop_back_val(); 469 auto Memo = MemoMap.find(UselessPad); 470 if (Memo != MemoMap.end() && Memo->second) { 471 // Here the name 'UselessPad' is a bit of a misnomer, because we've found 472 // that it is a funclet that does have information about unwinding to 473 // a particular destination; its parent was a useless pad. 474 // Since its parent has no information, the unwind edge must not escape 475 // the parent, and must target a sibling of this pad. This local unwind 476 // gives us no information about EHPad. Leave it and the subtree rooted 477 // at it alone. 478 assert(getParentPad(Memo->second) == getParentPad(UselessPad)); 479 continue; 480 } 481 // We know we don't have information for UselesPad. If it has an entry in 482 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos 483 // added on this invocation of getUnwindDestToken; if a previous invocation 484 // recorded nullptr, it would have had to prove that the ancestors of 485 // UselessPad, which include LastUselessPad, had no information, and that 486 // in turn would have required proving that the descendants of 487 // LastUselesPad, which include EHPad, have no information about 488 // LastUselessPad, which would imply that EHPad was mapped to nullptr in 489 // the MemoMap on that invocation, which isn't the case if we got here. 490 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); 491 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind 492 // information that we'd be contradicting by making a map entry for it 493 // (which is something that getUnwindDestTokenHelper must have proved for 494 // us to get here). Just assert on is direct users here; the checks in 495 // this downward walk at its descendants will verify that they don't have 496 // any unwind edges that exit 'UselessPad' either (i.e. they either have no 497 // unwind edges or unwind to a sibling). 498 MemoMap[UselessPad] = UnwindDestToken; 499 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { 500 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); 501 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { 502 auto *CatchPad = HandlerBlock->getFirstNonPHI(); 503 for (User *U : CatchPad->users()) { 504 assert( 505 (!isa<InvokeInst>(U) || 506 (getParentPad( 507 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == 508 CatchPad)) && 509 "Expected useless pad"); 510 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 511 Worklist.push_back(cast<Instruction>(U)); 512 } 513 } 514 } else { 515 assert(isa<CleanupPadInst>(UselessPad)); 516 for (User *U : UselessPad->users()) { 517 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad"); 518 assert((!isa<InvokeInst>(U) || 519 (getParentPad( 520 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == 521 UselessPad)) && 522 "Expected useless pad"); 523 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 524 Worklist.push_back(cast<Instruction>(U)); 525 } 526 } 527 } 528 529 return UnwindDestToken; 530 } 531 532 /// When we inline a basic block into an invoke, 533 /// we have to turn all of the calls that can throw into invokes. 534 /// This function analyze BB to see if there are any calls, and if so, 535 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI 536 /// nodes in that block with the values specified in InvokeDestPHIValues. 537 static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( 538 BasicBlock *BB, BasicBlock *UnwindEdge, 539 UnwindDestMemoTy *FuncletUnwindMap = nullptr) { 540 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { 541 Instruction *I = &*BBI++; 542 543 // We only need to check for function calls: inlined invoke 544 // instructions require no special handling. 545 CallInst *CI = dyn_cast<CallInst>(I); 546 547 if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) 548 continue; 549 550 // We do not need to (and in fact, cannot) convert possibly throwing calls 551 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into 552 // invokes. The caller's "segment" of the deoptimization continuation 553 // attached to the newly inlined @llvm.experimental_deoptimize 554 // (resp. @llvm.experimental.guard) call should contain the exception 555 // handling logic, if any. 556 if (auto *F = CI->getCalledFunction()) 557 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || 558 F->getIntrinsicID() == Intrinsic::experimental_guard) 559 continue; 560 561 if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { 562 // This call is nested inside a funclet. If that funclet has an unwind 563 // destination within the inlinee, then unwinding out of this call would 564 // be UB. Rewriting this call to an invoke which targets the inlined 565 // invoke's unwind dest would give the call's parent funclet multiple 566 // unwind destinations, which is something that subsequent EH table 567 // generation can't handle and that the veirifer rejects. So when we 568 // see such a call, leave it as a call. 569 auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); 570 Value *UnwindDestToken = 571 getUnwindDestToken(FuncletPad, *FuncletUnwindMap); 572 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 573 continue; 574 #ifndef NDEBUG 575 Instruction *MemoKey; 576 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) 577 MemoKey = CatchPad->getCatchSwitch(); 578 else 579 MemoKey = FuncletPad; 580 assert(FuncletUnwindMap->count(MemoKey) && 581 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && 582 "must get memoized to avoid confusing later searches"); 583 #endif // NDEBUG 584 } 585 586 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); 587 return BB; 588 } 589 return nullptr; 590 } 591 592 /// If we inlined an invoke site, we need to convert calls 593 /// in the body of the inlined function into invokes. 594 /// 595 /// II is the invoke instruction being inlined. FirstNewBlock is the first 596 /// block of the inlined code (the last block is the end of the function), 597 /// and InlineCodeInfo is information about the code that got inlined. 598 static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, 599 ClonedCodeInfo &InlinedCodeInfo) { 600 BasicBlock *InvokeDest = II->getUnwindDest(); 601 602 Function *Caller = FirstNewBlock->getParent(); 603 604 // The inlined code is currently at the end of the function, scan from the 605 // start of the inlined code to its end, checking for stuff we need to 606 // rewrite. 607 LandingPadInliningInfo Invoke(II); 608 609 // Get all of the inlined landing pad instructions. 610 SmallPtrSet<LandingPadInst*, 16> InlinedLPads; 611 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); 612 I != E; ++I) 613 if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) 614 InlinedLPads.insert(II->getLandingPadInst()); 615 616 // Append the clauses from the outer landing pad instruction into the inlined 617 // landing pad instructions. 618 LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); 619 for (LandingPadInst *InlinedLPad : InlinedLPads) { 620 unsigned OuterNum = OuterLPad->getNumClauses(); 621 InlinedLPad->reserveClauses(OuterNum); 622 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) 623 InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); 624 if (OuterLPad->isCleanup()) 625 InlinedLPad->setCleanup(true); 626 } 627 628 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 629 BB != E; ++BB) { 630 if (InlinedCodeInfo.ContainsCalls) 631 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 632 &*BB, Invoke.getOuterResumeDest())) 633 // Update any PHI nodes in the exceptional block to indicate that there 634 // is now a new entry in them. 635 Invoke.addIncomingPHIValuesFor(NewBB); 636 637 // Forward any resumes that are remaining here. 638 if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) 639 Invoke.forwardResume(RI, InlinedLPads); 640 } 641 642 // Now that everything is happy, we have one final detail. The PHI nodes in 643 // the exception destination block still have entries due to the original 644 // invoke instruction. Eliminate these entries (which might even delete the 645 // PHI node) now. 646 InvokeDest->removePredecessor(II->getParent()); 647 } 648 649 /// If we inlined an invoke site, we need to convert calls 650 /// in the body of the inlined function into invokes. 651 /// 652 /// II is the invoke instruction being inlined. FirstNewBlock is the first 653 /// block of the inlined code (the last block is the end of the function), 654 /// and InlineCodeInfo is information about the code that got inlined. 655 static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, 656 ClonedCodeInfo &InlinedCodeInfo) { 657 BasicBlock *UnwindDest = II->getUnwindDest(); 658 Function *Caller = FirstNewBlock->getParent(); 659 660 assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); 661 662 // If there are PHI nodes in the unwind destination block, we need to keep 663 // track of which values came into them from the invoke before removing the 664 // edge from this block. 665 SmallVector<Value *, 8> UnwindDestPHIValues; 666 BasicBlock *InvokeBB = II->getParent(); 667 for (Instruction &I : *UnwindDest) { 668 // Save the value to use for this edge. 669 PHINode *PHI = dyn_cast<PHINode>(&I); 670 if (!PHI) 671 break; 672 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 673 } 674 675 // Add incoming-PHI values to the unwind destination block for the given basic 676 // block, using the values for the original invoke's source block. 677 auto UpdatePHINodes = [&](BasicBlock *Src) { 678 BasicBlock::iterator I = UnwindDest->begin(); 679 for (Value *V : UnwindDestPHIValues) { 680 PHINode *PHI = cast<PHINode>(I); 681 PHI->addIncoming(V, Src); 682 ++I; 683 } 684 }; 685 686 // This connects all the instructions which 'unwind to caller' to the invoke 687 // destination. 688 UnwindDestMemoTy FuncletUnwindMap; 689 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 690 BB != E; ++BB) { 691 if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { 692 if (CRI->unwindsToCaller()) { 693 auto *CleanupPad = CRI->getCleanupPad(); 694 CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); 695 CRI->eraseFromParent(); 696 UpdatePHINodes(&*BB); 697 // Finding a cleanupret with an unwind destination would confuse 698 // subsequent calls to getUnwindDestToken, so map the cleanuppad 699 // to short-circuit any such calls and recognize this as an "unwind 700 // to caller" cleanup. 701 assert(!FuncletUnwindMap.count(CleanupPad) || 702 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); 703 FuncletUnwindMap[CleanupPad] = 704 ConstantTokenNone::get(Caller->getContext()); 705 } 706 } 707 708 Instruction *I = BB->getFirstNonPHI(); 709 if (!I->isEHPad()) 710 continue; 711 712 Instruction *Replacement = nullptr; 713 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 714 if (CatchSwitch->unwindsToCaller()) { 715 Value *UnwindDestToken; 716 if (auto *ParentPad = 717 dyn_cast<Instruction>(CatchSwitch->getParentPad())) { 718 // This catchswitch is nested inside another funclet. If that 719 // funclet has an unwind destination within the inlinee, then 720 // unwinding out of this catchswitch would be UB. Rewriting this 721 // catchswitch to unwind to the inlined invoke's unwind dest would 722 // give the parent funclet multiple unwind destinations, which is 723 // something that subsequent EH table generation can't handle and 724 // that the veirifer rejects. So when we see such a call, leave it 725 // as "unwind to caller". 726 UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); 727 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 728 continue; 729 } else { 730 // This catchswitch has no parent to inherit constraints from, and 731 // none of its descendants can have an unwind edge that exits it and 732 // targets another funclet in the inlinee. It may or may not have a 733 // descendant that definitively has an unwind to caller. In either 734 // case, we'll have to assume that any unwinds out of it may need to 735 // be routed to the caller, so treat it as though it has a definitive 736 // unwind to caller. 737 UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); 738 } 739 auto *NewCatchSwitch = CatchSwitchInst::Create( 740 CatchSwitch->getParentPad(), UnwindDest, 741 CatchSwitch->getNumHandlers(), CatchSwitch->getName(), 742 CatchSwitch); 743 for (BasicBlock *PadBB : CatchSwitch->handlers()) 744 NewCatchSwitch->addHandler(PadBB); 745 // Propagate info for the old catchswitch over to the new one in 746 // the unwind map. This also serves to short-circuit any subsequent 747 // checks for the unwind dest of this catchswitch, which would get 748 // confused if they found the outer handler in the callee. 749 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; 750 Replacement = NewCatchSwitch; 751 } 752 } else if (!isa<FuncletPadInst>(I)) { 753 llvm_unreachable("unexpected EHPad!"); 754 } 755 756 if (Replacement) { 757 Replacement->takeName(I); 758 I->replaceAllUsesWith(Replacement); 759 I->eraseFromParent(); 760 UpdatePHINodes(&*BB); 761 } 762 } 763 764 if (InlinedCodeInfo.ContainsCalls) 765 for (Function::iterator BB = FirstNewBlock->getIterator(), 766 E = Caller->end(); 767 BB != E; ++BB) 768 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 769 &*BB, UnwindDest, &FuncletUnwindMap)) 770 // Update any PHI nodes in the exceptional block to indicate that there 771 // is now a new entry in them. 772 UpdatePHINodes(NewBB); 773 774 // Now that everything is happy, we have one final detail. The PHI nodes in 775 // the exception destination block still have entries due to the original 776 // invoke instruction. Eliminate these entries (which might even delete the 777 // PHI node) now. 778 UnwindDest->removePredecessor(InvokeBB); 779 } 780 781 /// When inlining a call site that has !llvm.mem.parallel_loop_access or 782 /// llvm.access.group metadata, that metadata should be propagated to all 783 /// memory-accessing cloned instructions. 784 static void PropagateParallelLoopAccessMetadata(CallSite CS, 785 ValueToValueMapTy &VMap) { 786 MDNode *M = 787 CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); 788 MDNode *CallAccessGroup = 789 CS.getInstruction()->getMetadata(LLVMContext::MD_access_group); 790 if (!M && !CallAccessGroup) 791 return; 792 793 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 794 VMI != VMIE; ++VMI) { 795 if (!VMI->second) 796 continue; 797 798 Instruction *NI = dyn_cast<Instruction>(VMI->second); 799 if (!NI) 800 continue; 801 802 if (M) { 803 if (MDNode *PM = 804 NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { 805 M = MDNode::concatenate(PM, M); 806 NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); 807 } else if (NI->mayReadOrWriteMemory()) { 808 NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); 809 } 810 } 811 812 if (NI->mayReadOrWriteMemory()) { 813 MDNode *UnitedAccGroups = uniteAccessGroups( 814 NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup); 815 NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups); 816 } 817 } 818 } 819 820 /// When inlining a function that contains noalias scope metadata, 821 /// this metadata needs to be cloned so that the inlined blocks 822 /// have different "unique scopes" at every call site. Were this not done, then 823 /// aliasing scopes from a function inlined into a caller multiple times could 824 /// not be differentiated (and this would lead to miscompiles because the 825 /// non-aliasing property communicated by the metadata could have 826 /// call-site-specific control dependencies). 827 static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { 828 const Function *CalledFunc = CS.getCalledFunction(); 829 SetVector<const MDNode *> MD; 830 831 // Note: We could only clone the metadata if it is already used in the 832 // caller. I'm omitting that check here because it might confuse 833 // inter-procedural alias analysis passes. We can revisit this if it becomes 834 // an efficiency or overhead problem. 835 836 for (const BasicBlock &I : *CalledFunc) 837 for (const Instruction &J : I) { 838 if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) 839 MD.insert(M); 840 if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) 841 MD.insert(M); 842 } 843 844 if (MD.empty()) 845 return; 846 847 // Walk the existing metadata, adding the complete (perhaps cyclic) chain to 848 // the set. 849 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); 850 while (!Queue.empty()) { 851 const MDNode *M = cast<MDNode>(Queue.pop_back_val()); 852 for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) 853 if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i))) 854 if (MD.insert(M1)) 855 Queue.push_back(M1); 856 } 857 858 // Now we have a complete set of all metadata in the chains used to specify 859 // the noalias scopes and the lists of those scopes. 860 SmallVector<TempMDTuple, 16> DummyNodes; 861 DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; 862 for (const MDNode *I : MD) { 863 DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); 864 MDMap[I].reset(DummyNodes.back().get()); 865 } 866 867 // Create new metadata nodes to replace the dummy nodes, replacing old 868 // metadata references with either a dummy node or an already-created new 869 // node. 870 for (const MDNode *I : MD) { 871 SmallVector<Metadata *, 4> NewOps; 872 for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { 873 const Metadata *V = I->getOperand(i); 874 if (const MDNode *M = dyn_cast<MDNode>(V)) 875 NewOps.push_back(MDMap[M]); 876 else 877 NewOps.push_back(const_cast<Metadata *>(V)); 878 } 879 880 MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); 881 MDTuple *TempM = cast<MDTuple>(MDMap[I]); 882 assert(TempM->isTemporary() && "Expected temporary node"); 883 884 TempM->replaceAllUsesWith(NewM); 885 } 886 887 // Now replace the metadata in the new inlined instructions with the 888 // repacements from the map. 889 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 890 VMI != VMIE; ++VMI) { 891 if (!VMI->second) 892 continue; 893 894 Instruction *NI = dyn_cast<Instruction>(VMI->second); 895 if (!NI) 896 continue; 897 898 if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { 899 MDNode *NewMD = MDMap[M]; 900 // If the call site also had alias scope metadata (a list of scopes to 901 // which instructions inside it might belong), propagate those scopes to 902 // the inlined instructions. 903 if (MDNode *CSM = 904 CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) 905 NewMD = MDNode::concatenate(NewMD, CSM); 906 NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); 907 } else if (NI->mayReadOrWriteMemory()) { 908 if (MDNode *M = 909 CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) 910 NI->setMetadata(LLVMContext::MD_alias_scope, M); 911 } 912 913 if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { 914 MDNode *NewMD = MDMap[M]; 915 // If the call site also had noalias metadata (a list of scopes with 916 // which instructions inside it don't alias), propagate those scopes to 917 // the inlined instructions. 918 if (MDNode *CSM = 919 CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) 920 NewMD = MDNode::concatenate(NewMD, CSM); 921 NI->setMetadata(LLVMContext::MD_noalias, NewMD); 922 } else if (NI->mayReadOrWriteMemory()) { 923 if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) 924 NI->setMetadata(LLVMContext::MD_noalias, M); 925 } 926 } 927 } 928 929 /// If the inlined function has noalias arguments, 930 /// then add new alias scopes for each noalias argument, tag the mapped noalias 931 /// parameters with noalias metadata specifying the new scope, and tag all 932 /// non-derived loads, stores and memory intrinsics with the new alias scopes. 933 static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, 934 const DataLayout &DL, AAResults *CalleeAAR) { 935 if (!EnableNoAliasConversion) 936 return; 937 938 const Function *CalledFunc = CS.getCalledFunction(); 939 SmallVector<const Argument *, 4> NoAliasArgs; 940 941 for (const Argument &Arg : CalledFunc->args()) 942 if (CS.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) 943 NoAliasArgs.push_back(&Arg); 944 945 if (NoAliasArgs.empty()) 946 return; 947 948 // To do a good job, if a noalias variable is captured, we need to know if 949 // the capture point dominates the particular use we're considering. 950 DominatorTree DT; 951 DT.recalculate(const_cast<Function&>(*CalledFunc)); 952 953 // noalias indicates that pointer values based on the argument do not alias 954 // pointer values which are not based on it. So we add a new "scope" for each 955 // noalias function argument. Accesses using pointers based on that argument 956 // become part of that alias scope, accesses using pointers not based on that 957 // argument are tagged as noalias with that scope. 958 959 DenseMap<const Argument *, MDNode *> NewScopes; 960 MDBuilder MDB(CalledFunc->getContext()); 961 962 // Create a new scope domain for this function. 963 MDNode *NewDomain = 964 MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); 965 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { 966 const Argument *A = NoAliasArgs[i]; 967 968 std::string Name = std::string(CalledFunc->getName()); 969 if (A->hasName()) { 970 Name += ": %"; 971 Name += A->getName(); 972 } else { 973 Name += ": argument "; 974 Name += utostr(i); 975 } 976 977 // Note: We always create a new anonymous root here. This is true regardless 978 // of the linkage of the callee because the aliasing "scope" is not just a 979 // property of the callee, but also all control dependencies in the caller. 980 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 981 NewScopes.insert(std::make_pair(A, NewScope)); 982 } 983 984 // Iterate over all new instructions in the map; for all memory-access 985 // instructions, add the alias scope metadata. 986 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 987 VMI != VMIE; ++VMI) { 988 if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { 989 if (!VMI->second) 990 continue; 991 992 Instruction *NI = dyn_cast<Instruction>(VMI->second); 993 if (!NI) 994 continue; 995 996 bool IsArgMemOnlyCall = false, IsFuncCall = false; 997 SmallVector<const Value *, 2> PtrArgs; 998 999 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 1000 PtrArgs.push_back(LI->getPointerOperand()); 1001 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 1002 PtrArgs.push_back(SI->getPointerOperand()); 1003 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) 1004 PtrArgs.push_back(VAAI->getPointerOperand()); 1005 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) 1006 PtrArgs.push_back(CXI->getPointerOperand()); 1007 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) 1008 PtrArgs.push_back(RMWI->getPointerOperand()); 1009 else if (const auto *Call = dyn_cast<CallBase>(I)) { 1010 // If we know that the call does not access memory, then we'll still 1011 // know that about the inlined clone of this call site, and we don't 1012 // need to add metadata. 1013 if (Call->doesNotAccessMemory()) 1014 continue; 1015 1016 IsFuncCall = true; 1017 if (CalleeAAR) { 1018 FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call); 1019 if (AAResults::onlyAccessesArgPointees(MRB)) 1020 IsArgMemOnlyCall = true; 1021 } 1022 1023 for (Value *Arg : Call->args()) { 1024 // We need to check the underlying objects of all arguments, not just 1025 // the pointer arguments, because we might be passing pointers as 1026 // integers, etc. 1027 // However, if we know that the call only accesses pointer arguments, 1028 // then we only need to check the pointer arguments. 1029 if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) 1030 continue; 1031 1032 PtrArgs.push_back(Arg); 1033 } 1034 } 1035 1036 // If we found no pointers, then this instruction is not suitable for 1037 // pairing with an instruction to receive aliasing metadata. 1038 // However, if this is a call, this we might just alias with none of the 1039 // noalias arguments. 1040 if (PtrArgs.empty() && !IsFuncCall) 1041 continue; 1042 1043 // It is possible that there is only one underlying object, but you 1044 // need to go through several PHIs to see it, and thus could be 1045 // repeated in the Objects list. 1046 SmallPtrSet<const Value *, 4> ObjSet; 1047 SmallVector<Metadata *, 4> Scopes, NoAliases; 1048 1049 SmallSetVector<const Argument *, 4> NAPtrArgs; 1050 for (const Value *V : PtrArgs) { 1051 SmallVector<const Value *, 4> Objects; 1052 GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr); 1053 1054 for (const Value *O : Objects) 1055 ObjSet.insert(O); 1056 } 1057 1058 // Figure out if we're derived from anything that is not a noalias 1059 // argument. 1060 bool CanDeriveViaCapture = false, UsesAliasingPtr = false; 1061 for (const Value *V : ObjSet) { 1062 // Is this value a constant that cannot be derived from any pointer 1063 // value (we need to exclude constant expressions, for example, that 1064 // are formed from arithmetic on global symbols). 1065 bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || 1066 isa<ConstantPointerNull>(V) || 1067 isa<ConstantDataVector>(V) || isa<UndefValue>(V); 1068 if (IsNonPtrConst) 1069 continue; 1070 1071 // If this is anything other than a noalias argument, then we cannot 1072 // completely describe the aliasing properties using alias.scope 1073 // metadata (and, thus, won't add any). 1074 if (const Argument *A = dyn_cast<Argument>(V)) { 1075 if (!CS.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) 1076 UsesAliasingPtr = true; 1077 } else { 1078 UsesAliasingPtr = true; 1079 } 1080 1081 // If this is not some identified function-local object (which cannot 1082 // directly alias a noalias argument), or some other argument (which, 1083 // by definition, also cannot alias a noalias argument), then we could 1084 // alias a noalias argument that has been captured). 1085 if (!isa<Argument>(V) && 1086 !isIdentifiedFunctionLocal(const_cast<Value*>(V))) 1087 CanDeriveViaCapture = true; 1088 } 1089 1090 // A function call can always get captured noalias pointers (via other 1091 // parameters, globals, etc.). 1092 if (IsFuncCall && !IsArgMemOnlyCall) 1093 CanDeriveViaCapture = true; 1094 1095 // First, we want to figure out all of the sets with which we definitely 1096 // don't alias. Iterate over all noalias set, and add those for which: 1097 // 1. The noalias argument is not in the set of objects from which we 1098 // definitely derive. 1099 // 2. The noalias argument has not yet been captured. 1100 // An arbitrary function that might load pointers could see captured 1101 // noalias arguments via other noalias arguments or globals, and so we 1102 // must always check for prior capture. 1103 for (const Argument *A : NoAliasArgs) { 1104 if (!ObjSet.count(A) && (!CanDeriveViaCapture || 1105 // It might be tempting to skip the 1106 // PointerMayBeCapturedBefore check if 1107 // A->hasNoCaptureAttr() is true, but this is 1108 // incorrect because nocapture only guarantees 1109 // that no copies outlive the function, not 1110 // that the value cannot be locally captured. 1111 !PointerMayBeCapturedBefore(A, 1112 /* ReturnCaptures */ false, 1113 /* StoreCaptures */ false, I, &DT))) 1114 NoAliases.push_back(NewScopes[A]); 1115 } 1116 1117 if (!NoAliases.empty()) 1118 NI->setMetadata(LLVMContext::MD_noalias, 1119 MDNode::concatenate( 1120 NI->getMetadata(LLVMContext::MD_noalias), 1121 MDNode::get(CalledFunc->getContext(), NoAliases))); 1122 1123 // Next, we want to figure out all of the sets to which we might belong. 1124 // We might belong to a set if the noalias argument is in the set of 1125 // underlying objects. If there is some non-noalias argument in our list 1126 // of underlying objects, then we cannot add a scope because the fact 1127 // that some access does not alias with any set of our noalias arguments 1128 // cannot itself guarantee that it does not alias with this access 1129 // (because there is some pointer of unknown origin involved and the 1130 // other access might also depend on this pointer). We also cannot add 1131 // scopes to arbitrary functions unless we know they don't access any 1132 // non-parameter pointer-values. 1133 bool CanAddScopes = !UsesAliasingPtr; 1134 if (CanAddScopes && IsFuncCall) 1135 CanAddScopes = IsArgMemOnlyCall; 1136 1137 if (CanAddScopes) 1138 for (const Argument *A : NoAliasArgs) { 1139 if (ObjSet.count(A)) 1140 Scopes.push_back(NewScopes[A]); 1141 } 1142 1143 if (!Scopes.empty()) 1144 NI->setMetadata( 1145 LLVMContext::MD_alias_scope, 1146 MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), 1147 MDNode::get(CalledFunc->getContext(), Scopes))); 1148 } 1149 } 1150 } 1151 1152 static bool MayContainThrowingOrExitingCall(Instruction *Begin, 1153 Instruction *End) { 1154 1155 assert(Begin->getParent() == End->getParent() && 1156 "Expected to be in same basic block!"); 1157 unsigned NumInstChecked = 0; 1158 // Check that all instructions in the range [Begin, End) are guaranteed to 1159 // transfer execution to successor. 1160 for (auto &I : make_range(Begin->getIterator(), End->getIterator())) 1161 if (NumInstChecked++ > InlinerAttributeWindow || 1162 !isGuaranteedToTransferExecutionToSuccessor(&I)) 1163 return true; 1164 return false; 1165 } 1166 1167 static AttrBuilder IdentifyValidAttributes(CallSite CS) { 1168 1169 AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex); 1170 if (AB.empty()) 1171 return AB; 1172 AttrBuilder Valid; 1173 // Only allow these white listed attributes to be propagated back to the 1174 // callee. This is because other attributes may only be valid on the call 1175 // itself, i.e. attributes such as signext and zeroext. 1176 if (auto DerefBytes = AB.getDereferenceableBytes()) 1177 Valid.addDereferenceableAttr(DerefBytes); 1178 if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes()) 1179 Valid.addDereferenceableOrNullAttr(DerefOrNullBytes); 1180 if (AB.contains(Attribute::NoAlias)) 1181 Valid.addAttribute(Attribute::NoAlias); 1182 if (AB.contains(Attribute::NonNull)) 1183 Valid.addAttribute(Attribute::NonNull); 1184 return Valid; 1185 } 1186 1187 static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) { 1188 if (!UpdateReturnAttributes && !UpdateLoadMetadataDuringInlining) 1189 return; 1190 1191 AttrBuilder Valid = IdentifyValidAttributes(CS); 1192 if (Valid.empty()) 1193 return; 1194 auto *CalledFunction = CS.getCalledFunction(); 1195 auto &Context = CalledFunction->getContext(); 1196 1197 auto getExpectedRV = [&](Value *V) -> Instruction * { 1198 if (UpdateReturnAttributes && isa<CallBase>(V)) 1199 return dyn_cast_or_null<CallBase>(VMap.lookup(V)); 1200 if (UpdateLoadMetadataDuringInlining && isa<LoadInst>(V)) 1201 return dyn_cast_or_null<LoadInst>(VMap.lookup(V)); 1202 return nullptr; 1203 }; 1204 1205 MDBuilder MDB(Context); 1206 auto CreateMDNode = [&](uint64_t Num) -> MDNode * { 1207 auto *Int = ConstantInt::get(Type::getInt64Ty(Context), Num); 1208 return MDNode::get(Context, MDB.createConstant(Int)); 1209 }; 1210 1211 for (auto &BB : *CalledFunction) { 1212 auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()); 1213 if (!RI) 1214 continue; 1215 // Sanity check that the cloned RetVal exists and is a call, otherwise we 1216 // cannot add the attributes on the cloned RetVal. 1217 // Simplification during inlining could have transformed the cloned 1218 // instruction. 1219 auto *NewRetVal = getExpectedRV(RI->getOperand(0)); 1220 if (!NewRetVal) 1221 continue; 1222 auto *RetVal = cast<Instruction>(RI->getOperand(0)); 1223 // Backward propagation of attributes to the returned value may be incorrect 1224 // if it is control flow dependent. 1225 // Consider: 1226 // @callee { 1227 // %rv = call @foo() 1228 // %rv2 = call @bar() 1229 // if (%rv2 != null) 1230 // return %rv2 1231 // if (%rv == null) 1232 // exit() 1233 // return %rv 1234 // } 1235 // caller() { 1236 // %val = call nonnull @callee() 1237 // } 1238 // Here we cannot add the nonnull attribute on either foo or bar. So, we 1239 // limit the check to both RetVal and RI are in the same basic block and 1240 // there are no throwing/exiting instructions between these instructions. 1241 if (RI->getParent() != RetVal->getParent() || 1242 MayContainThrowingOrExitingCall(RetVal, RI)) 1243 continue; 1244 // Add to the existing attributes of NewRetVal, i.e. the cloned call 1245 // instruction. 1246 // NB! When we have the same attribute already existing on NewRetVal, but 1247 // with a differing value, the AttributeList's merge API honours the already 1248 // existing attribute value (i.e. attributes such as dereferenceable, 1249 // dereferenceable_or_null etc). See AttrBuilder::merge for more details. 1250 if (auto *CB = dyn_cast<CallBase>(NewRetVal)) { 1251 AttributeList AL = CB->getAttributes(); 1252 AttributeList NewAL = 1253 AL.addAttributes(Context, AttributeList::ReturnIndex, Valid); 1254 CB->setAttributes(NewAL); 1255 } else { 1256 auto *NewLI = cast<LoadInst>(NewRetVal); 1257 if (CS.isReturnNonNull()) 1258 NewLI->setMetadata(LLVMContext::MD_nonnull, CreateMDNode(1)); 1259 // If the load already has a dereferenceable/dereferenceable_or_null 1260 // metadata, we should honour it. 1261 if (uint64_t DerefBytes = Valid.getDereferenceableBytes()) 1262 if(!NewLI->getMetadata(LLVMContext::MD_dereferenceable)) 1263 NewLI->setMetadata(LLVMContext::MD_dereferenceable, 1264 CreateMDNode(DerefBytes)); 1265 if (uint64_t DerefOrNullBytes = Valid.getDereferenceableOrNullBytes()) 1266 if (!NewLI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) 1267 NewLI->setMetadata(LLVMContext::MD_dereferenceable_or_null, 1268 CreateMDNode(DerefOrNullBytes)); 1269 } 1270 1271 } 1272 } 1273 1274 /// If the inlined function has non-byval align arguments, then 1275 /// add @llvm.assume-based alignment assumptions to preserve this information. 1276 static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { 1277 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) 1278 return; 1279 1280 AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller()); 1281 auto &DL = CS.getCaller()->getParent()->getDataLayout(); 1282 1283 // To avoid inserting redundant assumptions, we should check for assumptions 1284 // already in the caller. To do this, we might need a DT of the caller. 1285 DominatorTree DT; 1286 bool DTCalculated = false; 1287 1288 Function *CalledFunc = CS.getCalledFunction(); 1289 for (Argument &Arg : CalledFunc->args()) { 1290 unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; 1291 if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { 1292 if (!DTCalculated) { 1293 DT.recalculate(*CS.getCaller()); 1294 DTCalculated = true; 1295 } 1296 1297 // If we can already prove the asserted alignment in the context of the 1298 // caller, then don't bother inserting the assumption. 1299 Value *ArgVal = CS.getArgument(Arg.getArgNo()); 1300 if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align) 1301 continue; 1302 1303 CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) 1304 .CreateAlignmentAssumption(DL, ArgVal, Align); 1305 AC->registerAssumption(NewAsmp); 1306 } 1307 } 1308 } 1309 1310 /// Once we have cloned code over from a callee into the caller, 1311 /// update the specified callgraph to reflect the changes we made. 1312 /// Note that it's possible that not all code was copied over, so only 1313 /// some edges of the callgraph may remain. 1314 static void UpdateCallGraphAfterInlining(CallSite CS, 1315 Function::iterator FirstNewBlock, 1316 ValueToValueMapTy &VMap, 1317 InlineFunctionInfo &IFI) { 1318 CallGraph &CG = *IFI.CG; 1319 const Function *Caller = CS.getCaller(); 1320 const Function *Callee = CS.getCalledFunction(); 1321 CallGraphNode *CalleeNode = CG[Callee]; 1322 CallGraphNode *CallerNode = CG[Caller]; 1323 1324 // Since we inlined some uninlined call sites in the callee into the caller, 1325 // add edges from the caller to all of the callees of the callee. 1326 CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); 1327 1328 // Consider the case where CalleeNode == CallerNode. 1329 CallGraphNode::CalledFunctionsVector CallCache; 1330 if (CalleeNode == CallerNode) { 1331 CallCache.assign(I, E); 1332 I = CallCache.begin(); 1333 E = CallCache.end(); 1334 } 1335 1336 for (; I != E; ++I) { 1337 const Value *OrigCall = I->first; 1338 1339 ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); 1340 // Only copy the edge if the call was inlined! 1341 if (VMI == VMap.end() || VMI->second == nullptr) 1342 continue; 1343 1344 // If the call was inlined, but then constant folded, there is no edge to 1345 // add. Check for this case. 1346 auto *NewCall = dyn_cast<CallBase>(VMI->second); 1347 if (!NewCall) 1348 continue; 1349 1350 // We do not treat intrinsic calls like real function calls because we 1351 // expect them to become inline code; do not add an edge for an intrinsic. 1352 if (NewCall->getCalledFunction() && 1353 NewCall->getCalledFunction()->isIntrinsic()) 1354 continue; 1355 1356 // Remember that this call site got inlined for the client of 1357 // InlineFunction. 1358 IFI.InlinedCalls.push_back(NewCall); 1359 1360 // It's possible that inlining the callsite will cause it to go from an 1361 // indirect to a direct call by resolving a function pointer. If this 1362 // happens, set the callee of the new call site to a more precise 1363 // destination. This can also happen if the call graph node of the caller 1364 // was just unnecessarily imprecise. 1365 if (!I->second->getFunction()) 1366 if (Function *F = NewCall->getCalledFunction()) { 1367 // Indirect call site resolved to direct call. 1368 CallerNode->addCalledFunction(NewCall, CG[F]); 1369 1370 continue; 1371 } 1372 1373 CallerNode->addCalledFunction(NewCall, I->second); 1374 } 1375 1376 // Update the call graph by deleting the edge from Callee to Caller. We must 1377 // do this after the loop above in case Caller and Callee are the same. 1378 CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction())); 1379 } 1380 1381 static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, 1382 BasicBlock *InsertBlock, 1383 InlineFunctionInfo &IFI) { 1384 Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); 1385 IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); 1386 1387 Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); 1388 1389 // Always generate a memcpy of alignment 1 here because we don't know 1390 // the alignment of the src pointer. Other optimizations can infer 1391 // better alignment. 1392 Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src, 1393 /*SrcAlign*/ Align(1), Size); 1394 } 1395 1396 /// When inlining a call site that has a byval argument, 1397 /// we have to make the implicit memcpy explicit by adding it. 1398 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, 1399 const Function *CalledFunc, 1400 InlineFunctionInfo &IFI, 1401 unsigned ByValAlignment) { 1402 PointerType *ArgTy = cast<PointerType>(Arg->getType()); 1403 Type *AggTy = ArgTy->getElementType(); 1404 1405 Function *Caller = TheCall->getFunction(); 1406 const DataLayout &DL = Caller->getParent()->getDataLayout(); 1407 1408 // If the called function is readonly, then it could not mutate the caller's 1409 // copy of the byval'd memory. In this case, it is safe to elide the copy and 1410 // temporary. 1411 if (CalledFunc->onlyReadsMemory()) { 1412 // If the byval argument has a specified alignment that is greater than the 1413 // passed in pointer, then we either have to round up the input pointer or 1414 // give up on this transformation. 1415 if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. 1416 return Arg; 1417 1418 AssumptionCache *AC = 1419 IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; 1420 1421 // If the pointer is already known to be sufficiently aligned, or if we can 1422 // round it up to a larger alignment, then we don't need a temporary. 1423 if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= 1424 ByValAlignment) 1425 return Arg; 1426 1427 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad 1428 // for code quality, but rarely happens and is required for correctness. 1429 } 1430 1431 // Create the alloca. If we have DataLayout, use nice alignment. 1432 Align Alignment(DL.getPrefTypeAlignment(AggTy)); 1433 1434 // If the byval had an alignment specified, we *must* use at least that 1435 // alignment, as it is required by the byval argument (and uses of the 1436 // pointer inside the callee). 1437 Alignment = max(Alignment, MaybeAlign(ByValAlignment)); 1438 1439 Value *NewAlloca = 1440 new AllocaInst(AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment, 1441 Arg->getName(), &*Caller->begin()->begin()); 1442 IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); 1443 1444 // Uses of the argument in the function should use our new alloca 1445 // instead. 1446 return NewAlloca; 1447 } 1448 1449 // Check whether this Value is used by a lifetime intrinsic. 1450 static bool isUsedByLifetimeMarker(Value *V) { 1451 for (User *U : V->users()) 1452 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) 1453 if (II->isLifetimeStartOrEnd()) 1454 return true; 1455 return false; 1456 } 1457 1458 // Check whether the given alloca already has 1459 // lifetime.start or lifetime.end intrinsics. 1460 static bool hasLifetimeMarkers(AllocaInst *AI) { 1461 Type *Ty = AI->getType(); 1462 Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), 1463 Ty->getPointerAddressSpace()); 1464 if (Ty == Int8PtrTy) 1465 return isUsedByLifetimeMarker(AI); 1466 1467 // Do a scan to find all the casts to i8*. 1468 for (User *U : AI->users()) { 1469 if (U->getType() != Int8PtrTy) continue; 1470 if (U->stripPointerCasts() != AI) continue; 1471 if (isUsedByLifetimeMarker(U)) 1472 return true; 1473 } 1474 return false; 1475 } 1476 1477 /// Return the result of AI->isStaticAlloca() if AI were moved to the entry 1478 /// block. Allocas used in inalloca calls and allocas of dynamic array size 1479 /// cannot be static. 1480 static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { 1481 return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); 1482 } 1483 1484 /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL 1485 /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. 1486 static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, 1487 LLVMContext &Ctx, 1488 DenseMap<const MDNode *, MDNode *> &IANodes) { 1489 auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); 1490 return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(), 1491 IA); 1492 } 1493 1494 /// Update inlined instructions' line numbers to 1495 /// to encode location where these instructions are inlined. 1496 static void fixupLineNumbers(Function *Fn, Function::iterator FI, 1497 Instruction *TheCall, bool CalleeHasDebugInfo) { 1498 const DebugLoc &TheCallDL = TheCall->getDebugLoc(); 1499 if (!TheCallDL) 1500 return; 1501 1502 auto &Ctx = Fn->getContext(); 1503 DILocation *InlinedAtNode = TheCallDL; 1504 1505 // Create a unique call site, not to be confused with any other call from the 1506 // same location. 1507 InlinedAtNode = DILocation::getDistinct( 1508 Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), 1509 InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); 1510 1511 // Cache the inlined-at nodes as they're built so they are reused, without 1512 // this every instruction's inlined-at chain would become distinct from each 1513 // other. 1514 DenseMap<const MDNode *, MDNode *> IANodes; 1515 1516 // Check if we are not generating inline line tables and want to use 1517 // the call site location instead. 1518 bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables"); 1519 1520 for (; FI != Fn->end(); ++FI) { 1521 for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 1522 BI != BE; ++BI) { 1523 // Loop metadata needs to be updated so that the start and end locs 1524 // reference inlined-at locations. 1525 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes]( 1526 const DILocation &Loc) -> DILocation * { 1527 return inlineDebugLoc(&Loc, InlinedAtNode, Ctx, IANodes).get(); 1528 }; 1529 updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc); 1530 1531 if (!NoInlineLineTables) 1532 if (DebugLoc DL = BI->getDebugLoc()) { 1533 DebugLoc IDL = 1534 inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); 1535 BI->setDebugLoc(IDL); 1536 continue; 1537 } 1538 1539 if (CalleeHasDebugInfo && !NoInlineLineTables) 1540 continue; 1541 1542 // If the inlined instruction has no line number, or if inline info 1543 // is not being generated, make it look as if it originates from the call 1544 // location. This is important for ((__always_inline, __nodebug__)) 1545 // functions which must use caller location for all instructions in their 1546 // function body. 1547 1548 // Don't update static allocas, as they may get moved later. 1549 if (auto *AI = dyn_cast<AllocaInst>(BI)) 1550 if (allocaWouldBeStaticInEntry(AI)) 1551 continue; 1552 1553 BI->setDebugLoc(TheCallDL); 1554 } 1555 1556 // Remove debug info intrinsics if we're not keeping inline info. 1557 if (NoInlineLineTables) { 1558 BasicBlock::iterator BI = FI->begin(); 1559 while (BI != FI->end()) { 1560 if (isa<DbgInfoIntrinsic>(BI)) { 1561 BI = BI->eraseFromParent(); 1562 continue; 1563 } 1564 ++BI; 1565 } 1566 } 1567 1568 } 1569 } 1570 1571 /// Update the block frequencies of the caller after a callee has been inlined. 1572 /// 1573 /// Each block cloned into the caller has its block frequency scaled by the 1574 /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of 1575 /// callee's entry block gets the same frequency as the callsite block and the 1576 /// relative frequencies of all cloned blocks remain the same after cloning. 1577 static void updateCallerBFI(BasicBlock *CallSiteBlock, 1578 const ValueToValueMapTy &VMap, 1579 BlockFrequencyInfo *CallerBFI, 1580 BlockFrequencyInfo *CalleeBFI, 1581 const BasicBlock &CalleeEntryBlock) { 1582 SmallPtrSet<BasicBlock *, 16> ClonedBBs; 1583 for (auto Entry : VMap) { 1584 if (!isa<BasicBlock>(Entry.first) || !Entry.second) 1585 continue; 1586 auto *OrigBB = cast<BasicBlock>(Entry.first); 1587 auto *ClonedBB = cast<BasicBlock>(Entry.second); 1588 uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); 1589 if (!ClonedBBs.insert(ClonedBB).second) { 1590 // Multiple blocks in the callee might get mapped to one cloned block in 1591 // the caller since we prune the callee as we clone it. When that happens, 1592 // we want to use the maximum among the original blocks' frequencies. 1593 uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); 1594 if (NewFreq > Freq) 1595 Freq = NewFreq; 1596 } 1597 CallerBFI->setBlockFreq(ClonedBB, Freq); 1598 } 1599 BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); 1600 CallerBFI->setBlockFreqAndScale( 1601 EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), 1602 ClonedBBs); 1603 } 1604 1605 /// Update the branch metadata for cloned call instructions. 1606 static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, 1607 const ProfileCount &CalleeEntryCount, 1608 const Instruction *TheCall, 1609 ProfileSummaryInfo *PSI, 1610 BlockFrequencyInfo *CallerBFI) { 1611 if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || 1612 CalleeEntryCount.getCount() < 1) 1613 return; 1614 auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; 1615 int64_t CallCount = 1616 std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, 1617 CalleeEntryCount.getCount()); 1618 updateProfileCallee(Callee, -CallCount, &VMap); 1619 } 1620 1621 void llvm::updateProfileCallee( 1622 Function *Callee, int64_t entryDelta, 1623 const ValueMap<const Value *, WeakTrackingVH> *VMap) { 1624 auto CalleeCount = Callee->getEntryCount(); 1625 if (!CalleeCount.hasValue()) 1626 return; 1627 1628 uint64_t priorEntryCount = CalleeCount.getCount(); 1629 uint64_t newEntryCount; 1630 1631 // Since CallSiteCount is an estimate, it could exceed the original callee 1632 // count and has to be set to 0 so guard against underflow. 1633 if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount) 1634 newEntryCount = 0; 1635 else 1636 newEntryCount = priorEntryCount + entryDelta; 1637 1638 // During inlining ? 1639 if (VMap) { 1640 uint64_t cloneEntryCount = priorEntryCount - newEntryCount; 1641 for (auto Entry : *VMap) 1642 if (isa<CallInst>(Entry.first)) 1643 if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) 1644 CI->updateProfWeight(cloneEntryCount, priorEntryCount); 1645 } 1646 1647 if (entryDelta) { 1648 Callee->setEntryCount(newEntryCount); 1649 1650 for (BasicBlock &BB : *Callee) 1651 // No need to update the callsite if it is pruned during inlining. 1652 if (!VMap || VMap->count(&BB)) 1653 for (Instruction &I : BB) 1654 if (CallInst *CI = dyn_cast<CallInst>(&I)) 1655 CI->updateProfWeight(newEntryCount, priorEntryCount); 1656 } 1657 } 1658 1659 /// This function inlines the called function into the basic block of the 1660 /// caller. This returns false if it is not possible to inline this call. 1661 /// The program is still in a well defined state if this occurs though. 1662 /// 1663 /// Note that this only does one level of inlining. For example, if the 1664 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now 1665 /// exists in the instruction stream. Similarly this will inline a recursive 1666 /// function by one level. 1667 llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, 1668 AAResults *CalleeAAR, 1669 bool InsertLifetime, 1670 Function *ForwardVarArgsTo) { 1671 Instruction *TheCall = CS.getInstruction(); 1672 assert(TheCall->getParent() && TheCall->getFunction() 1673 && "Instruction not in function!"); 1674 1675 // FIXME: we don't inline callbr yet. 1676 if (isa<CallBrInst>(TheCall)) 1677 return InlineResult::failure("We don't inline callbr yet."); 1678 1679 // If IFI has any state in it, zap it before we fill it in. 1680 IFI.reset(); 1681 1682 Function *CalledFunc = CS.getCalledFunction(); 1683 if (!CalledFunc || // Can't inline external function or indirect 1684 CalledFunc->isDeclaration()) // call! 1685 return InlineResult::failure("external or indirect"); 1686 1687 // The inliner does not know how to inline through calls with operand bundles 1688 // in general ... 1689 if (CS.hasOperandBundles()) { 1690 for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { 1691 uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); 1692 // ... but it knows how to inline through "deopt" operand bundles ... 1693 if (Tag == LLVMContext::OB_deopt) 1694 continue; 1695 // ... and "funclet" operand bundles. 1696 if (Tag == LLVMContext::OB_funclet) 1697 continue; 1698 1699 return InlineResult::failure("unsupported operand bundle"); 1700 } 1701 } 1702 1703 // If the call to the callee cannot throw, set the 'nounwind' flag on any 1704 // calls that we inline. 1705 bool MarkNoUnwind = CS.doesNotThrow(); 1706 1707 BasicBlock *OrigBB = TheCall->getParent(); 1708 Function *Caller = OrigBB->getParent(); 1709 1710 // GC poses two hazards to inlining, which only occur when the callee has GC: 1711 // 1. If the caller has no GC, then the callee's GC must be propagated to the 1712 // caller. 1713 // 2. If the caller has a differing GC, it is invalid to inline. 1714 if (CalledFunc->hasGC()) { 1715 if (!Caller->hasGC()) 1716 Caller->setGC(CalledFunc->getGC()); 1717 else if (CalledFunc->getGC() != Caller->getGC()) 1718 return InlineResult::failure("incompatible GC"); 1719 } 1720 1721 // Get the personality function from the callee if it contains a landing pad. 1722 Constant *CalledPersonality = 1723 CalledFunc->hasPersonalityFn() 1724 ? CalledFunc->getPersonalityFn()->stripPointerCasts() 1725 : nullptr; 1726 1727 // Find the personality function used by the landing pads of the caller. If it 1728 // exists, then check to see that it matches the personality function used in 1729 // the callee. 1730 Constant *CallerPersonality = 1731 Caller->hasPersonalityFn() 1732 ? Caller->getPersonalityFn()->stripPointerCasts() 1733 : nullptr; 1734 if (CalledPersonality) { 1735 if (!CallerPersonality) 1736 Caller->setPersonalityFn(CalledPersonality); 1737 // If the personality functions match, then we can perform the 1738 // inlining. Otherwise, we can't inline. 1739 // TODO: This isn't 100% true. Some personality functions are proper 1740 // supersets of others and can be used in place of the other. 1741 else if (CalledPersonality != CallerPersonality) 1742 return InlineResult::failure("incompatible personality"); 1743 } 1744 1745 // We need to figure out which funclet the callsite was in so that we may 1746 // properly nest the callee. 1747 Instruction *CallSiteEHPad = nullptr; 1748 if (CallerPersonality) { 1749 EHPersonality Personality = classifyEHPersonality(CallerPersonality); 1750 if (isScopedEHPersonality(Personality)) { 1751 Optional<OperandBundleUse> ParentFunclet = 1752 CS.getOperandBundle(LLVMContext::OB_funclet); 1753 if (ParentFunclet) 1754 CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); 1755 1756 // OK, the inlining site is legal. What about the target function? 1757 1758 if (CallSiteEHPad) { 1759 if (Personality == EHPersonality::MSVC_CXX) { 1760 // The MSVC personality cannot tolerate catches getting inlined into 1761 // cleanup funclets. 1762 if (isa<CleanupPadInst>(CallSiteEHPad)) { 1763 // Ok, the call site is within a cleanuppad. Let's check the callee 1764 // for catchpads. 1765 for (const BasicBlock &CalledBB : *CalledFunc) { 1766 if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) 1767 return InlineResult::failure("catch in cleanup funclet"); 1768 } 1769 } 1770 } else if (isAsynchronousEHPersonality(Personality)) { 1771 // SEH is even less tolerant, there may not be any sort of exceptional 1772 // funclet in the callee. 1773 for (const BasicBlock &CalledBB : *CalledFunc) { 1774 if (CalledBB.isEHPad()) 1775 return InlineResult::failure("SEH in cleanup funclet"); 1776 } 1777 } 1778 } 1779 } 1780 } 1781 1782 // Determine if we are dealing with a call in an EHPad which does not unwind 1783 // to caller. 1784 bool EHPadForCallUnwindsLocally = false; 1785 if (CallSiteEHPad && CS.isCall()) { 1786 UnwindDestMemoTy FuncletUnwindMap; 1787 Value *CallSiteUnwindDestToken = 1788 getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); 1789 1790 EHPadForCallUnwindsLocally = 1791 CallSiteUnwindDestToken && 1792 !isa<ConstantTokenNone>(CallSiteUnwindDestToken); 1793 } 1794 1795 // Get an iterator to the last basic block in the function, which will have 1796 // the new function inlined after it. 1797 Function::iterator LastBlock = --Caller->end(); 1798 1799 // Make sure to capture all of the return instructions from the cloned 1800 // function. 1801 SmallVector<ReturnInst*, 8> Returns; 1802 ClonedCodeInfo InlinedFunctionInfo; 1803 Function::iterator FirstNewBlock; 1804 1805 { // Scope to destroy VMap after cloning. 1806 ValueToValueMapTy VMap; 1807 // Keep a list of pair (dst, src) to emit byval initializations. 1808 SmallVector<std::pair<Value*, Value*>, 4> ByValInit; 1809 1810 auto &DL = Caller->getParent()->getDataLayout(); 1811 1812 // Calculate the vector of arguments to pass into the function cloner, which 1813 // matches up the formal to the actual argument values. 1814 CallSite::arg_iterator AI = CS.arg_begin(); 1815 unsigned ArgNo = 0; 1816 for (Function::arg_iterator I = CalledFunc->arg_begin(), 1817 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { 1818 Value *ActualArg = *AI; 1819 1820 // When byval arguments actually inlined, we need to make the copy implied 1821 // by them explicit. However, we don't do this if the callee is readonly 1822 // or readnone, because the copy would be unneeded: the callee doesn't 1823 // modify the struct. 1824 if (CS.isByValArgument(ArgNo)) { 1825 ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, 1826 CalledFunc->getParamAlignment(ArgNo)); 1827 if (ActualArg != *AI) 1828 ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); 1829 } 1830 1831 VMap[&*I] = ActualArg; 1832 } 1833 1834 // TODO: Remove this when users have been updated to the assume bundles. 1835 // Add alignment assumptions if necessary. We do this before the inlined 1836 // instructions are actually cloned into the caller so that we can easily 1837 // check what will be known at the start of the inlined code. 1838 AddAlignmentAssumptions(CS, IFI); 1839 1840 /// Preserve all attributes on of the call and its parameters. 1841 if (Instruction *Assume = buildAssumeFromInst(CS.getInstruction())) 1842 Assume->insertBefore(CS.getInstruction()); 1843 1844 // We want the inliner to prune the code as it copies. We would LOVE to 1845 // have no dead or constant instructions leftover after inlining occurs 1846 // (which can happen, e.g., because an argument was constant), but we'll be 1847 // happy with whatever the cloner can do. 1848 CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 1849 /*ModuleLevelChanges=*/false, Returns, ".i", 1850 &InlinedFunctionInfo, TheCall); 1851 // Remember the first block that is newly cloned over. 1852 FirstNewBlock = LastBlock; ++FirstNewBlock; 1853 1854 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) 1855 // Update the BFI of blocks cloned into the caller. 1856 updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, 1857 CalledFunc->front()); 1858 1859 updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, 1860 IFI.PSI, IFI.CallerBFI); 1861 1862 // Inject byval arguments initialization. 1863 for (std::pair<Value*, Value*> &Init : ByValInit) 1864 HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), 1865 &*FirstNewBlock, IFI); 1866 1867 Optional<OperandBundleUse> ParentDeopt = 1868 CS.getOperandBundle(LLVMContext::OB_deopt); 1869 if (ParentDeopt) { 1870 SmallVector<OperandBundleDef, 2> OpDefs; 1871 1872 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { 1873 Instruction *I = dyn_cast_or_null<Instruction>(VH); 1874 if (!I) continue; // instruction was DCE'd or RAUW'ed to undef 1875 1876 OpDefs.clear(); 1877 1878 CallSite ICS(I); 1879 OpDefs.reserve(ICS.getNumOperandBundles()); 1880 1881 for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { 1882 auto ChildOB = ICS.getOperandBundleAt(i); 1883 if (ChildOB.getTagID() != LLVMContext::OB_deopt) { 1884 // If the inlined call has other operand bundles, let them be 1885 OpDefs.emplace_back(ChildOB); 1886 continue; 1887 } 1888 1889 // It may be useful to separate this logic (of handling operand 1890 // bundles) out to a separate "policy" component if this gets crowded. 1891 // Prepend the parent's deoptimization continuation to the newly 1892 // inlined call's deoptimization continuation. 1893 std::vector<Value *> MergedDeoptArgs; 1894 MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + 1895 ChildOB.Inputs.size()); 1896 1897 MergedDeoptArgs.insert(MergedDeoptArgs.end(), 1898 ParentDeopt->Inputs.begin(), 1899 ParentDeopt->Inputs.end()); 1900 MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), 1901 ChildOB.Inputs.end()); 1902 1903 OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); 1904 } 1905 1906 Instruction *NewI = nullptr; 1907 if (isa<CallInst>(I)) 1908 NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I); 1909 else if (isa<CallBrInst>(I)) 1910 NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I); 1911 else 1912 NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I); 1913 1914 // Note: the RAUW does the appropriate fixup in VMap, so we need to do 1915 // this even if the call returns void. 1916 I->replaceAllUsesWith(NewI); 1917 1918 VH = nullptr; 1919 I->eraseFromParent(); 1920 } 1921 } 1922 1923 // Update the callgraph if requested. 1924 if (IFI.CG) 1925 UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); 1926 1927 // For 'nodebug' functions, the associated DISubprogram is always null. 1928 // Conservatively avoid propagating the callsite debug location to 1929 // instructions inlined from a function whose DISubprogram is not null. 1930 fixupLineNumbers(Caller, FirstNewBlock, TheCall, 1931 CalledFunc->getSubprogram() != nullptr); 1932 1933 // Clone existing noalias metadata if necessary. 1934 CloneAliasScopeMetadata(CS, VMap); 1935 1936 // Add noalias metadata if necessary. 1937 AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); 1938 1939 // Clone return attributes on the callsite into the calls within the inlined 1940 // function which feed into its return value. 1941 AddReturnAttributes(CS, VMap); 1942 1943 // Propagate llvm.mem.parallel_loop_access if necessary. 1944 PropagateParallelLoopAccessMetadata(CS, VMap); 1945 1946 // Register any cloned assumptions. 1947 if (IFI.GetAssumptionCache) 1948 for (BasicBlock &NewBlock : 1949 make_range(FirstNewBlock->getIterator(), Caller->end())) 1950 for (Instruction &I : NewBlock) { 1951 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 1952 if (II->getIntrinsicID() == Intrinsic::assume) 1953 (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II); 1954 } 1955 } 1956 1957 // If there are any alloca instructions in the block that used to be the entry 1958 // block for the callee, move them to the entry block of the caller. First 1959 // calculate which instruction they should be inserted before. We insert the 1960 // instructions at the end of the current alloca list. 1961 { 1962 BasicBlock::iterator InsertPoint = Caller->begin()->begin(); 1963 for (BasicBlock::iterator I = FirstNewBlock->begin(), 1964 E = FirstNewBlock->end(); I != E; ) { 1965 AllocaInst *AI = dyn_cast<AllocaInst>(I++); 1966 if (!AI) continue; 1967 1968 // If the alloca is now dead, remove it. This often occurs due to code 1969 // specialization. 1970 if (AI->use_empty()) { 1971 AI->eraseFromParent(); 1972 continue; 1973 } 1974 1975 if (!allocaWouldBeStaticInEntry(AI)) 1976 continue; 1977 1978 // Keep track of the static allocas that we inline into the caller. 1979 IFI.StaticAllocas.push_back(AI); 1980 1981 // Scan for the block of allocas that we can move over, and move them 1982 // all at once. 1983 while (isa<AllocaInst>(I) && 1984 !cast<AllocaInst>(I)->use_empty() && 1985 allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { 1986 IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); 1987 ++I; 1988 } 1989 1990 // Transfer all of the allocas over in a block. Using splice means 1991 // that the instructions aren't removed from the symbol table, then 1992 // reinserted. 1993 Caller->getEntryBlock().getInstList().splice( 1994 InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); 1995 } 1996 } 1997 1998 SmallVector<Value*,4> VarArgsToForward; 1999 SmallVector<AttributeSet, 4> VarArgsAttrs; 2000 for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); 2001 i < CS.getNumArgOperands(); i++) { 2002 VarArgsToForward.push_back(CS.getArgOperand(i)); 2003 VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); 2004 } 2005 2006 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; 2007 if (InlinedFunctionInfo.ContainsCalls) { 2008 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; 2009 if (CallInst *CI = dyn_cast<CallInst>(TheCall)) 2010 CallSiteTailKind = CI->getTailCallKind(); 2011 2012 // For inlining purposes, the "notail" marker is the same as no marker. 2013 if (CallSiteTailKind == CallInst::TCK_NoTail) 2014 CallSiteTailKind = CallInst::TCK_None; 2015 2016 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; 2017 ++BB) { 2018 for (auto II = BB->begin(); II != BB->end();) { 2019 Instruction &I = *II++; 2020 CallInst *CI = dyn_cast<CallInst>(&I); 2021 if (!CI) 2022 continue; 2023 2024 // Forward varargs from inlined call site to calls to the 2025 // ForwardVarArgsTo function, if requested, and to musttail calls. 2026 if (!VarArgsToForward.empty() && 2027 ((ForwardVarArgsTo && 2028 CI->getCalledFunction() == ForwardVarArgsTo) || 2029 CI->isMustTailCall())) { 2030 // Collect attributes for non-vararg parameters. 2031 AttributeList Attrs = CI->getAttributes(); 2032 SmallVector<AttributeSet, 8> ArgAttrs; 2033 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { 2034 for (unsigned ArgNo = 0; 2035 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) 2036 ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); 2037 } 2038 2039 // Add VarArg attributes. 2040 ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); 2041 Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(), 2042 Attrs.getRetAttributes(), ArgAttrs); 2043 // Add VarArgs to existing parameters. 2044 SmallVector<Value *, 6> Params(CI->arg_operands()); 2045 Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); 2046 CallInst *NewCI = CallInst::Create( 2047 CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI); 2048 NewCI->setDebugLoc(CI->getDebugLoc()); 2049 NewCI->setAttributes(Attrs); 2050 NewCI->setCallingConv(CI->getCallingConv()); 2051 CI->replaceAllUsesWith(NewCI); 2052 CI->eraseFromParent(); 2053 CI = NewCI; 2054 } 2055 2056 if (Function *F = CI->getCalledFunction()) 2057 InlinedDeoptimizeCalls |= 2058 F->getIntrinsicID() == Intrinsic::experimental_deoptimize; 2059 2060 // We need to reduce the strength of any inlined tail calls. For 2061 // musttail, we have to avoid introducing potential unbounded stack 2062 // growth. For example, if functions 'f' and 'g' are mutually recursive 2063 // with musttail, we can inline 'g' into 'f' so long as we preserve 2064 // musttail on the cloned call to 'f'. If either the inlined call site 2065 // or the cloned call site is *not* musttail, the program already has 2066 // one frame of stack growth, so it's safe to remove musttail. Here is 2067 // a table of example transformations: 2068 // 2069 // f -> musttail g -> musttail f ==> f -> musttail f 2070 // f -> musttail g -> tail f ==> f -> tail f 2071 // f -> g -> musttail f ==> f -> f 2072 // f -> g -> tail f ==> f -> f 2073 // 2074 // Inlined notail calls should remain notail calls. 2075 CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); 2076 if (ChildTCK != CallInst::TCK_NoTail) 2077 ChildTCK = std::min(CallSiteTailKind, ChildTCK); 2078 CI->setTailCallKind(ChildTCK); 2079 InlinedMustTailCalls |= CI->isMustTailCall(); 2080 2081 // Calls inlined through a 'nounwind' call site should be marked 2082 // 'nounwind'. 2083 if (MarkNoUnwind) 2084 CI->setDoesNotThrow(); 2085 } 2086 } 2087 } 2088 2089 // Leave lifetime markers for the static alloca's, scoping them to the 2090 // function we just inlined. 2091 if (InsertLifetime && !IFI.StaticAllocas.empty()) { 2092 IRBuilder<> builder(&FirstNewBlock->front()); 2093 for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { 2094 AllocaInst *AI = IFI.StaticAllocas[ai]; 2095 // Don't mark swifterror allocas. They can't have bitcast uses. 2096 if (AI->isSwiftError()) 2097 continue; 2098 2099 // If the alloca is already scoped to something smaller than the whole 2100 // function then there's no need to add redundant, less accurate markers. 2101 if (hasLifetimeMarkers(AI)) 2102 continue; 2103 2104 // Try to determine the size of the allocation. 2105 ConstantInt *AllocaSize = nullptr; 2106 if (ConstantInt *AIArraySize = 2107 dyn_cast<ConstantInt>(AI->getArraySize())) { 2108 auto &DL = Caller->getParent()->getDataLayout(); 2109 Type *AllocaType = AI->getAllocatedType(); 2110 uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); 2111 uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); 2112 2113 // Don't add markers for zero-sized allocas. 2114 if (AllocaArraySize == 0) 2115 continue; 2116 2117 // Check that array size doesn't saturate uint64_t and doesn't 2118 // overflow when it's multiplied by type size. 2119 if (AllocaArraySize != std::numeric_limits<uint64_t>::max() && 2120 std::numeric_limits<uint64_t>::max() / AllocaArraySize >= 2121 AllocaTypeSize) { 2122 AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), 2123 AllocaArraySize * AllocaTypeSize); 2124 } 2125 } 2126 2127 builder.CreateLifetimeStart(AI, AllocaSize); 2128 for (ReturnInst *RI : Returns) { 2129 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize 2130 // call and a return. The return kills all local allocas. 2131 if (InlinedMustTailCalls && 2132 RI->getParent()->getTerminatingMustTailCall()) 2133 continue; 2134 if (InlinedDeoptimizeCalls && 2135 RI->getParent()->getTerminatingDeoptimizeCall()) 2136 continue; 2137 IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); 2138 } 2139 } 2140 } 2141 2142 // If the inlined code contained dynamic alloca instructions, wrap the inlined 2143 // code with llvm.stacksave/llvm.stackrestore intrinsics. 2144 if (InlinedFunctionInfo.ContainsDynamicAllocas) { 2145 Module *M = Caller->getParent(); 2146 // Get the two intrinsics we care about. 2147 Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); 2148 Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); 2149 2150 // Insert the llvm.stacksave. 2151 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) 2152 .CreateCall(StackSave, {}, "savedstack"); 2153 2154 // Insert a call to llvm.stackrestore before any return instructions in the 2155 // inlined function. 2156 for (ReturnInst *RI : Returns) { 2157 // Don't insert llvm.stackrestore calls between a musttail or deoptimize 2158 // call and a return. The return will restore the stack pointer. 2159 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) 2160 continue; 2161 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) 2162 continue; 2163 IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); 2164 } 2165 } 2166 2167 // If we are inlining for an invoke instruction, we must make sure to rewrite 2168 // any call instructions into invoke instructions. This is sensitive to which 2169 // funclet pads were top-level in the inlinee, so must be done before 2170 // rewriting the "parent pad" links. 2171 if (auto *II = dyn_cast<InvokeInst>(TheCall)) { 2172 BasicBlock *UnwindDest = II->getUnwindDest(); 2173 Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); 2174 if (isa<LandingPadInst>(FirstNonPHI)) { 2175 HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); 2176 } else { 2177 HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); 2178 } 2179 } 2180 2181 // Update the lexical scopes of the new funclets and callsites. 2182 // Anything that had 'none' as its parent is now nested inside the callsite's 2183 // EHPad. 2184 2185 if (CallSiteEHPad) { 2186 for (Function::iterator BB = FirstNewBlock->getIterator(), 2187 E = Caller->end(); 2188 BB != E; ++BB) { 2189 // Add bundle operands to any top-level call sites. 2190 SmallVector<OperandBundleDef, 1> OpBundles; 2191 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { 2192 Instruction *I = &*BBI++; 2193 CallSite CS(I); 2194 if (!CS) 2195 continue; 2196 2197 // Skip call sites which are nounwind intrinsics. 2198 auto *CalledFn = 2199 dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); 2200 if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) 2201 continue; 2202 2203 // Skip call sites which already have a "funclet" bundle. 2204 if (CS.getOperandBundle(LLVMContext::OB_funclet)) 2205 continue; 2206 2207 CS.getOperandBundlesAsDefs(OpBundles); 2208 OpBundles.emplace_back("funclet", CallSiteEHPad); 2209 2210 Instruction *NewInst; 2211 if (CS.isCall()) 2212 NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); 2213 else if (CS.isCallBr()) 2214 NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I); 2215 else 2216 NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); 2217 NewInst->takeName(I); 2218 I->replaceAllUsesWith(NewInst); 2219 I->eraseFromParent(); 2220 2221 OpBundles.clear(); 2222 } 2223 2224 // It is problematic if the inlinee has a cleanupret which unwinds to 2225 // caller and we inline it into a call site which doesn't unwind but into 2226 // an EH pad that does. Such an edge must be dynamically unreachable. 2227 // As such, we replace the cleanupret with unreachable. 2228 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) 2229 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) 2230 changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); 2231 2232 Instruction *I = BB->getFirstNonPHI(); 2233 if (!I->isEHPad()) 2234 continue; 2235 2236 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 2237 if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) 2238 CatchSwitch->setParentPad(CallSiteEHPad); 2239 } else { 2240 auto *FPI = cast<FuncletPadInst>(I); 2241 if (isa<ConstantTokenNone>(FPI->getParentPad())) 2242 FPI->setParentPad(CallSiteEHPad); 2243 } 2244 } 2245 } 2246 2247 if (InlinedDeoptimizeCalls) { 2248 // We need to at least remove the deoptimizing returns from the Return set, 2249 // so that the control flow from those returns does not get merged into the 2250 // caller (but terminate it instead). If the caller's return type does not 2251 // match the callee's return type, we also need to change the return type of 2252 // the intrinsic. 2253 if (Caller->getReturnType() == TheCall->getType()) { 2254 auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { 2255 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; 2256 }); 2257 Returns.erase(NewEnd, Returns.end()); 2258 } else { 2259 SmallVector<ReturnInst *, 8> NormalReturns; 2260 Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( 2261 Caller->getParent(), Intrinsic::experimental_deoptimize, 2262 {Caller->getReturnType()}); 2263 2264 for (ReturnInst *RI : Returns) { 2265 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); 2266 if (!DeoptCall) { 2267 NormalReturns.push_back(RI); 2268 continue; 2269 } 2270 2271 // The calling convention on the deoptimize call itself may be bogus, 2272 // since the code we're inlining may have undefined behavior (and may 2273 // never actually execute at runtime); but all 2274 // @llvm.experimental.deoptimize declarations have to have the same 2275 // calling convention in a well-formed module. 2276 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); 2277 NewDeoptIntrinsic->setCallingConv(CallingConv); 2278 auto *CurBB = RI->getParent(); 2279 RI->eraseFromParent(); 2280 2281 SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), 2282 DeoptCall->arg_end()); 2283 2284 SmallVector<OperandBundleDef, 1> OpBundles; 2285 DeoptCall->getOperandBundlesAsDefs(OpBundles); 2286 DeoptCall->eraseFromParent(); 2287 assert(!OpBundles.empty() && 2288 "Expected at least the deopt operand bundle"); 2289 2290 IRBuilder<> Builder(CurBB); 2291 CallInst *NewDeoptCall = 2292 Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); 2293 NewDeoptCall->setCallingConv(CallingConv); 2294 if (NewDeoptCall->getType()->isVoidTy()) 2295 Builder.CreateRetVoid(); 2296 else 2297 Builder.CreateRet(NewDeoptCall); 2298 } 2299 2300 // Leave behind the normal returns so we can merge control flow. 2301 std::swap(Returns, NormalReturns); 2302 } 2303 } 2304 2305 // Handle any inlined musttail call sites. In order for a new call site to be 2306 // musttail, the source of the clone and the inlined call site must have been 2307 // musttail. Therefore it's safe to return without merging control into the 2308 // phi below. 2309 if (InlinedMustTailCalls) { 2310 // Check if we need to bitcast the result of any musttail calls. 2311 Type *NewRetTy = Caller->getReturnType(); 2312 bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; 2313 2314 // Handle the returns preceded by musttail calls separately. 2315 SmallVector<ReturnInst *, 8> NormalReturns; 2316 for (ReturnInst *RI : Returns) { 2317 CallInst *ReturnedMustTail = 2318 RI->getParent()->getTerminatingMustTailCall(); 2319 if (!ReturnedMustTail) { 2320 NormalReturns.push_back(RI); 2321 continue; 2322 } 2323 if (!NeedBitCast) 2324 continue; 2325 2326 // Delete the old return and any preceding bitcast. 2327 BasicBlock *CurBB = RI->getParent(); 2328 auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); 2329 RI->eraseFromParent(); 2330 if (OldCast) 2331 OldCast->eraseFromParent(); 2332 2333 // Insert a new bitcast and return with the right type. 2334 IRBuilder<> Builder(CurBB); 2335 Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); 2336 } 2337 2338 // Leave behind the normal returns so we can merge control flow. 2339 std::swap(Returns, NormalReturns); 2340 } 2341 2342 // Now that all of the transforms on the inlined code have taken place but 2343 // before we splice the inlined code into the CFG and lose track of which 2344 // blocks were actually inlined, collect the call sites. We only do this if 2345 // call graph updates weren't requested, as those provide value handle based 2346 // tracking of inlined call sites instead. 2347 if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { 2348 // Otherwise just collect the raw call sites that were inlined. 2349 for (BasicBlock &NewBB : 2350 make_range(FirstNewBlock->getIterator(), Caller->end())) 2351 for (Instruction &I : NewBB) 2352 if (auto *CB = dyn_cast<CallBase>(&I)) 2353 IFI.InlinedCallSites.push_back(CB); 2354 } 2355 2356 // If we cloned in _exactly one_ basic block, and if that block ends in a 2357 // return instruction, we splice the body of the inlined callee directly into 2358 // the calling basic block. 2359 if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { 2360 // Move all of the instructions right before the call. 2361 OrigBB->getInstList().splice(TheCall->getIterator(), 2362 FirstNewBlock->getInstList(), 2363 FirstNewBlock->begin(), FirstNewBlock->end()); 2364 // Remove the cloned basic block. 2365 Caller->getBasicBlockList().pop_back(); 2366 2367 // If the call site was an invoke instruction, add a branch to the normal 2368 // destination. 2369 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 2370 BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); 2371 NewBr->setDebugLoc(Returns[0]->getDebugLoc()); 2372 } 2373 2374 // If the return instruction returned a value, replace uses of the call with 2375 // uses of the returned value. 2376 if (!TheCall->use_empty()) { 2377 ReturnInst *R = Returns[0]; 2378 if (TheCall == R->getReturnValue()) 2379 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 2380 else 2381 TheCall->replaceAllUsesWith(R->getReturnValue()); 2382 } 2383 // Since we are now done with the Call/Invoke, we can delete it. 2384 TheCall->eraseFromParent(); 2385 2386 // Since we are now done with the return instruction, delete it also. 2387 Returns[0]->eraseFromParent(); 2388 2389 // We are now done with the inlining. 2390 return InlineResult::success(); 2391 } 2392 2393 // Otherwise, we have the normal case, of more than one block to inline or 2394 // multiple return sites. 2395 2396 // We want to clone the entire callee function into the hole between the 2397 // "starter" and "ender" blocks. How we accomplish this depends on whether 2398 // this is an invoke instruction or a call instruction. 2399 BasicBlock *AfterCallBB; 2400 BranchInst *CreatedBranchToNormalDest = nullptr; 2401 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 2402 2403 // Add an unconditional branch to make this look like the CallInst case... 2404 CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); 2405 2406 // Split the basic block. This guarantees that no PHI nodes will have to be 2407 // updated due to new incoming edges, and make the invoke case more 2408 // symmetric to the call case. 2409 AfterCallBB = 2410 OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), 2411 CalledFunc->getName() + ".exit"); 2412 2413 } else { // It's a call 2414 // If this is a call instruction, we need to split the basic block that 2415 // the call lives in. 2416 // 2417 AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), 2418 CalledFunc->getName() + ".exit"); 2419 } 2420 2421 if (IFI.CallerBFI) { 2422 // Copy original BB's block frequency to AfterCallBB 2423 IFI.CallerBFI->setBlockFreq( 2424 AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); 2425 } 2426 2427 // Change the branch that used to go to AfterCallBB to branch to the first 2428 // basic block of the inlined function. 2429 // 2430 Instruction *Br = OrigBB->getTerminator(); 2431 assert(Br && Br->getOpcode() == Instruction::Br && 2432 "splitBasicBlock broken!"); 2433 Br->setOperand(0, &*FirstNewBlock); 2434 2435 // Now that the function is correct, make it a little bit nicer. In 2436 // particular, move the basic blocks inserted from the end of the function 2437 // into the space made by splitting the source basic block. 2438 Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), 2439 Caller->getBasicBlockList(), FirstNewBlock, 2440 Caller->end()); 2441 2442 // Handle all of the return instructions that we just cloned in, and eliminate 2443 // any users of the original call/invoke instruction. 2444 Type *RTy = CalledFunc->getReturnType(); 2445 2446 PHINode *PHI = nullptr; 2447 if (Returns.size() > 1) { 2448 // The PHI node should go at the front of the new basic block to merge all 2449 // possible incoming values. 2450 if (!TheCall->use_empty()) { 2451 PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), 2452 &AfterCallBB->front()); 2453 // Anything that used the result of the function call should now use the 2454 // PHI node as their operand. 2455 TheCall->replaceAllUsesWith(PHI); 2456 } 2457 2458 // Loop over all of the return instructions adding entries to the PHI node 2459 // as appropriate. 2460 if (PHI) { 2461 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 2462 ReturnInst *RI = Returns[i]; 2463 assert(RI->getReturnValue()->getType() == PHI->getType() && 2464 "Ret value not consistent in function!"); 2465 PHI->addIncoming(RI->getReturnValue(), RI->getParent()); 2466 } 2467 } 2468 2469 // Add a branch to the merge points and remove return instructions. 2470 DebugLoc Loc; 2471 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 2472 ReturnInst *RI = Returns[i]; 2473 BranchInst* BI = BranchInst::Create(AfterCallBB, RI); 2474 Loc = RI->getDebugLoc(); 2475 BI->setDebugLoc(Loc); 2476 RI->eraseFromParent(); 2477 } 2478 // We need to set the debug location to *somewhere* inside the 2479 // inlined function. The line number may be nonsensical, but the 2480 // instruction will at least be associated with the right 2481 // function. 2482 if (CreatedBranchToNormalDest) 2483 CreatedBranchToNormalDest->setDebugLoc(Loc); 2484 } else if (!Returns.empty()) { 2485 // Otherwise, if there is exactly one return value, just replace anything 2486 // using the return value of the call with the computed value. 2487 if (!TheCall->use_empty()) { 2488 if (TheCall == Returns[0]->getReturnValue()) 2489 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 2490 else 2491 TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); 2492 } 2493 2494 // Update PHI nodes that use the ReturnBB to use the AfterCallBB. 2495 BasicBlock *ReturnBB = Returns[0]->getParent(); 2496 ReturnBB->replaceAllUsesWith(AfterCallBB); 2497 2498 // Splice the code from the return block into the block that it will return 2499 // to, which contains the code that was after the call. 2500 AfterCallBB->getInstList().splice(AfterCallBB->begin(), 2501 ReturnBB->getInstList()); 2502 2503 if (CreatedBranchToNormalDest) 2504 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); 2505 2506 // Delete the return instruction now and empty ReturnBB now. 2507 Returns[0]->eraseFromParent(); 2508 ReturnBB->eraseFromParent(); 2509 } else if (!TheCall->use_empty()) { 2510 // No returns, but something is using the return value of the call. Just 2511 // nuke the result. 2512 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 2513 } 2514 2515 // Since we are now done with the Call/Invoke, we can delete it. 2516 TheCall->eraseFromParent(); 2517 2518 // If we inlined any musttail calls and the original return is now 2519 // unreachable, delete it. It can only contain a bitcast and ret. 2520 if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) 2521 AfterCallBB->eraseFromParent(); 2522 2523 // We should always be able to fold the entry block of the function into the 2524 // single predecessor of the block... 2525 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); 2526 BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); 2527 2528 // Splice the code entry block into calling block, right before the 2529 // unconditional branch. 2530 CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes 2531 OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); 2532 2533 // Remove the unconditional branch. 2534 OrigBB->getInstList().erase(Br); 2535 2536 // Now we can remove the CalleeEntry block, which is now empty. 2537 Caller->getBasicBlockList().erase(CalleeEntry); 2538 2539 // If we inserted a phi node, check to see if it has a single value (e.g. all 2540 // the entries are the same or undef). If so, remove the PHI so it doesn't 2541 // block other optimizations. 2542 if (PHI) { 2543 AssumptionCache *AC = 2544 IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; 2545 auto &DL = Caller->getParent()->getDataLayout(); 2546 if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { 2547 PHI->replaceAllUsesWith(V); 2548 PHI->eraseFromParent(); 2549 } 2550 } 2551 2552 return InlineResult::success(); 2553 } 2554