1 //===- InlineFunction.cpp - Code to perform function inlining -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements inlining of a function into a call site, resolving 10 // parameters and the return value as appropriate. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/None.h" 16 #include "llvm/ADT/Optional.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SetVector.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/iterator_range.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Analysis/AssumptionCache.h" 25 #include "llvm/Analysis/BlockFrequencyInfo.h" 26 #include "llvm/Analysis/CallGraph.h" 27 #include "llvm/Analysis/CaptureTracking.h" 28 #include "llvm/Analysis/EHPersonalities.h" 29 #include "llvm/Analysis/InstructionSimplify.h" 30 #include "llvm/Analysis/ProfileSummaryInfo.h" 31 #include "llvm/Transforms/Utils/Local.h" 32 #include "llvm/Analysis/ValueTracking.h" 33 #include "llvm/Analysis/VectorUtils.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/BasicBlock.h" 36 #include "llvm/IR/CFG.h" 37 #include "llvm/IR/CallSite.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/DIBuilder.h" 41 #include "llvm/IR/DataLayout.h" 42 #include "llvm/IR/DebugInfoMetadata.h" 43 #include "llvm/IR/DebugLoc.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/Dominators.h" 46 #include "llvm/IR/Function.h" 47 #include "llvm/IR/IRBuilder.h" 48 #include "llvm/IR/InstrTypes.h" 49 #include "llvm/IR/Instruction.h" 50 #include "llvm/IR/Instructions.h" 51 #include "llvm/IR/IntrinsicInst.h" 52 #include "llvm/IR/Intrinsics.h" 53 #include "llvm/IR/KnowledgeRetention.h" 54 #include "llvm/IR/LLVMContext.h" 55 #include "llvm/IR/MDBuilder.h" 56 #include "llvm/IR/Metadata.h" 57 #include "llvm/IR/Module.h" 58 #include "llvm/IR/Type.h" 59 #include "llvm/IR/User.h" 60 #include "llvm/IR/Value.h" 61 #include "llvm/Support/Casting.h" 62 #include "llvm/Support/CommandLine.h" 63 #include "llvm/Support/ErrorHandling.h" 64 #include "llvm/Transforms/Utils/Cloning.h" 65 #include "llvm/Transforms/Utils/ValueMapper.h" 66 #include <algorithm> 67 #include <cassert> 68 #include <cstdint> 69 #include <iterator> 70 #include <limits> 71 #include <string> 72 #include <utility> 73 #include <vector> 74 75 using namespace llvm; 76 using ProfileCount = Function::ProfileCount; 77 78 static cl::opt<bool> 79 EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), 80 cl::Hidden, 81 cl::desc("Convert noalias attributes to metadata during inlining.")); 82 83 static cl::opt<bool> 84 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", 85 cl::init(true), cl::Hidden, 86 cl::desc("Convert align attributes to assumptions during inlining.")); 87 88 llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, 89 AAResults *CalleeAAR, 90 bool InsertLifetime) { 91 return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime); 92 } 93 94 namespace { 95 96 /// A class for recording information about inlining a landing pad. 97 class LandingPadInliningInfo { 98 /// Destination of the invoke's unwind. 99 BasicBlock *OuterResumeDest; 100 101 /// Destination for the callee's resume. 102 BasicBlock *InnerResumeDest = nullptr; 103 104 /// LandingPadInst associated with the invoke. 105 LandingPadInst *CallerLPad = nullptr; 106 107 /// PHI for EH values from landingpad insts. 108 PHINode *InnerEHValuesPHI = nullptr; 109 110 SmallVector<Value*, 8> UnwindDestPHIValues; 111 112 public: 113 LandingPadInliningInfo(InvokeInst *II) 114 : OuterResumeDest(II->getUnwindDest()) { 115 // If there are PHI nodes in the unwind destination block, we need to keep 116 // track of which values came into them from the invoke before removing 117 // the edge from this block. 118 BasicBlock *InvokeBB = II->getParent(); 119 BasicBlock::iterator I = OuterResumeDest->begin(); 120 for (; isa<PHINode>(I); ++I) { 121 // Save the value to use for this edge. 122 PHINode *PHI = cast<PHINode>(I); 123 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 124 } 125 126 CallerLPad = cast<LandingPadInst>(I); 127 } 128 129 /// The outer unwind destination is the target of 130 /// unwind edges introduced for calls within the inlined function. 131 BasicBlock *getOuterResumeDest() const { 132 return OuterResumeDest; 133 } 134 135 BasicBlock *getInnerResumeDest(); 136 137 LandingPadInst *getLandingPadInst() const { return CallerLPad; } 138 139 /// Forward the 'resume' instruction to the caller's landing pad block. 140 /// When the landing pad block has only one predecessor, this is 141 /// a simple branch. When there is more than one predecessor, we need to 142 /// split the landing pad block after the landingpad instruction and jump 143 /// to there. 144 void forwardResume(ResumeInst *RI, 145 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); 146 147 /// Add incoming-PHI values to the unwind destination block for the given 148 /// basic block, using the values for the original invoke's source block. 149 void addIncomingPHIValuesFor(BasicBlock *BB) const { 150 addIncomingPHIValuesForInto(BB, OuterResumeDest); 151 } 152 153 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { 154 BasicBlock::iterator I = dest->begin(); 155 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 156 PHINode *phi = cast<PHINode>(I); 157 phi->addIncoming(UnwindDestPHIValues[i], src); 158 } 159 } 160 }; 161 162 } // end anonymous namespace 163 164 /// Get or create a target for the branch from ResumeInsts. 165 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { 166 if (InnerResumeDest) return InnerResumeDest; 167 168 // Split the landing pad. 169 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); 170 InnerResumeDest = 171 OuterResumeDest->splitBasicBlock(SplitPoint, 172 OuterResumeDest->getName() + ".body"); 173 174 // The number of incoming edges we expect to the inner landing pad. 175 const unsigned PHICapacity = 2; 176 177 // Create corresponding new PHIs for all the PHIs in the outer landing pad. 178 Instruction *InsertPoint = &InnerResumeDest->front(); 179 BasicBlock::iterator I = OuterResumeDest->begin(); 180 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 181 PHINode *OuterPHI = cast<PHINode>(I); 182 PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, 183 OuterPHI->getName() + ".lpad-body", 184 InsertPoint); 185 OuterPHI->replaceAllUsesWith(InnerPHI); 186 InnerPHI->addIncoming(OuterPHI, OuterResumeDest); 187 } 188 189 // Create a PHI for the exception values. 190 InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, 191 "eh.lpad-body", InsertPoint); 192 CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); 193 InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); 194 195 // All done. 196 return InnerResumeDest; 197 } 198 199 /// Forward the 'resume' instruction to the caller's landing pad block. 200 /// When the landing pad block has only one predecessor, this is a simple 201 /// branch. When there is more than one predecessor, we need to split the 202 /// landing pad block after the landingpad instruction and jump to there. 203 void LandingPadInliningInfo::forwardResume( 204 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { 205 BasicBlock *Dest = getInnerResumeDest(); 206 BasicBlock *Src = RI->getParent(); 207 208 BranchInst::Create(Dest, Src); 209 210 // Update the PHIs in the destination. They were inserted in an order which 211 // makes this work. 212 addIncomingPHIValuesForInto(Src, Dest); 213 214 InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); 215 RI->eraseFromParent(); 216 } 217 218 /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. 219 static Value *getParentPad(Value *EHPad) { 220 if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) 221 return FPI->getParentPad(); 222 return cast<CatchSwitchInst>(EHPad)->getParentPad(); 223 } 224 225 using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; 226 227 /// Helper for getUnwindDestToken that does the descendant-ward part of 228 /// the search. 229 static Value *getUnwindDestTokenHelper(Instruction *EHPad, 230 UnwindDestMemoTy &MemoMap) { 231 SmallVector<Instruction *, 8> Worklist(1, EHPad); 232 233 while (!Worklist.empty()) { 234 Instruction *CurrentPad = Worklist.pop_back_val(); 235 // We only put pads on the worklist that aren't in the MemoMap. When 236 // we find an unwind dest for a pad we may update its ancestors, but 237 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, 238 // so they should never get updated while queued on the worklist. 239 assert(!MemoMap.count(CurrentPad)); 240 Value *UnwindDestToken = nullptr; 241 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { 242 if (CatchSwitch->hasUnwindDest()) { 243 UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); 244 } else { 245 // Catchswitch doesn't have a 'nounwind' variant, and one might be 246 // annotated as "unwinds to caller" when really it's nounwind (see 247 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the 248 // parent's unwind dest from this. We can check its catchpads' 249 // descendants, since they might include a cleanuppad with an 250 // "unwinds to caller" cleanupret, which can be trusted. 251 for (auto HI = CatchSwitch->handler_begin(), 252 HE = CatchSwitch->handler_end(); 253 HI != HE && !UnwindDestToken; ++HI) { 254 BasicBlock *HandlerBlock = *HI; 255 auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); 256 for (User *Child : CatchPad->users()) { 257 // Intentionally ignore invokes here -- since the catchswitch is 258 // marked "unwind to caller", it would be a verifier error if it 259 // contained an invoke which unwinds out of it, so any invoke we'd 260 // encounter must unwind to some child of the catch. 261 if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) 262 continue; 263 264 Instruction *ChildPad = cast<Instruction>(Child); 265 auto Memo = MemoMap.find(ChildPad); 266 if (Memo == MemoMap.end()) { 267 // Haven't figured out this child pad yet; queue it. 268 Worklist.push_back(ChildPad); 269 continue; 270 } 271 // We've already checked this child, but might have found that 272 // it offers no proof either way. 273 Value *ChildUnwindDestToken = Memo->second; 274 if (!ChildUnwindDestToken) 275 continue; 276 // We already know the child's unwind dest, which can either 277 // be ConstantTokenNone to indicate unwind to caller, or can 278 // be another child of the catchpad. Only the former indicates 279 // the unwind dest of the catchswitch. 280 if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { 281 UnwindDestToken = ChildUnwindDestToken; 282 break; 283 } 284 assert(getParentPad(ChildUnwindDestToken) == CatchPad); 285 } 286 } 287 } 288 } else { 289 auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); 290 for (User *U : CleanupPad->users()) { 291 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { 292 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) 293 UnwindDestToken = RetUnwindDest->getFirstNonPHI(); 294 else 295 UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); 296 break; 297 } 298 Value *ChildUnwindDestToken; 299 if (auto *Invoke = dyn_cast<InvokeInst>(U)) { 300 ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); 301 } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { 302 Instruction *ChildPad = cast<Instruction>(U); 303 auto Memo = MemoMap.find(ChildPad); 304 if (Memo == MemoMap.end()) { 305 // Haven't resolved this child yet; queue it and keep searching. 306 Worklist.push_back(ChildPad); 307 continue; 308 } 309 // We've checked this child, but still need to ignore it if it 310 // had no proof either way. 311 ChildUnwindDestToken = Memo->second; 312 if (!ChildUnwindDestToken) 313 continue; 314 } else { 315 // Not a relevant user of the cleanuppad 316 continue; 317 } 318 // In a well-formed program, the child/invoke must either unwind to 319 // an(other) child of the cleanup, or exit the cleanup. In the 320 // first case, continue searching. 321 if (isa<Instruction>(ChildUnwindDestToken) && 322 getParentPad(ChildUnwindDestToken) == CleanupPad) 323 continue; 324 UnwindDestToken = ChildUnwindDestToken; 325 break; 326 } 327 } 328 // If we haven't found an unwind dest for CurrentPad, we may have queued its 329 // children, so move on to the next in the worklist. 330 if (!UnwindDestToken) 331 continue; 332 333 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits 334 // any ancestors of CurrentPad up to but not including UnwindDestToken's 335 // parent pad. Record this in the memo map, and check to see if the 336 // original EHPad being queried is one of the ones exited. 337 Value *UnwindParent; 338 if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) 339 UnwindParent = getParentPad(UnwindPad); 340 else 341 UnwindParent = nullptr; 342 bool ExitedOriginalPad = false; 343 for (Instruction *ExitedPad = CurrentPad; 344 ExitedPad && ExitedPad != UnwindParent; 345 ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { 346 // Skip over catchpads since they just follow their catchswitches. 347 if (isa<CatchPadInst>(ExitedPad)) 348 continue; 349 MemoMap[ExitedPad] = UnwindDestToken; 350 ExitedOriginalPad |= (ExitedPad == EHPad); 351 } 352 353 if (ExitedOriginalPad) 354 return UnwindDestToken; 355 356 // Continue the search. 357 } 358 359 // No definitive information is contained within this funclet. 360 return nullptr; 361 } 362 363 /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, 364 /// return that pad instruction. If it unwinds to caller, return 365 /// ConstantTokenNone. If it does not have a definitive unwind destination, 366 /// return nullptr. 367 /// 368 /// This routine gets invoked for calls in funclets in inlinees when inlining 369 /// an invoke. Since many funclets don't have calls inside them, it's queried 370 /// on-demand rather than building a map of pads to unwind dests up front. 371 /// Determining a funclet's unwind dest may require recursively searching its 372 /// descendants, and also ancestors and cousins if the descendants don't provide 373 /// an answer. Since most funclets will have their unwind dest immediately 374 /// available as the unwind dest of a catchswitch or cleanupret, this routine 375 /// searches top-down from the given pad and then up. To avoid worst-case 376 /// quadratic run-time given that approach, it uses a memo map to avoid 377 /// re-processing funclet trees. The callers that rewrite the IR as they go 378 /// take advantage of this, for correctness, by checking/forcing rewritten 379 /// pads' entries to match the original callee view. 380 static Value *getUnwindDestToken(Instruction *EHPad, 381 UnwindDestMemoTy &MemoMap) { 382 // Catchpads unwind to the same place as their catchswitch; 383 // redirct any queries on catchpads so the code below can 384 // deal with just catchswitches and cleanuppads. 385 if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) 386 EHPad = CPI->getCatchSwitch(); 387 388 // Check if we've already determined the unwind dest for this pad. 389 auto Memo = MemoMap.find(EHPad); 390 if (Memo != MemoMap.end()) 391 return Memo->second; 392 393 // Search EHPad and, if necessary, its descendants. 394 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); 395 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); 396 if (UnwindDestToken) 397 return UnwindDestToken; 398 399 // No information is available for this EHPad from itself or any of its 400 // descendants. An unwind all the way out to a pad in the caller would 401 // need also to agree with the unwind dest of the parent funclet, so 402 // search up the chain to try to find a funclet with information. Put 403 // null entries in the memo map to avoid re-processing as we go up. 404 MemoMap[EHPad] = nullptr; 405 #ifndef NDEBUG 406 SmallPtrSet<Instruction *, 4> TempMemos; 407 TempMemos.insert(EHPad); 408 #endif 409 Instruction *LastUselessPad = EHPad; 410 Value *AncestorToken; 411 for (AncestorToken = getParentPad(EHPad); 412 auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); 413 AncestorToken = getParentPad(AncestorToken)) { 414 // Skip over catchpads since they just follow their catchswitches. 415 if (isa<CatchPadInst>(AncestorPad)) 416 continue; 417 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we 418 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this 419 // call to getUnwindDestToken, that would mean that AncestorPad had no 420 // information in itself, its descendants, or its ancestors. If that 421 // were the case, then we should also have recorded the lack of information 422 // for the descendant that we're coming from. So assert that we don't 423 // find a null entry in the MemoMap for AncestorPad. 424 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); 425 auto AncestorMemo = MemoMap.find(AncestorPad); 426 if (AncestorMemo == MemoMap.end()) { 427 UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); 428 } else { 429 UnwindDestToken = AncestorMemo->second; 430 } 431 if (UnwindDestToken) 432 break; 433 LastUselessPad = AncestorPad; 434 MemoMap[LastUselessPad] = nullptr; 435 #ifndef NDEBUG 436 TempMemos.insert(LastUselessPad); 437 #endif 438 } 439 440 // We know that getUnwindDestTokenHelper was called on LastUselessPad and 441 // returned nullptr (and likewise for EHPad and any of its ancestors up to 442 // LastUselessPad), so LastUselessPad has no information from below. Since 443 // getUnwindDestTokenHelper must investigate all downward paths through 444 // no-information nodes to prove that a node has no information like this, 445 // and since any time it finds information it records it in the MemoMap for 446 // not just the immediately-containing funclet but also any ancestors also 447 // exited, it must be the case that, walking downward from LastUselessPad, 448 // visiting just those nodes which have not been mapped to an unwind dest 449 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since 450 // they are just used to keep getUnwindDestTokenHelper from repeating work), 451 // any node visited must have been exhaustively searched with no information 452 // for it found. 453 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); 454 while (!Worklist.empty()) { 455 Instruction *UselessPad = Worklist.pop_back_val(); 456 auto Memo = MemoMap.find(UselessPad); 457 if (Memo != MemoMap.end() && Memo->second) { 458 // Here the name 'UselessPad' is a bit of a misnomer, because we've found 459 // that it is a funclet that does have information about unwinding to 460 // a particular destination; its parent was a useless pad. 461 // Since its parent has no information, the unwind edge must not escape 462 // the parent, and must target a sibling of this pad. This local unwind 463 // gives us no information about EHPad. Leave it and the subtree rooted 464 // at it alone. 465 assert(getParentPad(Memo->second) == getParentPad(UselessPad)); 466 continue; 467 } 468 // We know we don't have information for UselesPad. If it has an entry in 469 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos 470 // added on this invocation of getUnwindDestToken; if a previous invocation 471 // recorded nullptr, it would have had to prove that the ancestors of 472 // UselessPad, which include LastUselessPad, had no information, and that 473 // in turn would have required proving that the descendants of 474 // LastUselesPad, which include EHPad, have no information about 475 // LastUselessPad, which would imply that EHPad was mapped to nullptr in 476 // the MemoMap on that invocation, which isn't the case if we got here. 477 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); 478 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind 479 // information that we'd be contradicting by making a map entry for it 480 // (which is something that getUnwindDestTokenHelper must have proved for 481 // us to get here). Just assert on is direct users here; the checks in 482 // this downward walk at its descendants will verify that they don't have 483 // any unwind edges that exit 'UselessPad' either (i.e. they either have no 484 // unwind edges or unwind to a sibling). 485 MemoMap[UselessPad] = UnwindDestToken; 486 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { 487 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); 488 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { 489 auto *CatchPad = HandlerBlock->getFirstNonPHI(); 490 for (User *U : CatchPad->users()) { 491 assert( 492 (!isa<InvokeInst>(U) || 493 (getParentPad( 494 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == 495 CatchPad)) && 496 "Expected useless pad"); 497 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 498 Worklist.push_back(cast<Instruction>(U)); 499 } 500 } 501 } else { 502 assert(isa<CleanupPadInst>(UselessPad)); 503 for (User *U : UselessPad->users()) { 504 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad"); 505 assert((!isa<InvokeInst>(U) || 506 (getParentPad( 507 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == 508 UselessPad)) && 509 "Expected useless pad"); 510 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 511 Worklist.push_back(cast<Instruction>(U)); 512 } 513 } 514 } 515 516 return UnwindDestToken; 517 } 518 519 /// When we inline a basic block into an invoke, 520 /// we have to turn all of the calls that can throw into invokes. 521 /// This function analyze BB to see if there are any calls, and if so, 522 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI 523 /// nodes in that block with the values specified in InvokeDestPHIValues. 524 static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( 525 BasicBlock *BB, BasicBlock *UnwindEdge, 526 UnwindDestMemoTy *FuncletUnwindMap = nullptr) { 527 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { 528 Instruction *I = &*BBI++; 529 530 // We only need to check for function calls: inlined invoke 531 // instructions require no special handling. 532 CallInst *CI = dyn_cast<CallInst>(I); 533 534 if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) 535 continue; 536 537 // We do not need to (and in fact, cannot) convert possibly throwing calls 538 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into 539 // invokes. The caller's "segment" of the deoptimization continuation 540 // attached to the newly inlined @llvm.experimental_deoptimize 541 // (resp. @llvm.experimental.guard) call should contain the exception 542 // handling logic, if any. 543 if (auto *F = CI->getCalledFunction()) 544 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || 545 F->getIntrinsicID() == Intrinsic::experimental_guard) 546 continue; 547 548 if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { 549 // This call is nested inside a funclet. If that funclet has an unwind 550 // destination within the inlinee, then unwinding out of this call would 551 // be UB. Rewriting this call to an invoke which targets the inlined 552 // invoke's unwind dest would give the call's parent funclet multiple 553 // unwind destinations, which is something that subsequent EH table 554 // generation can't handle and that the veirifer rejects. So when we 555 // see such a call, leave it as a call. 556 auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); 557 Value *UnwindDestToken = 558 getUnwindDestToken(FuncletPad, *FuncletUnwindMap); 559 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 560 continue; 561 #ifndef NDEBUG 562 Instruction *MemoKey; 563 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) 564 MemoKey = CatchPad->getCatchSwitch(); 565 else 566 MemoKey = FuncletPad; 567 assert(FuncletUnwindMap->count(MemoKey) && 568 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && 569 "must get memoized to avoid confusing later searches"); 570 #endif // NDEBUG 571 } 572 573 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); 574 return BB; 575 } 576 return nullptr; 577 } 578 579 /// If we inlined an invoke site, we need to convert calls 580 /// in the body of the inlined function into invokes. 581 /// 582 /// II is the invoke instruction being inlined. FirstNewBlock is the first 583 /// block of the inlined code (the last block is the end of the function), 584 /// and InlineCodeInfo is information about the code that got inlined. 585 static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, 586 ClonedCodeInfo &InlinedCodeInfo) { 587 BasicBlock *InvokeDest = II->getUnwindDest(); 588 589 Function *Caller = FirstNewBlock->getParent(); 590 591 // The inlined code is currently at the end of the function, scan from the 592 // start of the inlined code to its end, checking for stuff we need to 593 // rewrite. 594 LandingPadInliningInfo Invoke(II); 595 596 // Get all of the inlined landing pad instructions. 597 SmallPtrSet<LandingPadInst*, 16> InlinedLPads; 598 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); 599 I != E; ++I) 600 if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) 601 InlinedLPads.insert(II->getLandingPadInst()); 602 603 // Append the clauses from the outer landing pad instruction into the inlined 604 // landing pad instructions. 605 LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); 606 for (LandingPadInst *InlinedLPad : InlinedLPads) { 607 unsigned OuterNum = OuterLPad->getNumClauses(); 608 InlinedLPad->reserveClauses(OuterNum); 609 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) 610 InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); 611 if (OuterLPad->isCleanup()) 612 InlinedLPad->setCleanup(true); 613 } 614 615 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 616 BB != E; ++BB) { 617 if (InlinedCodeInfo.ContainsCalls) 618 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 619 &*BB, Invoke.getOuterResumeDest())) 620 // Update any PHI nodes in the exceptional block to indicate that there 621 // is now a new entry in them. 622 Invoke.addIncomingPHIValuesFor(NewBB); 623 624 // Forward any resumes that are remaining here. 625 if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) 626 Invoke.forwardResume(RI, InlinedLPads); 627 } 628 629 // Now that everything is happy, we have one final detail. The PHI nodes in 630 // the exception destination block still have entries due to the original 631 // invoke instruction. Eliminate these entries (which might even delete the 632 // PHI node) now. 633 InvokeDest->removePredecessor(II->getParent()); 634 } 635 636 /// If we inlined an invoke site, we need to convert calls 637 /// in the body of the inlined function into invokes. 638 /// 639 /// II is the invoke instruction being inlined. FirstNewBlock is the first 640 /// block of the inlined code (the last block is the end of the function), 641 /// and InlineCodeInfo is information about the code that got inlined. 642 static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, 643 ClonedCodeInfo &InlinedCodeInfo) { 644 BasicBlock *UnwindDest = II->getUnwindDest(); 645 Function *Caller = FirstNewBlock->getParent(); 646 647 assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); 648 649 // If there are PHI nodes in the unwind destination block, we need to keep 650 // track of which values came into them from the invoke before removing the 651 // edge from this block. 652 SmallVector<Value *, 8> UnwindDestPHIValues; 653 BasicBlock *InvokeBB = II->getParent(); 654 for (Instruction &I : *UnwindDest) { 655 // Save the value to use for this edge. 656 PHINode *PHI = dyn_cast<PHINode>(&I); 657 if (!PHI) 658 break; 659 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 660 } 661 662 // Add incoming-PHI values to the unwind destination block for the given basic 663 // block, using the values for the original invoke's source block. 664 auto UpdatePHINodes = [&](BasicBlock *Src) { 665 BasicBlock::iterator I = UnwindDest->begin(); 666 for (Value *V : UnwindDestPHIValues) { 667 PHINode *PHI = cast<PHINode>(I); 668 PHI->addIncoming(V, Src); 669 ++I; 670 } 671 }; 672 673 // This connects all the instructions which 'unwind to caller' to the invoke 674 // destination. 675 UnwindDestMemoTy FuncletUnwindMap; 676 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 677 BB != E; ++BB) { 678 if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { 679 if (CRI->unwindsToCaller()) { 680 auto *CleanupPad = CRI->getCleanupPad(); 681 CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); 682 CRI->eraseFromParent(); 683 UpdatePHINodes(&*BB); 684 // Finding a cleanupret with an unwind destination would confuse 685 // subsequent calls to getUnwindDestToken, so map the cleanuppad 686 // to short-circuit any such calls and recognize this as an "unwind 687 // to caller" cleanup. 688 assert(!FuncletUnwindMap.count(CleanupPad) || 689 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); 690 FuncletUnwindMap[CleanupPad] = 691 ConstantTokenNone::get(Caller->getContext()); 692 } 693 } 694 695 Instruction *I = BB->getFirstNonPHI(); 696 if (!I->isEHPad()) 697 continue; 698 699 Instruction *Replacement = nullptr; 700 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 701 if (CatchSwitch->unwindsToCaller()) { 702 Value *UnwindDestToken; 703 if (auto *ParentPad = 704 dyn_cast<Instruction>(CatchSwitch->getParentPad())) { 705 // This catchswitch is nested inside another funclet. If that 706 // funclet has an unwind destination within the inlinee, then 707 // unwinding out of this catchswitch would be UB. Rewriting this 708 // catchswitch to unwind to the inlined invoke's unwind dest would 709 // give the parent funclet multiple unwind destinations, which is 710 // something that subsequent EH table generation can't handle and 711 // that the veirifer rejects. So when we see such a call, leave it 712 // as "unwind to caller". 713 UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); 714 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 715 continue; 716 } else { 717 // This catchswitch has no parent to inherit constraints from, and 718 // none of its descendants can have an unwind edge that exits it and 719 // targets another funclet in the inlinee. It may or may not have a 720 // descendant that definitively has an unwind to caller. In either 721 // case, we'll have to assume that any unwinds out of it may need to 722 // be routed to the caller, so treat it as though it has a definitive 723 // unwind to caller. 724 UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); 725 } 726 auto *NewCatchSwitch = CatchSwitchInst::Create( 727 CatchSwitch->getParentPad(), UnwindDest, 728 CatchSwitch->getNumHandlers(), CatchSwitch->getName(), 729 CatchSwitch); 730 for (BasicBlock *PadBB : CatchSwitch->handlers()) 731 NewCatchSwitch->addHandler(PadBB); 732 // Propagate info for the old catchswitch over to the new one in 733 // the unwind map. This also serves to short-circuit any subsequent 734 // checks for the unwind dest of this catchswitch, which would get 735 // confused if they found the outer handler in the callee. 736 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; 737 Replacement = NewCatchSwitch; 738 } 739 } else if (!isa<FuncletPadInst>(I)) { 740 llvm_unreachable("unexpected EHPad!"); 741 } 742 743 if (Replacement) { 744 Replacement->takeName(I); 745 I->replaceAllUsesWith(Replacement); 746 I->eraseFromParent(); 747 UpdatePHINodes(&*BB); 748 } 749 } 750 751 if (InlinedCodeInfo.ContainsCalls) 752 for (Function::iterator BB = FirstNewBlock->getIterator(), 753 E = Caller->end(); 754 BB != E; ++BB) 755 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 756 &*BB, UnwindDest, &FuncletUnwindMap)) 757 // Update any PHI nodes in the exceptional block to indicate that there 758 // is now a new entry in them. 759 UpdatePHINodes(NewBB); 760 761 // Now that everything is happy, we have one final detail. The PHI nodes in 762 // the exception destination block still have entries due to the original 763 // invoke instruction. Eliminate these entries (which might even delete the 764 // PHI node) now. 765 UnwindDest->removePredecessor(InvokeBB); 766 } 767 768 /// When inlining a call site that has !llvm.mem.parallel_loop_access or 769 /// llvm.access.group metadata, that metadata should be propagated to all 770 /// memory-accessing cloned instructions. 771 static void PropagateParallelLoopAccessMetadata(CallSite CS, 772 ValueToValueMapTy &VMap) { 773 MDNode *M = 774 CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); 775 MDNode *CallAccessGroup = 776 CS.getInstruction()->getMetadata(LLVMContext::MD_access_group); 777 if (!M && !CallAccessGroup) 778 return; 779 780 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 781 VMI != VMIE; ++VMI) { 782 if (!VMI->second) 783 continue; 784 785 Instruction *NI = dyn_cast<Instruction>(VMI->second); 786 if (!NI) 787 continue; 788 789 if (M) { 790 if (MDNode *PM = 791 NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { 792 M = MDNode::concatenate(PM, M); 793 NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); 794 } else if (NI->mayReadOrWriteMemory()) { 795 NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); 796 } 797 } 798 799 if (NI->mayReadOrWriteMemory()) { 800 MDNode *UnitedAccGroups = uniteAccessGroups( 801 NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup); 802 NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups); 803 } 804 } 805 } 806 807 /// When inlining a function that contains noalias scope metadata, 808 /// this metadata needs to be cloned so that the inlined blocks 809 /// have different "unique scopes" at every call site. Were this not done, then 810 /// aliasing scopes from a function inlined into a caller multiple times could 811 /// not be differentiated (and this would lead to miscompiles because the 812 /// non-aliasing property communicated by the metadata could have 813 /// call-site-specific control dependencies). 814 static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { 815 const Function *CalledFunc = CS.getCalledFunction(); 816 SetVector<const MDNode *> MD; 817 818 // Note: We could only clone the metadata if it is already used in the 819 // caller. I'm omitting that check here because it might confuse 820 // inter-procedural alias analysis passes. We can revisit this if it becomes 821 // an efficiency or overhead problem. 822 823 for (const BasicBlock &I : *CalledFunc) 824 for (const Instruction &J : I) { 825 if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) 826 MD.insert(M); 827 if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) 828 MD.insert(M); 829 } 830 831 if (MD.empty()) 832 return; 833 834 // Walk the existing metadata, adding the complete (perhaps cyclic) chain to 835 // the set. 836 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); 837 while (!Queue.empty()) { 838 const MDNode *M = cast<MDNode>(Queue.pop_back_val()); 839 for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) 840 if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i))) 841 if (MD.insert(M1)) 842 Queue.push_back(M1); 843 } 844 845 // Now we have a complete set of all metadata in the chains used to specify 846 // the noalias scopes and the lists of those scopes. 847 SmallVector<TempMDTuple, 16> DummyNodes; 848 DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; 849 for (const MDNode *I : MD) { 850 DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); 851 MDMap[I].reset(DummyNodes.back().get()); 852 } 853 854 // Create new metadata nodes to replace the dummy nodes, replacing old 855 // metadata references with either a dummy node or an already-created new 856 // node. 857 for (const MDNode *I : MD) { 858 SmallVector<Metadata *, 4> NewOps; 859 for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { 860 const Metadata *V = I->getOperand(i); 861 if (const MDNode *M = dyn_cast<MDNode>(V)) 862 NewOps.push_back(MDMap[M]); 863 else 864 NewOps.push_back(const_cast<Metadata *>(V)); 865 } 866 867 MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); 868 MDTuple *TempM = cast<MDTuple>(MDMap[I]); 869 assert(TempM->isTemporary() && "Expected temporary node"); 870 871 TempM->replaceAllUsesWith(NewM); 872 } 873 874 // Now replace the metadata in the new inlined instructions with the 875 // repacements from the map. 876 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 877 VMI != VMIE; ++VMI) { 878 if (!VMI->second) 879 continue; 880 881 Instruction *NI = dyn_cast<Instruction>(VMI->second); 882 if (!NI) 883 continue; 884 885 if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { 886 MDNode *NewMD = MDMap[M]; 887 // If the call site also had alias scope metadata (a list of scopes to 888 // which instructions inside it might belong), propagate those scopes to 889 // the inlined instructions. 890 if (MDNode *CSM = 891 CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) 892 NewMD = MDNode::concatenate(NewMD, CSM); 893 NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); 894 } else if (NI->mayReadOrWriteMemory()) { 895 if (MDNode *M = 896 CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) 897 NI->setMetadata(LLVMContext::MD_alias_scope, M); 898 } 899 900 if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { 901 MDNode *NewMD = MDMap[M]; 902 // If the call site also had noalias metadata (a list of scopes with 903 // which instructions inside it don't alias), propagate those scopes to 904 // the inlined instructions. 905 if (MDNode *CSM = 906 CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) 907 NewMD = MDNode::concatenate(NewMD, CSM); 908 NI->setMetadata(LLVMContext::MD_noalias, NewMD); 909 } else if (NI->mayReadOrWriteMemory()) { 910 if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) 911 NI->setMetadata(LLVMContext::MD_noalias, M); 912 } 913 } 914 } 915 916 /// If the inlined function has noalias arguments, 917 /// then add new alias scopes for each noalias argument, tag the mapped noalias 918 /// parameters with noalias metadata specifying the new scope, and tag all 919 /// non-derived loads, stores and memory intrinsics with the new alias scopes. 920 static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, 921 const DataLayout &DL, AAResults *CalleeAAR) { 922 if (!EnableNoAliasConversion) 923 return; 924 925 const Function *CalledFunc = CS.getCalledFunction(); 926 SmallVector<const Argument *, 4> NoAliasArgs; 927 928 for (const Argument &Arg : CalledFunc->args()) 929 if (CS.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) 930 NoAliasArgs.push_back(&Arg); 931 932 if (NoAliasArgs.empty()) 933 return; 934 935 // To do a good job, if a noalias variable is captured, we need to know if 936 // the capture point dominates the particular use we're considering. 937 DominatorTree DT; 938 DT.recalculate(const_cast<Function&>(*CalledFunc)); 939 940 // noalias indicates that pointer values based on the argument do not alias 941 // pointer values which are not based on it. So we add a new "scope" for each 942 // noalias function argument. Accesses using pointers based on that argument 943 // become part of that alias scope, accesses using pointers not based on that 944 // argument are tagged as noalias with that scope. 945 946 DenseMap<const Argument *, MDNode *> NewScopes; 947 MDBuilder MDB(CalledFunc->getContext()); 948 949 // Create a new scope domain for this function. 950 MDNode *NewDomain = 951 MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); 952 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { 953 const Argument *A = NoAliasArgs[i]; 954 955 std::string Name = std::string(CalledFunc->getName()); 956 if (A->hasName()) { 957 Name += ": %"; 958 Name += A->getName(); 959 } else { 960 Name += ": argument "; 961 Name += utostr(i); 962 } 963 964 // Note: We always create a new anonymous root here. This is true regardless 965 // of the linkage of the callee because the aliasing "scope" is not just a 966 // property of the callee, but also all control dependencies in the caller. 967 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 968 NewScopes.insert(std::make_pair(A, NewScope)); 969 } 970 971 // Iterate over all new instructions in the map; for all memory-access 972 // instructions, add the alias scope metadata. 973 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 974 VMI != VMIE; ++VMI) { 975 if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { 976 if (!VMI->second) 977 continue; 978 979 Instruction *NI = dyn_cast<Instruction>(VMI->second); 980 if (!NI) 981 continue; 982 983 bool IsArgMemOnlyCall = false, IsFuncCall = false; 984 SmallVector<const Value *, 2> PtrArgs; 985 986 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 987 PtrArgs.push_back(LI->getPointerOperand()); 988 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 989 PtrArgs.push_back(SI->getPointerOperand()); 990 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) 991 PtrArgs.push_back(VAAI->getPointerOperand()); 992 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) 993 PtrArgs.push_back(CXI->getPointerOperand()); 994 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) 995 PtrArgs.push_back(RMWI->getPointerOperand()); 996 else if (const auto *Call = dyn_cast<CallBase>(I)) { 997 // If we know that the call does not access memory, then we'll still 998 // know that about the inlined clone of this call site, and we don't 999 // need to add metadata. 1000 if (Call->doesNotAccessMemory()) 1001 continue; 1002 1003 IsFuncCall = true; 1004 if (CalleeAAR) { 1005 FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call); 1006 if (AAResults::onlyAccessesArgPointees(MRB)) 1007 IsArgMemOnlyCall = true; 1008 } 1009 1010 for (Value *Arg : Call->args()) { 1011 // We need to check the underlying objects of all arguments, not just 1012 // the pointer arguments, because we might be passing pointers as 1013 // integers, etc. 1014 // However, if we know that the call only accesses pointer arguments, 1015 // then we only need to check the pointer arguments. 1016 if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) 1017 continue; 1018 1019 PtrArgs.push_back(Arg); 1020 } 1021 } 1022 1023 // If we found no pointers, then this instruction is not suitable for 1024 // pairing with an instruction to receive aliasing metadata. 1025 // However, if this is a call, this we might just alias with none of the 1026 // noalias arguments. 1027 if (PtrArgs.empty() && !IsFuncCall) 1028 continue; 1029 1030 // It is possible that there is only one underlying object, but you 1031 // need to go through several PHIs to see it, and thus could be 1032 // repeated in the Objects list. 1033 SmallPtrSet<const Value *, 4> ObjSet; 1034 SmallVector<Metadata *, 4> Scopes, NoAliases; 1035 1036 SmallSetVector<const Argument *, 4> NAPtrArgs; 1037 for (const Value *V : PtrArgs) { 1038 SmallVector<const Value *, 4> Objects; 1039 GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr); 1040 1041 for (const Value *O : Objects) 1042 ObjSet.insert(O); 1043 } 1044 1045 // Figure out if we're derived from anything that is not a noalias 1046 // argument. 1047 bool CanDeriveViaCapture = false, UsesAliasingPtr = false; 1048 for (const Value *V : ObjSet) { 1049 // Is this value a constant that cannot be derived from any pointer 1050 // value (we need to exclude constant expressions, for example, that 1051 // are formed from arithmetic on global symbols). 1052 bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || 1053 isa<ConstantPointerNull>(V) || 1054 isa<ConstantDataVector>(V) || isa<UndefValue>(V); 1055 if (IsNonPtrConst) 1056 continue; 1057 1058 // If this is anything other than a noalias argument, then we cannot 1059 // completely describe the aliasing properties using alias.scope 1060 // metadata (and, thus, won't add any). 1061 if (const Argument *A = dyn_cast<Argument>(V)) { 1062 if (!CS.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) 1063 UsesAliasingPtr = true; 1064 } else { 1065 UsesAliasingPtr = true; 1066 } 1067 1068 // If this is not some identified function-local object (which cannot 1069 // directly alias a noalias argument), or some other argument (which, 1070 // by definition, also cannot alias a noalias argument), then we could 1071 // alias a noalias argument that has been captured). 1072 if (!isa<Argument>(V) && 1073 !isIdentifiedFunctionLocal(const_cast<Value*>(V))) 1074 CanDeriveViaCapture = true; 1075 } 1076 1077 // A function call can always get captured noalias pointers (via other 1078 // parameters, globals, etc.). 1079 if (IsFuncCall && !IsArgMemOnlyCall) 1080 CanDeriveViaCapture = true; 1081 1082 // First, we want to figure out all of the sets with which we definitely 1083 // don't alias. Iterate over all noalias set, and add those for which: 1084 // 1. The noalias argument is not in the set of objects from which we 1085 // definitely derive. 1086 // 2. The noalias argument has not yet been captured. 1087 // An arbitrary function that might load pointers could see captured 1088 // noalias arguments via other noalias arguments or globals, and so we 1089 // must always check for prior capture. 1090 for (const Argument *A : NoAliasArgs) { 1091 if (!ObjSet.count(A) && (!CanDeriveViaCapture || 1092 // It might be tempting to skip the 1093 // PointerMayBeCapturedBefore check if 1094 // A->hasNoCaptureAttr() is true, but this is 1095 // incorrect because nocapture only guarantees 1096 // that no copies outlive the function, not 1097 // that the value cannot be locally captured. 1098 !PointerMayBeCapturedBefore(A, 1099 /* ReturnCaptures */ false, 1100 /* StoreCaptures */ false, I, &DT))) 1101 NoAliases.push_back(NewScopes[A]); 1102 } 1103 1104 if (!NoAliases.empty()) 1105 NI->setMetadata(LLVMContext::MD_noalias, 1106 MDNode::concatenate( 1107 NI->getMetadata(LLVMContext::MD_noalias), 1108 MDNode::get(CalledFunc->getContext(), NoAliases))); 1109 1110 // Next, we want to figure out all of the sets to which we might belong. 1111 // We might belong to a set if the noalias argument is in the set of 1112 // underlying objects. If there is some non-noalias argument in our list 1113 // of underlying objects, then we cannot add a scope because the fact 1114 // that some access does not alias with any set of our noalias arguments 1115 // cannot itself guarantee that it does not alias with this access 1116 // (because there is some pointer of unknown origin involved and the 1117 // other access might also depend on this pointer). We also cannot add 1118 // scopes to arbitrary functions unless we know they don't access any 1119 // non-parameter pointer-values. 1120 bool CanAddScopes = !UsesAliasingPtr; 1121 if (CanAddScopes && IsFuncCall) 1122 CanAddScopes = IsArgMemOnlyCall; 1123 1124 if (CanAddScopes) 1125 for (const Argument *A : NoAliasArgs) { 1126 if (ObjSet.count(A)) 1127 Scopes.push_back(NewScopes[A]); 1128 } 1129 1130 if (!Scopes.empty()) 1131 NI->setMetadata( 1132 LLVMContext::MD_alias_scope, 1133 MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), 1134 MDNode::get(CalledFunc->getContext(), Scopes))); 1135 } 1136 } 1137 } 1138 1139 /// If the inlined function has non-byval align arguments, then 1140 /// add @llvm.assume-based alignment assumptions to preserve this information. 1141 static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { 1142 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) 1143 return; 1144 1145 AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller()); 1146 auto &DL = CS.getCaller()->getParent()->getDataLayout(); 1147 1148 // To avoid inserting redundant assumptions, we should check for assumptions 1149 // already in the caller. To do this, we might need a DT of the caller. 1150 DominatorTree DT; 1151 bool DTCalculated = false; 1152 1153 Function *CalledFunc = CS.getCalledFunction(); 1154 for (Argument &Arg : CalledFunc->args()) { 1155 unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; 1156 if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { 1157 if (!DTCalculated) { 1158 DT.recalculate(*CS.getCaller()); 1159 DTCalculated = true; 1160 } 1161 1162 // If we can already prove the asserted alignment in the context of the 1163 // caller, then don't bother inserting the assumption. 1164 Value *ArgVal = CS.getArgument(Arg.getArgNo()); 1165 if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align) 1166 continue; 1167 1168 CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) 1169 .CreateAlignmentAssumption(DL, ArgVal, Align); 1170 AC->registerAssumption(NewAsmp); 1171 } 1172 } 1173 } 1174 1175 /// Once we have cloned code over from a callee into the caller, 1176 /// update the specified callgraph to reflect the changes we made. 1177 /// Note that it's possible that not all code was copied over, so only 1178 /// some edges of the callgraph may remain. 1179 static void UpdateCallGraphAfterInlining(CallSite CS, 1180 Function::iterator FirstNewBlock, 1181 ValueToValueMapTy &VMap, 1182 InlineFunctionInfo &IFI) { 1183 CallGraph &CG = *IFI.CG; 1184 const Function *Caller = CS.getCaller(); 1185 const Function *Callee = CS.getCalledFunction(); 1186 CallGraphNode *CalleeNode = CG[Callee]; 1187 CallGraphNode *CallerNode = CG[Caller]; 1188 1189 // Since we inlined some uninlined call sites in the callee into the caller, 1190 // add edges from the caller to all of the callees of the callee. 1191 CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); 1192 1193 // Consider the case where CalleeNode == CallerNode. 1194 CallGraphNode::CalledFunctionsVector CallCache; 1195 if (CalleeNode == CallerNode) { 1196 CallCache.assign(I, E); 1197 I = CallCache.begin(); 1198 E = CallCache.end(); 1199 } 1200 1201 for (; I != E; ++I) { 1202 const Value *OrigCall = I->first; 1203 1204 ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); 1205 // Only copy the edge if the call was inlined! 1206 if (VMI == VMap.end() || VMI->second == nullptr) 1207 continue; 1208 1209 // If the call was inlined, but then constant folded, there is no edge to 1210 // add. Check for this case. 1211 auto *NewCall = dyn_cast<CallBase>(VMI->second); 1212 if (!NewCall) 1213 continue; 1214 1215 // We do not treat intrinsic calls like real function calls because we 1216 // expect them to become inline code; do not add an edge for an intrinsic. 1217 if (NewCall->getCalledFunction() && 1218 NewCall->getCalledFunction()->isIntrinsic()) 1219 continue; 1220 1221 // Remember that this call site got inlined for the client of 1222 // InlineFunction. 1223 IFI.InlinedCalls.push_back(NewCall); 1224 1225 // It's possible that inlining the callsite will cause it to go from an 1226 // indirect to a direct call by resolving a function pointer. If this 1227 // happens, set the callee of the new call site to a more precise 1228 // destination. This can also happen if the call graph node of the caller 1229 // was just unnecessarily imprecise. 1230 if (!I->second->getFunction()) 1231 if (Function *F = NewCall->getCalledFunction()) { 1232 // Indirect call site resolved to direct call. 1233 CallerNode->addCalledFunction(NewCall, CG[F]); 1234 1235 continue; 1236 } 1237 1238 CallerNode->addCalledFunction(NewCall, I->second); 1239 } 1240 1241 // Update the call graph by deleting the edge from Callee to Caller. We must 1242 // do this after the loop above in case Caller and Callee are the same. 1243 CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction())); 1244 } 1245 1246 static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, 1247 BasicBlock *InsertBlock, 1248 InlineFunctionInfo &IFI) { 1249 Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); 1250 IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); 1251 1252 Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); 1253 1254 // Always generate a memcpy of alignment 1 here because we don't know 1255 // the alignment of the src pointer. Other optimizations can infer 1256 // better alignment. 1257 Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src, 1258 /*SrcAlign*/ Align(1), Size); 1259 } 1260 1261 /// When inlining a call site that has a byval argument, 1262 /// we have to make the implicit memcpy explicit by adding it. 1263 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, 1264 const Function *CalledFunc, 1265 InlineFunctionInfo &IFI, 1266 unsigned ByValAlignment) { 1267 PointerType *ArgTy = cast<PointerType>(Arg->getType()); 1268 Type *AggTy = ArgTy->getElementType(); 1269 1270 Function *Caller = TheCall->getFunction(); 1271 const DataLayout &DL = Caller->getParent()->getDataLayout(); 1272 1273 // If the called function is readonly, then it could not mutate the caller's 1274 // copy of the byval'd memory. In this case, it is safe to elide the copy and 1275 // temporary. 1276 if (CalledFunc->onlyReadsMemory()) { 1277 // If the byval argument has a specified alignment that is greater than the 1278 // passed in pointer, then we either have to round up the input pointer or 1279 // give up on this transformation. 1280 if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. 1281 return Arg; 1282 1283 AssumptionCache *AC = 1284 IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; 1285 1286 // If the pointer is already known to be sufficiently aligned, or if we can 1287 // round it up to a larger alignment, then we don't need a temporary. 1288 if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= 1289 ByValAlignment) 1290 return Arg; 1291 1292 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad 1293 // for code quality, but rarely happens and is required for correctness. 1294 } 1295 1296 // Create the alloca. If we have DataLayout, use nice alignment. 1297 Align Alignment(DL.getPrefTypeAlignment(AggTy)); 1298 1299 // If the byval had an alignment specified, we *must* use at least that 1300 // alignment, as it is required by the byval argument (and uses of the 1301 // pointer inside the callee). 1302 Alignment = max(Alignment, MaybeAlign(ByValAlignment)); 1303 1304 Value *NewAlloca = 1305 new AllocaInst(AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment, 1306 Arg->getName(), &*Caller->begin()->begin()); 1307 IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); 1308 1309 // Uses of the argument in the function should use our new alloca 1310 // instead. 1311 return NewAlloca; 1312 } 1313 1314 // Check whether this Value is used by a lifetime intrinsic. 1315 static bool isUsedByLifetimeMarker(Value *V) { 1316 for (User *U : V->users()) 1317 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) 1318 if (II->isLifetimeStartOrEnd()) 1319 return true; 1320 return false; 1321 } 1322 1323 // Check whether the given alloca already has 1324 // lifetime.start or lifetime.end intrinsics. 1325 static bool hasLifetimeMarkers(AllocaInst *AI) { 1326 Type *Ty = AI->getType(); 1327 Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), 1328 Ty->getPointerAddressSpace()); 1329 if (Ty == Int8PtrTy) 1330 return isUsedByLifetimeMarker(AI); 1331 1332 // Do a scan to find all the casts to i8*. 1333 for (User *U : AI->users()) { 1334 if (U->getType() != Int8PtrTy) continue; 1335 if (U->stripPointerCasts() != AI) continue; 1336 if (isUsedByLifetimeMarker(U)) 1337 return true; 1338 } 1339 return false; 1340 } 1341 1342 /// Return the result of AI->isStaticAlloca() if AI were moved to the entry 1343 /// block. Allocas used in inalloca calls and allocas of dynamic array size 1344 /// cannot be static. 1345 static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { 1346 return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); 1347 } 1348 1349 /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL 1350 /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. 1351 static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, 1352 LLVMContext &Ctx, 1353 DenseMap<const MDNode *, MDNode *> &IANodes) { 1354 auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); 1355 return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(), 1356 IA); 1357 } 1358 1359 /// Update inlined instructions' line numbers to 1360 /// to encode location where these instructions are inlined. 1361 static void fixupLineNumbers(Function *Fn, Function::iterator FI, 1362 Instruction *TheCall, bool CalleeHasDebugInfo) { 1363 const DebugLoc &TheCallDL = TheCall->getDebugLoc(); 1364 if (!TheCallDL) 1365 return; 1366 1367 auto &Ctx = Fn->getContext(); 1368 DILocation *InlinedAtNode = TheCallDL; 1369 1370 // Create a unique call site, not to be confused with any other call from the 1371 // same location. 1372 InlinedAtNode = DILocation::getDistinct( 1373 Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), 1374 InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); 1375 1376 // Cache the inlined-at nodes as they're built so they are reused, without 1377 // this every instruction's inlined-at chain would become distinct from each 1378 // other. 1379 DenseMap<const MDNode *, MDNode *> IANodes; 1380 1381 // Check if we are not generating inline line tables and want to use 1382 // the call site location instead. 1383 bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables"); 1384 1385 for (; FI != Fn->end(); ++FI) { 1386 for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 1387 BI != BE; ++BI) { 1388 // Loop metadata needs to be updated so that the start and end locs 1389 // reference inlined-at locations. 1390 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes]( 1391 const DILocation &Loc) -> DILocation * { 1392 return inlineDebugLoc(&Loc, InlinedAtNode, Ctx, IANodes).get(); 1393 }; 1394 updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc); 1395 1396 if (!NoInlineLineTables) 1397 if (DebugLoc DL = BI->getDebugLoc()) { 1398 DebugLoc IDL = 1399 inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); 1400 BI->setDebugLoc(IDL); 1401 continue; 1402 } 1403 1404 if (CalleeHasDebugInfo && !NoInlineLineTables) 1405 continue; 1406 1407 // If the inlined instruction has no line number, or if inline info 1408 // is not being generated, make it look as if it originates from the call 1409 // location. This is important for ((__always_inline, __nodebug__)) 1410 // functions which must use caller location for all instructions in their 1411 // function body. 1412 1413 // Don't update static allocas, as they may get moved later. 1414 if (auto *AI = dyn_cast<AllocaInst>(BI)) 1415 if (allocaWouldBeStaticInEntry(AI)) 1416 continue; 1417 1418 BI->setDebugLoc(TheCallDL); 1419 } 1420 1421 // Remove debug info intrinsics if we're not keeping inline info. 1422 if (NoInlineLineTables) { 1423 BasicBlock::iterator BI = FI->begin(); 1424 while (BI != FI->end()) { 1425 if (isa<DbgInfoIntrinsic>(BI)) { 1426 BI = BI->eraseFromParent(); 1427 continue; 1428 } 1429 ++BI; 1430 } 1431 } 1432 1433 } 1434 } 1435 1436 /// Update the block frequencies of the caller after a callee has been inlined. 1437 /// 1438 /// Each block cloned into the caller has its block frequency scaled by the 1439 /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of 1440 /// callee's entry block gets the same frequency as the callsite block and the 1441 /// relative frequencies of all cloned blocks remain the same after cloning. 1442 static void updateCallerBFI(BasicBlock *CallSiteBlock, 1443 const ValueToValueMapTy &VMap, 1444 BlockFrequencyInfo *CallerBFI, 1445 BlockFrequencyInfo *CalleeBFI, 1446 const BasicBlock &CalleeEntryBlock) { 1447 SmallPtrSet<BasicBlock *, 16> ClonedBBs; 1448 for (auto Entry : VMap) { 1449 if (!isa<BasicBlock>(Entry.first) || !Entry.second) 1450 continue; 1451 auto *OrigBB = cast<BasicBlock>(Entry.first); 1452 auto *ClonedBB = cast<BasicBlock>(Entry.second); 1453 uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); 1454 if (!ClonedBBs.insert(ClonedBB).second) { 1455 // Multiple blocks in the callee might get mapped to one cloned block in 1456 // the caller since we prune the callee as we clone it. When that happens, 1457 // we want to use the maximum among the original blocks' frequencies. 1458 uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); 1459 if (NewFreq > Freq) 1460 Freq = NewFreq; 1461 } 1462 CallerBFI->setBlockFreq(ClonedBB, Freq); 1463 } 1464 BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); 1465 CallerBFI->setBlockFreqAndScale( 1466 EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), 1467 ClonedBBs); 1468 } 1469 1470 /// Update the branch metadata for cloned call instructions. 1471 static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, 1472 const ProfileCount &CalleeEntryCount, 1473 const Instruction *TheCall, 1474 ProfileSummaryInfo *PSI, 1475 BlockFrequencyInfo *CallerBFI) { 1476 if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || 1477 CalleeEntryCount.getCount() < 1) 1478 return; 1479 auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; 1480 int64_t CallCount = 1481 std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, 1482 CalleeEntryCount.getCount()); 1483 updateProfileCallee(Callee, -CallCount, &VMap); 1484 } 1485 1486 void llvm::updateProfileCallee( 1487 Function *Callee, int64_t entryDelta, 1488 const ValueMap<const Value *, WeakTrackingVH> *VMap) { 1489 auto CalleeCount = Callee->getEntryCount(); 1490 if (!CalleeCount.hasValue()) 1491 return; 1492 1493 uint64_t priorEntryCount = CalleeCount.getCount(); 1494 uint64_t newEntryCount; 1495 1496 // Since CallSiteCount is an estimate, it could exceed the original callee 1497 // count and has to be set to 0 so guard against underflow. 1498 if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount) 1499 newEntryCount = 0; 1500 else 1501 newEntryCount = priorEntryCount + entryDelta; 1502 1503 // During inlining ? 1504 if (VMap) { 1505 uint64_t cloneEntryCount = priorEntryCount - newEntryCount; 1506 for (auto Entry : *VMap) 1507 if (isa<CallInst>(Entry.first)) 1508 if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) 1509 CI->updateProfWeight(cloneEntryCount, priorEntryCount); 1510 } 1511 1512 if (entryDelta) { 1513 Callee->setEntryCount(newEntryCount); 1514 1515 for (BasicBlock &BB : *Callee) 1516 // No need to update the callsite if it is pruned during inlining. 1517 if (!VMap || VMap->count(&BB)) 1518 for (Instruction &I : BB) 1519 if (CallInst *CI = dyn_cast<CallInst>(&I)) 1520 CI->updateProfWeight(newEntryCount, priorEntryCount); 1521 } 1522 } 1523 1524 /// This function inlines the called function into the basic block of the 1525 /// caller. This returns false if it is not possible to inline this call. 1526 /// The program is still in a well defined state if this occurs though. 1527 /// 1528 /// Note that this only does one level of inlining. For example, if the 1529 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now 1530 /// exists in the instruction stream. Similarly this will inline a recursive 1531 /// function by one level. 1532 llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, 1533 AAResults *CalleeAAR, 1534 bool InsertLifetime, 1535 Function *ForwardVarArgsTo) { 1536 Instruction *TheCall = CS.getInstruction(); 1537 assert(TheCall->getParent() && TheCall->getFunction() 1538 && "Instruction not in function!"); 1539 1540 // FIXME: we don't inline callbr yet. 1541 if (isa<CallBrInst>(TheCall)) 1542 return InlineResult::failure("We don't inline callbr yet."); 1543 1544 // If IFI has any state in it, zap it before we fill it in. 1545 IFI.reset(); 1546 1547 Function *CalledFunc = CS.getCalledFunction(); 1548 if (!CalledFunc || // Can't inline external function or indirect 1549 CalledFunc->isDeclaration()) // call! 1550 return InlineResult::failure("external or indirect"); 1551 1552 // The inliner does not know how to inline through calls with operand bundles 1553 // in general ... 1554 if (CS.hasOperandBundles()) { 1555 for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { 1556 uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); 1557 // ... but it knows how to inline through "deopt" operand bundles ... 1558 if (Tag == LLVMContext::OB_deopt) 1559 continue; 1560 // ... and "funclet" operand bundles. 1561 if (Tag == LLVMContext::OB_funclet) 1562 continue; 1563 1564 return InlineResult::failure("unsupported operand bundle"); 1565 } 1566 } 1567 1568 // If the call to the callee cannot throw, set the 'nounwind' flag on any 1569 // calls that we inline. 1570 bool MarkNoUnwind = CS.doesNotThrow(); 1571 1572 BasicBlock *OrigBB = TheCall->getParent(); 1573 Function *Caller = OrigBB->getParent(); 1574 1575 // GC poses two hazards to inlining, which only occur when the callee has GC: 1576 // 1. If the caller has no GC, then the callee's GC must be propagated to the 1577 // caller. 1578 // 2. If the caller has a differing GC, it is invalid to inline. 1579 if (CalledFunc->hasGC()) { 1580 if (!Caller->hasGC()) 1581 Caller->setGC(CalledFunc->getGC()); 1582 else if (CalledFunc->getGC() != Caller->getGC()) 1583 return InlineResult::failure("incompatible GC"); 1584 } 1585 1586 // Get the personality function from the callee if it contains a landing pad. 1587 Constant *CalledPersonality = 1588 CalledFunc->hasPersonalityFn() 1589 ? CalledFunc->getPersonalityFn()->stripPointerCasts() 1590 : nullptr; 1591 1592 // Find the personality function used by the landing pads of the caller. If it 1593 // exists, then check to see that it matches the personality function used in 1594 // the callee. 1595 Constant *CallerPersonality = 1596 Caller->hasPersonalityFn() 1597 ? Caller->getPersonalityFn()->stripPointerCasts() 1598 : nullptr; 1599 if (CalledPersonality) { 1600 if (!CallerPersonality) 1601 Caller->setPersonalityFn(CalledPersonality); 1602 // If the personality functions match, then we can perform the 1603 // inlining. Otherwise, we can't inline. 1604 // TODO: This isn't 100% true. Some personality functions are proper 1605 // supersets of others and can be used in place of the other. 1606 else if (CalledPersonality != CallerPersonality) 1607 return InlineResult::failure("incompatible personality"); 1608 } 1609 1610 // We need to figure out which funclet the callsite was in so that we may 1611 // properly nest the callee. 1612 Instruction *CallSiteEHPad = nullptr; 1613 if (CallerPersonality) { 1614 EHPersonality Personality = classifyEHPersonality(CallerPersonality); 1615 if (isScopedEHPersonality(Personality)) { 1616 Optional<OperandBundleUse> ParentFunclet = 1617 CS.getOperandBundle(LLVMContext::OB_funclet); 1618 if (ParentFunclet) 1619 CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); 1620 1621 // OK, the inlining site is legal. What about the target function? 1622 1623 if (CallSiteEHPad) { 1624 if (Personality == EHPersonality::MSVC_CXX) { 1625 // The MSVC personality cannot tolerate catches getting inlined into 1626 // cleanup funclets. 1627 if (isa<CleanupPadInst>(CallSiteEHPad)) { 1628 // Ok, the call site is within a cleanuppad. Let's check the callee 1629 // for catchpads. 1630 for (const BasicBlock &CalledBB : *CalledFunc) { 1631 if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) 1632 return InlineResult::failure("catch in cleanup funclet"); 1633 } 1634 } 1635 } else if (isAsynchronousEHPersonality(Personality)) { 1636 // SEH is even less tolerant, there may not be any sort of exceptional 1637 // funclet in the callee. 1638 for (const BasicBlock &CalledBB : *CalledFunc) { 1639 if (CalledBB.isEHPad()) 1640 return InlineResult::failure("SEH in cleanup funclet"); 1641 } 1642 } 1643 } 1644 } 1645 } 1646 1647 // Determine if we are dealing with a call in an EHPad which does not unwind 1648 // to caller. 1649 bool EHPadForCallUnwindsLocally = false; 1650 if (CallSiteEHPad && CS.isCall()) { 1651 UnwindDestMemoTy FuncletUnwindMap; 1652 Value *CallSiteUnwindDestToken = 1653 getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); 1654 1655 EHPadForCallUnwindsLocally = 1656 CallSiteUnwindDestToken && 1657 !isa<ConstantTokenNone>(CallSiteUnwindDestToken); 1658 } 1659 1660 // Get an iterator to the last basic block in the function, which will have 1661 // the new function inlined after it. 1662 Function::iterator LastBlock = --Caller->end(); 1663 1664 // Make sure to capture all of the return instructions from the cloned 1665 // function. 1666 SmallVector<ReturnInst*, 8> Returns; 1667 ClonedCodeInfo InlinedFunctionInfo; 1668 Function::iterator FirstNewBlock; 1669 1670 { // Scope to destroy VMap after cloning. 1671 ValueToValueMapTy VMap; 1672 // Keep a list of pair (dst, src) to emit byval initializations. 1673 SmallVector<std::pair<Value*, Value*>, 4> ByValInit; 1674 1675 auto &DL = Caller->getParent()->getDataLayout(); 1676 1677 // Calculate the vector of arguments to pass into the function cloner, which 1678 // matches up the formal to the actual argument values. 1679 CallSite::arg_iterator AI = CS.arg_begin(); 1680 unsigned ArgNo = 0; 1681 for (Function::arg_iterator I = CalledFunc->arg_begin(), 1682 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { 1683 Value *ActualArg = *AI; 1684 1685 // When byval arguments actually inlined, we need to make the copy implied 1686 // by them explicit. However, we don't do this if the callee is readonly 1687 // or readnone, because the copy would be unneeded: the callee doesn't 1688 // modify the struct. 1689 if (CS.isByValArgument(ArgNo)) { 1690 ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, 1691 CalledFunc->getParamAlignment(ArgNo)); 1692 if (ActualArg != *AI) 1693 ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); 1694 } 1695 1696 VMap[&*I] = ActualArg; 1697 } 1698 1699 // TODO: Remove this when users have been updated to the assume bundles. 1700 // Add alignment assumptions if necessary. We do this before the inlined 1701 // instructions are actually cloned into the caller so that we can easily 1702 // check what will be known at the start of the inlined code. 1703 AddAlignmentAssumptions(CS, IFI); 1704 1705 /// Preserve all attributes on of the call and its parameters. 1706 if (Instruction *Assume = BuildAssumeFromInst(CS.getInstruction())) 1707 Assume->insertBefore(CS.getInstruction()); 1708 1709 // We want the inliner to prune the code as it copies. We would LOVE to 1710 // have no dead or constant instructions leftover after inlining occurs 1711 // (which can happen, e.g., because an argument was constant), but we'll be 1712 // happy with whatever the cloner can do. 1713 CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 1714 /*ModuleLevelChanges=*/false, Returns, ".i", 1715 &InlinedFunctionInfo, TheCall); 1716 // Remember the first block that is newly cloned over. 1717 FirstNewBlock = LastBlock; ++FirstNewBlock; 1718 1719 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) 1720 // Update the BFI of blocks cloned into the caller. 1721 updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, 1722 CalledFunc->front()); 1723 1724 updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, 1725 IFI.PSI, IFI.CallerBFI); 1726 1727 // Inject byval arguments initialization. 1728 for (std::pair<Value*, Value*> &Init : ByValInit) 1729 HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), 1730 &*FirstNewBlock, IFI); 1731 1732 Optional<OperandBundleUse> ParentDeopt = 1733 CS.getOperandBundle(LLVMContext::OB_deopt); 1734 if (ParentDeopt) { 1735 SmallVector<OperandBundleDef, 2> OpDefs; 1736 1737 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { 1738 Instruction *I = dyn_cast_or_null<Instruction>(VH); 1739 if (!I) continue; // instruction was DCE'd or RAUW'ed to undef 1740 1741 OpDefs.clear(); 1742 1743 CallSite ICS(I); 1744 OpDefs.reserve(ICS.getNumOperandBundles()); 1745 1746 for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { 1747 auto ChildOB = ICS.getOperandBundleAt(i); 1748 if (ChildOB.getTagID() != LLVMContext::OB_deopt) { 1749 // If the inlined call has other operand bundles, let them be 1750 OpDefs.emplace_back(ChildOB); 1751 continue; 1752 } 1753 1754 // It may be useful to separate this logic (of handling operand 1755 // bundles) out to a separate "policy" component if this gets crowded. 1756 // Prepend the parent's deoptimization continuation to the newly 1757 // inlined call's deoptimization continuation. 1758 std::vector<Value *> MergedDeoptArgs; 1759 MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + 1760 ChildOB.Inputs.size()); 1761 1762 MergedDeoptArgs.insert(MergedDeoptArgs.end(), 1763 ParentDeopt->Inputs.begin(), 1764 ParentDeopt->Inputs.end()); 1765 MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), 1766 ChildOB.Inputs.end()); 1767 1768 OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); 1769 } 1770 1771 Instruction *NewI = nullptr; 1772 if (isa<CallInst>(I)) 1773 NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I); 1774 else if (isa<CallBrInst>(I)) 1775 NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I); 1776 else 1777 NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I); 1778 1779 // Note: the RAUW does the appropriate fixup in VMap, so we need to do 1780 // this even if the call returns void. 1781 I->replaceAllUsesWith(NewI); 1782 1783 VH = nullptr; 1784 I->eraseFromParent(); 1785 } 1786 } 1787 1788 // Update the callgraph if requested. 1789 if (IFI.CG) 1790 UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); 1791 1792 // For 'nodebug' functions, the associated DISubprogram is always null. 1793 // Conservatively avoid propagating the callsite debug location to 1794 // instructions inlined from a function whose DISubprogram is not null. 1795 fixupLineNumbers(Caller, FirstNewBlock, TheCall, 1796 CalledFunc->getSubprogram() != nullptr); 1797 1798 // Clone existing noalias metadata if necessary. 1799 CloneAliasScopeMetadata(CS, VMap); 1800 1801 // Add noalias metadata if necessary. 1802 AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); 1803 1804 // Propagate llvm.mem.parallel_loop_access if necessary. 1805 PropagateParallelLoopAccessMetadata(CS, VMap); 1806 1807 // Register any cloned assumptions. 1808 if (IFI.GetAssumptionCache) 1809 for (BasicBlock &NewBlock : 1810 make_range(FirstNewBlock->getIterator(), Caller->end())) 1811 for (Instruction &I : NewBlock) { 1812 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 1813 if (II->getIntrinsicID() == Intrinsic::assume) 1814 (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II); 1815 } 1816 } 1817 1818 // If there are any alloca instructions in the block that used to be the entry 1819 // block for the callee, move them to the entry block of the caller. First 1820 // calculate which instruction they should be inserted before. We insert the 1821 // instructions at the end of the current alloca list. 1822 { 1823 BasicBlock::iterator InsertPoint = Caller->begin()->begin(); 1824 for (BasicBlock::iterator I = FirstNewBlock->begin(), 1825 E = FirstNewBlock->end(); I != E; ) { 1826 AllocaInst *AI = dyn_cast<AllocaInst>(I++); 1827 if (!AI) continue; 1828 1829 // If the alloca is now dead, remove it. This often occurs due to code 1830 // specialization. 1831 if (AI->use_empty()) { 1832 AI->eraseFromParent(); 1833 continue; 1834 } 1835 1836 if (!allocaWouldBeStaticInEntry(AI)) 1837 continue; 1838 1839 // Keep track of the static allocas that we inline into the caller. 1840 IFI.StaticAllocas.push_back(AI); 1841 1842 // Scan for the block of allocas that we can move over, and move them 1843 // all at once. 1844 while (isa<AllocaInst>(I) && 1845 !cast<AllocaInst>(I)->use_empty() && 1846 allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { 1847 IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); 1848 ++I; 1849 } 1850 1851 // Transfer all of the allocas over in a block. Using splice means 1852 // that the instructions aren't removed from the symbol table, then 1853 // reinserted. 1854 Caller->getEntryBlock().getInstList().splice( 1855 InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); 1856 } 1857 } 1858 1859 SmallVector<Value*,4> VarArgsToForward; 1860 SmallVector<AttributeSet, 4> VarArgsAttrs; 1861 for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); 1862 i < CS.getNumArgOperands(); i++) { 1863 VarArgsToForward.push_back(CS.getArgOperand(i)); 1864 VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); 1865 } 1866 1867 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; 1868 if (InlinedFunctionInfo.ContainsCalls) { 1869 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; 1870 if (CallInst *CI = dyn_cast<CallInst>(TheCall)) 1871 CallSiteTailKind = CI->getTailCallKind(); 1872 1873 // For inlining purposes, the "notail" marker is the same as no marker. 1874 if (CallSiteTailKind == CallInst::TCK_NoTail) 1875 CallSiteTailKind = CallInst::TCK_None; 1876 1877 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; 1878 ++BB) { 1879 for (auto II = BB->begin(); II != BB->end();) { 1880 Instruction &I = *II++; 1881 CallInst *CI = dyn_cast<CallInst>(&I); 1882 if (!CI) 1883 continue; 1884 1885 // Forward varargs from inlined call site to calls to the 1886 // ForwardVarArgsTo function, if requested, and to musttail calls. 1887 if (!VarArgsToForward.empty() && 1888 ((ForwardVarArgsTo && 1889 CI->getCalledFunction() == ForwardVarArgsTo) || 1890 CI->isMustTailCall())) { 1891 // Collect attributes for non-vararg parameters. 1892 AttributeList Attrs = CI->getAttributes(); 1893 SmallVector<AttributeSet, 8> ArgAttrs; 1894 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { 1895 for (unsigned ArgNo = 0; 1896 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) 1897 ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); 1898 } 1899 1900 // Add VarArg attributes. 1901 ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); 1902 Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(), 1903 Attrs.getRetAttributes(), ArgAttrs); 1904 // Add VarArgs to existing parameters. 1905 SmallVector<Value *, 6> Params(CI->arg_operands()); 1906 Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); 1907 CallInst *NewCI = CallInst::Create( 1908 CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI); 1909 NewCI->setDebugLoc(CI->getDebugLoc()); 1910 NewCI->setAttributes(Attrs); 1911 NewCI->setCallingConv(CI->getCallingConv()); 1912 CI->replaceAllUsesWith(NewCI); 1913 CI->eraseFromParent(); 1914 CI = NewCI; 1915 } 1916 1917 if (Function *F = CI->getCalledFunction()) 1918 InlinedDeoptimizeCalls |= 1919 F->getIntrinsicID() == Intrinsic::experimental_deoptimize; 1920 1921 // We need to reduce the strength of any inlined tail calls. For 1922 // musttail, we have to avoid introducing potential unbounded stack 1923 // growth. For example, if functions 'f' and 'g' are mutually recursive 1924 // with musttail, we can inline 'g' into 'f' so long as we preserve 1925 // musttail on the cloned call to 'f'. If either the inlined call site 1926 // or the cloned call site is *not* musttail, the program already has 1927 // one frame of stack growth, so it's safe to remove musttail. Here is 1928 // a table of example transformations: 1929 // 1930 // f -> musttail g -> musttail f ==> f -> musttail f 1931 // f -> musttail g -> tail f ==> f -> tail f 1932 // f -> g -> musttail f ==> f -> f 1933 // f -> g -> tail f ==> f -> f 1934 // 1935 // Inlined notail calls should remain notail calls. 1936 CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); 1937 if (ChildTCK != CallInst::TCK_NoTail) 1938 ChildTCK = std::min(CallSiteTailKind, ChildTCK); 1939 CI->setTailCallKind(ChildTCK); 1940 InlinedMustTailCalls |= CI->isMustTailCall(); 1941 1942 // Calls inlined through a 'nounwind' call site should be marked 1943 // 'nounwind'. 1944 if (MarkNoUnwind) 1945 CI->setDoesNotThrow(); 1946 } 1947 } 1948 } 1949 1950 // Leave lifetime markers for the static alloca's, scoping them to the 1951 // function we just inlined. 1952 if (InsertLifetime && !IFI.StaticAllocas.empty()) { 1953 IRBuilder<> builder(&FirstNewBlock->front()); 1954 for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { 1955 AllocaInst *AI = IFI.StaticAllocas[ai]; 1956 // Don't mark swifterror allocas. They can't have bitcast uses. 1957 if (AI->isSwiftError()) 1958 continue; 1959 1960 // If the alloca is already scoped to something smaller than the whole 1961 // function then there's no need to add redundant, less accurate markers. 1962 if (hasLifetimeMarkers(AI)) 1963 continue; 1964 1965 // Try to determine the size of the allocation. 1966 ConstantInt *AllocaSize = nullptr; 1967 if (ConstantInt *AIArraySize = 1968 dyn_cast<ConstantInt>(AI->getArraySize())) { 1969 auto &DL = Caller->getParent()->getDataLayout(); 1970 Type *AllocaType = AI->getAllocatedType(); 1971 uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); 1972 uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); 1973 1974 // Don't add markers for zero-sized allocas. 1975 if (AllocaArraySize == 0) 1976 continue; 1977 1978 // Check that array size doesn't saturate uint64_t and doesn't 1979 // overflow when it's multiplied by type size. 1980 if (AllocaArraySize != std::numeric_limits<uint64_t>::max() && 1981 std::numeric_limits<uint64_t>::max() / AllocaArraySize >= 1982 AllocaTypeSize) { 1983 AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), 1984 AllocaArraySize * AllocaTypeSize); 1985 } 1986 } 1987 1988 builder.CreateLifetimeStart(AI, AllocaSize); 1989 for (ReturnInst *RI : Returns) { 1990 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize 1991 // call and a return. The return kills all local allocas. 1992 if (InlinedMustTailCalls && 1993 RI->getParent()->getTerminatingMustTailCall()) 1994 continue; 1995 if (InlinedDeoptimizeCalls && 1996 RI->getParent()->getTerminatingDeoptimizeCall()) 1997 continue; 1998 IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); 1999 } 2000 } 2001 } 2002 2003 // If the inlined code contained dynamic alloca instructions, wrap the inlined 2004 // code with llvm.stacksave/llvm.stackrestore intrinsics. 2005 if (InlinedFunctionInfo.ContainsDynamicAllocas) { 2006 Module *M = Caller->getParent(); 2007 // Get the two intrinsics we care about. 2008 Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); 2009 Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); 2010 2011 // Insert the llvm.stacksave. 2012 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) 2013 .CreateCall(StackSave, {}, "savedstack"); 2014 2015 // Insert a call to llvm.stackrestore before any return instructions in the 2016 // inlined function. 2017 for (ReturnInst *RI : Returns) { 2018 // Don't insert llvm.stackrestore calls between a musttail or deoptimize 2019 // call and a return. The return will restore the stack pointer. 2020 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) 2021 continue; 2022 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) 2023 continue; 2024 IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); 2025 } 2026 } 2027 2028 // If we are inlining for an invoke instruction, we must make sure to rewrite 2029 // any call instructions into invoke instructions. This is sensitive to which 2030 // funclet pads were top-level in the inlinee, so must be done before 2031 // rewriting the "parent pad" links. 2032 if (auto *II = dyn_cast<InvokeInst>(TheCall)) { 2033 BasicBlock *UnwindDest = II->getUnwindDest(); 2034 Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); 2035 if (isa<LandingPadInst>(FirstNonPHI)) { 2036 HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); 2037 } else { 2038 HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); 2039 } 2040 } 2041 2042 // Update the lexical scopes of the new funclets and callsites. 2043 // Anything that had 'none' as its parent is now nested inside the callsite's 2044 // EHPad. 2045 2046 if (CallSiteEHPad) { 2047 for (Function::iterator BB = FirstNewBlock->getIterator(), 2048 E = Caller->end(); 2049 BB != E; ++BB) { 2050 // Add bundle operands to any top-level call sites. 2051 SmallVector<OperandBundleDef, 1> OpBundles; 2052 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { 2053 Instruction *I = &*BBI++; 2054 CallSite CS(I); 2055 if (!CS) 2056 continue; 2057 2058 // Skip call sites which are nounwind intrinsics. 2059 auto *CalledFn = 2060 dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); 2061 if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) 2062 continue; 2063 2064 // Skip call sites which already have a "funclet" bundle. 2065 if (CS.getOperandBundle(LLVMContext::OB_funclet)) 2066 continue; 2067 2068 CS.getOperandBundlesAsDefs(OpBundles); 2069 OpBundles.emplace_back("funclet", CallSiteEHPad); 2070 2071 Instruction *NewInst; 2072 if (CS.isCall()) 2073 NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); 2074 else if (CS.isCallBr()) 2075 NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I); 2076 else 2077 NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); 2078 NewInst->takeName(I); 2079 I->replaceAllUsesWith(NewInst); 2080 I->eraseFromParent(); 2081 2082 OpBundles.clear(); 2083 } 2084 2085 // It is problematic if the inlinee has a cleanupret which unwinds to 2086 // caller and we inline it into a call site which doesn't unwind but into 2087 // an EH pad that does. Such an edge must be dynamically unreachable. 2088 // As such, we replace the cleanupret with unreachable. 2089 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) 2090 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) 2091 changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); 2092 2093 Instruction *I = BB->getFirstNonPHI(); 2094 if (!I->isEHPad()) 2095 continue; 2096 2097 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 2098 if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) 2099 CatchSwitch->setParentPad(CallSiteEHPad); 2100 } else { 2101 auto *FPI = cast<FuncletPadInst>(I); 2102 if (isa<ConstantTokenNone>(FPI->getParentPad())) 2103 FPI->setParentPad(CallSiteEHPad); 2104 } 2105 } 2106 } 2107 2108 if (InlinedDeoptimizeCalls) { 2109 // We need to at least remove the deoptimizing returns from the Return set, 2110 // so that the control flow from those returns does not get merged into the 2111 // caller (but terminate it instead). If the caller's return type does not 2112 // match the callee's return type, we also need to change the return type of 2113 // the intrinsic. 2114 if (Caller->getReturnType() == TheCall->getType()) { 2115 auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { 2116 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; 2117 }); 2118 Returns.erase(NewEnd, Returns.end()); 2119 } else { 2120 SmallVector<ReturnInst *, 8> NormalReturns; 2121 Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( 2122 Caller->getParent(), Intrinsic::experimental_deoptimize, 2123 {Caller->getReturnType()}); 2124 2125 for (ReturnInst *RI : Returns) { 2126 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); 2127 if (!DeoptCall) { 2128 NormalReturns.push_back(RI); 2129 continue; 2130 } 2131 2132 // The calling convention on the deoptimize call itself may be bogus, 2133 // since the code we're inlining may have undefined behavior (and may 2134 // never actually execute at runtime); but all 2135 // @llvm.experimental.deoptimize declarations have to have the same 2136 // calling convention in a well-formed module. 2137 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); 2138 NewDeoptIntrinsic->setCallingConv(CallingConv); 2139 auto *CurBB = RI->getParent(); 2140 RI->eraseFromParent(); 2141 2142 SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), 2143 DeoptCall->arg_end()); 2144 2145 SmallVector<OperandBundleDef, 1> OpBundles; 2146 DeoptCall->getOperandBundlesAsDefs(OpBundles); 2147 DeoptCall->eraseFromParent(); 2148 assert(!OpBundles.empty() && 2149 "Expected at least the deopt operand bundle"); 2150 2151 IRBuilder<> Builder(CurBB); 2152 CallInst *NewDeoptCall = 2153 Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); 2154 NewDeoptCall->setCallingConv(CallingConv); 2155 if (NewDeoptCall->getType()->isVoidTy()) 2156 Builder.CreateRetVoid(); 2157 else 2158 Builder.CreateRet(NewDeoptCall); 2159 } 2160 2161 // Leave behind the normal returns so we can merge control flow. 2162 std::swap(Returns, NormalReturns); 2163 } 2164 } 2165 2166 // Handle any inlined musttail call sites. In order for a new call site to be 2167 // musttail, the source of the clone and the inlined call site must have been 2168 // musttail. Therefore it's safe to return without merging control into the 2169 // phi below. 2170 if (InlinedMustTailCalls) { 2171 // Check if we need to bitcast the result of any musttail calls. 2172 Type *NewRetTy = Caller->getReturnType(); 2173 bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; 2174 2175 // Handle the returns preceded by musttail calls separately. 2176 SmallVector<ReturnInst *, 8> NormalReturns; 2177 for (ReturnInst *RI : Returns) { 2178 CallInst *ReturnedMustTail = 2179 RI->getParent()->getTerminatingMustTailCall(); 2180 if (!ReturnedMustTail) { 2181 NormalReturns.push_back(RI); 2182 continue; 2183 } 2184 if (!NeedBitCast) 2185 continue; 2186 2187 // Delete the old return and any preceding bitcast. 2188 BasicBlock *CurBB = RI->getParent(); 2189 auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); 2190 RI->eraseFromParent(); 2191 if (OldCast) 2192 OldCast->eraseFromParent(); 2193 2194 // Insert a new bitcast and return with the right type. 2195 IRBuilder<> Builder(CurBB); 2196 Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); 2197 } 2198 2199 // Leave behind the normal returns so we can merge control flow. 2200 std::swap(Returns, NormalReturns); 2201 } 2202 2203 // Now that all of the transforms on the inlined code have taken place but 2204 // before we splice the inlined code into the CFG and lose track of which 2205 // blocks were actually inlined, collect the call sites. We only do this if 2206 // call graph updates weren't requested, as those provide value handle based 2207 // tracking of inlined call sites instead. 2208 if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { 2209 // Otherwise just collect the raw call sites that were inlined. 2210 for (BasicBlock &NewBB : 2211 make_range(FirstNewBlock->getIterator(), Caller->end())) 2212 for (Instruction &I : NewBB) 2213 if (auto CS = CallSite(&I)) 2214 IFI.InlinedCallSites.push_back(CS); 2215 } 2216 2217 // If we cloned in _exactly one_ basic block, and if that block ends in a 2218 // return instruction, we splice the body of the inlined callee directly into 2219 // the calling basic block. 2220 if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { 2221 // Move all of the instructions right before the call. 2222 OrigBB->getInstList().splice(TheCall->getIterator(), 2223 FirstNewBlock->getInstList(), 2224 FirstNewBlock->begin(), FirstNewBlock->end()); 2225 // Remove the cloned basic block. 2226 Caller->getBasicBlockList().pop_back(); 2227 2228 // If the call site was an invoke instruction, add a branch to the normal 2229 // destination. 2230 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 2231 BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); 2232 NewBr->setDebugLoc(Returns[0]->getDebugLoc()); 2233 } 2234 2235 // If the return instruction returned a value, replace uses of the call with 2236 // uses of the returned value. 2237 if (!TheCall->use_empty()) { 2238 ReturnInst *R = Returns[0]; 2239 if (TheCall == R->getReturnValue()) 2240 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 2241 else 2242 TheCall->replaceAllUsesWith(R->getReturnValue()); 2243 } 2244 // Since we are now done with the Call/Invoke, we can delete it. 2245 TheCall->eraseFromParent(); 2246 2247 // Since we are now done with the return instruction, delete it also. 2248 Returns[0]->eraseFromParent(); 2249 2250 // We are now done with the inlining. 2251 return InlineResult::success(); 2252 } 2253 2254 // Otherwise, we have the normal case, of more than one block to inline or 2255 // multiple return sites. 2256 2257 // We want to clone the entire callee function into the hole between the 2258 // "starter" and "ender" blocks. How we accomplish this depends on whether 2259 // this is an invoke instruction or a call instruction. 2260 BasicBlock *AfterCallBB; 2261 BranchInst *CreatedBranchToNormalDest = nullptr; 2262 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 2263 2264 // Add an unconditional branch to make this look like the CallInst case... 2265 CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); 2266 2267 // Split the basic block. This guarantees that no PHI nodes will have to be 2268 // updated due to new incoming edges, and make the invoke case more 2269 // symmetric to the call case. 2270 AfterCallBB = 2271 OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), 2272 CalledFunc->getName() + ".exit"); 2273 2274 } else { // It's a call 2275 // If this is a call instruction, we need to split the basic block that 2276 // the call lives in. 2277 // 2278 AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), 2279 CalledFunc->getName() + ".exit"); 2280 } 2281 2282 if (IFI.CallerBFI) { 2283 // Copy original BB's block frequency to AfterCallBB 2284 IFI.CallerBFI->setBlockFreq( 2285 AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); 2286 } 2287 2288 // Change the branch that used to go to AfterCallBB to branch to the first 2289 // basic block of the inlined function. 2290 // 2291 Instruction *Br = OrigBB->getTerminator(); 2292 assert(Br && Br->getOpcode() == Instruction::Br && 2293 "splitBasicBlock broken!"); 2294 Br->setOperand(0, &*FirstNewBlock); 2295 2296 // Now that the function is correct, make it a little bit nicer. In 2297 // particular, move the basic blocks inserted from the end of the function 2298 // into the space made by splitting the source basic block. 2299 Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), 2300 Caller->getBasicBlockList(), FirstNewBlock, 2301 Caller->end()); 2302 2303 // Handle all of the return instructions that we just cloned in, and eliminate 2304 // any users of the original call/invoke instruction. 2305 Type *RTy = CalledFunc->getReturnType(); 2306 2307 PHINode *PHI = nullptr; 2308 if (Returns.size() > 1) { 2309 // The PHI node should go at the front of the new basic block to merge all 2310 // possible incoming values. 2311 if (!TheCall->use_empty()) { 2312 PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), 2313 &AfterCallBB->front()); 2314 // Anything that used the result of the function call should now use the 2315 // PHI node as their operand. 2316 TheCall->replaceAllUsesWith(PHI); 2317 } 2318 2319 // Loop over all of the return instructions adding entries to the PHI node 2320 // as appropriate. 2321 if (PHI) { 2322 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 2323 ReturnInst *RI = Returns[i]; 2324 assert(RI->getReturnValue()->getType() == PHI->getType() && 2325 "Ret value not consistent in function!"); 2326 PHI->addIncoming(RI->getReturnValue(), RI->getParent()); 2327 } 2328 } 2329 2330 // Add a branch to the merge points and remove return instructions. 2331 DebugLoc Loc; 2332 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 2333 ReturnInst *RI = Returns[i]; 2334 BranchInst* BI = BranchInst::Create(AfterCallBB, RI); 2335 Loc = RI->getDebugLoc(); 2336 BI->setDebugLoc(Loc); 2337 RI->eraseFromParent(); 2338 } 2339 // We need to set the debug location to *somewhere* inside the 2340 // inlined function. The line number may be nonsensical, but the 2341 // instruction will at least be associated with the right 2342 // function. 2343 if (CreatedBranchToNormalDest) 2344 CreatedBranchToNormalDest->setDebugLoc(Loc); 2345 } else if (!Returns.empty()) { 2346 // Otherwise, if there is exactly one return value, just replace anything 2347 // using the return value of the call with the computed value. 2348 if (!TheCall->use_empty()) { 2349 if (TheCall == Returns[0]->getReturnValue()) 2350 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 2351 else 2352 TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); 2353 } 2354 2355 // Update PHI nodes that use the ReturnBB to use the AfterCallBB. 2356 BasicBlock *ReturnBB = Returns[0]->getParent(); 2357 ReturnBB->replaceAllUsesWith(AfterCallBB); 2358 2359 // Splice the code from the return block into the block that it will return 2360 // to, which contains the code that was after the call. 2361 AfterCallBB->getInstList().splice(AfterCallBB->begin(), 2362 ReturnBB->getInstList()); 2363 2364 if (CreatedBranchToNormalDest) 2365 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); 2366 2367 // Delete the return instruction now and empty ReturnBB now. 2368 Returns[0]->eraseFromParent(); 2369 ReturnBB->eraseFromParent(); 2370 } else if (!TheCall->use_empty()) { 2371 // No returns, but something is using the return value of the call. Just 2372 // nuke the result. 2373 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 2374 } 2375 2376 // Since we are now done with the Call/Invoke, we can delete it. 2377 TheCall->eraseFromParent(); 2378 2379 // If we inlined any musttail calls and the original return is now 2380 // unreachable, delete it. It can only contain a bitcast and ret. 2381 if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) 2382 AfterCallBB->eraseFromParent(); 2383 2384 // We should always be able to fold the entry block of the function into the 2385 // single predecessor of the block... 2386 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); 2387 BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); 2388 2389 // Splice the code entry block into calling block, right before the 2390 // unconditional branch. 2391 CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes 2392 OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); 2393 2394 // Remove the unconditional branch. 2395 OrigBB->getInstList().erase(Br); 2396 2397 // Now we can remove the CalleeEntry block, which is now empty. 2398 Caller->getBasicBlockList().erase(CalleeEntry); 2399 2400 // If we inserted a phi node, check to see if it has a single value (e.g. all 2401 // the entries are the same or undef). If so, remove the PHI so it doesn't 2402 // block other optimizations. 2403 if (PHI) { 2404 AssumptionCache *AC = 2405 IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; 2406 auto &DL = Caller->getParent()->getDataLayout(); 2407 if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { 2408 PHI->replaceAllUsesWith(V); 2409 PHI->eraseFromParent(); 2410 } 2411 } 2412 2413 return InlineResult::success(); 2414 } 2415