1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass promotes "by reference" arguments to be "by value" arguments. In 10 // practice, this means looking for internal functions that have pointer 11 // arguments. If it can prove, through the use of alias analysis, that an 12 // argument is *only* loaded, then it can pass the value into the function 13 // instead of the address of the value. This can cause recursive simplification 14 // of code and lead to the elimination of allocas (especially in C++ template 15 // code like the STL). 16 // 17 // This pass also handles aggregate arguments that are passed into a function, 18 // scalarizing them if the elements of the aggregate are only loaded. Note that 19 // by default it refuses to scalarize aggregates which would require passing in 20 // more than three operands to the function, because passing thousands of 21 // operands for a large array or structure is unprofitable! This limit can be 22 // configured or disabled, however. 23 // 24 // Note that this transformation could also be done for arguments that are only 25 // stored to (returning the value instead), but does not currently. This case 26 // would be best handled when and if LLVM begins supporting multiple return 27 // values from functions. 28 // 29 //===----------------------------------------------------------------------===// 30 31 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 32 33 #include "llvm/ADT/DepthFirstIterator.h" 34 #include "llvm/ADT/None.h" 35 #include "llvm/ADT/Optional.h" 36 #include "llvm/ADT/STLExtras.h" 37 #include "llvm/ADT/ScopeExit.h" 38 #include "llvm/ADT/SmallPtrSet.h" 39 #include "llvm/ADT/SmallVector.h" 40 #include "llvm/ADT/Statistic.h" 41 #include "llvm/ADT/Twine.h" 42 #include "llvm/Analysis/AssumptionCache.h" 43 #include "llvm/Analysis/BasicAliasAnalysis.h" 44 #include "llvm/Analysis/CGSCCPassManager.h" 45 #include "llvm/Analysis/CallGraph.h" 46 #include "llvm/Analysis/CallGraphSCCPass.h" 47 #include "llvm/Analysis/LazyCallGraph.h" 48 #include "llvm/Analysis/Loads.h" 49 #include "llvm/Analysis/MemoryLocation.h" 50 #include "llvm/Analysis/TargetLibraryInfo.h" 51 #include "llvm/Analysis/TargetTransformInfo.h" 52 #include "llvm/Analysis/ValueTracking.h" 53 #include "llvm/IR/Argument.h" 54 #include "llvm/IR/Attributes.h" 55 #include "llvm/IR/BasicBlock.h" 56 #include "llvm/IR/CFG.h" 57 #include "llvm/IR/Constants.h" 58 #include "llvm/IR/DataLayout.h" 59 #include "llvm/IR/DerivedTypes.h" 60 #include "llvm/IR/Dominators.h" 61 #include "llvm/IR/Function.h" 62 #include "llvm/IR/IRBuilder.h" 63 #include "llvm/IR/InstrTypes.h" 64 #include "llvm/IR/Instruction.h" 65 #include "llvm/IR/Instructions.h" 66 #include "llvm/IR/Metadata.h" 67 #include "llvm/IR/Module.h" 68 #include "llvm/IR/NoFolder.h" 69 #include "llvm/IR/PassManager.h" 70 #include "llvm/IR/Type.h" 71 #include "llvm/IR/Use.h" 72 #include "llvm/IR/User.h" 73 #include "llvm/IR/Value.h" 74 #include "llvm/InitializePasses.h" 75 #include "llvm/Pass.h" 76 #include "llvm/Support/Casting.h" 77 #include "llvm/Support/Debug.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/IPO.h" 80 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 81 #include <algorithm> 82 #include <cassert> 83 #include <cstdint> 84 #include <utility> 85 #include <vector> 86 87 using namespace llvm; 88 89 #define DEBUG_TYPE "argpromotion" 90 91 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); 92 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); 93 94 namespace { 95 96 struct ArgPart { 97 Type *Ty; 98 Align Alignment; 99 /// A representative guaranteed-executed load or store instruction for use by 100 /// metadata transfer. 101 Instruction *MustExecInstr; 102 }; 103 104 using OffsetAndArgPart = std::pair<int64_t, ArgPart>; 105 106 } // end anonymous namespace 107 108 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, 109 Value *Ptr, Type *ResElemTy, int64_t Offset) { 110 // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise 111 // fall back to an i8 GEP to a specific offset. 112 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 113 APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); 114 if (!Ptr->getType()->isOpaquePointerTy()) { 115 Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType(); 116 if (OrigOffset == 0 && OrigElemTy == ResElemTy) 117 return Ptr; 118 119 if (OrigElemTy->isSized()) { 120 APInt TmpOffset = OrigOffset; 121 Type *TmpTy = OrigElemTy; 122 SmallVector<APInt> IntIndices = 123 DL.getGEPIndicesForOffset(TmpTy, TmpOffset); 124 if (TmpOffset == 0) { 125 // Try to add trailing zero indices to reach the right type. 126 while (TmpTy != ResElemTy) { 127 Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0); 128 if (!NextTy) 129 break; 130 131 IntIndices.push_back(APInt::getZero( 132 isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth())); 133 TmpTy = NextTy; 134 } 135 136 SmallVector<Value *> Indices; 137 for (const APInt &Index : IntIndices) 138 Indices.push_back(IRB.getInt(Index)); 139 140 if (OrigOffset != 0 || TmpTy == ResElemTy) { 141 Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices); 142 return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); 143 } 144 } 145 } 146 } 147 148 if (OrigOffset != 0) { 149 Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace)); 150 Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset)); 151 } 152 return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); 153 } 154 155 /// DoPromotion - This method actually performs the promotion of the specified 156 /// arguments, and returns the new function. At this point, we know that it's 157 /// safe to do so. 158 static Function *doPromotion( 159 Function *F, function_ref<DominatorTree &(Function &F)> DTGetter, 160 function_ref<AssumptionCache *(Function &F)> ACGetter, 161 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> &ArgsToPromote, 162 Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> 163 ReplaceCallSite) { 164 // Start by computing a new prototype for the function, which is the same as 165 // the old function, but has modified arguments. 166 FunctionType *FTy = F->getFunctionType(); 167 std::vector<Type *> Params; 168 169 // Attribute - Keep track of the parameter attributes for the arguments 170 // that we are *not* promoting. For the ones that we do promote, the parameter 171 // attributes are lost 172 SmallVector<AttributeSet, 8> ArgAttrVec; 173 AttributeList PAL = F->getAttributes(); 174 175 // First, determine the new argument list 176 unsigned ArgNo = 0; 177 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 178 ++I, ++ArgNo) { 179 if (!ArgsToPromote.count(&*I)) { 180 // Unchanged argument 181 Params.push_back(I->getType()); 182 ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); 183 } else if (I->use_empty()) { 184 // Dead argument (which are always marked as promotable) 185 ++NumArgumentsDead; 186 } else { 187 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 188 for (const auto &Pair : ArgParts) { 189 Params.push_back(Pair.second.Ty); 190 ArgAttrVec.push_back(AttributeSet()); 191 } 192 ++NumArgumentsPromoted; 193 } 194 } 195 196 Type *RetTy = FTy->getReturnType(); 197 198 // Construct the new function type using the new arguments. 199 FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); 200 201 // Create the new function body and insert it into the module. 202 Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(), 203 F->getName()); 204 NF->copyAttributesFrom(F); 205 NF->copyMetadata(F, 0); 206 207 // The new function will have the !dbg metadata copied from the original 208 // function. The original function may not be deleted, and dbg metadata need 209 // to be unique, so we need to drop it. 210 F->setSubprogram(nullptr); 211 212 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" 213 << "From: " << *F); 214 215 uint64_t LargestVectorWidth = 0; 216 for (auto *I : Params) 217 if (auto *VT = dyn_cast<llvm::VectorType>(I)) 218 LargestVectorWidth = std::max( 219 LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); 220 221 // Recompute the parameter attributes list based on the new arguments for 222 // the function. 223 NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), 224 PAL.getRetAttrs(), ArgAttrVec)); 225 AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth); 226 ArgAttrVec.clear(); 227 228 F->getParent()->getFunctionList().insert(F->getIterator(), NF); 229 NF->takeName(F); 230 231 // Loop over all of the callers of the function, transforming the call sites 232 // to pass in the loaded pointers. 233 // 234 SmallVector<Value *, 16> Args; 235 const DataLayout &DL = F->getParent()->getDataLayout(); 236 while (!F->use_empty()) { 237 CallBase &CB = cast<CallBase>(*F->user_back()); 238 assert(CB.getCalledFunction() == F); 239 const AttributeList &CallPAL = CB.getAttributes(); 240 IRBuilder<NoFolder> IRB(&CB); 241 242 // Loop over the operands, inserting GEP and loads in the caller as 243 // appropriate. 244 auto *AI = CB.arg_begin(); 245 ArgNo = 0; 246 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 247 ++I, ++AI, ++ArgNo) { 248 if (!ArgsToPromote.count(&*I)) { 249 Args.push_back(*AI); // Unmodified argument 250 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 251 } else if (!I->use_empty()) { 252 Value *V = *AI; 253 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 254 for (const auto &Pair : ArgParts) { 255 LoadInst *LI = IRB.CreateAlignedLoad( 256 Pair.second.Ty, 257 createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), 258 Pair.second.Alignment, V->getName() + ".val"); 259 if (Pair.second.MustExecInstr) { 260 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); 261 LI->copyMetadata(*Pair.second.MustExecInstr, 262 {LLVMContext::MD_range, LLVMContext::MD_nonnull, 263 LLVMContext::MD_dereferenceable, 264 LLVMContext::MD_dereferenceable_or_null, 265 LLVMContext::MD_align, LLVMContext::MD_noundef}); 266 } 267 Args.push_back(LI); 268 ArgAttrVec.push_back(AttributeSet()); 269 } 270 } 271 } 272 273 // Push any varargs arguments on the list. 274 for (; AI != CB.arg_end(); ++AI, ++ArgNo) { 275 Args.push_back(*AI); 276 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 277 } 278 279 SmallVector<OperandBundleDef, 1> OpBundles; 280 CB.getOperandBundlesAsDefs(OpBundles); 281 282 CallBase *NewCS = nullptr; 283 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 284 NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), 285 Args, OpBundles, "", &CB); 286 } else { 287 auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB); 288 NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind()); 289 NewCS = NewCall; 290 } 291 NewCS->setCallingConv(CB.getCallingConv()); 292 NewCS->setAttributes(AttributeList::get(F->getContext(), 293 CallPAL.getFnAttrs(), 294 CallPAL.getRetAttrs(), ArgAttrVec)); 295 NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); 296 Args.clear(); 297 ArgAttrVec.clear(); 298 299 AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(), 300 LargestVectorWidth); 301 302 // Update the callgraph to know that the callsite has been transformed. 303 if (ReplaceCallSite) 304 (*ReplaceCallSite)(CB, *NewCS); 305 306 if (!CB.use_empty()) { 307 CB.replaceAllUsesWith(NewCS); 308 NewCS->takeName(&CB); 309 } 310 311 // Finally, remove the old call from the program, reducing the use-count of 312 // F. 313 CB.eraseFromParent(); 314 } 315 316 // Since we have now created the new function, splice the body of the old 317 // function right into the new function, leaving the old rotting hulk of the 318 // function empty. 319 NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); 320 321 // We will collect all the new created allocas to promote them into registers 322 // after the following loop 323 SmallVector<AllocaInst *, 4> Allocas; 324 325 // Loop over the argument list, transferring uses of the old arguments over to 326 // the new arguments, also transferring over the names as well. 327 Function::arg_iterator I2 = NF->arg_begin(); 328 for (Argument &Arg : F->args()) { 329 if (!ArgsToPromote.count(&Arg)) { 330 // If this is an unmodified argument, move the name and users over to the 331 // new version. 332 Arg.replaceAllUsesWith(&*I2); 333 I2->takeName(&Arg); 334 ++I2; 335 continue; 336 } 337 338 // There potentially are metadata uses for things like llvm.dbg.value. 339 // Replace them with undef, after handling the other regular uses. 340 auto RauwUndefMetadata = make_scope_exit( 341 [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); }); 342 343 if (Arg.use_empty()) 344 continue; 345 346 // Otherwise, if we promoted this argument, we have to create an alloca in 347 // the callee for every promotable part and store each of the new incoming 348 // arguments into the corresponding alloca, what lets the old code (the 349 // store instructions if they are allowed especially) a chance to work as 350 // before. 351 assert(Arg.getType()->isPointerTy() && 352 "Only arguments with a pointer type are promotable"); 353 354 IRBuilder<NoFolder> IRB(&NF->begin()->front()); 355 356 // Add only the promoted elements, so parts from ArgsToPromote 357 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; 358 for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { 359 int64_t Offset = Pair.first; 360 const ArgPart &Part = Pair.second; 361 362 Argument *NewArg = I2++; 363 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); 364 365 AllocaInst *NewAlloca = IRB.CreateAlloca( 366 Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); 367 NewAlloca->setAlignment(Pair.second.Alignment); 368 IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); 369 370 // Collect the alloca to retarget the users to 371 OffsetToAlloca.insert({Offset, NewAlloca}); 372 } 373 374 auto GetAlloca = [&](Value *Ptr) { 375 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 376 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 377 /* AllowNonInbounds */ true); 378 assert(Ptr == &Arg && "Not constant offset from arg?"); 379 return OffsetToAlloca.lookup(Offset.getSExtValue()); 380 }; 381 382 // Cleanup the code from the dead instructions: GEPs and BitCasts in between 383 // the original argument and its users: loads and stores. Retarget every 384 // user to the new created alloca. 385 SmallVector<Value *, 16> Worklist; 386 SmallVector<Instruction *, 16> DeadInsts; 387 append_range(Worklist, Arg.users()); 388 while (!Worklist.empty()) { 389 Value *V = Worklist.pop_back_val(); 390 if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) { 391 DeadInsts.push_back(cast<Instruction>(V)); 392 append_range(Worklist, V->users()); 393 continue; 394 } 395 396 if (auto *LI = dyn_cast<LoadInst>(V)) { 397 Value *Ptr = LI->getPointerOperand(); 398 LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); 399 continue; 400 } 401 402 if (auto *SI = dyn_cast<StoreInst>(V)) { 403 assert(!SI->isVolatile() && "Volatile operations can't be promoted."); 404 Value *Ptr = SI->getPointerOperand(); 405 SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); 406 continue; 407 } 408 409 llvm_unreachable("Unexpected user"); 410 } 411 412 for (Instruction *I : DeadInsts) { 413 I->replaceAllUsesWith(PoisonValue::get(I->getType())); 414 I->eraseFromParent(); 415 } 416 417 // Collect the allocas for promotion 418 for (const auto &Pair : OffsetToAlloca) { 419 assert(isAllocaPromotable(Pair.second) && 420 "By design, only promotable allocas should be produced."); 421 Allocas.push_back(Pair.second); 422 } 423 } 424 425 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() 426 << " alloca(s) are promotable by Mem2Reg\n"); 427 428 if (!Allocas.empty()) { 429 // And we are able to call the `promoteMemoryToRegister()` function. 430 // Our earlier checks have ensured that PromoteMemToReg() will 431 // succeed. 432 PromoteMemToReg(Allocas, DTGetter(*NF), ACGetter(*NF)); 433 } 434 435 return NF; 436 } 437 438 /// Return true if we can prove that all callees pass in a valid pointer for the 439 /// specified function argument. 440 static bool allCallersPassValidPointerForArgument(Argument *Arg, 441 Align NeededAlign, 442 uint64_t NeededDerefBytes) { 443 Function *Callee = Arg->getParent(); 444 const DataLayout &DL = Callee->getParent()->getDataLayout(); 445 APInt Bytes(64, NeededDerefBytes); 446 447 // Check if the argument itself is marked dereferenceable and aligned. 448 if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) 449 return true; 450 451 // Look at all call sites of the function. At this point we know we only have 452 // direct callees. 453 return all_of(Callee->users(), [&](User *U) { 454 CallBase &CB = cast<CallBase>(*U); 455 return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), 456 NeededAlign, Bytes, DL); 457 }); 458 } 459 460 /// Determine that this argument is safe to promote, and find the argument 461 /// parts it can be promoted into. 462 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, 463 unsigned MaxElements, bool IsRecursive, 464 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) { 465 // Quick exit for unused arguments 466 if (Arg->use_empty()) 467 return true; 468 469 // We can only promote this argument if all the uses are loads at known 470 // offsets. 471 // 472 // Promoting the argument causes it to be loaded in the caller 473 // unconditionally. This is only safe if we can prove that either the load 474 // would have happened in the callee anyway (ie, there is a load in the entry 475 // block) or the pointer passed in at every call site is guaranteed to be 476 // valid. 477 // In the former case, invalid loads can happen, but would have happened 478 // anyway, in the latter case, invalid loads won't happen. This prevents us 479 // from introducing an invalid load that wouldn't have happened in the 480 // original code. 481 482 SmallDenseMap<int64_t, ArgPart, 4> ArgParts; 483 Align NeededAlign(1); 484 uint64_t NeededDerefBytes = 0; 485 486 // And if this is a byval argument we also allow to have store instructions. 487 // Only handle in such way arguments with specified alignment; 488 // if it's unspecified, the actual alignment of the argument is 489 // target-specific. 490 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); 491 492 // An end user of a pointer argument is a load or store instruction. 493 // Returns None if this load or store is not based on the argument. Return 494 // true if we can promote the instruction, false otherwise. 495 auto HandleEndUser = [&](auto *I, Type *Ty, 496 bool GuaranteedToExecute) -> Optional<bool> { 497 // Don't promote volatile or atomic instructions. 498 if (!I->isSimple()) 499 return false; 500 501 Value *Ptr = I->getPointerOperand(); 502 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 503 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 504 /* AllowNonInbounds */ true); 505 if (Ptr != Arg) 506 return None; 507 508 if (Offset.getSignificantBits() >= 64) 509 return false; 510 511 TypeSize Size = DL.getTypeStoreSize(Ty); 512 // Don't try to promote scalable types. 513 if (Size.isScalable()) 514 return false; 515 516 // If this is a recursive function and one of the types is a pointer, 517 // then promoting it might lead to recursive promotion. 518 if (IsRecursive && Ty->isPointerTy()) 519 return false; 520 521 int64_t Off = Offset.getSExtValue(); 522 auto Pair = ArgParts.try_emplace( 523 Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); 524 ArgPart &Part = Pair.first->second; 525 bool OffsetNotSeenBefore = Pair.second; 526 527 // We limit promotion to only promoting up to a fixed number of elements of 528 // the aggregate. 529 if (MaxElements > 0 && ArgParts.size() > MaxElements) { 530 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 531 << "more than " << MaxElements << " parts\n"); 532 return false; 533 } 534 535 // For now, we only support loading/storing one specific type at a given 536 // offset. 537 if (Part.Ty != Ty) { 538 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 539 << "accessed as both " << *Part.Ty << " and " << *Ty 540 << " at offset " << Off << "\n"); 541 return false; 542 } 543 544 // If this instruction is not guaranteed to execute, and we haven't seen a 545 // load or store at this offset before (or it had lower alignment), then we 546 // need to remember that requirement. 547 // Note that skipping instructions of previously seen offsets is only 548 // correct because we only allow a single type for a given offset, which 549 // also means that the number of accessed bytes will be the same. 550 if (!GuaranteedToExecute && 551 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { 552 // We won't be able to prove dereferenceability for negative offsets. 553 if (Off < 0) 554 return false; 555 556 // If the offset is not aligned, an aligned base pointer won't help. 557 if (!isAligned(I->getAlign(), Off)) 558 return false; 559 560 NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); 561 NeededAlign = std::max(NeededAlign, I->getAlign()); 562 } 563 564 Part.Alignment = std::max(Part.Alignment, I->getAlign()); 565 return true; 566 }; 567 568 // Look for loads and stores that are guaranteed to execute on entry. 569 for (Instruction &I : Arg->getParent()->getEntryBlock()) { 570 Optional<bool> Res{}; 571 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 572 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); 573 else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 574 Res = HandleEndUser(SI, SI->getValueOperand()->getType(), 575 /* GuaranteedToExecute */ true); 576 if (Res && !*Res) 577 return false; 578 579 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 580 break; 581 } 582 583 // Now look at all loads of the argument. Remember the load instructions 584 // for the aliasing check below. 585 SmallVector<const Use *, 16> Worklist; 586 SmallPtrSet<const Use *, 16> Visited; 587 SmallVector<LoadInst *, 16> Loads; 588 auto AppendUses = [&](const Value *V) { 589 for (const Use &U : V->uses()) 590 if (Visited.insert(&U).second) 591 Worklist.push_back(&U); 592 }; 593 AppendUses(Arg); 594 while (!Worklist.empty()) { 595 const Use *U = Worklist.pop_back_val(); 596 Value *V = U->getUser(); 597 if (isa<BitCastInst>(V)) { 598 AppendUses(V); 599 continue; 600 } 601 602 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { 603 if (!GEP->hasAllConstantIndices()) 604 return false; 605 AppendUses(V); 606 continue; 607 } 608 609 if (auto *LI = dyn_cast<LoadInst>(V)) { 610 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) 611 return false; 612 Loads.push_back(LI); 613 continue; 614 } 615 616 // Stores are allowed for byval arguments 617 auto *SI = dyn_cast<StoreInst>(V); 618 if (AreStoresAllowed && SI && 619 U->getOperandNo() == StoreInst::getPointerOperandIndex()) { 620 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), 621 /* GuaranteedToExecute */ false)) 622 return false; 623 continue; 624 // Only stores TO the argument is allowed, all the other stores are 625 // unknown users 626 } 627 628 // Unknown user. 629 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 630 << "unknown user " << *V << "\n"); 631 return false; 632 } 633 634 if (NeededDerefBytes || NeededAlign > 1) { 635 // Try to prove a required deref / aligned requirement. 636 if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, 637 NeededDerefBytes)) { 638 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 639 << "not dereferenceable or aligned\n"); 640 return false; 641 } 642 } 643 644 if (ArgParts.empty()) 645 return true; // No users, this is a dead argument. 646 647 // Sort parts by offset. 648 append_range(ArgPartsVec, ArgParts); 649 sort(ArgPartsVec, 650 [](const auto &A, const auto &B) { return A.first < B.first; }); 651 652 // Make sure the parts are non-overlapping. 653 int64_t Offset = ArgPartsVec[0].first; 654 for (const auto &Pair : ArgPartsVec) { 655 if (Pair.first < Offset) 656 return false; // Overlap with previous part. 657 658 Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); 659 } 660 661 // If store instructions are allowed, the path from the entry of the function 662 // to each load may be not free of instructions that potentially invalidate 663 // the load, and this is an admissible situation. 664 if (AreStoresAllowed) 665 return true; 666 667 // Okay, now we know that the argument is only used by load instructions, and 668 // it is safe to unconditionally perform all of them. Use alias analysis to 669 // check to see if the pointer is guaranteed to not be modified from entry of 670 // the function to each of the load instructions. 671 672 // Because there could be several/many load instructions, remember which 673 // blocks we know to be transparent to the load. 674 df_iterator_default_set<BasicBlock *, 16> TranspBlocks; 675 676 for (LoadInst *Load : Loads) { 677 // Check to see if the load is invalidated from the start of the block to 678 // the load itself. 679 BasicBlock *BB = Load->getParent(); 680 681 MemoryLocation Loc = MemoryLocation::get(Load); 682 if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod)) 683 return false; // Pointer is invalidated! 684 685 // Now check every path from the entry block to the load for transparency. 686 // To do this, we perform a depth first search on the inverse CFG from the 687 // loading block. 688 for (BasicBlock *P : predecessors(BB)) { 689 for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) 690 if (AAR.canBasicBlockModify(*TranspBB, Loc)) 691 return false; 692 } 693 } 694 695 // If the path from the entry of the function to each load is free of 696 // instructions that potentially invalidate the load, we can make the 697 // transformation! 698 return true; 699 } 700 701 bool ArgumentPromotionPass::isDenselyPacked(Type *Ty, const DataLayout &DL) { 702 // There is no size information, so be conservative. 703 if (!Ty->isSized()) 704 return false; 705 706 // If the alloc size is not equal to the storage size, then there are padding 707 // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. 708 if (DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty)) 709 return false; 710 711 // FIXME: This isn't the right way to check for padding in vectors with 712 // non-byte-size elements. 713 if (VectorType *SeqTy = dyn_cast<VectorType>(Ty)) 714 return isDenselyPacked(SeqTy->getElementType(), DL); 715 716 // For array types, check for padding within members. 717 if (ArrayType *SeqTy = dyn_cast<ArrayType>(Ty)) 718 return isDenselyPacked(SeqTy->getElementType(), DL); 719 720 if (!isa<StructType>(Ty)) 721 return true; 722 723 // Check for padding within and between elements of a struct. 724 StructType *StructTy = cast<StructType>(Ty); 725 const StructLayout *Layout = DL.getStructLayout(StructTy); 726 uint64_t StartPos = 0; 727 for (unsigned I = 0, E = StructTy->getNumElements(); I < E; ++I) { 728 Type *ElTy = StructTy->getElementType(I); 729 if (!isDenselyPacked(ElTy, DL)) 730 return false; 731 if (StartPos != Layout->getElementOffsetInBits(I)) 732 return false; 733 StartPos += DL.getTypeAllocSizeInBits(ElTy); 734 } 735 736 return true; 737 } 738 739 /// Check if callers and callee agree on how promoted arguments would be 740 /// passed. 741 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, 742 const TargetTransformInfo &TTI) { 743 return all_of(F.uses(), [&](const Use &U) { 744 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 745 if (!CB) 746 return false; 747 748 const Function *Caller = CB->getCaller(); 749 const Function *Callee = CB->getCalledFunction(); 750 return TTI.areTypesABICompatible(Caller, Callee, Types); 751 }); 752 } 753 754 /// PromoteArguments - This method checks the specified function to see if there 755 /// are any promotable arguments and if it is safe to promote the function (for 756 /// example, all callers are direct). If safe to promote some arguments, it 757 /// calls the DoPromotion method. 758 static Function * 759 promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, 760 function_ref<DominatorTree &(Function &F)> DTGetter, 761 function_ref<AssumptionCache *(Function &F)> ACGetter, 762 unsigned MaxElements, 763 Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> 764 ReplaceCallSite, 765 const TargetTransformInfo &TTI, bool IsRecursive) { 766 // Don't perform argument promotion for naked functions; otherwise we can end 767 // up removing parameters that are seemingly 'not used' as they are referred 768 // to in the assembly. 769 if (F->hasFnAttribute(Attribute::Naked)) 770 return nullptr; 771 772 // Make sure that it is local to this module. 773 if (!F->hasLocalLinkage()) 774 return nullptr; 775 776 // Don't promote arguments for variadic functions. Adding, removing, or 777 // changing non-pack parameters can change the classification of pack 778 // parameters. Frontends encode that classification at the call site in the 779 // IR, while in the callee the classification is determined dynamically based 780 // on the number of registers consumed so far. 781 if (F->isVarArg()) 782 return nullptr; 783 784 // Don't transform functions that receive inallocas, as the transformation may 785 // not be safe depending on calling convention. 786 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) 787 return nullptr; 788 789 // First check: see if there are any pointer arguments! If not, quick exit. 790 SmallVector<Argument *, 16> PointerArgs; 791 for (Argument &I : F->args()) 792 if (I.getType()->isPointerTy()) 793 PointerArgs.push_back(&I); 794 if (PointerArgs.empty()) 795 return nullptr; 796 797 // Second check: make sure that all callers are direct callers. We can't 798 // transform functions that have indirect callers. Also see if the function 799 // is self-recursive. 800 for (Use &U : F->uses()) { 801 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 802 // Must be a direct call. 803 if (CB == nullptr || !CB->isCallee(&U) || 804 CB->getFunctionType() != F->getFunctionType()) 805 return nullptr; 806 807 // Can't change signature of musttail callee 808 if (CB->isMustTailCall()) 809 return nullptr; 810 811 if (CB->getFunction() == F) 812 IsRecursive = true; 813 } 814 815 // Can't change signature of musttail caller 816 // FIXME: Support promoting whole chain of musttail functions 817 for (BasicBlock &BB : *F) 818 if (BB.getTerminatingMustTailCall()) 819 return nullptr; 820 821 const DataLayout &DL = F->getParent()->getDataLayout(); 822 823 AAResults &AAR = AARGetter(*F); 824 825 // Check to see which arguments are promotable. If an argument is promotable, 826 // add it to ArgsToPromote. 827 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; 828 for (Argument *PtrArg : PointerArgs) { 829 // Replace sret attribute with noalias. This reduces register pressure by 830 // avoiding a register copy. 831 if (PtrArg->hasStructRetAttr()) { 832 unsigned ArgNo = PtrArg->getArgNo(); 833 F->removeParamAttr(ArgNo, Attribute::StructRet); 834 F->addParamAttr(ArgNo, Attribute::NoAlias); 835 for (Use &U : F->uses()) { 836 CallBase &CB = cast<CallBase>(*U.getUser()); 837 CB.removeParamAttr(ArgNo, Attribute::StructRet); 838 CB.addParamAttr(ArgNo, Attribute::NoAlias); 839 } 840 } 841 842 // If we can promote the pointer to its value. 843 SmallVector<OffsetAndArgPart, 4> ArgParts; 844 845 if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { 846 SmallVector<Type *, 4> Types; 847 for (const auto &Pair : ArgParts) 848 Types.push_back(Pair.second.Ty); 849 850 if (areTypesABICompatible(Types, *F, TTI)) { 851 ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); 852 } 853 } 854 } 855 856 // No promotable pointer arguments. 857 if (ArgsToPromote.empty()) 858 return nullptr; 859 860 return doPromotion(F, DTGetter, ACGetter, ArgsToPromote, ReplaceCallSite); 861 } 862 863 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, 864 CGSCCAnalysisManager &AM, 865 LazyCallGraph &CG, 866 CGSCCUpdateResult &UR) { 867 bool Changed = false, LocalChange; 868 869 // Iterate until we stop promoting from this SCC. 870 do { 871 LocalChange = false; 872 873 FunctionAnalysisManager &FAM = 874 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 875 876 bool IsRecursive = C.size() > 1; 877 for (LazyCallGraph::Node &N : C) { 878 Function &OldF = N.getFunction(); 879 880 // FIXME: This lambda must only be used with this function. We should 881 // skip the lambda and just get the AA results directly. 882 auto AARGetter = [&](Function &F) -> AAResults & { 883 assert(&F == &OldF && "Called with an unexpected function!"); 884 return FAM.getResult<AAManager>(F); 885 }; 886 887 auto DTGetter = [&](Function &F) -> DominatorTree & { 888 assert(&F != &OldF && "Called with the obsolete function!"); 889 return FAM.getResult<DominatorTreeAnalysis>(F); 890 }; 891 892 auto ACGetter = [&](Function &F) -> AssumptionCache * { 893 assert(&F != &OldF && "Called with the obsolete function!"); 894 return &FAM.getResult<AssumptionAnalysis>(F); 895 }; 896 897 const auto &TTI = FAM.getResult<TargetIRAnalysis>(OldF); 898 Function *NewF = promoteArguments(&OldF, AARGetter, DTGetter, ACGetter, 899 MaxElements, None, TTI, IsRecursive); 900 if (!NewF) 901 continue; 902 LocalChange = true; 903 904 // Directly substitute the functions in the call graph. Note that this 905 // requires the old function to be completely dead and completely 906 // replaced by the new function. It does no call graph updates, it merely 907 // swaps out the particular function mapped to a particular node in the 908 // graph. 909 C.getOuterRefSCC().replaceNodeFunction(N, *NewF); 910 FAM.clear(OldF, OldF.getName()); 911 OldF.eraseFromParent(); 912 913 PreservedAnalyses FuncPA; 914 FuncPA.preserveSet<CFGAnalyses>(); 915 for (auto *U : NewF->users()) { 916 auto *UserF = cast<CallBase>(U)->getFunction(); 917 FAM.invalidate(*UserF, FuncPA); 918 } 919 } 920 921 Changed |= LocalChange; 922 } while (LocalChange); 923 924 if (!Changed) 925 return PreservedAnalyses::all(); 926 927 PreservedAnalyses PA; 928 // We've cleared out analyses for deleted functions. 929 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 930 // We've manually invalidated analyses for functions we've modified. 931 PA.preserveSet<AllAnalysesOn<Function>>(); 932 return PA; 933 } 934