1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass promotes "by reference" arguments to be "by value" arguments. In 10 // practice, this means looking for internal functions that have pointer 11 // arguments. If it can prove, through the use of alias analysis, that an 12 // argument is *only* loaded, then it can pass the value into the function 13 // instead of the address of the value. This can cause recursive simplification 14 // of code and lead to the elimination of allocas (especially in C++ template 15 // code like the STL). 16 // 17 // This pass also handles aggregate arguments that are passed into a function, 18 // scalarizing them if the elements of the aggregate are only loaded. Note that 19 // by default it refuses to scalarize aggregates which would require passing in 20 // more than three operands to the function, because passing thousands of 21 // operands for a large array or structure is unprofitable! This limit can be 22 // configured or disabled, however. 23 // 24 // Note that this transformation could also be done for arguments that are only 25 // stored to (returning the value instead), but does not currently. This case 26 // would be best handled when and if LLVM begins supporting multiple return 27 // values from functions. 28 // 29 //===----------------------------------------------------------------------===// 30 31 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 32 #include "llvm/ADT/DepthFirstIterator.h" 33 #include "llvm/ADT/None.h" 34 #include "llvm/ADT/Optional.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/ScopeExit.h" 37 #include "llvm/ADT/SmallPtrSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 #include "llvm/ADT/Twine.h" 41 #include "llvm/Analysis/AssumptionCache.h" 42 #include "llvm/Analysis/BasicAliasAnalysis.h" 43 #include "llvm/Analysis/CGSCCPassManager.h" 44 #include "llvm/Analysis/CallGraph.h" 45 #include "llvm/Analysis/CallGraphSCCPass.h" 46 #include "llvm/Analysis/LazyCallGraph.h" 47 #include "llvm/Analysis/Loads.h" 48 #include "llvm/Analysis/MemoryLocation.h" 49 #include "llvm/Analysis/ValueTracking.h" 50 #include "llvm/Analysis/TargetLibraryInfo.h" 51 #include "llvm/Analysis/TargetTransformInfo.h" 52 #include "llvm/IR/Argument.h" 53 #include "llvm/IR/Attributes.h" 54 #include "llvm/IR/BasicBlock.h" 55 #include "llvm/IR/CFG.h" 56 #include "llvm/IR/Constants.h" 57 #include "llvm/IR/DataLayout.h" 58 #include "llvm/IR/DerivedTypes.h" 59 #include "llvm/IR/Function.h" 60 #include "llvm/IR/IRBuilder.h" 61 #include "llvm/IR/InstrTypes.h" 62 #include "llvm/IR/Instruction.h" 63 #include "llvm/IR/Instructions.h" 64 #include "llvm/IR/Metadata.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/IR/NoFolder.h" 67 #include "llvm/IR/PassManager.h" 68 #include "llvm/IR/Type.h" 69 #include "llvm/IR/Use.h" 70 #include "llvm/IR/User.h" 71 #include "llvm/IR/Value.h" 72 #include "llvm/InitializePasses.h" 73 #include "llvm/Pass.h" 74 #include "llvm/Support/Casting.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/FormatVariadic.h" 77 #include "llvm/Support/raw_ostream.h" 78 #include "llvm/Transforms/IPO.h" 79 #include <algorithm> 80 #include <cassert> 81 #include <cstdint> 82 #include <functional> 83 #include <iterator> 84 #include <map> 85 #include <set> 86 #include <utility> 87 #include <vector> 88 89 using namespace llvm; 90 91 #define DEBUG_TYPE "argpromotion" 92 93 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); 94 STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); 95 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); 96 97 struct ArgPart { 98 Type *Ty; 99 Align Alignment; 100 /// A representative guaranteed-executed load instruction for use by 101 /// metadata transfer. 102 LoadInst *MustExecLoad; 103 }; 104 using OffsetAndArgPart = std::pair<int64_t, ArgPart>; 105 106 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, 107 Value *Ptr, Type *ResElemTy, int64_t Offset) { 108 // For non-opaque pointers, try create a "nice" GEP if possible, otherwise 109 // fall back to an i8 GEP to a specific offset. 110 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 111 APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); 112 if (!Ptr->getType()->isOpaquePointerTy()) { 113 Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType(); 114 if (OrigOffset == 0 && OrigElemTy == ResElemTy) 115 return Ptr; 116 117 if (OrigElemTy->isSized()) { 118 APInt TmpOffset = OrigOffset; 119 Type *TmpTy = OrigElemTy; 120 SmallVector<APInt> IntIndices = 121 DL.getGEPIndicesForOffset(TmpTy, TmpOffset); 122 if (TmpOffset == 0) { 123 // Try to add trailing zero indices to reach the right type. 124 while (TmpTy != ResElemTy) { 125 Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0); 126 if (!NextTy) 127 break; 128 129 IntIndices.push_back(APInt::getZero( 130 isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth())); 131 TmpTy = NextTy; 132 } 133 134 SmallVector<Value *> Indices; 135 for (const APInt &Index : IntIndices) 136 Indices.push_back(IRB.getInt(Index)); 137 138 if (OrigOffset != 0 || TmpTy == ResElemTy) { 139 Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices); 140 return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); 141 } 142 } 143 } 144 } 145 146 if (OrigOffset != 0) { 147 Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace)); 148 Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset)); 149 } 150 return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); 151 } 152 153 /// DoPromotion - This method actually performs the promotion of the specified 154 /// arguments, and returns the new function. At this point, we know that it's 155 /// safe to do so. 156 static Function *doPromotion( 157 Function *F, 158 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> &ArgsToPromote, 159 SmallPtrSetImpl<Argument *> &ByValArgsToTransform, 160 Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> 161 ReplaceCallSite) { 162 // Start by computing a new prototype for the function, which is the same as 163 // the old function, but has modified arguments. 164 FunctionType *FTy = F->getFunctionType(); 165 std::vector<Type *> Params; 166 167 // Attribute - Keep track of the parameter attributes for the arguments 168 // that we are *not* promoting. For the ones that we do promote, the parameter 169 // attributes are lost 170 SmallVector<AttributeSet, 8> ArgAttrVec; 171 AttributeList PAL = F->getAttributes(); 172 173 // First, determine the new argument list 174 unsigned ArgNo = 0; 175 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 176 ++I, ++ArgNo) { 177 if (ByValArgsToTransform.count(&*I)) { 178 // Simple byval argument? Just add all the struct element types. 179 Type *AgTy = I->getParamByValType(); 180 StructType *STy = cast<StructType>(AgTy); 181 llvm::append_range(Params, STy->elements()); 182 ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), 183 AttributeSet()); 184 ++NumByValArgsPromoted; 185 } else if (!ArgsToPromote.count(&*I)) { 186 // Unchanged argument 187 Params.push_back(I->getType()); 188 ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); 189 } else if (I->use_empty()) { 190 // Dead argument (which are always marked as promotable) 191 ++NumArgumentsDead; 192 } else { 193 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 194 for (const auto &Pair : ArgParts) { 195 Params.push_back(Pair.second.Ty); 196 ArgAttrVec.push_back(AttributeSet()); 197 } 198 ++NumArgumentsPromoted; 199 } 200 } 201 202 Type *RetTy = FTy->getReturnType(); 203 204 // Construct the new function type using the new arguments. 205 FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); 206 207 // Create the new function body and insert it into the module. 208 Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(), 209 F->getName()); 210 NF->copyAttributesFrom(F); 211 NF->copyMetadata(F, 0); 212 213 // The new function will have the !dbg metadata copied from the original 214 // function. The original function may not be deleted, and dbg metadata need 215 // to be unique so we need to drop it. 216 F->setSubprogram(nullptr); 217 218 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" 219 << "From: " << *F); 220 221 // Recompute the parameter attributes list based on the new arguments for 222 // the function. 223 NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), 224 PAL.getRetAttrs(), ArgAttrVec)); 225 ArgAttrVec.clear(); 226 227 F->getParent()->getFunctionList().insert(F->getIterator(), NF); 228 NF->takeName(F); 229 230 // Loop over all of the callers of the function, transforming the call sites 231 // to pass in the loaded pointers. 232 // 233 SmallVector<Value *, 16> Args; 234 const DataLayout &DL = F->getParent()->getDataLayout(); 235 while (!F->use_empty()) { 236 CallBase &CB = cast<CallBase>(*F->user_back()); 237 assert(CB.getCalledFunction() == F); 238 const AttributeList &CallPAL = CB.getAttributes(); 239 IRBuilder<NoFolder> IRB(&CB); 240 241 // Loop over the operands, inserting GEP and loads in the caller as 242 // appropriate. 243 auto AI = CB.arg_begin(); 244 ArgNo = 0; 245 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 246 ++I, ++AI, ++ArgNo) 247 if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { 248 Args.push_back(*AI); // Unmodified argument 249 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 250 } else if (ByValArgsToTransform.count(&*I)) { 251 // Emit a GEP and load for each element of the struct. 252 Type *AgTy = I->getParamByValType(); 253 StructType *STy = cast<StructType>(AgTy); 254 Value *Idxs[2] = { 255 ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; 256 const StructLayout *SL = DL.getStructLayout(STy); 257 Align StructAlign = *I->getParamAlign(); 258 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 259 Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); 260 auto *Idx = 261 IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i)); 262 // TODO: Tell AA about the new values? 263 Align Alignment = 264 commonAlignment(StructAlign, SL->getElementOffset(i)); 265 Args.push_back(IRB.CreateAlignedLoad( 266 STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val")); 267 ArgAttrVec.push_back(AttributeSet()); 268 } 269 } else if (!I->use_empty()) { 270 Value *V = *AI; 271 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 272 for (const auto &Pair : ArgParts) { 273 LoadInst *LI = IRB.CreateAlignedLoad( 274 Pair.second.Ty, 275 createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), 276 Pair.second.Alignment, V->getName() + ".val"); 277 if (Pair.second.MustExecLoad) { 278 // TODO: Transfer other metadata like !nonnull here. 279 LI->setAAMetadata(Pair.second.MustExecLoad->getAAMetadata()); 280 } 281 Args.push_back(LI); 282 ArgAttrVec.push_back(AttributeSet()); 283 } 284 } 285 286 // Push any varargs arguments on the list. 287 for (; AI != CB.arg_end(); ++AI, ++ArgNo) { 288 Args.push_back(*AI); 289 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 290 } 291 292 SmallVector<OperandBundleDef, 1> OpBundles; 293 CB.getOperandBundlesAsDefs(OpBundles); 294 295 CallBase *NewCS = nullptr; 296 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 297 NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), 298 Args, OpBundles, "", &CB); 299 } else { 300 auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB); 301 NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind()); 302 NewCS = NewCall; 303 } 304 NewCS->setCallingConv(CB.getCallingConv()); 305 NewCS->setAttributes(AttributeList::get(F->getContext(), 306 CallPAL.getFnAttrs(), 307 CallPAL.getRetAttrs(), ArgAttrVec)); 308 NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); 309 Args.clear(); 310 ArgAttrVec.clear(); 311 312 // Update the callgraph to know that the callsite has been transformed. 313 if (ReplaceCallSite) 314 (*ReplaceCallSite)(CB, *NewCS); 315 316 if (!CB.use_empty()) { 317 CB.replaceAllUsesWith(NewCS); 318 NewCS->takeName(&CB); 319 } 320 321 // Finally, remove the old call from the program, reducing the use-count of 322 // F. 323 CB.eraseFromParent(); 324 } 325 326 // Since we have now created the new function, splice the body of the old 327 // function right into the new function, leaving the old rotting hulk of the 328 // function empty. 329 NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); 330 331 // Loop over the argument list, transferring uses of the old arguments over to 332 // the new arguments, also transferring over the names as well. 333 Function::arg_iterator I2 = NF->arg_begin(); 334 for (Argument &Arg : F->args()) { 335 if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { 336 // If this is an unmodified argument, move the name and users over to the 337 // new version. 338 Arg.replaceAllUsesWith(&*I2); 339 I2->takeName(&Arg); 340 ++I2; 341 continue; 342 } 343 344 if (ByValArgsToTransform.count(&Arg)) { 345 // In the callee, we create an alloca, and store each of the new incoming 346 // arguments into the alloca. 347 Instruction *InsertPt = &NF->begin()->front(); 348 349 // Just add all the struct element types. 350 Type *AgTy = Arg.getParamByValType(); 351 Align StructAlign = *Arg.getParamAlign(); 352 Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, 353 StructAlign, "", InsertPt); 354 StructType *STy = cast<StructType>(AgTy); 355 Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 356 nullptr}; 357 const StructLayout *SL = DL.getStructLayout(STy); 358 359 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 360 Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); 361 Value *Idx = GetElementPtrInst::Create( 362 AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), 363 InsertPt); 364 I2->setName(Arg.getName() + "." + Twine(i)); 365 Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i)); 366 new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); 367 } 368 369 // Anything that used the arg should now use the alloca. 370 Arg.replaceAllUsesWith(TheAlloca); 371 TheAlloca->takeName(&Arg); 372 continue; 373 } 374 375 // There potentially are metadata uses for things like llvm.dbg.value. 376 // Replace them with undef, after handling the other regular uses. 377 auto RauwUndefMetadata = make_scope_exit( 378 [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); }); 379 380 if (Arg.use_empty()) 381 continue; 382 383 SmallDenseMap<int64_t, Argument *> OffsetToArg; 384 for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { 385 Argument &NewArg = *I2++; 386 NewArg.setName(Arg.getName() + "." + Twine(Pair.first) + ".val"); 387 OffsetToArg.insert({Pair.first, &NewArg}); 388 } 389 390 // Otherwise, if we promoted this argument, then all users are load 391 // instructions (with possible casts and GEPs in between). 392 393 SmallVector<Value *, 16> Worklist; 394 SmallVector<Instruction *, 16> DeadInsts; 395 append_range(Worklist, Arg.users()); 396 while (!Worklist.empty()) { 397 Value *V = Worklist.pop_back_val(); 398 if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) { 399 DeadInsts.push_back(cast<Instruction>(V)); 400 append_range(Worklist, V->users()); 401 continue; 402 } 403 404 if (auto *LI = dyn_cast<LoadInst>(V)) { 405 Value *Ptr = LI->getPointerOperand(); 406 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 407 Ptr = 408 Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 409 /* AllowNonInbounds */ true); 410 assert(Ptr == &Arg && "Not constant offset from arg?"); 411 LI->replaceAllUsesWith(OffsetToArg[Offset.getSExtValue()]); 412 DeadInsts.push_back(LI); 413 continue; 414 } 415 416 llvm_unreachable("Unexpected user"); 417 } 418 419 for (Instruction *I : DeadInsts) { 420 I->replaceAllUsesWith(UndefValue::get(I->getType())); 421 I->eraseFromParent(); 422 } 423 } 424 425 return NF; 426 } 427 428 /// Return true if we can prove that all callees pass in a valid pointer for the 429 /// specified function argument. 430 static bool allCallersPassValidPointerForArgument(Argument *Arg, 431 Align NeededAlign, 432 uint64_t NeededDerefBytes) { 433 Function *Callee = Arg->getParent(); 434 const DataLayout &DL = Callee->getParent()->getDataLayout(); 435 APInt Bytes(64, NeededDerefBytes); 436 437 // Check if the argument itself is marked dereferenceable and aligned. 438 if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) 439 return true; 440 441 // Look at all call sites of the function. At this point we know we only have 442 // direct callees. 443 return all_of(Callee->users(), [&](User *U) { 444 CallBase &CB = cast<CallBase>(*U); 445 return isDereferenceableAndAlignedPointer( 446 CB.getArgOperand(Arg->getArgNo()), NeededAlign, Bytes, DL); 447 }); 448 } 449 450 /// Determine that this argument is safe to promote, and find the argument 451 /// parts it can be promoted into. 452 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, 453 unsigned MaxElements, bool IsSelfRecursive, 454 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) { 455 // Quick exit for unused arguments 456 if (Arg->use_empty()) 457 return true; 458 459 // We can only promote this argument if all of the uses are loads at known 460 // offsets. 461 // 462 // Promoting the argument causes it to be loaded in the caller 463 // unconditionally. This is only safe if we can prove that either the load 464 // would have happened in the callee anyway (ie, there is a load in the entry 465 // block) or the pointer passed in at every call site is guaranteed to be 466 // valid. 467 // In the former case, invalid loads can happen, but would have happened 468 // anyway, in the latter case, invalid loads won't happen. This prevents us 469 // from introducing an invalid load that wouldn't have happened in the 470 // original code. 471 472 SmallDenseMap<int64_t, ArgPart, 4> ArgParts; 473 Align NeededAlign(1); 474 uint64_t NeededDerefBytes = 0; 475 476 // Returns None if this load is not based on the argument. Return true if 477 // we can promote the load, false otherwise. 478 auto HandleLoad = [&](LoadInst *LI, 479 bool GuaranteedToExecute) -> Optional<bool> { 480 // Don't promote volatile or atomic loads. 481 if (!LI->isSimple()) 482 return false; 483 484 Value *Ptr = LI->getPointerOperand(); 485 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 486 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 487 /* AllowNonInbounds */ true); 488 if (Ptr != Arg) 489 return None; 490 491 if (Offset.getSignificantBits() >= 64) 492 return false; 493 494 Type *Ty = LI->getType(); 495 TypeSize Size = DL.getTypeStoreSize(Ty); 496 // Don't try to promote scalable types. 497 if (Size.isScalable()) 498 return false; 499 500 // If this is a self-recursive function and one of the types is a pointer, 501 // then promoting it might lead to recursive promotion. 502 if (IsSelfRecursive && Ty->isPointerTy()) 503 return false; 504 505 int64_t Off = Offset.getSExtValue(); 506 auto Pair = ArgParts.try_emplace( 507 Off, ArgPart{Ty, LI->getAlign(), GuaranteedToExecute ? LI : nullptr}); 508 ArgPart &Part = Pair.first->second; 509 bool OffsetNotSeenBefore = Pair.second; 510 511 // We limit promotion to only promoting up to a fixed number of elements of 512 // the aggregate. 513 if (MaxElements > 0 && ArgParts.size() >= MaxElements) { 514 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 515 << "more than " << MaxElements << " parts\n"); 516 return false; 517 } 518 519 // For now, we only support loading one specific type at a given offset. 520 if (Part.Ty != Ty) { 521 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 522 << "loaded via both " << *Part.Ty << " and " << *Ty 523 << " at offset " << Off << "\n"); 524 return false; 525 } 526 527 // If this load is not guaranteed to execute and we haven't seen a load at 528 // this offset before (or it had lower alignment), then we need to remember 529 // that requirement. 530 // Note that skipping loads of previously seen offsets is only correct 531 // because we only allow a single type for a given offset, which also means 532 // that the number of accessed bytes will be the same. 533 if (!GuaranteedToExecute && 534 (OffsetNotSeenBefore || Part.Alignment < LI->getAlign())) { 535 // We won't be able to prove dereferenceability for negative offsets. 536 if (Off < 0) 537 return false; 538 539 // If the offset is not aligned, an aligned base pointer won't help. 540 if (!isAligned(LI->getAlign(), Off)) 541 return false; 542 543 NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); 544 NeededAlign = std::max(NeededAlign, LI->getAlign()); 545 } 546 547 Part.Alignment = std::max(Part.Alignment, LI->getAlign()); 548 return true; 549 }; 550 551 // Look for loads that are guaranteed to execute on entry. 552 for (Instruction &I : Arg->getParent()->getEntryBlock()) { 553 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 554 if (Optional<bool> Res = HandleLoad(LI, /* GuaranteedToExecute */ true)) 555 if (!*Res) 556 return false; 557 558 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 559 break; 560 } 561 562 // Now look at all loads of the argument. Remember the load instructions 563 // for the aliasing check below. 564 SmallVector<Value *, 16> Worklist; 565 SmallPtrSet<Value *, 16> Visited; 566 SmallVector<LoadInst *, 16> Loads; 567 auto AppendUsers = [&](Value *V) { 568 for (User *U : V->users()) 569 if (Visited.insert(U).second) 570 Worklist.push_back(U); 571 }; 572 AppendUsers(Arg); 573 while (!Worklist.empty()) { 574 Value *V = Worklist.pop_back_val(); 575 if (isa<BitCastInst>(V)) { 576 AppendUsers(V); 577 continue; 578 } 579 580 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { 581 if (!GEP->hasAllConstantIndices()) 582 return false; 583 AppendUsers(V); 584 continue; 585 } 586 587 if (auto *LI = dyn_cast<LoadInst>(V)) { 588 if (!*HandleLoad(LI, /* GuaranteedToExecute */ false)) 589 return false; 590 Loads.push_back(LI); 591 continue; 592 } 593 594 // Unknown user. 595 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 596 << "unknown user " << *V << "\n"); 597 return false; 598 } 599 600 if (NeededDerefBytes || NeededAlign > 1) { 601 // Try to prove a required deref / aligned requirement. 602 if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, 603 NeededDerefBytes)) { 604 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 605 << "not dereferenceable or aligned\n"); 606 return false; 607 } 608 } 609 610 if (ArgParts.empty()) 611 return true; // No users, this is a dead argument. 612 613 // Sort parts by offset. 614 append_range(ArgPartsVec, ArgParts); 615 sort(ArgPartsVec, 616 [](const auto &A, const auto &B) { return A.first < B.first; }); 617 618 // Make sure the parts are non-overlapping. 619 // TODO: As we're doing pure load promotion here, overlap should be fine from 620 // a correctness perspective. Profitability is less obvious though. 621 int64_t Offset = ArgPartsVec[0].first; 622 for (const auto &Pair : ArgPartsVec) { 623 if (Pair.first < Offset) 624 return false; // Overlap with previous part. 625 626 Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); 627 } 628 629 // Okay, now we know that the argument is only used by load instructions and 630 // it is safe to unconditionally perform all of them. Use alias analysis to 631 // check to see if the pointer is guaranteed to not be modified from entry of 632 // the function to each of the load instructions. 633 634 // Because there could be several/many load instructions, remember which 635 // blocks we know to be transparent to the load. 636 df_iterator_default_set<BasicBlock *, 16> TranspBlocks; 637 638 for (LoadInst *Load : Loads) { 639 // Check to see if the load is invalidated from the start of the block to 640 // the load itself. 641 BasicBlock *BB = Load->getParent(); 642 643 MemoryLocation Loc = MemoryLocation::get(Load); 644 if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod)) 645 return false; // Pointer is invalidated! 646 647 // Now check every path from the entry block to the load for transparency. 648 // To do this, we perform a depth first search on the inverse CFG from the 649 // loading block. 650 for (BasicBlock *P : predecessors(BB)) { 651 for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) 652 if (AAR.canBasicBlockModify(*TranspBB, Loc)) 653 return false; 654 } 655 } 656 657 // If the path from the entry of the function to each load is free of 658 // instructions that potentially invalidate the load, we can make the 659 // transformation! 660 return true; 661 } 662 663 bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) { 664 // There is no size information, so be conservative. 665 if (!type->isSized()) 666 return false; 667 668 // If the alloc size is not equal to the storage size, then there are padding 669 // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. 670 if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) 671 return false; 672 673 // FIXME: This isn't the right way to check for padding in vectors with 674 // non-byte-size elements. 675 if (VectorType *seqTy = dyn_cast<VectorType>(type)) 676 return isDenselyPacked(seqTy->getElementType(), DL); 677 678 // For array types, check for padding within members. 679 if (ArrayType *seqTy = dyn_cast<ArrayType>(type)) 680 return isDenselyPacked(seqTy->getElementType(), DL); 681 682 if (!isa<StructType>(type)) 683 return true; 684 685 // Check for padding within and between elements of a struct. 686 StructType *StructTy = cast<StructType>(type); 687 const StructLayout *Layout = DL.getStructLayout(StructTy); 688 uint64_t StartPos = 0; 689 for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { 690 Type *ElTy = StructTy->getElementType(i); 691 if (!isDenselyPacked(ElTy, DL)) 692 return false; 693 if (StartPos != Layout->getElementOffsetInBits(i)) 694 return false; 695 StartPos += DL.getTypeAllocSizeInBits(ElTy); 696 } 697 698 return true; 699 } 700 701 /// Checks if the padding bytes of an argument could be accessed. 702 static bool canPaddingBeAccessed(Argument *arg) { 703 assert(arg->hasByValAttr()); 704 705 // Track all the pointers to the argument to make sure they are not captured. 706 SmallPtrSet<Value *, 16> PtrValues; 707 PtrValues.insert(arg); 708 709 // Track all of the stores. 710 SmallVector<StoreInst *, 16> Stores; 711 712 // Scan through the uses recursively to make sure the pointer is always used 713 // sanely. 714 SmallVector<Value *, 16> WorkList(arg->users()); 715 while (!WorkList.empty()) { 716 Value *V = WorkList.pop_back_val(); 717 if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { 718 if (PtrValues.insert(V).second) 719 llvm::append_range(WorkList, V->users()); 720 } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { 721 Stores.push_back(Store); 722 } else if (!isa<LoadInst>(V)) { 723 return true; 724 } 725 } 726 727 // Check to make sure the pointers aren't captured 728 for (StoreInst *Store : Stores) 729 if (PtrValues.count(Store->getValueOperand())) 730 return true; 731 732 return false; 733 } 734 735 /// Check if callers and callee agree on how promoted arguments would be 736 /// passed. 737 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, 738 const TargetTransformInfo &TTI) { 739 return all_of(F.uses(), [&](const Use &U) { 740 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 741 if (!CB) 742 return false; 743 744 const Function *Caller = CB->getCaller(); 745 const Function *Callee = CB->getCalledFunction(); 746 return TTI.areTypesABICompatible(Caller, Callee, Types); 747 }); 748 } 749 750 /// PromoteArguments - This method checks the specified function to see if there 751 /// are any promotable arguments and if it is safe to promote the function (for 752 /// example, all callers are direct). If safe to promote some arguments, it 753 /// calls the DoPromotion method. 754 static Function * 755 promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, 756 unsigned MaxElements, 757 Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> 758 ReplaceCallSite, 759 const TargetTransformInfo &TTI) { 760 // Don't perform argument promotion for naked functions; otherwise we can end 761 // up removing parameters that are seemingly 'not used' as they are referred 762 // to in the assembly. 763 if(F->hasFnAttribute(Attribute::Naked)) 764 return nullptr; 765 766 // Make sure that it is local to this module. 767 if (!F->hasLocalLinkage()) 768 return nullptr; 769 770 // Don't promote arguments for variadic functions. Adding, removing, or 771 // changing non-pack parameters can change the classification of pack 772 // parameters. Frontends encode that classification at the call site in the 773 // IR, while in the callee the classification is determined dynamically based 774 // on the number of registers consumed so far. 775 if (F->isVarArg()) 776 return nullptr; 777 778 // Don't transform functions that receive inallocas, as the transformation may 779 // not be safe depending on calling convention. 780 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) 781 return nullptr; 782 783 // First check: see if there are any pointer arguments! If not, quick exit. 784 SmallVector<Argument *, 16> PointerArgs; 785 for (Argument &I : F->args()) 786 if (I.getType()->isPointerTy()) 787 PointerArgs.push_back(&I); 788 if (PointerArgs.empty()) 789 return nullptr; 790 791 // Second check: make sure that all callers are direct callers. We can't 792 // transform functions that have indirect callers. Also see if the function 793 // is self-recursive and check that target features are compatible. 794 bool isSelfRecursive = false; 795 for (Use &U : F->uses()) { 796 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 797 // Must be a direct call. 798 if (CB == nullptr || !CB->isCallee(&U)) 799 return nullptr; 800 801 // Can't change signature of musttail callee 802 if (CB->isMustTailCall()) 803 return nullptr; 804 805 if (CB->getParent()->getParent() == F) 806 isSelfRecursive = true; 807 } 808 809 // Can't change signature of musttail caller 810 // FIXME: Support promoting whole chain of musttail functions 811 for (BasicBlock &BB : *F) 812 if (BB.getTerminatingMustTailCall()) 813 return nullptr; 814 815 const DataLayout &DL = F->getParent()->getDataLayout(); 816 817 AAResults &AAR = AARGetter(*F); 818 819 // Check to see which arguments are promotable. If an argument is promotable, 820 // add it to ArgsToPromote. 821 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; 822 SmallPtrSet<Argument *, 8> ByValArgsToTransform; 823 for (Argument *PtrArg : PointerArgs) { 824 // Replace sret attribute with noalias. This reduces register pressure by 825 // avoiding a register copy. 826 if (PtrArg->hasStructRetAttr()) { 827 unsigned ArgNo = PtrArg->getArgNo(); 828 F->removeParamAttr(ArgNo, Attribute::StructRet); 829 F->addParamAttr(ArgNo, Attribute::NoAlias); 830 for (Use &U : F->uses()) { 831 CallBase &CB = cast<CallBase>(*U.getUser()); 832 CB.removeParamAttr(ArgNo, Attribute::StructRet); 833 CB.addParamAttr(ArgNo, Attribute::NoAlias); 834 } 835 } 836 837 // If this is a byval argument, and if the aggregate type is small, just 838 // pass the elements, which is always safe, if the passed value is densely 839 // packed or if we can prove the padding bytes are never accessed. 840 // 841 // Only handle arguments with specified alignment; if it's unspecified, the 842 // actual alignment of the argument is target-specific. 843 Type *ByValTy = PtrArg->getParamByValType(); 844 bool isSafeToPromote = 845 ByValTy && PtrArg->getParamAlign() && 846 (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) || 847 !canPaddingBeAccessed(PtrArg)); 848 if (isSafeToPromote) { 849 if (StructType *STy = dyn_cast<StructType>(ByValTy)) { 850 if (MaxElements > 0 && STy->getNumElements() > MaxElements) { 851 LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '" 852 << PtrArg->getName() 853 << "' because it would require adding more" 854 << " than " << MaxElements 855 << " arguments to the function.\n"); 856 continue; 857 } 858 859 SmallVector<Type *, 4> Types; 860 append_range(Types, STy->elements()); 861 862 // If all the elements are single-value types, we can promote it. 863 bool AllSimple = 864 all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); }); 865 866 // Safe to transform, don't even bother trying to "promote" it. 867 // Passing the elements as a scalar will allow sroa to hack on 868 // the new alloca we introduce. 869 if (AllSimple && areTypesABICompatible(Types, *F, TTI)) { 870 ByValArgsToTransform.insert(PtrArg); 871 continue; 872 } 873 } 874 } 875 876 // Otherwise, see if we can promote the pointer to its value. 877 SmallVector<OffsetAndArgPart, 4> ArgParts; 878 if (findArgParts(PtrArg, DL, AAR, MaxElements, isSelfRecursive, ArgParts)) { 879 SmallVector<Type *, 4> Types; 880 for (const auto &Pair : ArgParts) 881 Types.push_back(Pair.second.Ty); 882 883 if (areTypesABICompatible(Types, *F, TTI)) 884 ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); 885 } 886 } 887 888 // No promotable pointer arguments. 889 if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 890 return nullptr; 891 892 return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); 893 } 894 895 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, 896 CGSCCAnalysisManager &AM, 897 LazyCallGraph &CG, 898 CGSCCUpdateResult &UR) { 899 bool Changed = false, LocalChange; 900 901 // Iterate until we stop promoting from this SCC. 902 do { 903 LocalChange = false; 904 905 FunctionAnalysisManager &FAM = 906 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 907 908 for (LazyCallGraph::Node &N : C) { 909 Function &OldF = N.getFunction(); 910 911 // FIXME: This lambda must only be used with this function. We should 912 // skip the lambda and just get the AA results directly. 913 auto AARGetter = [&](Function &F) -> AAResults & { 914 assert(&F == &OldF && "Called with an unexpected function!"); 915 return FAM.getResult<AAManager>(F); 916 }; 917 918 const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF); 919 Function *NewF = 920 promoteArguments(&OldF, AARGetter, MaxElements, None, TTI); 921 if (!NewF) 922 continue; 923 LocalChange = true; 924 925 // Directly substitute the functions in the call graph. Note that this 926 // requires the old function to be completely dead and completely 927 // replaced by the new function. It does no call graph updates, it merely 928 // swaps out the particular function mapped to a particular node in the 929 // graph. 930 C.getOuterRefSCC().replaceNodeFunction(N, *NewF); 931 FAM.clear(OldF, OldF.getName()); 932 OldF.eraseFromParent(); 933 934 PreservedAnalyses FuncPA; 935 FuncPA.preserveSet<CFGAnalyses>(); 936 for (auto *U : NewF->users()) { 937 auto *UserF = cast<CallBase>(U)->getFunction(); 938 FAM.invalidate(*UserF, FuncPA); 939 } 940 } 941 942 Changed |= LocalChange; 943 } while (LocalChange); 944 945 if (!Changed) 946 return PreservedAnalyses::all(); 947 948 PreservedAnalyses PA; 949 // We've cleared out analyses for deleted functions. 950 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 951 // We've manually invalidated analyses for functions we've modified. 952 PA.preserveSet<AllAnalysesOn<Function>>(); 953 return PA; 954 } 955 956 namespace { 957 958 /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. 959 struct ArgPromotion : public CallGraphSCCPass { 960 // Pass identification, replacement for typeid 961 static char ID; 962 963 explicit ArgPromotion(unsigned MaxElements = 3) 964 : CallGraphSCCPass(ID), MaxElements(MaxElements) { 965 initializeArgPromotionPass(*PassRegistry::getPassRegistry()); 966 } 967 968 void getAnalysisUsage(AnalysisUsage &AU) const override { 969 AU.addRequired<AssumptionCacheTracker>(); 970 AU.addRequired<TargetLibraryInfoWrapperPass>(); 971 AU.addRequired<TargetTransformInfoWrapperPass>(); 972 getAAResultsAnalysisUsage(AU); 973 CallGraphSCCPass::getAnalysisUsage(AU); 974 } 975 976 bool runOnSCC(CallGraphSCC &SCC) override; 977 978 private: 979 using llvm::Pass::doInitialization; 980 981 bool doInitialization(CallGraph &CG) override; 982 983 /// The maximum number of elements to expand, or 0 for unlimited. 984 unsigned MaxElements; 985 }; 986 987 } // end anonymous namespace 988 989 char ArgPromotion::ID = 0; 990 991 INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", 992 "Promote 'by reference' arguments to scalars", false, 993 false) 994 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 995 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 996 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 997 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 998 INITIALIZE_PASS_END(ArgPromotion, "argpromotion", 999 "Promote 'by reference' arguments to scalars", false, false) 1000 1001 Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { 1002 return new ArgPromotion(MaxElements); 1003 } 1004 1005 bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { 1006 if (skipSCC(SCC)) 1007 return false; 1008 1009 // Get the callgraph information that we need to update to reflect our 1010 // changes. 1011 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1012 1013 LegacyAARGetter AARGetter(*this); 1014 1015 bool Changed = false, LocalChange; 1016 1017 // Iterate until we stop promoting from this SCC. 1018 do { 1019 LocalChange = false; 1020 // Attempt to promote arguments from all functions in this SCC. 1021 for (CallGraphNode *OldNode : SCC) { 1022 Function *OldF = OldNode->getFunction(); 1023 if (!OldF) 1024 continue; 1025 1026 auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) { 1027 Function *Caller = OldCS.getParent()->getParent(); 1028 CallGraphNode *NewCalleeNode = 1029 CG.getOrInsertFunction(NewCS.getCalledFunction()); 1030 CallGraphNode *CallerNode = CG[Caller]; 1031 CallerNode->replaceCallEdge(cast<CallBase>(OldCS), 1032 cast<CallBase>(NewCS), NewCalleeNode); 1033 }; 1034 1035 const TargetTransformInfo &TTI = 1036 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF); 1037 if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, 1038 {ReplaceCallSite}, TTI)) { 1039 LocalChange = true; 1040 1041 // Update the call graph for the newly promoted function. 1042 CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); 1043 NewNode->stealCalledFunctionsFrom(OldNode); 1044 if (OldNode->getNumReferences() == 0) 1045 delete CG.removeFunctionFromModule(OldNode); 1046 else 1047 OldF->setLinkage(Function::ExternalLinkage); 1048 1049 // And updat ethe SCC we're iterating as well. 1050 SCC.ReplaceNode(OldNode, NewNode); 1051 } 1052 } 1053 // Remember that we changed something. 1054 Changed |= LocalChange; 1055 } while (LocalChange); 1056 1057 return Changed; 1058 } 1059 1060 bool ArgPromotion::doInitialization(CallGraph &CG) { 1061 return CallGraphSCCPass::doInitialization(CG); 1062 } 1063