1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass promotes "by reference" arguments to be "by value" arguments. In 11 // practice, this means looking for internal functions that have pointer 12 // arguments. If it can prove, through the use of alias analysis, that an 13 // argument is *only* loaded, then it can pass the value into the function 14 // instead of the address of the value. This can cause recursive simplification 15 // of code and lead to the elimination of allocas (especially in C++ template 16 // code like the STL). 17 // 18 // This pass also handles aggregate arguments that are passed into a function, 19 // scalarizing them if the elements of the aggregate are only loaded. Note that 20 // by default it refuses to scalarize aggregates which would require passing in 21 // more than three operands to the function, because passing thousands of 22 // operands for a large array or structure is unprofitable! This limit can be 23 // configured or disabled, however. 24 // 25 // Note that this transformation could also be done for arguments that are only 26 // stored to (returning the value instead), but does not currently. This case 27 // would be best handled when and if LLVM begins supporting multiple return 28 // values from functions. 29 // 30 //===----------------------------------------------------------------------===// 31 32 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 33 #include "llvm/ADT/DepthFirstIterator.h" 34 #include "llvm/ADT/None.h" 35 #include "llvm/ADT/Optional.h" 36 #include "llvm/ADT/STLExtras.h" 37 #include "llvm/ADT/SmallPtrSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 #include "llvm/ADT/StringExtras.h" 41 #include "llvm/ADT/Twine.h" 42 #include "llvm/Analysis/AliasAnalysis.h" 43 #include "llvm/Analysis/AssumptionCache.h" 44 #include "llvm/Analysis/BasicAliasAnalysis.h" 45 #include "llvm/Analysis/CGSCCPassManager.h" 46 #include "llvm/Analysis/CallGraph.h" 47 #include "llvm/Analysis/CallGraphSCCPass.h" 48 #include "llvm/Analysis/LazyCallGraph.h" 49 #include "llvm/Analysis/Loads.h" 50 #include "llvm/Analysis/MemoryLocation.h" 51 #include "llvm/Analysis/TargetLibraryInfo.h" 52 #include "llvm/IR/Argument.h" 53 #include "llvm/IR/Attributes.h" 54 #include "llvm/IR/BasicBlock.h" 55 #include "llvm/IR/CFG.h" 56 #include "llvm/IR/CallSite.h" 57 #include "llvm/IR/Constants.h" 58 #include "llvm/IR/DataLayout.h" 59 #include "llvm/IR/DerivedTypes.h" 60 #include "llvm/IR/Function.h" 61 #include "llvm/IR/InstrTypes.h" 62 #include "llvm/IR/Instruction.h" 63 #include "llvm/IR/Instructions.h" 64 #include "llvm/IR/Metadata.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/IR/PassManager.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/Use.h" 69 #include "llvm/IR/User.h" 70 #include "llvm/IR/Value.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/Debug.h" 74 #include "llvm/Support/raw_ostream.h" 75 #include "llvm/Transforms/IPO.h" 76 #include <algorithm> 77 #include <cassert> 78 #include <cstdint> 79 #include <functional> 80 #include <iterator> 81 #include <map> 82 #include <set> 83 #include <string> 84 #include <utility> 85 #include <vector> 86 87 using namespace llvm; 88 89 #define DEBUG_TYPE "argpromotion" 90 91 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); 92 STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); 93 STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); 94 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); 95 96 /// A vector used to hold the indices of a single GEP instruction 97 using IndicesVector = std::vector<uint64_t>; 98 99 /// DoPromotion - This method actually performs the promotion of the specified 100 /// arguments, and returns the new function. At this point, we know that it's 101 /// safe to do so. 102 static Function * 103 doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, 104 SmallPtrSetImpl<Argument *> &ByValArgsToTransform, 105 Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> 106 ReplaceCallSite) { 107 // Start by computing a new prototype for the function, which is the same as 108 // the old function, but has modified arguments. 109 FunctionType *FTy = F->getFunctionType(); 110 std::vector<Type *> Params; 111 112 using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; 113 114 // ScalarizedElements - If we are promoting a pointer that has elements 115 // accessed out of it, keep track of which elements are accessed so that we 116 // can add one argument for each. 117 // 118 // Arguments that are directly loaded will have a zero element value here, to 119 // handle cases where there are both a direct load and GEP accesses. 120 std::map<Argument *, ScalarizeTable> ScalarizedElements; 121 122 // OriginalLoads - Keep track of a representative load instruction from the 123 // original function so that we can tell the alias analysis implementation 124 // what the new GEP/Load instructions we are inserting look like. 125 // We need to keep the original loads for each argument and the elements 126 // of the argument that are accessed. 127 std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; 128 129 // Attribute - Keep track of the parameter attributes for the arguments 130 // that we are *not* promoting. For the ones that we do promote, the parameter 131 // attributes are lost 132 SmallVector<AttributeSet, 8> ArgAttrVec; 133 AttributeList PAL = F->getAttributes(); 134 135 // First, determine the new argument list 136 unsigned ArgNo = 0; 137 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 138 ++I, ++ArgNo) { 139 if (ByValArgsToTransform.count(&*I)) { 140 // Simple byval argument? Just add all the struct element types. 141 Type *AgTy = cast<PointerType>(I->getType())->getElementType(); 142 StructType *STy = cast<StructType>(AgTy); 143 Params.insert(Params.end(), STy->element_begin(), STy->element_end()); 144 ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), 145 AttributeSet()); 146 ++NumByValArgsPromoted; 147 } else if (!ArgsToPromote.count(&*I)) { 148 // Unchanged argument 149 Params.push_back(I->getType()); 150 ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); 151 } else if (I->use_empty()) { 152 // Dead argument (which are always marked as promotable) 153 ++NumArgumentsDead; 154 155 // There may be remaining metadata uses of the argument for things like 156 // llvm.dbg.value. Replace them with undef. 157 I->replaceAllUsesWith(UndefValue::get(I->getType())); 158 } else { 159 // Okay, this is being promoted. This means that the only uses are loads 160 // or GEPs which are only used by loads 161 162 // In this table, we will track which indices are loaded from the argument 163 // (where direct loads are tracked as no indices). 164 ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; 165 for (User *U : I->users()) { 166 Instruction *UI = cast<Instruction>(U); 167 Type *SrcTy; 168 if (LoadInst *L = dyn_cast<LoadInst>(UI)) 169 SrcTy = L->getType(); 170 else 171 SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); 172 IndicesVector Indices; 173 Indices.reserve(UI->getNumOperands() - 1); 174 // Since loads will only have a single operand, and GEPs only a single 175 // non-index operand, this will record direct loads without any indices, 176 // and gep+loads with the GEP indices. 177 for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); 178 II != IE; ++II) 179 Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); 180 // GEPs with a single 0 index can be merged with direct loads 181 if (Indices.size() == 1 && Indices.front() == 0) 182 Indices.clear(); 183 ArgIndices.insert(std::make_pair(SrcTy, Indices)); 184 LoadInst *OrigLoad; 185 if (LoadInst *L = dyn_cast<LoadInst>(UI)) 186 OrigLoad = L; 187 else 188 // Take any load, we will use it only to update Alias Analysis 189 OrigLoad = cast<LoadInst>(UI->user_back()); 190 OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; 191 } 192 193 // Add a parameter to the function for each element passed in. 194 for (const auto &ArgIndex : ArgIndices) { 195 // not allowed to dereference ->begin() if size() is 0 196 Params.push_back(GetElementPtrInst::getIndexedType( 197 cast<PointerType>(I->getType()->getScalarType())->getElementType(), 198 ArgIndex.second)); 199 ArgAttrVec.push_back(AttributeSet()); 200 assert(Params.back()); 201 } 202 203 if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) 204 ++NumArgumentsPromoted; 205 else 206 ++NumAggregatesPromoted; 207 } 208 } 209 210 Type *RetTy = FTy->getReturnType(); 211 212 // Construct the new function type using the new arguments. 213 FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); 214 215 // Create the new function body and insert it into the module. 216 Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(), 217 F->getName()); 218 NF->copyAttributesFrom(F); 219 220 // Patch the pointer to LLVM function in debug info descriptor. 221 NF->setSubprogram(F->getSubprogram()); 222 F->setSubprogram(nullptr); 223 224 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" 225 << "From: " << *F); 226 227 // Recompute the parameter attributes list based on the new arguments for 228 // the function. 229 NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), 230 PAL.getRetAttributes(), ArgAttrVec)); 231 ArgAttrVec.clear(); 232 233 F->getParent()->getFunctionList().insert(F->getIterator(), NF); 234 NF->takeName(F); 235 236 // Loop over all of the callers of the function, transforming the call sites 237 // to pass in the loaded pointers. 238 // 239 SmallVector<Value *, 16> Args; 240 while (!F->use_empty()) { 241 CallSite CS(F->user_back()); 242 assert(CS.getCalledFunction() == F); 243 Instruction *Call = CS.getInstruction(); 244 const AttributeList &CallPAL = CS.getAttributes(); 245 246 // Loop over the operands, inserting GEP and loads in the caller as 247 // appropriate. 248 CallSite::arg_iterator AI = CS.arg_begin(); 249 ArgNo = 0; 250 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 251 ++I, ++AI, ++ArgNo) 252 if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { 253 Args.push_back(*AI); // Unmodified argument 254 ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); 255 } else if (ByValArgsToTransform.count(&*I)) { 256 // Emit a GEP and load for each element of the struct. 257 Type *AgTy = cast<PointerType>(I->getType())->getElementType(); 258 StructType *STy = cast<StructType>(AgTy); 259 Value *Idxs[2] = { 260 ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; 261 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 262 Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); 263 Value *Idx = GetElementPtrInst::Create( 264 STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); 265 // TODO: Tell AA about the new values? 266 Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); 267 ArgAttrVec.push_back(AttributeSet()); 268 } 269 } else if (!I->use_empty()) { 270 // Non-dead argument: insert GEPs and loads as appropriate. 271 ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; 272 // Store the Value* version of the indices in here, but declare it now 273 // for reuse. 274 std::vector<Value *> Ops; 275 for (const auto &ArgIndex : ArgIndices) { 276 Value *V = *AI; 277 LoadInst *OrigLoad = 278 OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; 279 if (!ArgIndex.second.empty()) { 280 Ops.reserve(ArgIndex.second.size()); 281 Type *ElTy = V->getType(); 282 for (auto II : ArgIndex.second) { 283 // Use i32 to index structs, and i64 for others (pointers/arrays). 284 // This satisfies GEP constraints. 285 Type *IdxTy = 286 (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) 287 : Type::getInt64Ty(F->getContext())); 288 Ops.push_back(ConstantInt::get(IdxTy, II)); 289 // Keep track of the type we're currently indexing. 290 if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) 291 ElTy = ElPTy->getElementType(); 292 else 293 ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); 294 } 295 // And create a GEP to extract those indices. 296 V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, 297 V->getName() + ".idx", Call); 298 Ops.clear(); 299 } 300 // Since we're replacing a load make sure we take the alignment 301 // of the previous load. 302 LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call); 303 newLoad->setAlignment(OrigLoad->getAlignment()); 304 // Transfer the AA info too. 305 AAMDNodes AAInfo; 306 OrigLoad->getAAMetadata(AAInfo); 307 newLoad->setAAMetadata(AAInfo); 308 309 Args.push_back(newLoad); 310 ArgAttrVec.push_back(AttributeSet()); 311 } 312 } 313 314 // Push any varargs arguments on the list. 315 for (; AI != CS.arg_end(); ++AI, ++ArgNo) { 316 Args.push_back(*AI); 317 ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); 318 } 319 320 SmallVector<OperandBundleDef, 1> OpBundles; 321 CS.getOperandBundlesAsDefs(OpBundles); 322 323 CallSite NewCS; 324 if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { 325 NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), 326 Args, OpBundles, "", Call); 327 } else { 328 auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); 329 NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); 330 NewCS = NewCall; 331 } 332 NewCS.setCallingConv(CS.getCallingConv()); 333 NewCS.setAttributes( 334 AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), 335 CallPAL.getRetAttributes(), ArgAttrVec)); 336 NewCS->setDebugLoc(Call->getDebugLoc()); 337 uint64_t W; 338 if (Call->extractProfTotalWeight(W)) 339 NewCS->setProfWeight(W); 340 Args.clear(); 341 ArgAttrVec.clear(); 342 343 // Update the callgraph to know that the callsite has been transformed. 344 if (ReplaceCallSite) 345 (*ReplaceCallSite)(CS, NewCS); 346 347 if (!Call->use_empty()) { 348 Call->replaceAllUsesWith(NewCS.getInstruction()); 349 NewCS->takeName(Call); 350 } 351 352 // Finally, remove the old call from the program, reducing the use-count of 353 // F. 354 Call->eraseFromParent(); 355 } 356 357 const DataLayout &DL = F->getParent()->getDataLayout(); 358 359 // Since we have now created the new function, splice the body of the old 360 // function right into the new function, leaving the old rotting hulk of the 361 // function empty. 362 NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); 363 364 // Loop over the argument list, transferring uses of the old arguments over to 365 // the new arguments, also transferring over the names as well. 366 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), 367 I2 = NF->arg_begin(); 368 I != E; ++I) { 369 if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { 370 // If this is an unmodified argument, move the name and users over to the 371 // new version. 372 I->replaceAllUsesWith(&*I2); 373 I2->takeName(&*I); 374 ++I2; 375 continue; 376 } 377 378 if (ByValArgsToTransform.count(&*I)) { 379 // In the callee, we create an alloca, and store each of the new incoming 380 // arguments into the alloca. 381 Instruction *InsertPt = &NF->begin()->front(); 382 383 // Just add all the struct element types. 384 Type *AgTy = cast<PointerType>(I->getType())->getElementType(); 385 Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, 386 I->getParamAlignment(), "", InsertPt); 387 StructType *STy = cast<StructType>(AgTy); 388 Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 389 nullptr}; 390 391 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 392 Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); 393 Value *Idx = GetElementPtrInst::Create( 394 AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), 395 InsertPt); 396 I2->setName(I->getName() + "." + Twine(i)); 397 new StoreInst(&*I2++, Idx, InsertPt); 398 } 399 400 // Anything that used the arg should now use the alloca. 401 I->replaceAllUsesWith(TheAlloca); 402 TheAlloca->takeName(&*I); 403 404 // If the alloca is used in a call, we must clear the tail flag since 405 // the callee now uses an alloca from the caller. 406 for (User *U : TheAlloca->users()) { 407 CallInst *Call = dyn_cast<CallInst>(U); 408 if (!Call) 409 continue; 410 Call->setTailCall(false); 411 } 412 continue; 413 } 414 415 if (I->use_empty()) 416 continue; 417 418 // Otherwise, if we promoted this argument, then all users are load 419 // instructions (or GEPs with only load users), and all loads should be 420 // using the new argument that we added. 421 ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; 422 423 while (!I->use_empty()) { 424 if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { 425 assert(ArgIndices.begin()->second.empty() && 426 "Load element should sort to front!"); 427 I2->setName(I->getName() + ".val"); 428 LI->replaceAllUsesWith(&*I2); 429 LI->eraseFromParent(); 430 LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() 431 << "' in function '" << F->getName() << "'\n"); 432 } else { 433 GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); 434 IndicesVector Operands; 435 Operands.reserve(GEP->getNumIndices()); 436 for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); 437 II != IE; ++II) 438 Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); 439 440 // GEPs with a single 0 index can be merged with direct loads 441 if (Operands.size() == 1 && Operands.front() == 0) 442 Operands.clear(); 443 444 Function::arg_iterator TheArg = I2; 445 for (ScalarizeTable::iterator It = ArgIndices.begin(); 446 It->second != Operands; ++It, ++TheArg) { 447 assert(It != ArgIndices.end() && "GEP not handled??"); 448 } 449 450 std::string NewName = I->getName(); 451 for (unsigned i = 0, e = Operands.size(); i != e; ++i) { 452 NewName += "." + utostr(Operands[i]); 453 } 454 NewName += ".val"; 455 TheArg->setName(NewName); 456 457 LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() 458 << "' of function '" << NF->getName() << "'\n"); 459 460 // All of the uses must be load instructions. Replace them all with 461 // the argument specified by ArgNo. 462 while (!GEP->use_empty()) { 463 LoadInst *L = cast<LoadInst>(GEP->user_back()); 464 L->replaceAllUsesWith(&*TheArg); 465 L->eraseFromParent(); 466 } 467 GEP->eraseFromParent(); 468 } 469 } 470 471 // Increment I2 past all of the arguments added for this promoted pointer. 472 std::advance(I2, ArgIndices.size()); 473 } 474 475 return NF; 476 } 477 478 /// AllCallersPassInValidPointerForArgument - Return true if we can prove that 479 /// all callees pass in a valid pointer for the specified function argument. 480 static bool allCallersPassInValidPointerForArgument(Argument *Arg) { 481 Function *Callee = Arg->getParent(); 482 const DataLayout &DL = Callee->getParent()->getDataLayout(); 483 484 unsigned ArgNo = Arg->getArgNo(); 485 486 // Look at all call sites of the function. At this point we know we only have 487 // direct callees. 488 for (User *U : Callee->users()) { 489 CallSite CS(U); 490 assert(CS && "Should only have direct calls!"); 491 492 if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) 493 return false; 494 } 495 return true; 496 } 497 498 /// Returns true if Prefix is a prefix of longer. That means, Longer has a size 499 /// that is greater than or equal to the size of prefix, and each of the 500 /// elements in Prefix is the same as the corresponding elements in Longer. 501 /// 502 /// This means it also returns true when Prefix and Longer are equal! 503 static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { 504 if (Prefix.size() > Longer.size()) 505 return false; 506 return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); 507 } 508 509 /// Checks if Indices, or a prefix of Indices, is in Set. 510 static bool prefixIn(const IndicesVector &Indices, 511 std::set<IndicesVector> &Set) { 512 std::set<IndicesVector>::iterator Low; 513 Low = Set.upper_bound(Indices); 514 if (Low != Set.begin()) 515 Low--; 516 // Low is now the last element smaller than or equal to Indices. This means 517 // it points to a prefix of Indices (possibly Indices itself), if such 518 // prefix exists. 519 // 520 // This load is safe if any prefix of its operands is safe to load. 521 return Low != Set.end() && isPrefix(*Low, Indices); 522 } 523 524 /// Mark the given indices (ToMark) as safe in the given set of indices 525 /// (Safe). Marking safe usually means adding ToMark to Safe. However, if there 526 /// is already a prefix of Indices in Safe, Indices are implicitely marked safe 527 /// already. Furthermore, any indices that Indices is itself a prefix of, are 528 /// removed from Safe (since they are implicitely safe because of Indices now). 529 static void markIndicesSafe(const IndicesVector &ToMark, 530 std::set<IndicesVector> &Safe) { 531 std::set<IndicesVector>::iterator Low; 532 Low = Safe.upper_bound(ToMark); 533 // Guard against the case where Safe is empty 534 if (Low != Safe.begin()) 535 Low--; 536 // Low is now the last element smaller than or equal to Indices. This 537 // means it points to a prefix of Indices (possibly Indices itself), if 538 // such prefix exists. 539 if (Low != Safe.end()) { 540 if (isPrefix(*Low, ToMark)) 541 // If there is already a prefix of these indices (or exactly these 542 // indices) marked a safe, don't bother adding these indices 543 return; 544 545 // Increment Low, so we can use it as a "insert before" hint 546 ++Low; 547 } 548 // Insert 549 Low = Safe.insert(Low, ToMark); 550 ++Low; 551 // If there we're a prefix of longer index list(s), remove those 552 std::set<IndicesVector>::iterator End = Safe.end(); 553 while (Low != End && isPrefix(ToMark, *Low)) { 554 std::set<IndicesVector>::iterator Remove = Low; 555 ++Low; 556 Safe.erase(Remove); 557 } 558 } 559 560 /// isSafeToPromoteArgument - As you might guess from the name of this method, 561 /// it checks to see if it is both safe and useful to promote the argument. 562 /// This method limits promotion of aggregates to only promote up to three 563 /// elements of the aggregate in order to avoid exploding the number of 564 /// arguments passed in. 565 static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, 566 AAResults &AAR, unsigned MaxElements) { 567 using GEPIndicesSet = std::set<IndicesVector>; 568 569 // Quick exit for unused arguments 570 if (Arg->use_empty()) 571 return true; 572 573 // We can only promote this argument if all of the uses are loads, or are GEP 574 // instructions (with constant indices) that are subsequently loaded. 575 // 576 // Promoting the argument causes it to be loaded in the caller 577 // unconditionally. This is only safe if we can prove that either the load 578 // would have happened in the callee anyway (ie, there is a load in the entry 579 // block) or the pointer passed in at every call site is guaranteed to be 580 // valid. 581 // In the former case, invalid loads can happen, but would have happened 582 // anyway, in the latter case, invalid loads won't happen. This prevents us 583 // from introducing an invalid load that wouldn't have happened in the 584 // original code. 585 // 586 // This set will contain all sets of indices that are loaded in the entry 587 // block, and thus are safe to unconditionally load in the caller. 588 // 589 // This optimization is also safe for InAlloca parameters, because it verifies 590 // that the address isn't captured. 591 GEPIndicesSet SafeToUnconditionallyLoad; 592 593 // This set contains all the sets of indices that we are planning to promote. 594 // This makes it possible to limit the number of arguments added. 595 GEPIndicesSet ToPromote; 596 597 // If the pointer is always valid, any load with first index 0 is valid. 598 if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg)) 599 SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); 600 601 // First, iterate the entry block and mark loads of (geps of) arguments as 602 // safe. 603 BasicBlock &EntryBlock = Arg->getParent()->front(); 604 // Declare this here so we can reuse it 605 IndicesVector Indices; 606 for (Instruction &I : EntryBlock) 607 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 608 Value *V = LI->getPointerOperand(); 609 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { 610 V = GEP->getPointerOperand(); 611 if (V == Arg) { 612 // This load actually loads (part of) Arg? Check the indices then. 613 Indices.reserve(GEP->getNumIndices()); 614 for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); 615 II != IE; ++II) 616 if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) 617 Indices.push_back(CI->getSExtValue()); 618 else 619 // We found a non-constant GEP index for this argument? Bail out 620 // right away, can't promote this argument at all. 621 return false; 622 623 // Indices checked out, mark them as safe 624 markIndicesSafe(Indices, SafeToUnconditionallyLoad); 625 Indices.clear(); 626 } 627 } else if (V == Arg) { 628 // Direct loads are equivalent to a GEP with a single 0 index. 629 markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); 630 } 631 } 632 633 // Now, iterate all uses of the argument to see if there are any uses that are 634 // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. 635 SmallVector<LoadInst *, 16> Loads; 636 IndicesVector Operands; 637 for (Use &U : Arg->uses()) { 638 User *UR = U.getUser(); 639 Operands.clear(); 640 if (LoadInst *LI = dyn_cast<LoadInst>(UR)) { 641 // Don't hack volatile/atomic loads 642 if (!LI->isSimple()) 643 return false; 644 Loads.push_back(LI); 645 // Direct loads are equivalent to a GEP with a zero index and then a load. 646 Operands.push_back(0); 647 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) { 648 if (GEP->use_empty()) { 649 // Dead GEP's cause trouble later. Just remove them if we run into 650 // them. 651 GEP->eraseFromParent(); 652 // TODO: This runs the above loop over and over again for dead GEPs 653 // Couldn't we just do increment the UI iterator earlier and erase the 654 // use? 655 return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR, 656 MaxElements); 657 } 658 659 // Ensure that all of the indices are constants. 660 for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e; 661 ++i) 662 if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) 663 Operands.push_back(C->getSExtValue()); 664 else 665 return false; // Not a constant operand GEP! 666 667 // Ensure that the only users of the GEP are load instructions. 668 for (User *GEPU : GEP->users()) 669 if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) { 670 // Don't hack volatile/atomic loads 671 if (!LI->isSimple()) 672 return false; 673 Loads.push_back(LI); 674 } else { 675 // Other uses than load? 676 return false; 677 } 678 } else { 679 return false; // Not a load or a GEP. 680 } 681 682 // Now, see if it is safe to promote this load / loads of this GEP. Loading 683 // is safe if Operands, or a prefix of Operands, is marked as safe. 684 if (!prefixIn(Operands, SafeToUnconditionallyLoad)) 685 return false; 686 687 // See if we are already promoting a load with these indices. If not, check 688 // to make sure that we aren't promoting too many elements. If so, nothing 689 // to do. 690 if (ToPromote.find(Operands) == ToPromote.end()) { 691 if (MaxElements > 0 && ToPromote.size() == MaxElements) { 692 LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '" 693 << Arg->getName() 694 << "' because it would require adding more " 695 << "than " << MaxElements 696 << " arguments to the function.\n"); 697 // We limit aggregate promotion to only promoting up to a fixed number 698 // of elements of the aggregate. 699 return false; 700 } 701 ToPromote.insert(std::move(Operands)); 702 } 703 } 704 705 if (Loads.empty()) 706 return true; // No users, this is a dead argument. 707 708 // Okay, now we know that the argument is only used by load instructions and 709 // it is safe to unconditionally perform all of them. Use alias analysis to 710 // check to see if the pointer is guaranteed to not be modified from entry of 711 // the function to each of the load instructions. 712 713 // Because there could be several/many load instructions, remember which 714 // blocks we know to be transparent to the load. 715 df_iterator_default_set<BasicBlock *, 16> TranspBlocks; 716 717 for (LoadInst *Load : Loads) { 718 // Check to see if the load is invalidated from the start of the block to 719 // the load itself. 720 BasicBlock *BB = Load->getParent(); 721 722 MemoryLocation Loc = MemoryLocation::get(Load); 723 if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod)) 724 return false; // Pointer is invalidated! 725 726 // Now check every path from the entry block to the load for transparency. 727 // To do this, we perform a depth first search on the inverse CFG from the 728 // loading block. 729 for (BasicBlock *P : predecessors(BB)) { 730 for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) 731 if (AAR.canBasicBlockModify(*TranspBB, Loc)) 732 return false; 733 } 734 } 735 736 // If the path from the entry of the function to each load is free of 737 // instructions that potentially invalidate the load, we can make the 738 // transformation! 739 return true; 740 } 741 742 /// Checks if a type could have padding bytes. 743 static bool isDenselyPacked(Type *type, const DataLayout &DL) { 744 // There is no size information, so be conservative. 745 if (!type->isSized()) 746 return false; 747 748 // If the alloc size is not equal to the storage size, then there are padding 749 // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. 750 if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) 751 return false; 752 753 if (!isa<CompositeType>(type)) 754 return true; 755 756 // For homogenous sequential types, check for padding within members. 757 if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) 758 return isDenselyPacked(seqTy->getElementType(), DL); 759 760 // Check for padding within and between elements of a struct. 761 StructType *StructTy = cast<StructType>(type); 762 const StructLayout *Layout = DL.getStructLayout(StructTy); 763 uint64_t StartPos = 0; 764 for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { 765 Type *ElTy = StructTy->getElementType(i); 766 if (!isDenselyPacked(ElTy, DL)) 767 return false; 768 if (StartPos != Layout->getElementOffsetInBits(i)) 769 return false; 770 StartPos += DL.getTypeAllocSizeInBits(ElTy); 771 } 772 773 return true; 774 } 775 776 /// Checks if the padding bytes of an argument could be accessed. 777 static bool canPaddingBeAccessed(Argument *arg) { 778 assert(arg->hasByValAttr()); 779 780 // Track all the pointers to the argument to make sure they are not captured. 781 SmallPtrSet<Value *, 16> PtrValues; 782 PtrValues.insert(arg); 783 784 // Track all of the stores. 785 SmallVector<StoreInst *, 16> Stores; 786 787 // Scan through the uses recursively to make sure the pointer is always used 788 // sanely. 789 SmallVector<Value *, 16> WorkList; 790 WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); 791 while (!WorkList.empty()) { 792 Value *V = WorkList.back(); 793 WorkList.pop_back(); 794 if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { 795 if (PtrValues.insert(V).second) 796 WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); 797 } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { 798 Stores.push_back(Store); 799 } else if (!isa<LoadInst>(V)) { 800 return true; 801 } 802 } 803 804 // Check to make sure the pointers aren't captured 805 for (StoreInst *Store : Stores) 806 if (PtrValues.count(Store->getValueOperand())) 807 return true; 808 809 return false; 810 } 811 812 /// PromoteArguments - This method checks the specified function to see if there 813 /// are any promotable arguments and if it is safe to promote the function (for 814 /// example, all callers are direct). If safe to promote some arguments, it 815 /// calls the DoPromotion method. 816 static Function * 817 promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, 818 unsigned MaxElements, 819 Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> 820 ReplaceCallSite) { 821 // Don't perform argument promotion for naked functions; otherwise we can end 822 // up removing parameters that are seemingly 'not used' as they are referred 823 // to in the assembly. 824 if(F->hasFnAttribute(Attribute::Naked)) 825 return nullptr; 826 827 // Make sure that it is local to this module. 828 if (!F->hasLocalLinkage()) 829 return nullptr; 830 831 // Don't promote arguments for variadic functions. Adding, removing, or 832 // changing non-pack parameters can change the classification of pack 833 // parameters. Frontends encode that classification at the call site in the 834 // IR, while in the callee the classification is determined dynamically based 835 // on the number of registers consumed so far. 836 if (F->isVarArg()) 837 return nullptr; 838 839 // First check: see if there are any pointer arguments! If not, quick exit. 840 SmallVector<Argument *, 16> PointerArgs; 841 for (Argument &I : F->args()) 842 if (I.getType()->isPointerTy()) 843 PointerArgs.push_back(&I); 844 if (PointerArgs.empty()) 845 return nullptr; 846 847 // Second check: make sure that all callers are direct callers. We can't 848 // transform functions that have indirect callers. Also see if the function 849 // is self-recursive. 850 bool isSelfRecursive = false; 851 for (Use &U : F->uses()) { 852 CallSite CS(U.getUser()); 853 // Must be a direct call. 854 if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) 855 return nullptr; 856 857 // Can't change signature of musttail callee 858 if (CS.isMustTailCall()) 859 return nullptr; 860 861 if (CS.getInstruction()->getParent()->getParent() == F) 862 isSelfRecursive = true; 863 } 864 865 // Can't change signature of musttail caller 866 // FIXME: Support promoting whole chain of musttail functions 867 for (BasicBlock &BB : *F) 868 if (BB.getTerminatingMustTailCall()) 869 return nullptr; 870 871 const DataLayout &DL = F->getParent()->getDataLayout(); 872 873 AAResults &AAR = AARGetter(*F); 874 875 // Check to see which arguments are promotable. If an argument is promotable, 876 // add it to ArgsToPromote. 877 SmallPtrSet<Argument *, 8> ArgsToPromote; 878 SmallPtrSet<Argument *, 8> ByValArgsToTransform; 879 for (Argument *PtrArg : PointerArgs) { 880 Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); 881 882 // Replace sret attribute with noalias. This reduces register pressure by 883 // avoiding a register copy. 884 if (PtrArg->hasStructRetAttr()) { 885 unsigned ArgNo = PtrArg->getArgNo(); 886 F->removeParamAttr(ArgNo, Attribute::StructRet); 887 F->addParamAttr(ArgNo, Attribute::NoAlias); 888 for (Use &U : F->uses()) { 889 CallSite CS(U.getUser()); 890 CS.removeParamAttr(ArgNo, Attribute::StructRet); 891 CS.addParamAttr(ArgNo, Attribute::NoAlias); 892 } 893 } 894 895 // If this is a byval argument, and if the aggregate type is small, just 896 // pass the elements, which is always safe, if the passed value is densely 897 // packed or if we can prove the padding bytes are never accessed. This does 898 // not apply to inalloca. 899 bool isSafeToPromote = 900 PtrArg->hasByValAttr() && 901 (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); 902 if (isSafeToPromote) { 903 if (StructType *STy = dyn_cast<StructType>(AgTy)) { 904 if (MaxElements > 0 && STy->getNumElements() > MaxElements) { 905 LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '" 906 << PtrArg->getName() 907 << "' because it would require adding more" 908 << " than " << MaxElements 909 << " arguments to the function.\n"); 910 continue; 911 } 912 913 // If all the elements are single-value types, we can promote it. 914 bool AllSimple = true; 915 for (const auto *EltTy : STy->elements()) { 916 if (!EltTy->isSingleValueType()) { 917 AllSimple = false; 918 break; 919 } 920 } 921 922 // Safe to transform, don't even bother trying to "promote" it. 923 // Passing the elements as a scalar will allow sroa to hack on 924 // the new alloca we introduce. 925 if (AllSimple) { 926 ByValArgsToTransform.insert(PtrArg); 927 continue; 928 } 929 } 930 } 931 932 // If the argument is a recursive type and we're in a recursive 933 // function, we could end up infinitely peeling the function argument. 934 if (isSelfRecursive) { 935 if (StructType *STy = dyn_cast<StructType>(AgTy)) { 936 bool RecursiveType = false; 937 for (const auto *EltTy : STy->elements()) { 938 if (EltTy == PtrArg->getType()) { 939 RecursiveType = true; 940 break; 941 } 942 } 943 if (RecursiveType) 944 continue; 945 } 946 } 947 948 // Otherwise, see if we can promote the pointer to its value. 949 if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, 950 MaxElements)) 951 ArgsToPromote.insert(PtrArg); 952 } 953 954 // No promotable pointer arguments. 955 if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 956 return nullptr; 957 958 return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); 959 } 960 961 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, 962 CGSCCAnalysisManager &AM, 963 LazyCallGraph &CG, 964 CGSCCUpdateResult &UR) { 965 bool Changed = false, LocalChange; 966 967 // Iterate until we stop promoting from this SCC. 968 do { 969 LocalChange = false; 970 971 for (LazyCallGraph::Node &N : C) { 972 Function &OldF = N.getFunction(); 973 974 FunctionAnalysisManager &FAM = 975 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 976 // FIXME: This lambda must only be used with this function. We should 977 // skip the lambda and just get the AA results directly. 978 auto AARGetter = [&](Function &F) -> AAResults & { 979 assert(&F == &OldF && "Called with an unexpected function!"); 980 return FAM.getResult<AAManager>(F); 981 }; 982 983 Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None); 984 if (!NewF) 985 continue; 986 LocalChange = true; 987 988 // Directly substitute the functions in the call graph. Note that this 989 // requires the old function to be completely dead and completely 990 // replaced by the new function. It does no call graph updates, it merely 991 // swaps out the particular function mapped to a particular node in the 992 // graph. 993 C.getOuterRefSCC().replaceNodeFunction(N, *NewF); 994 OldF.eraseFromParent(); 995 } 996 997 Changed |= LocalChange; 998 } while (LocalChange); 999 1000 if (!Changed) 1001 return PreservedAnalyses::all(); 1002 1003 return PreservedAnalyses::none(); 1004 } 1005 1006 namespace { 1007 1008 /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. 1009 struct ArgPromotion : public CallGraphSCCPass { 1010 // Pass identification, replacement for typeid 1011 static char ID; 1012 1013 explicit ArgPromotion(unsigned MaxElements = 3) 1014 : CallGraphSCCPass(ID), MaxElements(MaxElements) { 1015 initializeArgPromotionPass(*PassRegistry::getPassRegistry()); 1016 } 1017 1018 void getAnalysisUsage(AnalysisUsage &AU) const override { 1019 AU.addRequired<AssumptionCacheTracker>(); 1020 AU.addRequired<TargetLibraryInfoWrapperPass>(); 1021 getAAResultsAnalysisUsage(AU); 1022 CallGraphSCCPass::getAnalysisUsage(AU); 1023 } 1024 1025 bool runOnSCC(CallGraphSCC &SCC) override; 1026 1027 private: 1028 using llvm::Pass::doInitialization; 1029 1030 bool doInitialization(CallGraph &CG) override; 1031 1032 /// The maximum number of elements to expand, or 0 for unlimited. 1033 unsigned MaxElements; 1034 }; 1035 1036 } // end anonymous namespace 1037 1038 char ArgPromotion::ID = 0; 1039 1040 INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", 1041 "Promote 'by reference' arguments to scalars", false, 1042 false) 1043 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1044 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1045 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 1046 INITIALIZE_PASS_END(ArgPromotion, "argpromotion", 1047 "Promote 'by reference' arguments to scalars", false, false) 1048 1049 Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { 1050 return new ArgPromotion(MaxElements); 1051 } 1052 1053 bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { 1054 if (skipSCC(SCC)) 1055 return false; 1056 1057 // Get the callgraph information that we need to update to reflect our 1058 // changes. 1059 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1060 1061 LegacyAARGetter AARGetter(*this); 1062 1063 bool Changed = false, LocalChange; 1064 1065 // Iterate until we stop promoting from this SCC. 1066 do { 1067 LocalChange = false; 1068 // Attempt to promote arguments from all functions in this SCC. 1069 for (CallGraphNode *OldNode : SCC) { 1070 Function *OldF = OldNode->getFunction(); 1071 if (!OldF) 1072 continue; 1073 1074 auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) { 1075 Function *Caller = OldCS.getInstruction()->getParent()->getParent(); 1076 CallGraphNode *NewCalleeNode = 1077 CG.getOrInsertFunction(NewCS.getCalledFunction()); 1078 CallGraphNode *CallerNode = CG[Caller]; 1079 CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode); 1080 }; 1081 1082 if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, 1083 {ReplaceCallSite})) { 1084 LocalChange = true; 1085 1086 // Update the call graph for the newly promoted function. 1087 CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); 1088 NewNode->stealCalledFunctionsFrom(OldNode); 1089 if (OldNode->getNumReferences() == 0) 1090 delete CG.removeFunctionFromModule(OldNode); 1091 else 1092 OldF->setLinkage(Function::ExternalLinkage); 1093 1094 // And updat ethe SCC we're iterating as well. 1095 SCC.ReplaceNode(OldNode, NewNode); 1096 } 1097 } 1098 // Remember that we changed something. 1099 Changed |= LocalChange; 1100 } while (LocalChange); 1101 1102 return Changed; 1103 } 1104 1105 bool ArgPromotion::doInitialization(CallGraph &CG) { 1106 return CallGraphSCCPass::doInitialization(CG); 1107 } 1108