1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass promotes "by reference" arguments to be "by value" arguments.  In
10 // practice, this means looking for internal functions that have pointer
11 // arguments.  If it can prove, through the use of alias analysis, that an
12 // argument is *only* loaded, then it can pass the value into the function
13 // instead of the address of the value.  This can cause recursive simplification
14 // of code and lead to the elimination of allocas (especially in C++ template
15 // code like the STL).
16 //
17 // This pass also handles aggregate arguments that are passed into a function,
18 // scalarizing them if the elements of the aggregate are only loaded.  Note that
19 // by default it refuses to scalarize aggregates which would require passing in
20 // more than three operands to the function, because passing thousands of
21 // operands for a large array or structure is unprofitable! This limit can be
22 // configured or disabled, however.
23 //
24 // Note that this transformation could also be done for arguments that are only
25 // stored to (returning the value instead), but does not currently.  This case
26 // would be best handled when and if LLVM begins supporting multiple return
27 // values from functions.
28 //
29 //===----------------------------------------------------------------------===//
30 
31 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
32 
33 #include "llvm/ADT/DepthFirstIterator.h"
34 #include "llvm/ADT/None.h"
35 #include "llvm/ADT/Optional.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/ScopeExit.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/Twine.h"
42 #include "llvm/Analysis/AssumptionCache.h"
43 #include "llvm/Analysis/BasicAliasAnalysis.h"
44 #include "llvm/Analysis/CGSCCPassManager.h"
45 #include "llvm/Analysis/CallGraph.h"
46 #include "llvm/Analysis/CallGraphSCCPass.h"
47 #include "llvm/Analysis/LazyCallGraph.h"
48 #include "llvm/Analysis/Loads.h"
49 #include "llvm/Analysis/MemoryLocation.h"
50 #include "llvm/Analysis/TargetLibraryInfo.h"
51 #include "llvm/Analysis/TargetTransformInfo.h"
52 #include "llvm/Analysis/ValueTracking.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/Attributes.h"
55 #include "llvm/IR/BasicBlock.h"
56 #include "llvm/IR/CFG.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DerivedTypes.h"
60 #include "llvm/IR/Dominators.h"
61 #include "llvm/IR/Function.h"
62 #include "llvm/IR/IRBuilder.h"
63 #include "llvm/IR/InstrTypes.h"
64 #include "llvm/IR/Instruction.h"
65 #include "llvm/IR/Instructions.h"
66 #include "llvm/IR/Metadata.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/NoFolder.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/InitializePasses.h"
75 #include "llvm/Pass.h"
76 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/IPO.h"
80 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <cstdint>
84 #include <utility>
85 #include <vector>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "argpromotion"
90 
91 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
92 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
93 
94 namespace {
95 
96 struct ArgPart {
97   Type *Ty;
98   Align Alignment;
99   /// A representative guaranteed-executed load or store instruction for use by
100   /// metadata transfer.
101   Instruction *MustExecInstr;
102 };
103 
104 using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
105 
106 } // end anonymous namespace
107 
108 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
109                             Value *Ptr, Type *ResElemTy, int64_t Offset) {
110   // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise
111   // fall back to an i8 GEP to a specific offset.
112   unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
113   APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
114   if (!Ptr->getType()->isOpaquePointerTy()) {
115     Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType();
116     if (OrigOffset == 0 && OrigElemTy == ResElemTy)
117       return Ptr;
118 
119     if (OrigElemTy->isSized()) {
120       APInt TmpOffset = OrigOffset;
121       Type *TmpTy = OrigElemTy;
122       SmallVector<APInt> IntIndices =
123           DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
124       if (TmpOffset == 0) {
125         // Try to add trailing zero indices to reach the right type.
126         while (TmpTy != ResElemTy) {
127           Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0);
128           if (!NextTy)
129             break;
130 
131           IntIndices.push_back(APInt::getZero(
132               isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth()));
133           TmpTy = NextTy;
134         }
135 
136         SmallVector<Value *> Indices;
137         for (const APInt &Index : IntIndices)
138           Indices.push_back(IRB.getInt(Index));
139 
140         if (OrigOffset != 0 || TmpTy == ResElemTy) {
141           Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices);
142           return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
143         }
144       }
145     }
146   }
147 
148   if (OrigOffset != 0) {
149     Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace));
150     Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset));
151   }
152   return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
153 }
154 
155 /// DoPromotion - This method actually performs the promotion of the specified
156 /// arguments, and returns the new function.  At this point, we know that it's
157 /// safe to do so.
158 static Function *doPromotion(
159     Function *F, function_ref<DominatorTree &(Function &F)> DTGetter,
160     function_ref<AssumptionCache *(Function &F)> ACGetter,
161     const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> &ArgsToPromote,
162     Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
163         ReplaceCallSite) {
164   // Start by computing a new prototype for the function, which is the same as
165   // the old function, but has modified arguments.
166   FunctionType *FTy = F->getFunctionType();
167   std::vector<Type *> Params;
168 
169   // Attribute - Keep track of the parameter attributes for the arguments
170   // that we are *not* promoting. For the ones that we do promote, the parameter
171   // attributes are lost
172   SmallVector<AttributeSet, 8> ArgAttrVec;
173   AttributeList PAL = F->getAttributes();
174 
175   // First, determine the new argument list
176   unsigned ArgNo = 0;
177   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
178        ++I, ++ArgNo) {
179     if (!ArgsToPromote.count(&*I)) {
180       // Unchanged argument
181       Params.push_back(I->getType());
182       ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
183     } else if (I->use_empty()) {
184       // Dead argument (which are always marked as promotable)
185       ++NumArgumentsDead;
186     } else {
187       const auto &ArgParts = ArgsToPromote.find(&*I)->second;
188       for (const auto &Pair : ArgParts) {
189         Params.push_back(Pair.second.Ty);
190         ArgAttrVec.push_back(AttributeSet());
191       }
192       ++NumArgumentsPromoted;
193     }
194   }
195 
196   Type *RetTy = FTy->getReturnType();
197 
198   // Construct the new function type using the new arguments.
199   FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
200 
201   // Create the new function body and insert it into the module.
202   Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
203                                   F->getName());
204   NF->copyAttributesFrom(F);
205   NF->copyMetadata(F, 0);
206 
207   // The new function will have the !dbg metadata copied from the original
208   // function. The original function may not be deleted, and dbg metadata need
209   // to be unique, so we need to drop it.
210   F->setSubprogram(nullptr);
211 
212   LLVM_DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
213                     << "From: " << *F);
214 
215   uint64_t LargestVectorWidth = 0;
216   for (auto *I : Params)
217     if (auto *VT = dyn_cast<llvm::VectorType>(I))
218       LargestVectorWidth = std::max(
219           LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
220 
221   // Recompute the parameter attributes list based on the new arguments for
222   // the function.
223   NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
224                                        PAL.getRetAttrs(), ArgAttrVec));
225   AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
226   ArgAttrVec.clear();
227 
228   F->getParent()->getFunctionList().insert(F->getIterator(), NF);
229   NF->takeName(F);
230 
231   // Loop over all of the callers of the function, transforming the call sites
232   // to pass in the loaded pointers.
233   //
234   SmallVector<Value *, 16> Args;
235   const DataLayout &DL = F->getParent()->getDataLayout();
236   while (!F->use_empty()) {
237     CallBase &CB = cast<CallBase>(*F->user_back());
238     assert(CB.getCalledFunction() == F);
239     const AttributeList &CallPAL = CB.getAttributes();
240     IRBuilder<NoFolder> IRB(&CB);
241 
242     // Loop over the operands, inserting GEP and loads in the caller as
243     // appropriate.
244     auto *AI = CB.arg_begin();
245     ArgNo = 0;
246     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
247          ++I, ++AI, ++ArgNo) {
248       if (!ArgsToPromote.count(&*I)) {
249         Args.push_back(*AI); // Unmodified argument
250         ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
251       } else if (!I->use_empty()) {
252         Value *V = *AI;
253         const auto &ArgParts = ArgsToPromote.find(&*I)->second;
254         for (const auto &Pair : ArgParts) {
255           LoadInst *LI = IRB.CreateAlignedLoad(
256               Pair.second.Ty,
257               createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
258               Pair.second.Alignment, V->getName() + ".val");
259           if (Pair.second.MustExecInstr) {
260             LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
261             LI->copyMetadata(*Pair.second.MustExecInstr,
262                              {LLVMContext::MD_range, LLVMContext::MD_nonnull,
263                               LLVMContext::MD_dereferenceable,
264                               LLVMContext::MD_dereferenceable_or_null,
265                               LLVMContext::MD_align, LLVMContext::MD_noundef});
266           }
267           Args.push_back(LI);
268           ArgAttrVec.push_back(AttributeSet());
269         }
270       }
271     }
272 
273     // Push any varargs arguments on the list.
274     for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
275       Args.push_back(*AI);
276       ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
277     }
278 
279     SmallVector<OperandBundleDef, 1> OpBundles;
280     CB.getOperandBundlesAsDefs(OpBundles);
281 
282     CallBase *NewCS = nullptr;
283     if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
284       NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
285                                  Args, OpBundles, "", &CB);
286     } else {
287       auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
288       NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
289       NewCS = NewCall;
290     }
291     NewCS->setCallingConv(CB.getCallingConv());
292     NewCS->setAttributes(AttributeList::get(F->getContext(),
293                                             CallPAL.getFnAttrs(),
294                                             CallPAL.getRetAttrs(), ArgAttrVec));
295     NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
296     Args.clear();
297     ArgAttrVec.clear();
298 
299     AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
300                                                   LargestVectorWidth);
301 
302     // Update the callgraph to know that the callsite has been transformed.
303     if (ReplaceCallSite)
304       (*ReplaceCallSite)(CB, *NewCS);
305 
306     if (!CB.use_empty()) {
307       CB.replaceAllUsesWith(NewCS);
308       NewCS->takeName(&CB);
309     }
310 
311     // Finally, remove the old call from the program, reducing the use-count of
312     // F.
313     CB.eraseFromParent();
314   }
315 
316   // Since we have now created the new function, splice the body of the old
317   // function right into the new function, leaving the old rotting hulk of the
318   // function empty.
319   NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
320 
321   // We will collect all the new created allocas to promote them into registers
322   // after the following loop
323   SmallVector<AllocaInst *, 4> Allocas;
324 
325   // Loop over the argument list, transferring uses of the old arguments over to
326   // the new arguments, also transferring over the names as well.
327   Function::arg_iterator I2 = NF->arg_begin();
328   for (Argument &Arg : F->args()) {
329     if (!ArgsToPromote.count(&Arg)) {
330       // If this is an unmodified argument, move the name and users over to the
331       // new version.
332       Arg.replaceAllUsesWith(&*I2);
333       I2->takeName(&Arg);
334       ++I2;
335       continue;
336     }
337 
338     // There potentially are metadata uses for things like llvm.dbg.value.
339     // Replace them with undef, after handling the other regular uses.
340     auto RauwUndefMetadata = make_scope_exit(
341         [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
342 
343     if (Arg.use_empty())
344       continue;
345 
346     // Otherwise, if we promoted this argument, we have to create an alloca in
347     // the callee for every promotable part and store each of the new incoming
348     // arguments into the corresponding alloca, what lets the old code (the
349     // store instructions if they are allowed especially) a chance to work as
350     // before.
351     assert(Arg.getType()->isPointerTy() &&
352            "Only arguments with a pointer type are promotable");
353 
354     IRBuilder<NoFolder> IRB(&NF->begin()->front());
355 
356     // Add only the promoted elements, so parts from ArgsToPromote
357     SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
358     for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
359       int64_t Offset = Pair.first;
360       const ArgPart &Part = Pair.second;
361 
362       Argument *NewArg = I2++;
363       NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
364 
365       AllocaInst *NewAlloca = IRB.CreateAlloca(
366           Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
367       NewAlloca->setAlignment(Pair.second.Alignment);
368       IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
369 
370       // Collect the alloca to retarget the users to
371       OffsetToAlloca.insert({Offset, NewAlloca});
372     }
373 
374     auto GetAlloca = [&](Value *Ptr) {
375       APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
376       Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
377                                                    /* AllowNonInbounds */ true);
378       assert(Ptr == &Arg && "Not constant offset from arg?");
379       return OffsetToAlloca.lookup(Offset.getSExtValue());
380     };
381 
382     // Cleanup the code from the dead instructions: GEPs and BitCasts in between
383     // the original argument and its users: loads and stores. Retarget every
384     // user to the new created alloca.
385     SmallVector<Value *, 16> Worklist;
386     SmallVector<Instruction *, 16> DeadInsts;
387     append_range(Worklist, Arg.users());
388     while (!Worklist.empty()) {
389       Value *V = Worklist.pop_back_val();
390       if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
391         DeadInsts.push_back(cast<Instruction>(V));
392         append_range(Worklist, V->users());
393         continue;
394       }
395 
396       if (auto *LI = dyn_cast<LoadInst>(V)) {
397         Value *Ptr = LI->getPointerOperand();
398         LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
399         continue;
400       }
401 
402       if (auto *SI = dyn_cast<StoreInst>(V)) {
403         assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
404         Value *Ptr = SI->getPointerOperand();
405         SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
406         continue;
407       }
408 
409       llvm_unreachable("Unexpected user");
410     }
411 
412     for (Instruction *I : DeadInsts) {
413       I->replaceAllUsesWith(PoisonValue::get(I->getType()));
414       I->eraseFromParent();
415     }
416 
417     // Collect the allocas for promotion
418     for (const auto &Pair : OffsetToAlloca) {
419       assert(isAllocaPromotable(Pair.second) &&
420              "By design, only promotable allocas should be produced.");
421       Allocas.push_back(Pair.second);
422     }
423   }
424 
425   LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
426                     << " alloca(s) are promotable by Mem2Reg\n");
427 
428   if (!Allocas.empty()) {
429     // And we are able to call the `promoteMemoryToRegister()` function.
430     // Our earlier checks have ensured that PromoteMemToReg() will
431     // succeed.
432     PromoteMemToReg(Allocas, DTGetter(*NF), ACGetter(*NF));
433   }
434 
435   return NF;
436 }
437 
438 /// Return true if we can prove that all callees pass in a valid pointer for the
439 /// specified function argument.
440 static bool allCallersPassValidPointerForArgument(Argument *Arg,
441                                                   Align NeededAlign,
442                                                   uint64_t NeededDerefBytes) {
443   Function *Callee = Arg->getParent();
444   const DataLayout &DL = Callee->getParent()->getDataLayout();
445   APInt Bytes(64, NeededDerefBytes);
446 
447   // Check if the argument itself is marked dereferenceable and aligned.
448   if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
449     return true;
450 
451   // Look at all call sites of the function.  At this point we know we only have
452   // direct callees.
453   return all_of(Callee->users(), [&](User *U) {
454     CallBase &CB = cast<CallBase>(*U);
455     return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
456                                               NeededAlign, Bytes, DL);
457   });
458 }
459 
460 /// Determine that this argument is safe to promote, and find the argument
461 /// parts it can be promoted into.
462 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
463                          unsigned MaxElements, bool IsRecursive,
464                          SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
465   // Quick exit for unused arguments
466   if (Arg->use_empty())
467     return true;
468 
469   // We can only promote this argument if all the uses are loads at known
470   // offsets.
471   //
472   // Promoting the argument causes it to be loaded in the caller
473   // unconditionally. This is only safe if we can prove that either the load
474   // would have happened in the callee anyway (ie, there is a load in the entry
475   // block) or the pointer passed in at every call site is guaranteed to be
476   // valid.
477   // In the former case, invalid loads can happen, but would have happened
478   // anyway, in the latter case, invalid loads won't happen. This prevents us
479   // from introducing an invalid load that wouldn't have happened in the
480   // original code.
481 
482   SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
483   Align NeededAlign(1);
484   uint64_t NeededDerefBytes = 0;
485 
486   // And if this is a byval argument we also allow to have store instructions.
487   // Only handle in such way arguments with specified alignment;
488   // if it's unspecified, the actual alignment of the argument is
489   // target-specific.
490   bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
491 
492   // An end user of a pointer argument is a load or store instruction.
493   // Returns None if this load or store is not based on the argument. Return
494   // true if we can promote the instruction, false otherwise.
495   auto HandleEndUser = [&](auto *I, Type *Ty,
496                            bool GuaranteedToExecute) -> Optional<bool> {
497     // Don't promote volatile or atomic instructions.
498     if (!I->isSimple())
499       return false;
500 
501     Value *Ptr = I->getPointerOperand();
502     APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
503     Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
504                                                  /* AllowNonInbounds */ true);
505     if (Ptr != Arg)
506       return None;
507 
508     if (Offset.getSignificantBits() >= 64)
509       return false;
510 
511     TypeSize Size = DL.getTypeStoreSize(Ty);
512     // Don't try to promote scalable types.
513     if (Size.isScalable())
514       return false;
515 
516     // If this is a recursive function and one of the types is a pointer,
517     // then promoting it might lead to recursive promotion.
518     if (IsRecursive && Ty->isPointerTy())
519       return false;
520 
521     int64_t Off = Offset.getSExtValue();
522     auto Pair = ArgParts.try_emplace(
523         Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
524     ArgPart &Part = Pair.first->second;
525     bool OffsetNotSeenBefore = Pair.second;
526 
527     // We limit promotion to only promoting up to a fixed number of elements of
528     // the aggregate.
529     if (MaxElements > 0 && ArgParts.size() > MaxElements) {
530       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
531                         << "more than " << MaxElements << " parts\n");
532       return false;
533     }
534 
535     // For now, we only support loading/storing one specific type at a given
536     // offset.
537     if (Part.Ty != Ty) {
538       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
539                         << "accessed as both " << *Part.Ty << " and " << *Ty
540                         << " at offset " << Off << "\n");
541       return false;
542     }
543 
544     // If this instruction is not guaranteed to execute, and we haven't seen a
545     // load or store at this offset before (or it had lower alignment), then we
546     // need to remember that requirement.
547     // Note that skipping instructions of previously seen offsets is only
548     // correct because we only allow a single type for a given offset, which
549     // also means that the number of accessed bytes will be the same.
550     if (!GuaranteedToExecute &&
551         (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
552       // We won't be able to prove dereferenceability for negative offsets.
553       if (Off < 0)
554         return false;
555 
556       // If the offset is not aligned, an aligned base pointer won't help.
557       if (!isAligned(I->getAlign(), Off))
558         return false;
559 
560       NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
561       NeededAlign = std::max(NeededAlign, I->getAlign());
562     }
563 
564     Part.Alignment = std::max(Part.Alignment, I->getAlign());
565     return true;
566   };
567 
568   // Look for loads and stores that are guaranteed to execute on entry.
569   for (Instruction &I : Arg->getParent()->getEntryBlock()) {
570     Optional<bool> Res{};
571     if (LoadInst *LI = dyn_cast<LoadInst>(&I))
572       Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
573     else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
574       Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
575                           /* GuaranteedToExecute */ true);
576     if (Res && !*Res)
577       return false;
578 
579     if (!isGuaranteedToTransferExecutionToSuccessor(&I))
580       break;
581   }
582 
583   // Now look at all loads of the argument. Remember the load instructions
584   // for the aliasing check below.
585   SmallVector<const Use *, 16> Worklist;
586   SmallPtrSet<const Use *, 16> Visited;
587   SmallVector<LoadInst *, 16> Loads;
588   auto AppendUses = [&](const Value *V) {
589     for (const Use &U : V->uses())
590       if (Visited.insert(&U).second)
591         Worklist.push_back(&U);
592   };
593   AppendUses(Arg);
594   while (!Worklist.empty()) {
595     const Use *U = Worklist.pop_back_val();
596     Value *V = U->getUser();
597     if (isa<BitCastInst>(V)) {
598       AppendUses(V);
599       continue;
600     }
601 
602     if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
603       if (!GEP->hasAllConstantIndices())
604         return false;
605       AppendUses(V);
606       continue;
607     }
608 
609     if (auto *LI = dyn_cast<LoadInst>(V)) {
610       if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
611         return false;
612       Loads.push_back(LI);
613       continue;
614     }
615 
616     // Stores are allowed for byval arguments
617     auto *SI = dyn_cast<StoreInst>(V);
618     if (AreStoresAllowed && SI &&
619         U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
620       if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
621                           /* GuaranteedToExecute */ false))
622         return false;
623       continue;
624       // Only stores TO the argument is allowed, all the other stores are
625       // unknown users
626     }
627 
628     // Unknown user.
629     LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
630                       << "unknown user " << *V << "\n");
631     return false;
632   }
633 
634   if (NeededDerefBytes || NeededAlign > 1) {
635     // Try to prove a required deref / aligned requirement.
636     if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
637                                                NeededDerefBytes)) {
638       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
639                         << "not dereferenceable or aligned\n");
640       return false;
641     }
642   }
643 
644   if (ArgParts.empty())
645     return true; // No users, this is a dead argument.
646 
647   // Sort parts by offset.
648   append_range(ArgPartsVec, ArgParts);
649   sort(ArgPartsVec,
650        [](const auto &A, const auto &B) { return A.first < B.first; });
651 
652   // Make sure the parts are non-overlapping.
653   int64_t Offset = ArgPartsVec[0].first;
654   for (const auto &Pair : ArgPartsVec) {
655     if (Pair.first < Offset)
656       return false; // Overlap with previous part.
657 
658     Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
659   }
660 
661   // If store instructions are allowed, the path from the entry of the function
662   // to each load may be not free of instructions that potentially invalidate
663   // the load, and this is an admissible situation.
664   if (AreStoresAllowed)
665     return true;
666 
667   // Okay, now we know that the argument is only used by load instructions, and
668   // it is safe to unconditionally perform all of them. Use alias analysis to
669   // check to see if the pointer is guaranteed to not be modified from entry of
670   // the function to each of the load instructions.
671 
672   // Because there could be several/many load instructions, remember which
673   // blocks we know to be transparent to the load.
674   df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
675 
676   for (LoadInst *Load : Loads) {
677     // Check to see if the load is invalidated from the start of the block to
678     // the load itself.
679     BasicBlock *BB = Load->getParent();
680 
681     MemoryLocation Loc = MemoryLocation::get(Load);
682     if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
683       return false; // Pointer is invalidated!
684 
685     // Now check every path from the entry block to the load for transparency.
686     // To do this, we perform a depth first search on the inverse CFG from the
687     // loading block.
688     for (BasicBlock *P : predecessors(BB)) {
689       for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
690         if (AAR.canBasicBlockModify(*TranspBB, Loc))
691           return false;
692     }
693   }
694 
695   // If the path from the entry of the function to each load is free of
696   // instructions that potentially invalidate the load, we can make the
697   // transformation!
698   return true;
699 }
700 
701 bool ArgumentPromotionPass::isDenselyPacked(Type *Ty, const DataLayout &DL) {
702   // There is no size information, so be conservative.
703   if (!Ty->isSized())
704     return false;
705 
706   // If the alloc size is not equal to the storage size, then there are padding
707   // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
708   if (DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty))
709     return false;
710 
711   // FIXME: This isn't the right way to check for padding in vectors with
712   // non-byte-size elements.
713   if (VectorType *SeqTy = dyn_cast<VectorType>(Ty))
714     return isDenselyPacked(SeqTy->getElementType(), DL);
715 
716   // For array types, check for padding within members.
717   if (ArrayType *SeqTy = dyn_cast<ArrayType>(Ty))
718     return isDenselyPacked(SeqTy->getElementType(), DL);
719 
720   if (!isa<StructType>(Ty))
721     return true;
722 
723   // Check for padding within and between elements of a struct.
724   StructType *StructTy = cast<StructType>(Ty);
725   const StructLayout *Layout = DL.getStructLayout(StructTy);
726   uint64_t StartPos = 0;
727   for (unsigned I = 0, E = StructTy->getNumElements(); I < E; ++I) {
728     Type *ElTy = StructTy->getElementType(I);
729     if (!isDenselyPacked(ElTy, DL))
730       return false;
731     if (StartPos != Layout->getElementOffsetInBits(I))
732       return false;
733     StartPos += DL.getTypeAllocSizeInBits(ElTy);
734   }
735 
736   return true;
737 }
738 
739 /// Check if callers and callee agree on how promoted arguments would be
740 /// passed.
741 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
742                                   const TargetTransformInfo &TTI) {
743   return all_of(F.uses(), [&](const Use &U) {
744     CallBase *CB = dyn_cast<CallBase>(U.getUser());
745     if (!CB)
746       return false;
747 
748     const Function *Caller = CB->getCaller();
749     const Function *Callee = CB->getCalledFunction();
750     return TTI.areTypesABICompatible(Caller, Callee, Types);
751   });
752 }
753 
754 /// PromoteArguments - This method checks the specified function to see if there
755 /// are any promotable arguments and if it is safe to promote the function (for
756 /// example, all callers are direct).  If safe to promote some arguments, it
757 /// calls the DoPromotion method.
758 static Function *
759 promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
760                  function_ref<DominatorTree &(Function &F)> DTGetter,
761                  function_ref<AssumptionCache *(Function &F)> ACGetter,
762                  unsigned MaxElements,
763                  Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
764                      ReplaceCallSite,
765                  const TargetTransformInfo &TTI, bool IsRecursive) {
766   // Don't perform argument promotion for naked functions; otherwise we can end
767   // up removing parameters that are seemingly 'not used' as they are referred
768   // to in the assembly.
769   if (F->hasFnAttribute(Attribute::Naked))
770     return nullptr;
771 
772   // Make sure that it is local to this module.
773   if (!F->hasLocalLinkage())
774     return nullptr;
775 
776   // Don't promote arguments for variadic functions. Adding, removing, or
777   // changing non-pack parameters can change the classification of pack
778   // parameters. Frontends encode that classification at the call site in the
779   // IR, while in the callee the classification is determined dynamically based
780   // on the number of registers consumed so far.
781   if (F->isVarArg())
782     return nullptr;
783 
784   // Don't transform functions that receive inallocas, as the transformation may
785   // not be safe depending on calling convention.
786   if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
787     return nullptr;
788 
789   // First check: see if there are any pointer arguments!  If not, quick exit.
790   SmallVector<Argument *, 16> PointerArgs;
791   for (Argument &I : F->args())
792     if (I.getType()->isPointerTy())
793       PointerArgs.push_back(&I);
794   if (PointerArgs.empty())
795     return nullptr;
796 
797   // Second check: make sure that all callers are direct callers.  We can't
798   // transform functions that have indirect callers.  Also see if the function
799   // is self-recursive.
800   for (Use &U : F->uses()) {
801     CallBase *CB = dyn_cast<CallBase>(U.getUser());
802     // Must be a direct call.
803     if (CB == nullptr || !CB->isCallee(&U) ||
804         CB->getFunctionType() != F->getFunctionType())
805       return nullptr;
806 
807     // Can't change signature of musttail callee
808     if (CB->isMustTailCall())
809       return nullptr;
810 
811     if (CB->getFunction() == F)
812       IsRecursive = true;
813   }
814 
815   // Can't change signature of musttail caller
816   // FIXME: Support promoting whole chain of musttail functions
817   for (BasicBlock &BB : *F)
818     if (BB.getTerminatingMustTailCall())
819       return nullptr;
820 
821   const DataLayout &DL = F->getParent()->getDataLayout();
822 
823   AAResults &AAR = AARGetter(*F);
824 
825   // Check to see which arguments are promotable.  If an argument is promotable,
826   // add it to ArgsToPromote.
827   DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
828   for (Argument *PtrArg : PointerArgs) {
829     // Replace sret attribute with noalias. This reduces register pressure by
830     // avoiding a register copy.
831     if (PtrArg->hasStructRetAttr()) {
832       unsigned ArgNo = PtrArg->getArgNo();
833       F->removeParamAttr(ArgNo, Attribute::StructRet);
834       F->addParamAttr(ArgNo, Attribute::NoAlias);
835       for (Use &U : F->uses()) {
836         CallBase &CB = cast<CallBase>(*U.getUser());
837         CB.removeParamAttr(ArgNo, Attribute::StructRet);
838         CB.addParamAttr(ArgNo, Attribute::NoAlias);
839       }
840     }
841 
842     // If we can promote the pointer to its value.
843     SmallVector<OffsetAndArgPart, 4> ArgParts;
844 
845     if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
846       SmallVector<Type *, 4> Types;
847       for (const auto &Pair : ArgParts)
848         Types.push_back(Pair.second.Ty);
849 
850       if (areTypesABICompatible(Types, *F, TTI)) {
851         ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
852       }
853     }
854   }
855 
856   // No promotable pointer arguments.
857   if (ArgsToPromote.empty())
858     return nullptr;
859 
860   return doPromotion(F, DTGetter, ACGetter, ArgsToPromote, ReplaceCallSite);
861 }
862 
863 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
864                                              CGSCCAnalysisManager &AM,
865                                              LazyCallGraph &CG,
866                                              CGSCCUpdateResult &UR) {
867   bool Changed = false, LocalChange;
868 
869   // Iterate until we stop promoting from this SCC.
870   do {
871     LocalChange = false;
872 
873     FunctionAnalysisManager &FAM =
874         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
875 
876     bool IsRecursive = C.size() > 1;
877     for (LazyCallGraph::Node &N : C) {
878       Function &OldF = N.getFunction();
879 
880       // FIXME: This lambda must only be used with this function. We should
881       // skip the lambda and just get the AA results directly.
882       auto AARGetter = [&](Function &F) -> AAResults & {
883         assert(&F == &OldF && "Called with an unexpected function!");
884         return FAM.getResult<AAManager>(F);
885       };
886 
887       auto DTGetter = [&](Function &F) -> DominatorTree & {
888         assert(&F != &OldF && "Called with the obsolete function!");
889         return FAM.getResult<DominatorTreeAnalysis>(F);
890       };
891 
892       auto ACGetter = [&](Function &F) -> AssumptionCache * {
893         assert(&F != &OldF && "Called with the obsolete function!");
894         return &FAM.getResult<AssumptionAnalysis>(F);
895       };
896 
897       const auto &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
898       Function *NewF = promoteArguments(&OldF, AARGetter, DTGetter, ACGetter,
899                                         MaxElements, None, TTI, IsRecursive);
900       if (!NewF)
901         continue;
902       LocalChange = true;
903 
904       // Directly substitute the functions in the call graph. Note that this
905       // requires the old function to be completely dead and completely
906       // replaced by the new function. It does no call graph updates, it merely
907       // swaps out the particular function mapped to a particular node in the
908       // graph.
909       C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
910       FAM.clear(OldF, OldF.getName());
911       OldF.eraseFromParent();
912 
913       PreservedAnalyses FuncPA;
914       FuncPA.preserveSet<CFGAnalyses>();
915       for (auto *U : NewF->users()) {
916         auto *UserF = cast<CallBase>(U)->getFunction();
917         FAM.invalidate(*UserF, FuncPA);
918       }
919     }
920 
921     Changed |= LocalChange;
922   } while (LocalChange);
923 
924   if (!Changed)
925     return PreservedAnalyses::all();
926 
927   PreservedAnalyses PA;
928   // We've cleared out analyses for deleted functions.
929   PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
930   // We've manually invalidated analyses for functions we've modified.
931   PA.preserveSet<AllAnalysesOn<Function>>();
932   return PA;
933 }
934