1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visit functions for load, store and alloca.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/ConstantRange.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27 #include "llvm/Transforms/Utils/Local.h"
28 using namespace llvm;
29 using namespace PatternMatch;
30 
31 #define DEBUG_TYPE "instcombine"
32 
33 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
34 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
35 
36 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
37 /// some part of a constant global variable.  This intentionally only accepts
38 /// constant expressions because we can't rewrite arbitrary instructions.
39 static bool pointsToConstantGlobal(Value *V) {
40   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
41     return GV->isConstant();
42 
43   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
44     if (CE->getOpcode() == Instruction::BitCast ||
45         CE->getOpcode() == Instruction::AddrSpaceCast ||
46         CE->getOpcode() == Instruction::GetElementPtr)
47       return pointsToConstantGlobal(CE->getOperand(0));
48   }
49   return false;
50 }
51 
52 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
53 /// pointer to an alloca.  Ignore any reads of the pointer, return false if we
54 /// see any stores or other unknown uses.  If we see pointer arithmetic, keep
55 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
56 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
57 /// the alloca, and if the source pointer is a pointer to a constant global, we
58 /// can optimize this.
59 static bool
60 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
61                                SmallVectorImpl<Instruction *> &ToDelete) {
62   // We track lifetime intrinsics as we encounter them.  If we decide to go
63   // ahead and replace the value with the global, this lets the caller quickly
64   // eliminate the markers.
65 
66   SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
67   ValuesToInspect.emplace_back(V, false);
68   while (!ValuesToInspect.empty()) {
69     auto ValuePair = ValuesToInspect.pop_back_val();
70     const bool IsOffset = ValuePair.second;
71     for (auto &U : ValuePair.first->uses()) {
72       auto *I = cast<Instruction>(U.getUser());
73 
74       if (auto *LI = dyn_cast<LoadInst>(I)) {
75         // Ignore non-volatile loads, they are always ok.
76         if (!LI->isSimple()) return false;
77         continue;
78       }
79 
80       if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
81         // If uses of the bitcast are ok, we are ok.
82         ValuesToInspect.emplace_back(I, IsOffset);
83         continue;
84       }
85       if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
86         // If the GEP has all zero indices, it doesn't offset the pointer. If it
87         // doesn't, it does.
88         ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
89         continue;
90       }
91 
92       if (auto CS = CallSite(I)) {
93         // If this is the function being called then we treat it like a load and
94         // ignore it.
95         if (CS.isCallee(&U))
96           continue;
97 
98         unsigned DataOpNo = CS.getDataOperandNo(&U);
99         bool IsArgOperand = CS.isArgOperand(&U);
100 
101         // Inalloca arguments are clobbered by the call.
102         if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
103           return false;
104 
105         // If this is a readonly/readnone call site, then we know it is just a
106         // load (but one that potentially returns the value itself), so we can
107         // ignore it if we know that the value isn't captured.
108         if (CS.onlyReadsMemory() &&
109             (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
110           continue;
111 
112         // If this is being passed as a byval argument, the caller is making a
113         // copy, so it is only a read of the alloca.
114         if (IsArgOperand && CS.isByValArgument(DataOpNo))
115           continue;
116       }
117 
118       // Lifetime intrinsics can be handled by the caller.
119       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
120         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
121             II->getIntrinsicID() == Intrinsic::lifetime_end) {
122           assert(II->use_empty() && "Lifetime markers have no result to use!");
123           ToDelete.push_back(II);
124           continue;
125         }
126       }
127 
128       // If this is isn't our memcpy/memmove, reject it as something we can't
129       // handle.
130       MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
131       if (!MI)
132         return false;
133 
134       // If the transfer is using the alloca as a source of the transfer, then
135       // ignore it since it is a load (unless the transfer is volatile).
136       if (U.getOperandNo() == 1) {
137         if (MI->isVolatile()) return false;
138         continue;
139       }
140 
141       // If we already have seen a copy, reject the second one.
142       if (TheCopy) return false;
143 
144       // If the pointer has been offset from the start of the alloca, we can't
145       // safely handle this.
146       if (IsOffset) return false;
147 
148       // If the memintrinsic isn't using the alloca as the dest, reject it.
149       if (U.getOperandNo() != 0) return false;
150 
151       // If the source of the memcpy/move is not a constant global, reject it.
152       if (!pointsToConstantGlobal(MI->getSource()))
153         return false;
154 
155       // Otherwise, the transform is safe.  Remember the copy instruction.
156       TheCopy = MI;
157     }
158   }
159   return true;
160 }
161 
162 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
163 /// modified by a copy from a constant global.  If we can prove this, we can
164 /// replace any uses of the alloca with uses of the global directly.
165 static MemTransferInst *
166 isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
167                                SmallVectorImpl<Instruction *> &ToDelete) {
168   MemTransferInst *TheCopy = nullptr;
169   if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
170     return TheCopy;
171   return nullptr;
172 }
173 
174 /// Returns true if V is dereferenceable for size of alloca.
175 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
176                                            const DataLayout &DL) {
177   if (AI->isArrayAllocation())
178     return false;
179   uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
180   if (!AllocaSize)
181     return false;
182   return isDereferenceableAndAlignedPointer(V, AI->getAlignment(),
183                                             APInt(64, AllocaSize), DL);
184 }
185 
186 static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
187   // Check for array size of 1 (scalar allocation).
188   if (!AI.isArrayAllocation()) {
189     // i32 1 is the canonical array size for scalar allocations.
190     if (AI.getArraySize()->getType()->isIntegerTy(32))
191       return nullptr;
192 
193     // Canonicalize it.
194     Value *V = IC.Builder.getInt32(1);
195     AI.setOperand(0, V);
196     return &AI;
197   }
198 
199   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
200   if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
201     Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
202     AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
203     New->setAlignment(AI.getAlignment());
204 
205     // Scan to the end of the allocation instructions, to skip over a block of
206     // allocas if possible...also skip interleaved debug info
207     //
208     BasicBlock::iterator It(New);
209     while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
210       ++It;
211 
212     // Now that I is pointing to the first non-allocation-inst in the block,
213     // insert our getelementptr instruction...
214     //
215     Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
216     Value *NullIdx = Constant::getNullValue(IdxTy);
217     Value *Idx[2] = {NullIdx, NullIdx};
218     Instruction *GEP =
219         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
220     IC.InsertNewInstBefore(GEP, *It);
221 
222     // Now make everything use the getelementptr instead of the original
223     // allocation.
224     return IC.replaceInstUsesWith(AI, GEP);
225   }
226 
227   if (isa<UndefValue>(AI.getArraySize()))
228     return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
229 
230   // Ensure that the alloca array size argument has type intptr_t, so that
231   // any casting is exposed early.
232   Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
233   if (AI.getArraySize()->getType() != IntPtrTy) {
234     Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
235     AI.setOperand(0, V);
236     return &AI;
237   }
238 
239   return nullptr;
240 }
241 
242 namespace {
243 // If I and V are pointers in different address space, it is not allowed to
244 // use replaceAllUsesWith since I and V have different types. A
245 // non-target-specific transformation should not use addrspacecast on V since
246 // the two address space may be disjoint depending on target.
247 //
248 // This class chases down uses of the old pointer until reaching the load
249 // instructions, then replaces the old pointer in the load instructions with
250 // the new pointer. If during the chasing it sees bitcast or GEP, it will
251 // create new bitcast or GEP with the new pointer and use them in the load
252 // instruction.
253 class PointerReplacer {
254 public:
255   PointerReplacer(InstCombiner &IC) : IC(IC) {}
256   void replacePointer(Instruction &I, Value *V);
257 
258 private:
259   void findLoadAndReplace(Instruction &I);
260   void replace(Instruction *I);
261   Value *getReplacement(Value *I);
262 
263   SmallVector<Instruction *, 4> Path;
264   MapVector<Value *, Value *> WorkMap;
265   InstCombiner &IC;
266 };
267 } // end anonymous namespace
268 
269 void PointerReplacer::findLoadAndReplace(Instruction &I) {
270   for (auto U : I.users()) {
271     auto *Inst = dyn_cast<Instruction>(&*U);
272     if (!Inst)
273       return;
274     DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
275     if (isa<LoadInst>(Inst)) {
276       for (auto P : Path)
277         replace(P);
278       replace(Inst);
279     } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
280       Path.push_back(Inst);
281       findLoadAndReplace(*Inst);
282       Path.pop_back();
283     } else {
284       return;
285     }
286   }
287 }
288 
289 Value *PointerReplacer::getReplacement(Value *V) {
290   auto Loc = WorkMap.find(V);
291   if (Loc != WorkMap.end())
292     return Loc->second;
293   return nullptr;
294 }
295 
296 void PointerReplacer::replace(Instruction *I) {
297   if (getReplacement(I))
298     return;
299 
300   if (auto *LT = dyn_cast<LoadInst>(I)) {
301     auto *V = getReplacement(LT->getPointerOperand());
302     assert(V && "Operand not replaced");
303     auto *NewI = new LoadInst(V);
304     NewI->takeName(LT);
305     IC.InsertNewInstWith(NewI, *LT);
306     IC.replaceInstUsesWith(*LT, NewI);
307     WorkMap[LT] = NewI;
308   } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
309     auto *V = getReplacement(GEP->getPointerOperand());
310     assert(V && "Operand not replaced");
311     SmallVector<Value *, 8> Indices;
312     Indices.append(GEP->idx_begin(), GEP->idx_end());
313     auto *NewI = GetElementPtrInst::Create(
314         V->getType()->getPointerElementType(), V, Indices);
315     IC.InsertNewInstWith(NewI, *GEP);
316     NewI->takeName(GEP);
317     WorkMap[GEP] = NewI;
318   } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
319     auto *V = getReplacement(BC->getOperand(0));
320     assert(V && "Operand not replaced");
321     auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
322                                   V->getType()->getPointerAddressSpace());
323     auto *NewI = new BitCastInst(V, NewT);
324     IC.InsertNewInstWith(NewI, *BC);
325     NewI->takeName(BC);
326     WorkMap[BC] = NewI;
327   } else {
328     llvm_unreachable("should never reach here");
329   }
330 }
331 
332 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
333 #ifndef NDEBUG
334   auto *PT = cast<PointerType>(I.getType());
335   auto *NT = cast<PointerType>(V->getType());
336   assert(PT != NT && PT->getElementType() == NT->getElementType() &&
337          "Invalid usage");
338 #endif
339   WorkMap[&I] = V;
340   findLoadAndReplace(I);
341 }
342 
343 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
344   if (auto *I = simplifyAllocaArraySize(*this, AI))
345     return I;
346 
347   if (AI.getAllocatedType()->isSized()) {
348     // If the alignment is 0 (unspecified), assign it the preferred alignment.
349     if (AI.getAlignment() == 0)
350       AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
351 
352     // Move all alloca's of zero byte objects to the entry block and merge them
353     // together.  Note that we only do this for alloca's, because malloc should
354     // allocate and return a unique pointer, even for a zero byte allocation.
355     if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
356       // For a zero sized alloca there is no point in doing an array allocation.
357       // This is helpful if the array size is a complicated expression not used
358       // elsewhere.
359       if (AI.isArrayAllocation()) {
360         AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
361         return &AI;
362       }
363 
364       // Get the first instruction in the entry block.
365       BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
366       Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
367       if (FirstInst != &AI) {
368         // If the entry block doesn't start with a zero-size alloca then move
369         // this one to the start of the entry block.  There is no problem with
370         // dominance as the array size was forced to a constant earlier already.
371         AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
372         if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
373             DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
374           AI.moveBefore(FirstInst);
375           return &AI;
376         }
377 
378         // If the alignment of the entry block alloca is 0 (unspecified),
379         // assign it the preferred alignment.
380         if (EntryAI->getAlignment() == 0)
381           EntryAI->setAlignment(
382               DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
383         // Replace this zero-sized alloca with the one at the start of the entry
384         // block after ensuring that the address will be aligned enough for both
385         // types.
386         unsigned MaxAlign = std::max(EntryAI->getAlignment(),
387                                      AI.getAlignment());
388         EntryAI->setAlignment(MaxAlign);
389         if (AI.getType() != EntryAI->getType())
390           return new BitCastInst(EntryAI, AI.getType());
391         return replaceInstUsesWith(AI, EntryAI);
392       }
393     }
394   }
395 
396   if (AI.getAlignment()) {
397     // Check to see if this allocation is only modified by a memcpy/memmove from
398     // a constant global whose alignment is equal to or exceeds that of the
399     // allocation.  If this is the case, we can change all users to use
400     // the constant global instead.  This is commonly produced by the CFE by
401     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
402     // is only subsequently read.
403     SmallVector<Instruction *, 4> ToDelete;
404     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
405       unsigned SourceAlign = getOrEnforceKnownAlignment(
406           Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
407       if (AI.getAlignment() <= SourceAlign &&
408           isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
409         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
410         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
411         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
412           eraseInstFromFunction(*ToDelete[i]);
413         Constant *TheSrc = cast<Constant>(Copy->getSource());
414         auto *SrcTy = TheSrc->getType();
415         auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
416                                         SrcTy->getPointerAddressSpace());
417         Constant *Cast =
418             ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, DestTy);
419         if (AI.getType()->getPointerAddressSpace() ==
420             SrcTy->getPointerAddressSpace()) {
421           Instruction *NewI = replaceInstUsesWith(AI, Cast);
422           eraseInstFromFunction(*Copy);
423           ++NumGlobalCopies;
424           return NewI;
425         } else {
426           PointerReplacer PtrReplacer(*this);
427           PtrReplacer.replacePointer(AI, Cast);
428           ++NumGlobalCopies;
429         }
430       }
431     }
432   }
433 
434   // At last, use the generic allocation site handler to aggressively remove
435   // unused allocas.
436   return visitAllocSite(AI);
437 }
438 
439 // Are we allowed to form a atomic load or store of this type?
440 static bool isSupportedAtomicType(Type *Ty) {
441   return Ty->isIntegerTy() || Ty->isPointerTy() || Ty->isFloatingPointTy();
442 }
443 
444 /// \brief Helper to combine a load to a new type.
445 ///
446 /// This just does the work of combining a load to a new type. It handles
447 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
448 /// loaded *value* type. This will convert it to a pointer, cast the operand to
449 /// that pointer type, load it, etc.
450 ///
451 /// Note that this will create all of the instructions with whatever insert
452 /// point the \c InstCombiner currently is using.
453 static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
454                                       const Twine &Suffix = "") {
455   assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
456          "can't fold an atomic load to requested type");
457 
458   Value *Ptr = LI.getPointerOperand();
459   unsigned AS = LI.getPointerAddressSpace();
460   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
461   LI.getAllMetadata(MD);
462 
463   LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
464       IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
465       LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
466   NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
467   MDBuilder MDB(NewLoad->getContext());
468   for (const auto &MDPair : MD) {
469     unsigned ID = MDPair.first;
470     MDNode *N = MDPair.second;
471     // Note, essentially every kind of metadata should be preserved here! This
472     // routine is supposed to clone a load instruction changing *only its type*.
473     // The only metadata it makes sense to drop is metadata which is invalidated
474     // when the pointer type changes. This should essentially never be the case
475     // in LLVM, but we explicitly switch over only known metadata to be
476     // conservatively correct. If you are adding metadata to LLVM which pertains
477     // to loads, you almost certainly want to add it here.
478     switch (ID) {
479     case LLVMContext::MD_dbg:
480     case LLVMContext::MD_tbaa:
481     case LLVMContext::MD_prof:
482     case LLVMContext::MD_fpmath:
483     case LLVMContext::MD_tbaa_struct:
484     case LLVMContext::MD_invariant_load:
485     case LLVMContext::MD_alias_scope:
486     case LLVMContext::MD_noalias:
487     case LLVMContext::MD_nontemporal:
488     case LLVMContext::MD_mem_parallel_loop_access:
489       // All of these directly apply.
490       NewLoad->setMetadata(ID, N);
491       break;
492 
493     case LLVMContext::MD_nonnull:
494       copyNonnullMetadata(LI, N, *NewLoad);
495       break;
496     case LLVMContext::MD_align:
497     case LLVMContext::MD_dereferenceable:
498     case LLVMContext::MD_dereferenceable_or_null:
499       // These only directly apply if the new type is also a pointer.
500       if (NewTy->isPointerTy())
501         NewLoad->setMetadata(ID, N);
502       break;
503     case LLVMContext::MD_range:
504       copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
505       break;
506     }
507   }
508   return NewLoad;
509 }
510 
511 /// \brief Combine a store to a new type.
512 ///
513 /// Returns the newly created store instruction.
514 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
515   assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
516          "can't fold an atomic store of requested type");
517 
518   Value *Ptr = SI.getPointerOperand();
519   unsigned AS = SI.getPointerAddressSpace();
520   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
521   SI.getAllMetadata(MD);
522 
523   StoreInst *NewStore = IC.Builder.CreateAlignedStore(
524       V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
525       SI.getAlignment(), SI.isVolatile());
526   NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
527   for (const auto &MDPair : MD) {
528     unsigned ID = MDPair.first;
529     MDNode *N = MDPair.second;
530     // Note, essentially every kind of metadata should be preserved here! This
531     // routine is supposed to clone a store instruction changing *only its
532     // type*. The only metadata it makes sense to drop is metadata which is
533     // invalidated when the pointer type changes. This should essentially
534     // never be the case in LLVM, but we explicitly switch over only known
535     // metadata to be conservatively correct. If you are adding metadata to
536     // LLVM which pertains to stores, you almost certainly want to add it
537     // here.
538     switch (ID) {
539     case LLVMContext::MD_dbg:
540     case LLVMContext::MD_tbaa:
541     case LLVMContext::MD_prof:
542     case LLVMContext::MD_fpmath:
543     case LLVMContext::MD_tbaa_struct:
544     case LLVMContext::MD_alias_scope:
545     case LLVMContext::MD_noalias:
546     case LLVMContext::MD_nontemporal:
547     case LLVMContext::MD_mem_parallel_loop_access:
548       // All of these directly apply.
549       NewStore->setMetadata(ID, N);
550       break;
551 
552     case LLVMContext::MD_invariant_load:
553     case LLVMContext::MD_nonnull:
554     case LLVMContext::MD_range:
555     case LLVMContext::MD_align:
556     case LLVMContext::MD_dereferenceable:
557     case LLVMContext::MD_dereferenceable_or_null:
558       // These don't apply for stores.
559       break;
560     }
561   }
562 
563   return NewStore;
564 }
565 
566 /// Returns true if instruction represent minmax pattern like:
567 ///   select ((cmp load V1, load V2), V1, V2).
568 static bool isMinMaxWithLoads(Value *V) {
569   assert(V->getType()->isPointerTy() && "Expected pointer type.");
570   // Ignore possible ty* to ixx* bitcast.
571   V = peekThroughBitcast(V);
572   // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
573   // pattern.
574   CmpInst::Predicate Pred;
575   Instruction *L1;
576   Instruction *L2;
577   Value *LHS;
578   Value *RHS;
579   if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
580                          m_Value(LHS), m_Value(RHS))))
581     return false;
582   return (match(L1, m_Load(m_Specific(LHS))) &&
583           match(L2, m_Load(m_Specific(RHS)))) ||
584          (match(L1, m_Load(m_Specific(RHS))) &&
585           match(L2, m_Load(m_Specific(LHS))));
586 }
587 
588 /// \brief Combine loads to match the type of their uses' value after looking
589 /// through intervening bitcasts.
590 ///
591 /// The core idea here is that if the result of a load is used in an operation,
592 /// we should load the type most conducive to that operation. For example, when
593 /// loading an integer and converting that immediately to a pointer, we should
594 /// instead directly load a pointer.
595 ///
596 /// However, this routine must never change the width of a load or the number of
597 /// loads as that would introduce a semantic change. This combine is expected to
598 /// be a semantic no-op which just allows loads to more closely model the types
599 /// of their consuming operations.
600 ///
601 /// Currently, we also refuse to change the precise type used for an atomic load
602 /// or a volatile load. This is debatable, and might be reasonable to change
603 /// later. However, it is risky in case some backend or other part of LLVM is
604 /// relying on the exact type loaded to select appropriate atomic operations.
605 static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
606   // FIXME: We could probably with some care handle both volatile and ordered
607   // atomic loads here but it isn't clear that this is important.
608   if (!LI.isUnordered())
609     return nullptr;
610 
611   if (LI.use_empty())
612     return nullptr;
613 
614   // swifterror values can't be bitcasted.
615   if (LI.getPointerOperand()->isSwiftError())
616     return nullptr;
617 
618   Type *Ty = LI.getType();
619   const DataLayout &DL = IC.getDataLayout();
620 
621   // Try to canonicalize loads which are only ever stored to operate over
622   // integers instead of any other type. We only do this when the loaded type
623   // is sized and has a size exactly the same as its store size and the store
624   // size is a legal integer type.
625   // Do not perform canonicalization if minmax pattern is found (to avoid
626   // infinite loop).
627   if (!Ty->isIntegerTy() && Ty->isSized() &&
628       DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
629       DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
630       !DL.isNonIntegralPointerType(Ty) &&
631       !isMinMaxWithLoads(
632           peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
633     if (all_of(LI.users(), [&LI](User *U) {
634           auto *SI = dyn_cast<StoreInst>(U);
635           return SI && SI->getPointerOperand() != &LI &&
636                  !SI->getPointerOperand()->isSwiftError();
637         })) {
638       LoadInst *NewLoad = combineLoadToNewType(
639           IC, LI,
640           Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
641       // Replace all the stores with stores of the newly loaded value.
642       for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
643         auto *SI = cast<StoreInst>(*UI++);
644         IC.Builder.SetInsertPoint(SI);
645         combineStoreToNewValue(IC, *SI, NewLoad);
646         IC.eraseInstFromFunction(*SI);
647       }
648       assert(LI.use_empty() && "Failed to remove all users of the load!");
649       // Return the old load so the combiner can delete it safely.
650       return &LI;
651     }
652   }
653 
654   // Fold away bit casts of the loaded value by loading the desired type.
655   // We can do this for BitCastInsts as well as casts from and to pointer types,
656   // as long as those are noops (i.e., the source or dest type have the same
657   // bitwidth as the target's pointers).
658   if (LI.hasOneUse())
659     if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
660       if (CI->isNoopCast(DL))
661         if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
662           LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
663           CI->replaceAllUsesWith(NewLoad);
664           IC.eraseInstFromFunction(*CI);
665           return &LI;
666         }
667 
668   // FIXME: We should also canonicalize loads of vectors when their elements are
669   // cast to other types.
670   return nullptr;
671 }
672 
673 static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
674   // FIXME: We could probably with some care handle both volatile and atomic
675   // stores here but it isn't clear that this is important.
676   if (!LI.isSimple())
677     return nullptr;
678 
679   Type *T = LI.getType();
680   if (!T->isAggregateType())
681     return nullptr;
682 
683   StringRef Name = LI.getName();
684   assert(LI.getAlignment() && "Alignment must be set at this point");
685 
686   if (auto *ST = dyn_cast<StructType>(T)) {
687     // If the struct only have one element, we unpack.
688     auto NumElements = ST->getNumElements();
689     if (NumElements == 1) {
690       LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
691                                                ".unpack");
692       AAMDNodes AAMD;
693       LI.getAAMetadata(AAMD);
694       NewLoad->setAAMetadata(AAMD);
695       return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
696         UndefValue::get(T), NewLoad, 0, Name));
697     }
698 
699     // We don't want to break loads with padding here as we'd loose
700     // the knowledge that padding exists for the rest of the pipeline.
701     const DataLayout &DL = IC.getDataLayout();
702     auto *SL = DL.getStructLayout(ST);
703     if (SL->hasPadding())
704       return nullptr;
705 
706     auto Align = LI.getAlignment();
707     if (!Align)
708       Align = DL.getABITypeAlignment(ST);
709 
710     auto *Addr = LI.getPointerOperand();
711     auto *IdxType = Type::getInt32Ty(T->getContext());
712     auto *Zero = ConstantInt::get(IdxType, 0);
713 
714     Value *V = UndefValue::get(T);
715     for (unsigned i = 0; i < NumElements; i++) {
716       Value *Indices[2] = {
717         Zero,
718         ConstantInt::get(IdxType, i),
719       };
720       auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
721                                                Name + ".elt");
722       auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
723       auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
724       // Propagate AA metadata. It'll still be valid on the narrowed load.
725       AAMDNodes AAMD;
726       LI.getAAMetadata(AAMD);
727       L->setAAMetadata(AAMD);
728       V = IC.Builder.CreateInsertValue(V, L, i);
729     }
730 
731     V->setName(Name);
732     return IC.replaceInstUsesWith(LI, V);
733   }
734 
735   if (auto *AT = dyn_cast<ArrayType>(T)) {
736     auto *ET = AT->getElementType();
737     auto NumElements = AT->getNumElements();
738     if (NumElements == 1) {
739       LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
740       AAMDNodes AAMD;
741       LI.getAAMetadata(AAMD);
742       NewLoad->setAAMetadata(AAMD);
743       return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
744         UndefValue::get(T), NewLoad, 0, Name));
745     }
746 
747     // Bail out if the array is too large. Ideally we would like to optimize
748     // arrays of arbitrary size but this has a terrible impact on compile time.
749     // The threshold here is chosen arbitrarily, maybe needs a little bit of
750     // tuning.
751     if (NumElements > IC.MaxArraySizeForCombine)
752       return nullptr;
753 
754     const DataLayout &DL = IC.getDataLayout();
755     auto EltSize = DL.getTypeAllocSize(ET);
756     auto Align = LI.getAlignment();
757     if (!Align)
758       Align = DL.getABITypeAlignment(T);
759 
760     auto *Addr = LI.getPointerOperand();
761     auto *IdxType = Type::getInt64Ty(T->getContext());
762     auto *Zero = ConstantInt::get(IdxType, 0);
763 
764     Value *V = UndefValue::get(T);
765     uint64_t Offset = 0;
766     for (uint64_t i = 0; i < NumElements; i++) {
767       Value *Indices[2] = {
768         Zero,
769         ConstantInt::get(IdxType, i),
770       };
771       auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
772                                                Name + ".elt");
773       auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
774                                              Name + ".unpack");
775       AAMDNodes AAMD;
776       LI.getAAMetadata(AAMD);
777       L->setAAMetadata(AAMD);
778       V = IC.Builder.CreateInsertValue(V, L, i);
779       Offset += EltSize;
780     }
781 
782     V->setName(Name);
783     return IC.replaceInstUsesWith(LI, V);
784   }
785 
786   return nullptr;
787 }
788 
789 // If we can determine that all possible objects pointed to by the provided
790 // pointer value are, not only dereferenceable, but also definitively less than
791 // or equal to the provided maximum size, then return true. Otherwise, return
792 // false (constant global values and allocas fall into this category).
793 //
794 // FIXME: This should probably live in ValueTracking (or similar).
795 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
796                                      const DataLayout &DL) {
797   SmallPtrSet<Value *, 4> Visited;
798   SmallVector<Value *, 4> Worklist(1, V);
799 
800   do {
801     Value *P = Worklist.pop_back_val();
802     P = P->stripPointerCasts();
803 
804     if (!Visited.insert(P).second)
805       continue;
806 
807     if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
808       Worklist.push_back(SI->getTrueValue());
809       Worklist.push_back(SI->getFalseValue());
810       continue;
811     }
812 
813     if (PHINode *PN = dyn_cast<PHINode>(P)) {
814       for (Value *IncValue : PN->incoming_values())
815         Worklist.push_back(IncValue);
816       continue;
817     }
818 
819     if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
820       if (GA->isInterposable())
821         return false;
822       Worklist.push_back(GA->getAliasee());
823       continue;
824     }
825 
826     // If we know how big this object is, and it is less than MaxSize, continue
827     // searching. Otherwise, return false.
828     if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
829       if (!AI->getAllocatedType()->isSized())
830         return false;
831 
832       ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
833       if (!CS)
834         return false;
835 
836       uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
837       // Make sure that, even if the multiplication below would wrap as an
838       // uint64_t, we still do the right thing.
839       if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
840         return false;
841       continue;
842     }
843 
844     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
845       if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
846         return false;
847 
848       uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
849       if (InitSize > MaxSize)
850         return false;
851       continue;
852     }
853 
854     return false;
855   } while (!Worklist.empty());
856 
857   return true;
858 }
859 
860 // If we're indexing into an object of a known size, and the outer index is
861 // not a constant, but having any value but zero would lead to undefined
862 // behavior, replace it with zero.
863 //
864 // For example, if we have:
865 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
866 // ...
867 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
868 // ... = load i32* %arrayidx, align 4
869 // Then we know that we can replace %x in the GEP with i64 0.
870 //
871 // FIXME: We could fold any GEP index to zero that would cause UB if it were
872 // not zero. Currently, we only handle the first such index. Also, we could
873 // also search through non-zero constant indices if we kept track of the
874 // offsets those indices implied.
875 static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
876                                      Instruction *MemI, unsigned &Idx) {
877   if (GEPI->getNumOperands() < 2)
878     return false;
879 
880   // Find the first non-zero index of a GEP. If all indices are zero, return
881   // one past the last index.
882   auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
883     unsigned I = 1;
884     for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
885       Value *V = GEPI->getOperand(I);
886       if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
887         if (CI->isZero())
888           continue;
889 
890       break;
891     }
892 
893     return I;
894   };
895 
896   // Skip through initial 'zero' indices, and find the corresponding pointer
897   // type. See if the next index is not a constant.
898   Idx = FirstNZIdx(GEPI);
899   if (Idx == GEPI->getNumOperands())
900     return false;
901   if (isa<Constant>(GEPI->getOperand(Idx)))
902     return false;
903 
904   SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
905   Type *AllocTy =
906     GetElementPtrInst::getIndexedType(GEPI->getSourceElementType(), Ops);
907   if (!AllocTy || !AllocTy->isSized())
908     return false;
909   const DataLayout &DL = IC.getDataLayout();
910   uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
911 
912   // If there are more indices after the one we might replace with a zero, make
913   // sure they're all non-negative. If any of them are negative, the overall
914   // address being computed might be before the base address determined by the
915   // first non-zero index.
916   auto IsAllNonNegative = [&]() {
917     for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
918       KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
919       if (Known.isNonNegative())
920         continue;
921       return false;
922     }
923 
924     return true;
925   };
926 
927   // FIXME: If the GEP is not inbounds, and there are extra indices after the
928   // one we'll replace, those could cause the address computation to wrap
929   // (rendering the IsAllNonNegative() check below insufficient). We can do
930   // better, ignoring zero indices (and other indices we can prove small
931   // enough not to wrap).
932   if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
933     return false;
934 
935   // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
936   // also known to be dereferenceable.
937   return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
938          IsAllNonNegative();
939 }
940 
941 // If we're indexing into an object with a variable index for the memory
942 // access, but the object has only one element, we can assume that the index
943 // will always be zero. If we replace the GEP, return it.
944 template <typename T>
945 static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
946                                           T &MemI) {
947   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
948     unsigned Idx;
949     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
950       Instruction *NewGEPI = GEPI->clone();
951       NewGEPI->setOperand(Idx,
952         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
953       NewGEPI->insertBefore(GEPI);
954       MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
955       return NewGEPI;
956     }
957   }
958 
959   return nullptr;
960 }
961 
962 static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
963   if (SI.getPointerAddressSpace() != 0)
964     return false;
965 
966   auto *Ptr = SI.getPointerOperand();
967   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
968     Ptr = GEPI->getOperand(0);
969   return isa<ConstantPointerNull>(Ptr);
970 }
971 
972 static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
973   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
974     const Value *GEPI0 = GEPI->getOperand(0);
975     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0)
976       return true;
977   }
978   if (isa<UndefValue>(Op) ||
979       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0))
980     return true;
981   return false;
982 }
983 
984 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
985   Value *Op = LI.getOperand(0);
986 
987   // Try to canonicalize the loaded type.
988   if (Instruction *Res = combineLoadToOperationType(*this, LI))
989     return Res;
990 
991   // Attempt to improve the alignment.
992   unsigned KnownAlign = getOrEnforceKnownAlignment(
993       Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
994   unsigned LoadAlign = LI.getAlignment();
995   unsigned EffectiveLoadAlign =
996       LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
997 
998   if (KnownAlign > EffectiveLoadAlign)
999     LI.setAlignment(KnownAlign);
1000   else if (LoadAlign == 0)
1001     LI.setAlignment(EffectiveLoadAlign);
1002 
1003   // Replace GEP indices if possible.
1004   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
1005       Worklist.Add(NewGEPI);
1006       return &LI;
1007   }
1008 
1009   if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1010     return Res;
1011 
1012   // Do really simple store-to-load forwarding and load CSE, to catch cases
1013   // where there are several consecutive memory accesses to the same location,
1014   // separated by a few arithmetic operations.
1015   BasicBlock::iterator BBI(LI);
1016   bool IsLoadCSE = false;
1017   if (Value *AvailableVal = FindAvailableLoadedValue(
1018           &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1019     if (IsLoadCSE)
1020       combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
1021 
1022     return replaceInstUsesWith(
1023         LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1024                                            LI.getName() + ".cast"));
1025   }
1026 
1027   // None of the following transforms are legal for volatile/ordered atomic
1028   // loads.  Most of them do apply for unordered atomics.
1029   if (!LI.isUnordered()) return nullptr;
1030 
1031   // load(gep null, ...) -> unreachable
1032   // load null/undef -> unreachable
1033   // TODO: Consider a target hook for valid address spaces for this xforms.
1034   if (canSimplifyNullLoadOrGEP(LI, Op)) {
1035     // Insert a new store to null instruction before the load to indicate
1036     // that this code is not reachable.  We do this instead of inserting
1037     // an unreachable instruction directly because we cannot modify the
1038     // CFG.
1039     StoreInst *SI = new StoreInst(UndefValue::get(LI.getType()),
1040                                   Constant::getNullValue(Op->getType()), &LI);
1041     SI->setDebugLoc(LI.getDebugLoc());
1042     return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1043   }
1044 
1045   if (Op->hasOneUse()) {
1046     // Change select and PHI nodes to select values instead of addresses: this
1047     // helps alias analysis out a lot, allows many others simplifications, and
1048     // exposes redundancy in the code.
1049     //
1050     // Note that we cannot do the transformation unless we know that the
1051     // introduced loads cannot trap!  Something like this is valid as long as
1052     // the condition is always false: load (select bool %C, int* null, int* %G),
1053     // but it would not be valid if we transformed it to load from null
1054     // unconditionally.
1055     //
1056     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1057       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
1058       unsigned Align = LI.getAlignment();
1059       if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
1060           isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
1061         LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
1062                                           SI->getOperand(1)->getName()+".val");
1063         LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
1064                                           SI->getOperand(2)->getName()+".val");
1065         assert(LI.isUnordered() && "implied by above");
1066         V1->setAlignment(Align);
1067         V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1068         V2->setAlignment(Align);
1069         V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1070         return SelectInst::Create(SI->getCondition(), V1, V2);
1071       }
1072 
1073       // load (select (cond, null, P)) -> load P
1074       if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1075           LI.getPointerAddressSpace() == 0) {
1076         LI.setOperand(0, SI->getOperand(2));
1077         return &LI;
1078       }
1079 
1080       // load (select (cond, P, null)) -> load P
1081       if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1082           LI.getPointerAddressSpace() == 0) {
1083         LI.setOperand(0, SI->getOperand(1));
1084         return &LI;
1085       }
1086     }
1087   }
1088   return nullptr;
1089 }
1090 
1091 /// \brief Look for extractelement/insertvalue sequence that acts like a bitcast.
1092 ///
1093 /// \returns underlying value that was "cast", or nullptr otherwise.
1094 ///
1095 /// For example, if we have:
1096 ///
1097 ///     %E0 = extractelement <2 x double> %U, i32 0
1098 ///     %V0 = insertvalue [2 x double] undef, double %E0, 0
1099 ///     %E1 = extractelement <2 x double> %U, i32 1
1100 ///     %V1 = insertvalue [2 x double] %V0, double %E1, 1
1101 ///
1102 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1103 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1104 /// Note that %U may contain non-undef values where %V1 has undef.
1105 static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
1106   Value *U = nullptr;
1107   while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1108     auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1109     if (!E)
1110       return nullptr;
1111     auto *W = E->getVectorOperand();
1112     if (!U)
1113       U = W;
1114     else if (U != W)
1115       return nullptr;
1116     auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1117     if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1118       return nullptr;
1119     V = IV->getAggregateOperand();
1120   }
1121   if (!isa<UndefValue>(V) ||!U)
1122     return nullptr;
1123 
1124   auto *UT = cast<VectorType>(U->getType());
1125   auto *VT = V->getType();
1126   // Check that types UT and VT are bitwise isomorphic.
1127   const auto &DL = IC.getDataLayout();
1128   if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1129     return nullptr;
1130   }
1131   if (auto *AT = dyn_cast<ArrayType>(VT)) {
1132     if (AT->getNumElements() != UT->getNumElements())
1133       return nullptr;
1134   } else {
1135     auto *ST = cast<StructType>(VT);
1136     if (ST->getNumElements() != UT->getNumElements())
1137       return nullptr;
1138     for (const auto *EltT : ST->elements()) {
1139       if (EltT != UT->getElementType())
1140         return nullptr;
1141     }
1142   }
1143   return U;
1144 }
1145 
1146 /// \brief Combine stores to match the type of value being stored.
1147 ///
1148 /// The core idea here is that the memory does not have any intrinsic type and
1149 /// where we can we should match the type of a store to the type of value being
1150 /// stored.
1151 ///
1152 /// However, this routine must never change the width of a store or the number of
1153 /// stores as that would introduce a semantic change. This combine is expected to
1154 /// be a semantic no-op which just allows stores to more closely model the types
1155 /// of their incoming values.
1156 ///
1157 /// Currently, we also refuse to change the precise type used for an atomic or
1158 /// volatile store. This is debatable, and might be reasonable to change later.
1159 /// However, it is risky in case some backend or other part of LLVM is relying
1160 /// on the exact type stored to select appropriate atomic operations.
1161 ///
1162 /// \returns true if the store was successfully combined away. This indicates
1163 /// the caller must erase the store instruction. We have to let the caller erase
1164 /// the store instruction as otherwise there is no way to signal whether it was
1165 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1166 static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
1167   // FIXME: We could probably with some care handle both volatile and ordered
1168   // atomic stores here but it isn't clear that this is important.
1169   if (!SI.isUnordered())
1170     return false;
1171 
1172   // swifterror values can't be bitcasted.
1173   if (SI.getPointerOperand()->isSwiftError())
1174     return false;
1175 
1176   Value *V = SI.getValueOperand();
1177 
1178   // Fold away bit casts of the stored value by storing the original type.
1179   if (auto *BC = dyn_cast<BitCastInst>(V)) {
1180     V = BC->getOperand(0);
1181     if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1182       combineStoreToNewValue(IC, SI, V);
1183       return true;
1184     }
1185   }
1186 
1187   if (Value *U = likeBitCastFromVector(IC, V))
1188     if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1189       combineStoreToNewValue(IC, SI, U);
1190       return true;
1191     }
1192 
1193   // FIXME: We should also canonicalize stores of vectors when their elements
1194   // are cast to other types.
1195   return false;
1196 }
1197 
1198 static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
1199   // FIXME: We could probably with some care handle both volatile and atomic
1200   // stores here but it isn't clear that this is important.
1201   if (!SI.isSimple())
1202     return false;
1203 
1204   Value *V = SI.getValueOperand();
1205   Type *T = V->getType();
1206 
1207   if (!T->isAggregateType())
1208     return false;
1209 
1210   if (auto *ST = dyn_cast<StructType>(T)) {
1211     // If the struct only have one element, we unpack.
1212     unsigned Count = ST->getNumElements();
1213     if (Count == 1) {
1214       V = IC.Builder.CreateExtractValue(V, 0);
1215       combineStoreToNewValue(IC, SI, V);
1216       return true;
1217     }
1218 
1219     // We don't want to break loads with padding here as we'd loose
1220     // the knowledge that padding exists for the rest of the pipeline.
1221     const DataLayout &DL = IC.getDataLayout();
1222     auto *SL = DL.getStructLayout(ST);
1223     if (SL->hasPadding())
1224       return false;
1225 
1226     auto Align = SI.getAlignment();
1227     if (!Align)
1228       Align = DL.getABITypeAlignment(ST);
1229 
1230     SmallString<16> EltName = V->getName();
1231     EltName += ".elt";
1232     auto *Addr = SI.getPointerOperand();
1233     SmallString<16> AddrName = Addr->getName();
1234     AddrName += ".repack";
1235 
1236     auto *IdxType = Type::getInt32Ty(ST->getContext());
1237     auto *Zero = ConstantInt::get(IdxType, 0);
1238     for (unsigned i = 0; i < Count; i++) {
1239       Value *Indices[2] = {
1240         Zero,
1241         ConstantInt::get(IdxType, i),
1242       };
1243       auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1244                                                AddrName);
1245       auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1246       auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1247       llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1248       AAMDNodes AAMD;
1249       SI.getAAMetadata(AAMD);
1250       NS->setAAMetadata(AAMD);
1251     }
1252 
1253     return true;
1254   }
1255 
1256   if (auto *AT = dyn_cast<ArrayType>(T)) {
1257     // If the array only have one element, we unpack.
1258     auto NumElements = AT->getNumElements();
1259     if (NumElements == 1) {
1260       V = IC.Builder.CreateExtractValue(V, 0);
1261       combineStoreToNewValue(IC, SI, V);
1262       return true;
1263     }
1264 
1265     // Bail out if the array is too large. Ideally we would like to optimize
1266     // arrays of arbitrary size but this has a terrible impact on compile time.
1267     // The threshold here is chosen arbitrarily, maybe needs a little bit of
1268     // tuning.
1269     if (NumElements > IC.MaxArraySizeForCombine)
1270       return false;
1271 
1272     const DataLayout &DL = IC.getDataLayout();
1273     auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1274     auto Align = SI.getAlignment();
1275     if (!Align)
1276       Align = DL.getABITypeAlignment(T);
1277 
1278     SmallString<16> EltName = V->getName();
1279     EltName += ".elt";
1280     auto *Addr = SI.getPointerOperand();
1281     SmallString<16> AddrName = Addr->getName();
1282     AddrName += ".repack";
1283 
1284     auto *IdxType = Type::getInt64Ty(T->getContext());
1285     auto *Zero = ConstantInt::get(IdxType, 0);
1286 
1287     uint64_t Offset = 0;
1288     for (uint64_t i = 0; i < NumElements; i++) {
1289       Value *Indices[2] = {
1290         Zero,
1291         ConstantInt::get(IdxType, i),
1292       };
1293       auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1294                                                AddrName);
1295       auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1296       auto EltAlign = MinAlign(Align, Offset);
1297       Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1298       AAMDNodes AAMD;
1299       SI.getAAMetadata(AAMD);
1300       NS->setAAMetadata(AAMD);
1301       Offset += EltSize;
1302     }
1303 
1304     return true;
1305   }
1306 
1307   return false;
1308 }
1309 
1310 /// equivalentAddressValues - Test if A and B will obviously have the same
1311 /// value. This includes recognizing that %t0 and %t1 will have the same
1312 /// value in code like this:
1313 ///   %t0 = getelementptr \@a, 0, 3
1314 ///   store i32 0, i32* %t0
1315 ///   %t1 = getelementptr \@a, 0, 3
1316 ///   %t2 = load i32* %t1
1317 ///
1318 static bool equivalentAddressValues(Value *A, Value *B) {
1319   // Test if the values are trivially equivalent.
1320   if (A == B) return true;
1321 
1322   // Test if the values come form identical arithmetic instructions.
1323   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1324   // its only used to compare two uses within the same basic block, which
1325   // means that they'll always either have the same value or one of them
1326   // will have an undefined value.
1327   if (isa<BinaryOperator>(A) ||
1328       isa<CastInst>(A) ||
1329       isa<PHINode>(A) ||
1330       isa<GetElementPtrInst>(A))
1331     if (Instruction *BI = dyn_cast<Instruction>(B))
1332       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1333         return true;
1334 
1335   // Otherwise they may not be equivalent.
1336   return false;
1337 }
1338 
1339 /// Converts store (bitcast (load (bitcast (select ...)))) to
1340 /// store (load (select ...)), where select is minmax:
1341 /// select ((cmp load V1, load V2), V1, V2).
1342 static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
1343                                                 StoreInst &SI) {
1344   // bitcast?
1345   if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1346     return false;
1347   // load? integer?
1348   Value *LoadAddr;
1349   if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1350     return false;
1351   auto *LI = cast<LoadInst>(SI.getValueOperand());
1352   if (!LI->getType()->isIntegerTy())
1353     return false;
1354   if (!isMinMaxWithLoads(LoadAddr))
1355     return false;
1356 
1357   if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1358         auto *SI = dyn_cast<StoreInst>(U);
1359         return SI && SI->getPointerOperand() != LI &&
1360                peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1361                !SI->getPointerOperand()->isSwiftError();
1362       }))
1363     return false;
1364 
1365   IC.Builder.SetInsertPoint(LI);
1366   LoadInst *NewLI = combineLoadToNewType(
1367       IC, *LI, LoadAddr->getType()->getPointerElementType());
1368   // Replace all the stores with stores of the newly loaded value.
1369   for (auto *UI : LI->users()) {
1370     auto *USI = cast<StoreInst>(UI);
1371     IC.Builder.SetInsertPoint(USI);
1372     combineStoreToNewValue(IC, *USI, NewLI);
1373   }
1374   IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1375   IC.eraseInstFromFunction(*LI);
1376   return true;
1377 }
1378 
1379 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
1380   Value *Val = SI.getOperand(0);
1381   Value *Ptr = SI.getOperand(1);
1382 
1383   // Try to canonicalize the stored type.
1384   if (combineStoreToValueType(*this, SI))
1385     return eraseInstFromFunction(SI);
1386 
1387   // Attempt to improve the alignment.
1388   unsigned KnownAlign = getOrEnforceKnownAlignment(
1389       Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1390   unsigned StoreAlign = SI.getAlignment();
1391   unsigned EffectiveStoreAlign =
1392       StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
1393 
1394   if (KnownAlign > EffectiveStoreAlign)
1395     SI.setAlignment(KnownAlign);
1396   else if (StoreAlign == 0)
1397     SI.setAlignment(EffectiveStoreAlign);
1398 
1399   // Try to canonicalize the stored type.
1400   if (unpackStoreToAggregate(*this, SI))
1401     return eraseInstFromFunction(SI);
1402 
1403   if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1404     return eraseInstFromFunction(SI);
1405 
1406   // Replace GEP indices if possible.
1407   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1408       Worklist.Add(NewGEPI);
1409       return &SI;
1410   }
1411 
1412   // Don't hack volatile/ordered stores.
1413   // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1414   if (!SI.isUnordered()) return nullptr;
1415 
1416   // If the RHS is an alloca with a single use, zapify the store, making the
1417   // alloca dead.
1418   if (Ptr->hasOneUse()) {
1419     if (isa<AllocaInst>(Ptr))
1420       return eraseInstFromFunction(SI);
1421     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1422       if (isa<AllocaInst>(GEP->getOperand(0))) {
1423         if (GEP->getOperand(0)->hasOneUse())
1424           return eraseInstFromFunction(SI);
1425       }
1426     }
1427   }
1428 
1429   // Do really simple DSE, to catch cases where there are several consecutive
1430   // stores to the same location, separated by a few arithmetic operations. This
1431   // situation often occurs with bitfield accesses.
1432   BasicBlock::iterator BBI(SI);
1433   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1434        --ScanInsts) {
1435     --BBI;
1436     // Don't count debug info directives, lest they affect codegen,
1437     // and we skip pointer-to-pointer bitcasts, which are NOPs.
1438     if (isa<DbgInfoIntrinsic>(BBI) ||
1439         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1440       ScanInsts++;
1441       continue;
1442     }
1443 
1444     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1445       // Prev store isn't volatile, and stores to the same location?
1446       if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1447                                                         SI.getOperand(1))) {
1448         ++NumDeadStore;
1449         ++BBI;
1450         eraseInstFromFunction(*PrevSI);
1451         continue;
1452       }
1453       break;
1454     }
1455 
1456     // If this is a load, we have to stop.  However, if the loaded value is from
1457     // the pointer we're loading and is producing the pointer we're storing,
1458     // then *this* store is dead (X = load P; store X -> P).
1459     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1460       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1461         assert(SI.isUnordered() && "can't eliminate ordering operation");
1462         return eraseInstFromFunction(SI);
1463       }
1464 
1465       // Otherwise, this is a load from some other location.  Stores before it
1466       // may not be dead.
1467       break;
1468     }
1469 
1470     // Don't skip over loads, throws or things that can modify memory.
1471     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1472       break;
1473   }
1474 
1475   // store X, null    -> turns into 'unreachable' in SimplifyCFG
1476   // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1477   if (canSimplifyNullStoreOrGEP(SI)) {
1478     if (!isa<UndefValue>(Val)) {
1479       SI.setOperand(0, UndefValue::get(Val->getType()));
1480       if (Instruction *U = dyn_cast<Instruction>(Val))
1481         Worklist.Add(U);  // Dropped a use.
1482     }
1483     return nullptr;  // Do not modify these!
1484   }
1485 
1486   // store undef, Ptr -> noop
1487   if (isa<UndefValue>(Val))
1488     return eraseInstFromFunction(SI);
1489 
1490   // If this store is the last instruction in the basic block (possibly
1491   // excepting debug info instructions), and if the block ends with an
1492   // unconditional branch, try to move it to the successor block.
1493   BBI = SI.getIterator();
1494   do {
1495     ++BBI;
1496   } while (isa<DbgInfoIntrinsic>(BBI) ||
1497            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1498   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1499     if (BI->isUnconditional())
1500       if (SimplifyStoreAtEndOfBlock(SI))
1501         return nullptr;  // xform done!
1502 
1503   return nullptr;
1504 }
1505 
1506 /// SimplifyStoreAtEndOfBlock - Turn things like:
1507 ///   if () { *P = v1; } else { *P = v2 }
1508 /// into a phi node with a store in the successor.
1509 ///
1510 /// Simplify things like:
1511 ///   *P = v1; if () { *P = v2; }
1512 /// into a phi node with a store in the successor.
1513 ///
1514 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
1515   assert(SI.isUnordered() &&
1516          "this code has not been auditted for volatile or ordered store case");
1517 
1518   BasicBlock *StoreBB = SI.getParent();
1519 
1520   // Check to see if the successor block has exactly two incoming edges.  If
1521   // so, see if the other predecessor contains a store to the same location.
1522   // if so, insert a PHI node (if needed) and move the stores down.
1523   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1524 
1525   // Determine whether Dest has exactly two predecessors and, if so, compute
1526   // the other predecessor.
1527   pred_iterator PI = pred_begin(DestBB);
1528   BasicBlock *P = *PI;
1529   BasicBlock *OtherBB = nullptr;
1530 
1531   if (P != StoreBB)
1532     OtherBB = P;
1533 
1534   if (++PI == pred_end(DestBB))
1535     return false;
1536 
1537   P = *PI;
1538   if (P != StoreBB) {
1539     if (OtherBB)
1540       return false;
1541     OtherBB = P;
1542   }
1543   if (++PI != pred_end(DestBB))
1544     return false;
1545 
1546   // Bail out if all the relevant blocks aren't distinct (this can happen,
1547   // for example, if SI is in an infinite loop)
1548   if (StoreBB == DestBB || OtherBB == DestBB)
1549     return false;
1550 
1551   // Verify that the other block ends in a branch and is not otherwise empty.
1552   BasicBlock::iterator BBI(OtherBB->getTerminator());
1553   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1554   if (!OtherBr || BBI == OtherBB->begin())
1555     return false;
1556 
1557   // If the other block ends in an unconditional branch, check for the 'if then
1558   // else' case.  there is an instruction before the branch.
1559   StoreInst *OtherStore = nullptr;
1560   if (OtherBr->isUnconditional()) {
1561     --BBI;
1562     // Skip over debugging info.
1563     while (isa<DbgInfoIntrinsic>(BBI) ||
1564            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1565       if (BBI==OtherBB->begin())
1566         return false;
1567       --BBI;
1568     }
1569     // If this isn't a store, isn't a store to the same location, or is not the
1570     // right kind of store, bail out.
1571     OtherStore = dyn_cast<StoreInst>(BBI);
1572     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1573         !SI.isSameOperationAs(OtherStore))
1574       return false;
1575   } else {
1576     // Otherwise, the other block ended with a conditional branch. If one of the
1577     // destinations is StoreBB, then we have the if/then case.
1578     if (OtherBr->getSuccessor(0) != StoreBB &&
1579         OtherBr->getSuccessor(1) != StoreBB)
1580       return false;
1581 
1582     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1583     // if/then triangle.  See if there is a store to the same ptr as SI that
1584     // lives in OtherBB.
1585     for (;; --BBI) {
1586       // Check to see if we find the matching store.
1587       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1588         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1589             !SI.isSameOperationAs(OtherStore))
1590           return false;
1591         break;
1592       }
1593       // If we find something that may be using or overwriting the stored
1594       // value, or if we run out of instructions, we can't do the xform.
1595       if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1596           BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1597         return false;
1598     }
1599 
1600     // In order to eliminate the store in OtherBr, we have to
1601     // make sure nothing reads or overwrites the stored value in
1602     // StoreBB.
1603     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1604       // FIXME: This should really be AA driven.
1605       if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1606         return false;
1607     }
1608   }
1609 
1610   // Insert a PHI node now if we need it.
1611   Value *MergedVal = OtherStore->getOperand(0);
1612   if (MergedVal != SI.getOperand(0)) {
1613     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1614     PN->addIncoming(SI.getOperand(0), SI.getParent());
1615     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1616     MergedVal = InsertNewInstBefore(PN, DestBB->front());
1617   }
1618 
1619   // Advance to a place where it is safe to insert the new store and
1620   // insert it.
1621   BBI = DestBB->getFirstInsertionPt();
1622   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1623                                    SI.isVolatile(),
1624                                    SI.getAlignment(),
1625                                    SI.getOrdering(),
1626                                    SI.getSyncScopeID());
1627   InsertNewInstBefore(NewSI, *BBI);
1628   // The debug locations of the original instructions might differ; merge them.
1629   NewSI->applyMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc());
1630 
1631   // If the two stores had AA tags, merge them.
1632   AAMDNodes AATags;
1633   SI.getAAMetadata(AATags);
1634   if (AATags) {
1635     OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1636     NewSI->setAAMetadata(AATags);
1637   }
1638 
1639   // Nuke the old stores.
1640   eraseInstFromFunction(SI);
1641   eraseInstFromFunction(*OtherStore);
1642   return true;
1643 }
1644