1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a trivial dead store elimination that only considers
10 // basic-block local redundant stores.
11 //
12 // FIXME: This should eventually be extended to be a post-dominator tree
13 // traversal.  Doing so would be pretty trivial.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/MapVector.h"
21 #include "llvm/ADT/PostOrderIterator.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/Analysis/CaptureTracking.h"
29 #include "llvm/Analysis/GlobalsModRef.h"
30 #include "llvm/Analysis/MemoryBuiltins.h"
31 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
32 #include "llvm/Analysis/MemoryLocation.h"
33 #include "llvm/Analysis/MemorySSA.h"
34 #include "llvm/Analysis/MemorySSAUpdater.h"
35 #include "llvm/Analysis/PostDominators.h"
36 #include "llvm/Analysis/TargetLibraryInfo.h"
37 #include "llvm/Analysis/ValueTracking.h"
38 #include "llvm/IR/Argument.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CallSite.h"
41 #include "llvm/IR/Constant.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Intrinsics.h"
51 #include "llvm/IR/LLVMContext.h"
52 #include "llvm/IR/Module.h"
53 #include "llvm/IR/PassManager.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/DebugCounter.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/MathExtras.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Scalar.h"
65 #include "llvm/Transforms/Utils/Local.h"
66 #include <algorithm>
67 #include <cassert>
68 #include <cstddef>
69 #include <cstdint>
70 #include <iterator>
71 #include <map>
72 #include <utility>
73 
74 using namespace llvm;
75 
76 #define DEBUG_TYPE "dse"
77 
78 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
79 STATISTIC(NumFastStores, "Number of stores deleted");
80 STATISTIC(NumFastOther, "Number of other instrs removed");
81 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
82 STATISTIC(NumModifiedStores, "Number of stores modified");
83 
84 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
85               "Controls which MemoryDefs are eliminated.");
86 
87 static cl::opt<bool>
88 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
89   cl::init(true), cl::Hidden,
90   cl::desc("Enable partial-overwrite tracking in DSE"));
91 
92 static cl::opt<bool>
93 EnablePartialStoreMerging("enable-dse-partial-store-merging",
94   cl::init(true), cl::Hidden,
95   cl::desc("Enable partial store merging in DSE"));
96 
97 static cl::opt<bool>
98     EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden,
99                     cl::desc("Use the new MemorySSA-backed DSE."));
100 
101 static cl::opt<unsigned>
102     MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden,
103                        cl::desc("The number of memory instructions to scan for "
104                                 "dead store elimination (default = 100)"));
105 
106 static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
107     "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
108     cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
109              "other stores per basic block (default = 5000)"));
110 
111 //===----------------------------------------------------------------------===//
112 // Helper functions
113 //===----------------------------------------------------------------------===//
114 using OverlapIntervalsTy = std::map<int64_t, int64_t>;
115 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
116 
117 /// Delete this instruction.  Before we do, go through and zero out all the
118 /// operands of this instruction.  If any of them become dead, delete them and
119 /// the computation tree that feeds them.
120 /// If ValueSet is non-null, remove any deleted instructions from it as well.
121 static void
122 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
123                       MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
124                       InstOverlapIntervalsTy &IOL,
125                       MapVector<Instruction *, bool> &ThrowableInst,
126                       SmallSetVector<const Value *, 16> *ValueSet = nullptr) {
127   SmallVector<Instruction*, 32> NowDeadInsts;
128 
129   NowDeadInsts.push_back(I);
130   --NumFastOther;
131 
132   // Keeping the iterator straight is a pain, so we let this routine tell the
133   // caller what the next instruction is after we're done mucking about.
134   BasicBlock::iterator NewIter = *BBI;
135 
136   // Before we touch this instruction, remove it from memdep!
137   do {
138     Instruction *DeadInst = NowDeadInsts.pop_back_val();
139     // Mark the DeadInst as dead in the list of throwable instructions.
140     auto It = ThrowableInst.find(DeadInst);
141     if (It != ThrowableInst.end())
142       ThrowableInst[It->first] = false;
143     ++NumFastOther;
144 
145     // Try to preserve debug information attached to the dead instruction.
146     salvageDebugInfoOrMarkUndef(*DeadInst);
147 
148     // This instruction is dead, zap it, in stages.  Start by removing it from
149     // MemDep, which needs to know the operands and needs it to be in the
150     // function.
151     MD.removeInstruction(DeadInst);
152 
153     for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
154       Value *Op = DeadInst->getOperand(op);
155       DeadInst->setOperand(op, nullptr);
156 
157       // If this operand just became dead, add it to the NowDeadInsts list.
158       if (!Op->use_empty()) continue;
159 
160       if (Instruction *OpI = dyn_cast<Instruction>(Op))
161         if (isInstructionTriviallyDead(OpI, &TLI))
162           NowDeadInsts.push_back(OpI);
163     }
164 
165     if (ValueSet) ValueSet->remove(DeadInst);
166     IOL.erase(DeadInst);
167 
168     if (NewIter == DeadInst->getIterator())
169       NewIter = DeadInst->eraseFromParent();
170     else
171       DeadInst->eraseFromParent();
172   } while (!NowDeadInsts.empty());
173   *BBI = NewIter;
174   // Pop dead entries from back of ThrowableInst till we find an alive entry.
175   while (!ThrowableInst.empty() && !ThrowableInst.back().second)
176     ThrowableInst.pop_back();
177 }
178 
179 /// Does this instruction write some memory?  This only returns true for things
180 /// that we can analyze with other helpers below.
181 static bool hasAnalyzableMemoryWrite(Instruction *I,
182                                      const TargetLibraryInfo &TLI) {
183   if (isa<StoreInst>(I))
184     return true;
185   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
186     switch (II->getIntrinsicID()) {
187     default:
188       return false;
189     case Intrinsic::memset:
190     case Intrinsic::memmove:
191     case Intrinsic::memcpy:
192     case Intrinsic::memcpy_element_unordered_atomic:
193     case Intrinsic::memmove_element_unordered_atomic:
194     case Intrinsic::memset_element_unordered_atomic:
195     case Intrinsic::init_trampoline:
196     case Intrinsic::lifetime_end:
197       return true;
198     }
199   }
200   if (auto CS = CallSite(I)) {
201     if (Function *F = CS.getCalledFunction()) {
202       LibFunc LF;
203       if (TLI.getLibFunc(*F, LF) && TLI.has(LF)) {
204         switch (LF) {
205         case LibFunc_strcpy:
206         case LibFunc_strncpy:
207         case LibFunc_strcat:
208         case LibFunc_strncat:
209           return true;
210         default:
211           return false;
212         }
213       }
214     }
215   }
216   return false;
217 }
218 
219 /// Return a Location stored to by the specified instruction. If isRemovable
220 /// returns true, this function and getLocForRead completely describe the memory
221 /// operations for this instruction.
222 static MemoryLocation getLocForWrite(Instruction *Inst) {
223 
224   if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
225     return MemoryLocation::get(SI);
226 
227   if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
228     // memcpy/memmove/memset.
229     MemoryLocation Loc = MemoryLocation::getForDest(MI);
230     return Loc;
231   }
232 
233   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
234     switch (II->getIntrinsicID()) {
235     default:
236       return MemoryLocation(); // Unhandled intrinsic.
237     case Intrinsic::init_trampoline:
238       return MemoryLocation(II->getArgOperand(0));
239     case Intrinsic::lifetime_end: {
240       uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
241       return MemoryLocation(II->getArgOperand(1), Len);
242     }
243     }
244   }
245   if (auto CS = CallSite(Inst))
246     // All the supported TLI functions so far happen to have dest as their
247     // first argument.
248     return MemoryLocation(CS.getArgument(0));
249   return MemoryLocation();
250 }
251 
252 /// Return the location read by the specified "hasAnalyzableMemoryWrite"
253 /// instruction if any.
254 static MemoryLocation getLocForRead(Instruction *Inst,
255                                     const TargetLibraryInfo &TLI) {
256   assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case");
257 
258   // The only instructions that both read and write are the mem transfer
259   // instructions (memcpy/memmove).
260   if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst))
261     return MemoryLocation::getForSource(MTI);
262   return MemoryLocation();
263 }
264 
265 /// If the value of this instruction and the memory it writes to is unused, may
266 /// we delete this instruction?
267 static bool isRemovable(Instruction *I) {
268   // Don't remove volatile/atomic stores.
269   if (StoreInst *SI = dyn_cast<StoreInst>(I))
270     return SI->isUnordered();
271 
272   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
273     switch (II->getIntrinsicID()) {
274     default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate");
275     case Intrinsic::lifetime_end:
276       // Never remove dead lifetime_end's, e.g. because it is followed by a
277       // free.
278       return false;
279     case Intrinsic::init_trampoline:
280       // Always safe to remove init_trampoline.
281       return true;
282     case Intrinsic::memset:
283     case Intrinsic::memmove:
284     case Intrinsic::memcpy:
285       // Don't remove volatile memory intrinsics.
286       return !cast<MemIntrinsic>(II)->isVolatile();
287     case Intrinsic::memcpy_element_unordered_atomic:
288     case Intrinsic::memmove_element_unordered_atomic:
289     case Intrinsic::memset_element_unordered_atomic:
290       return true;
291     }
292   }
293 
294   // note: only get here for calls with analyzable writes - i.e. libcalls
295   if (auto CS = CallSite(I))
296     return CS.getInstruction()->use_empty();
297 
298   return false;
299 }
300 
301 /// Returns true if the end of this instruction can be safely shortened in
302 /// length.
303 static bool isShortenableAtTheEnd(Instruction *I) {
304   // Don't shorten stores for now
305   if (isa<StoreInst>(I))
306     return false;
307 
308   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
309     switch (II->getIntrinsicID()) {
310       default: return false;
311       case Intrinsic::memset:
312       case Intrinsic::memcpy:
313       case Intrinsic::memcpy_element_unordered_atomic:
314       case Intrinsic::memset_element_unordered_atomic:
315         // Do shorten memory intrinsics.
316         // FIXME: Add memmove if it's also safe to transform.
317         return true;
318     }
319   }
320 
321   // Don't shorten libcalls calls for now.
322 
323   return false;
324 }
325 
326 /// Returns true if the beginning of this instruction can be safely shortened
327 /// in length.
328 static bool isShortenableAtTheBeginning(Instruction *I) {
329   // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
330   // easily done by offsetting the source address.
331   return isa<AnyMemSetInst>(I);
332 }
333 
334 /// Return the pointer that is being written to.
335 static Value *getStoredPointerOperand(Instruction *I) {
336   //TODO: factor this to reuse getLocForWrite
337   MemoryLocation Loc = getLocForWrite(I);
338   assert(Loc.Ptr &&
339          "unable to find pointer written for analyzable instruction?");
340   // TODO: most APIs don't expect const Value *
341   return const_cast<Value*>(Loc.Ptr);
342 }
343 
344 static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
345                                const TargetLibraryInfo &TLI,
346                                const Function *F) {
347   uint64_t Size;
348   ObjectSizeOpts Opts;
349   Opts.NullIsUnknownSize = NullPointerIsDefined(F);
350 
351   if (getObjectSize(V, Size, DL, &TLI, Opts))
352     return Size;
353   return MemoryLocation::UnknownSize;
354 }
355 
356 namespace {
357 
358 enum OverwriteResult {
359   OW_Begin,
360   OW_Complete,
361   OW_End,
362   OW_PartialEarlierWithFullLater,
363   OW_Unknown
364 };
365 
366 } // end anonymous namespace
367 
368 /// Return 'OW_Complete' if a store to the 'Later' location completely
369 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
370 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
371 /// beginning of the 'Earlier' location is overwritten by 'Later'.
372 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
373 /// overwritten by a latter (smaller) store which doesn't write outside the big
374 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
375 static OverwriteResult isOverwrite(const MemoryLocation &Later,
376                                    const MemoryLocation &Earlier,
377                                    const DataLayout &DL,
378                                    const TargetLibraryInfo &TLI,
379                                    int64_t &EarlierOff, int64_t &LaterOff,
380                                    Instruction *DepWrite,
381                                    InstOverlapIntervalsTy &IOL,
382                                    AliasAnalysis &AA,
383                                    const Function *F) {
384   // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
385   // get imprecise values here, though (except for unknown sizes).
386   if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise())
387     return OW_Unknown;
388 
389   const uint64_t LaterSize = Later.Size.getValue();
390   const uint64_t EarlierSize = Earlier.Size.getValue();
391 
392   const Value *P1 = Earlier.Ptr->stripPointerCasts();
393   const Value *P2 = Later.Ptr->stripPointerCasts();
394 
395   // If the start pointers are the same, we just have to compare sizes to see if
396   // the later store was larger than the earlier store.
397   if (P1 == P2 || AA.isMustAlias(P1, P2)) {
398     // Make sure that the Later size is >= the Earlier size.
399     if (LaterSize >= EarlierSize)
400       return OW_Complete;
401   }
402 
403   // Check to see if the later store is to the entire object (either a global,
404   // an alloca, or a byval/inalloca argument).  If so, then it clearly
405   // overwrites any other store to the same object.
406   const Value *UO1 = GetUnderlyingObject(P1, DL),
407               *UO2 = GetUnderlyingObject(P2, DL);
408 
409   // If we can't resolve the same pointers to the same object, then we can't
410   // analyze them at all.
411   if (UO1 != UO2)
412     return OW_Unknown;
413 
414   // If the "Later" store is to a recognizable object, get its size.
415   uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F);
416   if (ObjectSize != MemoryLocation::UnknownSize)
417     if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
418       return OW_Complete;
419 
420   // Okay, we have stores to two completely different pointers.  Try to
421   // decompose the pointer into a "base + constant_offset" form.  If the base
422   // pointers are equal, then we can reason about the two stores.
423   EarlierOff = 0;
424   LaterOff = 0;
425   const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
426   const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
427 
428   // If the base pointers still differ, we have two completely different stores.
429   if (BP1 != BP2)
430     return OW_Unknown;
431 
432   // The later store completely overlaps the earlier store if:
433   //
434   // 1. Both start at the same offset and the later one's size is greater than
435   //    or equal to the earlier one's, or
436   //
437   //      |--earlier--|
438   //      |--   later   --|
439   //
440   // 2. The earlier store has an offset greater than the later offset, but which
441   //    still lies completely within the later store.
442   //
443   //        |--earlier--|
444   //    |-----  later  ------|
445   //
446   // We have to be careful here as *Off is signed while *.Size is unsigned.
447   if (EarlierOff >= LaterOff &&
448       LaterSize >= EarlierSize &&
449       uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
450     return OW_Complete;
451 
452   // We may now overlap, although the overlap is not complete. There might also
453   // be other incomplete overlaps, and together, they might cover the complete
454   // earlier write.
455   // Note: The correctness of this logic depends on the fact that this function
456   // is not even called providing DepWrite when there are any intervening reads.
457   if (EnablePartialOverwriteTracking &&
458       LaterOff < int64_t(EarlierOff + EarlierSize) &&
459       int64_t(LaterOff + LaterSize) >= EarlierOff) {
460 
461     // Insert our part of the overlap into the map.
462     auto &IM = IOL[DepWrite];
463     LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
464                       << ", " << int64_t(EarlierOff + EarlierSize)
465                       << ") Later [" << LaterOff << ", "
466                       << int64_t(LaterOff + LaterSize) << ")\n");
467 
468     // Make sure that we only insert non-overlapping intervals and combine
469     // adjacent intervals. The intervals are stored in the map with the ending
470     // offset as the key (in the half-open sense) and the starting offset as
471     // the value.
472     int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
473 
474     // Find any intervals ending at, or after, LaterIntStart which start
475     // before LaterIntEnd.
476     auto ILI = IM.lower_bound(LaterIntStart);
477     if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
478       // This existing interval is overlapped with the current store somewhere
479       // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
480       // intervals and adjusting our start and end.
481       LaterIntStart = std::min(LaterIntStart, ILI->second);
482       LaterIntEnd = std::max(LaterIntEnd, ILI->first);
483       ILI = IM.erase(ILI);
484 
485       // Continue erasing and adjusting our end in case other previous
486       // intervals are also overlapped with the current store.
487       //
488       // |--- ealier 1 ---|  |--- ealier 2 ---|
489       //     |------- later---------|
490       //
491       while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
492         assert(ILI->second > LaterIntStart && "Unexpected interval");
493         LaterIntEnd = std::max(LaterIntEnd, ILI->first);
494         ILI = IM.erase(ILI);
495       }
496     }
497 
498     IM[LaterIntEnd] = LaterIntStart;
499 
500     ILI = IM.begin();
501     if (ILI->second <= EarlierOff &&
502         ILI->first >= int64_t(EarlierOff + EarlierSize)) {
503       LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
504                         << EarlierOff << ", "
505                         << int64_t(EarlierOff + EarlierSize)
506                         << ") Composite Later [" << ILI->second << ", "
507                         << ILI->first << ")\n");
508       ++NumCompletePartials;
509       return OW_Complete;
510     }
511   }
512 
513   // Check for an earlier store which writes to all the memory locations that
514   // the later store writes to.
515   if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
516       int64_t(EarlierOff + EarlierSize) > LaterOff &&
517       uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
518     LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
519                       << EarlierOff << ", "
520                       << int64_t(EarlierOff + EarlierSize)
521                       << ") by a later store [" << LaterOff << ", "
522                       << int64_t(LaterOff + LaterSize) << ")\n");
523     // TODO: Maybe come up with a better name?
524     return OW_PartialEarlierWithFullLater;
525   }
526 
527   // Another interesting case is if the later store overwrites the end of the
528   // earlier store.
529   //
530   //      |--earlier--|
531   //                |--   later   --|
532   //
533   // In this case we may want to trim the size of earlier to avoid generating
534   // writes to addresses which will definitely be overwritten later
535   if (!EnablePartialOverwriteTracking &&
536       (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
537        int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
538     return OW_End;
539 
540   // Finally, we also need to check if the later store overwrites the beginning
541   // of the earlier store.
542   //
543   //                |--earlier--|
544   //      |--   later   --|
545   //
546   // In this case we may want to move the destination address and trim the size
547   // of earlier to avoid generating writes to addresses which will definitely
548   // be overwritten later.
549   if (!EnablePartialOverwriteTracking &&
550       (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
551     assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
552            "Expect to be handled as OW_Complete");
553     return OW_Begin;
554   }
555   // Otherwise, they don't completely overlap.
556   return OW_Unknown;
557 }
558 
559 /// If 'Inst' might be a self read (i.e. a noop copy of a
560 /// memory region into an identical pointer) then it doesn't actually make its
561 /// input dead in the traditional sense.  Consider this case:
562 ///
563 ///   memmove(A <- B)
564 ///   memmove(A <- A)
565 ///
566 /// In this case, the second store to A does not make the first store to A dead.
567 /// The usual situation isn't an explicit A<-A store like this (which can be
568 /// trivially removed) but a case where two pointers may alias.
569 ///
570 /// This function detects when it is unsafe to remove a dependent instruction
571 /// because the DSE inducing instruction may be a self-read.
572 static bool isPossibleSelfRead(Instruction *Inst,
573                                const MemoryLocation &InstStoreLoc,
574                                Instruction *DepWrite,
575                                const TargetLibraryInfo &TLI,
576                                AliasAnalysis &AA) {
577   // Self reads can only happen for instructions that read memory.  Get the
578   // location read.
579   MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
580   if (!InstReadLoc.Ptr)
581     return false; // Not a reading instruction.
582 
583   // If the read and written loc obviously don't alias, it isn't a read.
584   if (AA.isNoAlias(InstReadLoc, InstStoreLoc))
585     return false;
586 
587   if (isa<AnyMemCpyInst>(Inst)) {
588     // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763)
589     // but in practice memcpy(A <- B) either means that A and B are disjoint or
590     // are equal (i.e. there are not partial overlaps).  Given that, if we have:
591     //
592     //   memcpy/memmove(A <- B)  // DepWrite
593     //   memcpy(A <- B)  // Inst
594     //
595     // with Inst reading/writing a >= size than DepWrite, we can reason as
596     // follows:
597     //
598     //   - If A == B then both the copies are no-ops, so the DepWrite can be
599     //     removed.
600     //   - If A != B then A and B are disjoint locations in Inst.  Since
601     //     Inst.size >= DepWrite.size A and B are disjoint in DepWrite too.
602     //     Therefore DepWrite can be removed.
603     MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
604 
605     if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
606       return false;
607   }
608 
609   // If DepWrite doesn't read memory or if we can't prove it is a must alias,
610   // then it can't be considered dead.
611   return true;
612 }
613 
614 /// Returns true if the memory which is accessed by the second instruction is not
615 /// modified between the first and the second instruction.
616 /// Precondition: Second instruction must be dominated by the first
617 /// instruction.
618 static bool memoryIsNotModifiedBetween(Instruction *FirstI,
619                                        Instruction *SecondI,
620                                        AliasAnalysis *AA,
621                                        const DataLayout &DL,
622                                        DominatorTree *DT) {
623   // Do a backwards scan through the CFG from SecondI to FirstI. Look for
624   // instructions which can modify the memory location accessed by SecondI.
625   //
626   // While doing the walk keep track of the address to check. It might be
627   // different in different basic blocks due to PHI translation.
628   using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
629   SmallVector<BlockAddressPair, 16> WorkList;
630   // Keep track of the address we visited each block with. Bail out if we
631   // visit a block with different addresses.
632   DenseMap<BasicBlock *, Value *> Visited;
633 
634   BasicBlock::iterator FirstBBI(FirstI);
635   ++FirstBBI;
636   BasicBlock::iterator SecondBBI(SecondI);
637   BasicBlock *FirstBB = FirstI->getParent();
638   BasicBlock *SecondBB = SecondI->getParent();
639   MemoryLocation MemLoc = MemoryLocation::get(SecondI);
640   auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
641 
642   // Start checking the SecondBB.
643   WorkList.push_back(
644       std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
645   bool isFirstBlock = true;
646 
647   // Check all blocks going backward until we reach the FirstBB.
648   while (!WorkList.empty()) {
649     BlockAddressPair Current = WorkList.pop_back_val();
650     BasicBlock *B = Current.first;
651     PHITransAddr &Addr = Current.second;
652     Value *Ptr = Addr.getAddr();
653 
654     // Ignore instructions before FirstI if this is the FirstBB.
655     BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
656 
657     BasicBlock::iterator EI;
658     if (isFirstBlock) {
659       // Ignore instructions after SecondI if this is the first visit of SecondBB.
660       assert(B == SecondBB && "first block is not the store block");
661       EI = SecondBBI;
662       isFirstBlock = false;
663     } else {
664       // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
665       // In this case we also have to look at instructions after SecondI.
666       EI = B->end();
667     }
668     for (; BI != EI; ++BI) {
669       Instruction *I = &*BI;
670       if (I->mayWriteToMemory() && I != SecondI)
671         if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
672           return false;
673     }
674     if (B != FirstBB) {
675       assert(B != &FirstBB->getParent()->getEntryBlock() &&
676           "Should not hit the entry block because SI must be dominated by LI");
677       for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
678         PHITransAddr PredAddr = Addr;
679         if (PredAddr.NeedsPHITranslationFromBlock(B)) {
680           if (!PredAddr.IsPotentiallyPHITranslatable())
681             return false;
682           if (PredAddr.PHITranslateValue(B, *PredI, DT, false))
683             return false;
684         }
685         Value *TranslatedPtr = PredAddr.getAddr();
686         auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr));
687         if (!Inserted.second) {
688           // We already visited this block before. If it was with a different
689           // address - bail out!
690           if (TranslatedPtr != Inserted.first->second)
691             return false;
692           // ... otherwise just skip it.
693           continue;
694         }
695         WorkList.push_back(std::make_pair(*PredI, PredAddr));
696       }
697     }
698   }
699   return true;
700 }
701 
702 /// Find all blocks that will unconditionally lead to the block BB and append
703 /// them to F.
704 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
705                                    BasicBlock *BB, DominatorTree *DT) {
706   for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
707     BasicBlock *Pred = *I;
708     if (Pred == BB) continue;
709     Instruction *PredTI = Pred->getTerminator();
710     if (PredTI->getNumSuccessors() != 1)
711       continue;
712 
713     if (DT->isReachableFromEntry(Pred))
714       Blocks.push_back(Pred);
715   }
716 }
717 
718 /// Handle frees of entire structures whose dependency is a store
719 /// to a field of that structure.
720 static bool handleFree(CallInst *F, AliasAnalysis *AA,
721                        MemoryDependenceResults *MD, DominatorTree *DT,
722                        const TargetLibraryInfo *TLI,
723                        InstOverlapIntervalsTy &IOL,
724                        MapVector<Instruction *, bool> &ThrowableInst) {
725   bool MadeChange = false;
726 
727   MemoryLocation Loc = MemoryLocation(F->getOperand(0));
728   SmallVector<BasicBlock *, 16> Blocks;
729   Blocks.push_back(F->getParent());
730   const DataLayout &DL = F->getModule()->getDataLayout();
731 
732   while (!Blocks.empty()) {
733     BasicBlock *BB = Blocks.pop_back_val();
734     Instruction *InstPt = BB->getTerminator();
735     if (BB == F->getParent()) InstPt = F;
736 
737     MemDepResult Dep =
738         MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB);
739     while (Dep.isDef() || Dep.isClobber()) {
740       Instruction *Dependency = Dep.getInst();
741       if (!hasAnalyzableMemoryWrite(Dependency, *TLI) ||
742           !isRemovable(Dependency))
743         break;
744 
745       Value *DepPointer =
746           GetUnderlyingObject(getStoredPointerOperand(Dependency), DL);
747 
748       // Check for aliasing.
749       if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
750         break;
751 
752       LLVM_DEBUG(
753           dbgs() << "DSE: Dead Store to soon to be freed memory:\n  DEAD: "
754                  << *Dependency << '\n');
755 
756       // DCE instructions only used to calculate that store.
757       BasicBlock::iterator BBI(Dependency);
758       deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL,
759                             ThrowableInst);
760       ++NumFastStores;
761       MadeChange = true;
762 
763       // Inst's old Dependency is now deleted. Compute the next dependency,
764       // which may also be dead, as in
765       //    s[0] = 0;
766       //    s[1] = 0; // This has just been deleted.
767       //    free(s);
768       Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB);
769     }
770 
771     if (Dep.isNonLocal())
772       findUnconditionalPreds(Blocks, BB, DT);
773   }
774 
775   return MadeChange;
776 }
777 
778 /// Check to see if the specified location may alias any of the stack objects in
779 /// the DeadStackObjects set. If so, they become live because the location is
780 /// being loaded.
781 static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
782                                   SmallSetVector<const Value *, 16> &DeadStackObjects,
783                                   const DataLayout &DL, AliasAnalysis *AA,
784                                   const TargetLibraryInfo *TLI,
785                                   const Function *F) {
786   const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
787 
788   // A constant can't be in the dead pointer set.
789   if (isa<Constant>(UnderlyingPointer))
790     return;
791 
792   // If the kill pointer can be easily reduced to an alloca, don't bother doing
793   // extraneous AA queries.
794   if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
795     DeadStackObjects.remove(UnderlyingPointer);
796     return;
797   }
798 
799   // Remove objects that could alias LoadedLoc.
800   DeadStackObjects.remove_if([&](const Value *I) {
801     // See if the loaded location could alias the stack location.
802     MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F));
803     return !AA->isNoAlias(StackLoc, LoadedLoc);
804   });
805 }
806 
807 /// Remove dead stores to stack-allocated locations in the function end block.
808 /// Ex:
809 /// %A = alloca i32
810 /// ...
811 /// store i32 1, i32* %A
812 /// ret void
813 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
814                            MemoryDependenceResults *MD,
815                            const TargetLibraryInfo *TLI,
816                            InstOverlapIntervalsTy &IOL,
817                            MapVector<Instruction *, bool> &ThrowableInst) {
818   bool MadeChange = false;
819 
820   // Keep track of all of the stack objects that are dead at the end of the
821   // function.
822   SmallSetVector<const Value*, 16> DeadStackObjects;
823 
824   // Find all of the alloca'd pointers in the entry block.
825   BasicBlock &Entry = BB.getParent()->front();
826   for (Instruction &I : Entry) {
827     if (isa<AllocaInst>(&I))
828       DeadStackObjects.insert(&I);
829 
830     // Okay, so these are dead heap objects, but if the pointer never escapes
831     // then it's leaked by this function anyways.
832     else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true))
833       DeadStackObjects.insert(&I);
834   }
835 
836   // Treat byval or inalloca arguments the same, stores to them are dead at the
837   // end of the function.
838   for (Argument &AI : BB.getParent()->args())
839     if (AI.hasByValOrInAllocaAttr())
840       DeadStackObjects.insert(&AI);
841 
842   const DataLayout &DL = BB.getModule()->getDataLayout();
843 
844   // Scan the basic block backwards
845   for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
846     --BBI;
847 
848     // If we find a store, check to see if it points into a dead stack value.
849     if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
850       // See through pointer-to-pointer bitcasts
851       SmallVector<const Value *, 4> Pointers;
852       GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
853 
854       // Stores to stack values are valid candidates for removal.
855       bool AllDead = true;
856       for (const Value *Pointer : Pointers)
857         if (!DeadStackObjects.count(Pointer)) {
858           AllDead = false;
859           break;
860         }
861 
862       if (AllDead) {
863         Instruction *Dead = &*BBI;
864 
865         LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
866                           << *Dead << "\n  Objects: ";
867                    for (SmallVectorImpl<const Value *>::iterator I =
868                             Pointers.begin(),
869                         E = Pointers.end();
870                         I != E; ++I) {
871                      dbgs() << **I;
872                      if (std::next(I) != E)
873                        dbgs() << ", ";
874                    } dbgs()
875                    << '\n');
876 
877         // DCE instructions only used to calculate that store.
878         deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, ThrowableInst,
879                               &DeadStackObjects);
880         ++NumFastStores;
881         MadeChange = true;
882         continue;
883       }
884     }
885 
886     // Remove any dead non-memory-mutating instructions.
887     if (isInstructionTriviallyDead(&*BBI, TLI)) {
888       LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n  DEAD: "
889                         << *&*BBI << '\n');
890       deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, ThrowableInst,
891                             &DeadStackObjects);
892       ++NumFastOther;
893       MadeChange = true;
894       continue;
895     }
896 
897     if (isa<AllocaInst>(BBI)) {
898       // Remove allocas from the list of dead stack objects; there can't be
899       // any references before the definition.
900       DeadStackObjects.remove(&*BBI);
901       continue;
902     }
903 
904     if (auto *Call = dyn_cast<CallBase>(&*BBI)) {
905       // Remove allocation function calls from the list of dead stack objects;
906       // there can't be any references before the definition.
907       if (isAllocLikeFn(&*BBI, TLI))
908         DeadStackObjects.remove(&*BBI);
909 
910       // If this call does not access memory, it can't be loading any of our
911       // pointers.
912       if (AA->doesNotAccessMemory(Call))
913         continue;
914 
915       // If the call might load from any of our allocas, then any store above
916       // the call is live.
917       DeadStackObjects.remove_if([&](const Value *I) {
918         // See if the call site touches the value.
919         return isRefSet(AA->getModRefInfo(
920             Call, I, getPointerSize(I, DL, *TLI, BB.getParent())));
921       });
922 
923       // If all of the allocas were clobbered by the call then we're not going
924       // to find anything else to process.
925       if (DeadStackObjects.empty())
926         break;
927 
928       continue;
929     }
930 
931     // We can remove the dead stores, irrespective of the fence and its ordering
932     // (release/acquire/seq_cst). Fences only constraints the ordering of
933     // already visible stores, it does not make a store visible to other
934     // threads. So, skipping over a fence does not change a store from being
935     // dead.
936     if (isa<FenceInst>(*BBI))
937       continue;
938 
939     MemoryLocation LoadedLoc;
940 
941     // If we encounter a use of the pointer, it is no longer considered dead
942     if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
943       if (!L->isUnordered()) // Be conservative with atomic/volatile load
944         break;
945       LoadedLoc = MemoryLocation::get(L);
946     } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
947       LoadedLoc = MemoryLocation::get(V);
948     } else if (!BBI->mayReadFromMemory()) {
949       // Instruction doesn't read memory.  Note that stores that weren't removed
950       // above will hit this case.
951       continue;
952     } else {
953       // Unknown inst; assume it clobbers everything.
954       break;
955     }
956 
957     // Remove any allocas from the DeadPointer set that are loaded, as this
958     // makes any stores above the access live.
959     removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI, BB.getParent());
960 
961     // If all of the allocas were clobbered by the access then we're not going
962     // to find anything else to process.
963     if (DeadStackObjects.empty())
964       break;
965   }
966 
967   return MadeChange;
968 }
969 
970 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
971                          int64_t &EarlierSize, int64_t LaterOffset,
972                          int64_t LaterSize, bool IsOverwriteEnd) {
973   // TODO: base this on the target vector size so that if the earlier
974   // store was too small to get vector writes anyway then its likely
975   // a good idea to shorten it
976   // Power of 2 vector writes are probably always a bad idea to optimize
977   // as any store/memset/memcpy is likely using vector instructions so
978   // shortening it to not vector size is likely to be slower
979   auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
980   unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment();
981   if (!IsOverwriteEnd)
982     LaterOffset = int64_t(LaterOffset + LaterSize);
983 
984   if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
985       !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
986     return false;
987 
988   int64_t NewLength = IsOverwriteEnd
989                           ? LaterOffset - EarlierOffset
990                           : EarlierSize - (LaterOffset - EarlierOffset);
991 
992   if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
993     // When shortening an atomic memory intrinsic, the newly shortened
994     // length must remain an integer multiple of the element size.
995     const uint32_t ElementSize = AMI->getElementSizeInBytes();
996     if (0 != NewLength % ElementSize)
997       return false;
998   }
999 
1000   LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
1001                     << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
1002                     << *EarlierWrite << "\n  KILLER (offset " << LaterOffset
1003                     << ", " << EarlierSize << ")\n");
1004 
1005   Value *EarlierWriteLength = EarlierIntrinsic->getLength();
1006   Value *TrimmedLength =
1007       ConstantInt::get(EarlierWriteLength->getType(), NewLength);
1008   EarlierIntrinsic->setLength(TrimmedLength);
1009 
1010   EarlierSize = NewLength;
1011   if (!IsOverwriteEnd) {
1012     int64_t OffsetMoved = (LaterOffset - EarlierOffset);
1013     Value *Indices[1] = {
1014         ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
1015     GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
1016         EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(),
1017         EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
1018     NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
1019     EarlierIntrinsic->setDest(NewDestGEP);
1020     EarlierOffset = EarlierOffset + OffsetMoved;
1021   }
1022   return true;
1023 }
1024 
1025 static bool tryToShortenEnd(Instruction *EarlierWrite,
1026                             OverlapIntervalsTy &IntervalMap,
1027                             int64_t &EarlierStart, int64_t &EarlierSize) {
1028   if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
1029     return false;
1030 
1031   OverlapIntervalsTy::iterator OII = --IntervalMap.end();
1032   int64_t LaterStart = OII->second;
1033   int64_t LaterSize = OII->first - LaterStart;
1034 
1035   if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
1036       LaterStart + LaterSize >= EarlierStart + EarlierSize) {
1037     if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
1038                      LaterSize, true)) {
1039       IntervalMap.erase(OII);
1040       return true;
1041     }
1042   }
1043   return false;
1044 }
1045 
1046 static bool tryToShortenBegin(Instruction *EarlierWrite,
1047                               OverlapIntervalsTy &IntervalMap,
1048                               int64_t &EarlierStart, int64_t &EarlierSize) {
1049   if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
1050     return false;
1051 
1052   OverlapIntervalsTy::iterator OII = IntervalMap.begin();
1053   int64_t LaterStart = OII->second;
1054   int64_t LaterSize = OII->first - LaterStart;
1055 
1056   if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
1057     assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
1058            "Should have been handled as OW_Complete");
1059     if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
1060                      LaterSize, false)) {
1061       IntervalMap.erase(OII);
1062       return true;
1063     }
1064   }
1065   return false;
1066 }
1067 
1068 static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
1069                                             const DataLayout &DL,
1070                                             InstOverlapIntervalsTy &IOL) {
1071   bool Changed = false;
1072   for (auto OI : IOL) {
1073     Instruction *EarlierWrite = OI.first;
1074     MemoryLocation Loc = getLocForWrite(EarlierWrite);
1075     assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
1076 
1077     const Value *Ptr = Loc.Ptr->stripPointerCasts();
1078     int64_t EarlierStart = 0;
1079     int64_t EarlierSize = int64_t(Loc.Size.getValue());
1080     GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
1081     OverlapIntervalsTy &IntervalMap = OI.second;
1082     Changed |=
1083         tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
1084     if (IntervalMap.empty())
1085       continue;
1086     Changed |=
1087         tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
1088   }
1089   return Changed;
1090 }
1091 
1092 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
1093                                AliasAnalysis *AA, MemoryDependenceResults *MD,
1094                                const DataLayout &DL,
1095                                const TargetLibraryInfo *TLI,
1096                                InstOverlapIntervalsTy &IOL,
1097                                MapVector<Instruction *, bool> &ThrowableInst,
1098                                DominatorTree *DT) {
1099   // Must be a store instruction.
1100   StoreInst *SI = dyn_cast<StoreInst>(Inst);
1101   if (!SI)
1102     return false;
1103 
1104   // If we're storing the same value back to a pointer that we just loaded from,
1105   // then the store can be removed.
1106   if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
1107     if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
1108         isRemovable(SI) &&
1109         memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) {
1110 
1111       LLVM_DEBUG(
1112           dbgs() << "DSE: Remove Store Of Load from same pointer:\n  LOAD: "
1113                  << *DepLoad << "\n  STORE: " << *SI << '\n');
1114 
1115       deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst);
1116       ++NumRedundantStores;
1117       return true;
1118     }
1119   }
1120 
1121   // Remove null stores into the calloc'ed objects
1122   Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
1123   if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) {
1124     Instruction *UnderlyingPointer =
1125         dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL));
1126 
1127     if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
1128         memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) {
1129       LLVM_DEBUG(
1130           dbgs() << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
1131                  << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
1132 
1133       deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst);
1134       ++NumRedundantStores;
1135       return true;
1136     }
1137   }
1138   return false;
1139 }
1140 
1141 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
1142                                 MemoryDependenceResults *MD, DominatorTree *DT,
1143                                 const TargetLibraryInfo *TLI) {
1144   const DataLayout &DL = BB.getModule()->getDataLayout();
1145   bool MadeChange = false;
1146 
1147   MapVector<Instruction *, bool> ThrowableInst;
1148 
1149   // A map of interval maps representing partially-overwritten value parts.
1150   InstOverlapIntervalsTy IOL;
1151 
1152   // Do a top-down walk on the BB.
1153   for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
1154     // Handle 'free' calls specially.
1155     if (CallInst *F = isFreeCall(&*BBI, TLI)) {
1156       MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, ThrowableInst);
1157       // Increment BBI after handleFree has potentially deleted instructions.
1158       // This ensures we maintain a valid iterator.
1159       ++BBI;
1160       continue;
1161     }
1162 
1163     Instruction *Inst = &*BBI++;
1164 
1165     if (Inst->mayThrow()) {
1166       ThrowableInst[Inst] = true;
1167       continue;
1168     }
1169 
1170     // Check to see if Inst writes to memory.  If not, continue.
1171     if (!hasAnalyzableMemoryWrite(Inst, *TLI))
1172       continue;
1173 
1174     // eliminateNoopStore will update in iterator, if necessary.
1175     if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL,
1176                            ThrowableInst, DT)) {
1177       MadeChange = true;
1178       continue;
1179     }
1180 
1181     // If we find something that writes memory, get its memory dependence.
1182     MemDepResult InstDep = MD->getDependency(Inst);
1183 
1184     // Ignore any store where we can't find a local dependence.
1185     // FIXME: cross-block DSE would be fun. :)
1186     if (!InstDep.isDef() && !InstDep.isClobber())
1187       continue;
1188 
1189     // Figure out what location is being stored to.
1190     MemoryLocation Loc = getLocForWrite(Inst);
1191 
1192     // If we didn't get a useful location, fail.
1193     if (!Loc.Ptr)
1194       continue;
1195 
1196     // Loop until we find a store we can eliminate or a load that
1197     // invalidates the analysis. Without an upper bound on the number of
1198     // instructions examined, this analysis can become very time-consuming.
1199     // However, the potential gain diminishes as we process more instructions
1200     // without eliminating any of them. Therefore, we limit the number of
1201     // instructions we look at.
1202     auto Limit = MD->getDefaultBlockScanLimit();
1203     while (InstDep.isDef() || InstDep.isClobber()) {
1204       // Get the memory clobbered by the instruction we depend on.  MemDep will
1205       // skip any instructions that 'Loc' clearly doesn't interact with.  If we
1206       // end up depending on a may- or must-aliased load, then we can't optimize
1207       // away the store and we bail out.  However, if we depend on something
1208       // that overwrites the memory location we *can* potentially optimize it.
1209       //
1210       // Find out what memory location the dependent instruction stores.
1211       Instruction *DepWrite = InstDep.getInst();
1212       if (!hasAnalyzableMemoryWrite(DepWrite, *TLI))
1213         break;
1214       MemoryLocation DepLoc = getLocForWrite(DepWrite);
1215       // If we didn't get a useful location, or if it isn't a size, bail out.
1216       if (!DepLoc.Ptr)
1217         break;
1218 
1219       // Find the last throwable instruction not removed by call to
1220       // deleteDeadInstruction.
1221       Instruction *LastThrowing = nullptr;
1222       if (!ThrowableInst.empty())
1223         LastThrowing = ThrowableInst.back().first;
1224 
1225       // Make sure we don't look past a call which might throw. This is an
1226       // issue because MemoryDependenceAnalysis works in the wrong direction:
1227       // it finds instructions which dominate the current instruction, rather than
1228       // instructions which are post-dominated by the current instruction.
1229       //
1230       // If the underlying object is a non-escaping memory allocation, any store
1231       // to it is dead along the unwind edge. Otherwise, we need to preserve
1232       // the store.
1233       if (LastThrowing && DepWrite->comesBefore(LastThrowing)) {
1234         const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL);
1235         bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying);
1236         if (!IsStoreDeadOnUnwind) {
1237             // We're looking for a call to an allocation function
1238             // where the allocation doesn't escape before the last
1239             // throwing instruction; PointerMayBeCaptured
1240             // reasonably fast approximation.
1241             IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) &&
1242                 !PointerMayBeCaptured(Underlying, false, true);
1243         }
1244         if (!IsStoreDeadOnUnwind)
1245           break;
1246       }
1247 
1248       // If we find a write that is a) removable (i.e., non-volatile), b) is
1249       // completely obliterated by the store to 'Loc', and c) which we know that
1250       // 'Inst' doesn't load from, then we can remove it.
1251       // Also try to merge two stores if a later one only touches memory written
1252       // to by the earlier one.
1253       if (isRemovable(DepWrite) &&
1254           !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
1255         int64_t InstWriteOffset, DepWriteOffset;
1256         OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset,
1257                                          InstWriteOffset, DepWrite, IOL, *AA,
1258                                          BB.getParent());
1259         if (OR == OW_Complete) {
1260           LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *DepWrite
1261                             << "\n  KILLER: " << *Inst << '\n');
1262 
1263           // Delete the store and now-dead instructions that feed it.
1264           deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
1265                                 ThrowableInst);
1266           ++NumFastStores;
1267           MadeChange = true;
1268 
1269           // We erased DepWrite; start over.
1270           InstDep = MD->getDependency(Inst);
1271           continue;
1272         } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) ||
1273                    ((OR == OW_Begin &&
1274                      isShortenableAtTheBeginning(DepWrite)))) {
1275           assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
1276                                                     "when partial-overwrite "
1277                                                     "tracking is enabled");
1278           // The overwrite result is known, so these must be known, too.
1279           int64_t EarlierSize = DepLoc.Size.getValue();
1280           int64_t LaterSize = Loc.Size.getValue();
1281           bool IsOverwriteEnd = (OR == OW_End);
1282           MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
1283                                     InstWriteOffset, LaterSize, IsOverwriteEnd);
1284         } else if (EnablePartialStoreMerging &&
1285                    OR == OW_PartialEarlierWithFullLater) {
1286           auto *Earlier = dyn_cast<StoreInst>(DepWrite);
1287           auto *Later = dyn_cast<StoreInst>(Inst);
1288           if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
1289               DL.typeSizeEqualsStoreSize(
1290                   Earlier->getValueOperand()->getType()) &&
1291               Later && isa<ConstantInt>(Later->getValueOperand()) &&
1292               DL.typeSizeEqualsStoreSize(
1293                   Later->getValueOperand()->getType()) &&
1294               memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
1295             // If the store we find is:
1296             //   a) partially overwritten by the store to 'Loc'
1297             //   b) the later store is fully contained in the earlier one and
1298             //   c) they both have a constant value
1299             //   d) none of the two stores need padding
1300             // Merge the two stores, replacing the earlier store's value with a
1301             // merge of both values.
1302             // TODO: Deal with other constant types (vectors, etc), and probably
1303             // some mem intrinsics (if needed)
1304 
1305             APInt EarlierValue =
1306                 cast<ConstantInt>(Earlier->getValueOperand())->getValue();
1307             APInt LaterValue =
1308                 cast<ConstantInt>(Later->getValueOperand())->getValue();
1309             unsigned LaterBits = LaterValue.getBitWidth();
1310             assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
1311             LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
1312 
1313             // Offset of the smaller store inside the larger store
1314             unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
1315             unsigned LShiftAmount =
1316                 DL.isBigEndian()
1317                     ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits
1318                     : BitOffsetDiff;
1319             APInt Mask =
1320                 APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
1321                                   LShiftAmount + LaterBits);
1322             // Clear the bits we'll be replacing, then OR with the smaller
1323             // store, shifted appropriately.
1324             APInt Merged =
1325                 (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
1326             LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n  Earlier: " << *DepWrite
1327                               << "\n  Later: " << *Inst
1328                               << "\n  Merged Value: " << Merged << '\n');
1329 
1330             auto *SI = new StoreInst(
1331                 ConstantInt::get(Earlier->getValueOperand()->getType(), Merged),
1332                 Earlier->getPointerOperand(), false,
1333                 MaybeAlign(Earlier->getAlignment()), Earlier->getOrdering(),
1334                 Earlier->getSyncScopeID(), DepWrite);
1335 
1336             unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa,
1337                                    LLVMContext::MD_alias_scope,
1338                                    LLVMContext::MD_noalias,
1339                                    LLVMContext::MD_nontemporal};
1340             SI->copyMetadata(*DepWrite, MDToKeep);
1341             ++NumModifiedStores;
1342 
1343             // Delete the old stores and now-dead instructions that feed them.
1344             deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL,
1345                                   ThrowableInst);
1346             deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
1347                                   ThrowableInst);
1348             MadeChange = true;
1349 
1350             // We erased DepWrite and Inst (Loc); start over.
1351             break;
1352           }
1353         }
1354       }
1355 
1356       // If this is a may-aliased store that is clobbering the store value, we
1357       // can keep searching past it for another must-aliased pointer that stores
1358       // to the same location.  For example, in:
1359       //   store -> P
1360       //   store -> Q
1361       //   store -> P
1362       // we can remove the first store to P even though we don't know if P and Q
1363       // alias.
1364       if (DepWrite == &BB.front()) break;
1365 
1366       // Can't look past this instruction if it might read 'Loc'.
1367       if (isRefSet(AA->getModRefInfo(DepWrite, Loc)))
1368         break;
1369 
1370       InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false,
1371                                              DepWrite->getIterator(), &BB,
1372                                              /*QueryInst=*/ nullptr, &Limit);
1373     }
1374   }
1375 
1376   if (EnablePartialOverwriteTracking)
1377     MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
1378 
1379   // If this block ends in a return, unwind, or unreachable, all allocas are
1380   // dead at its end, which means stores to them are also dead.
1381   if (BB.getTerminator()->getNumSuccessors() == 0)
1382     MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, ThrowableInst);
1383 
1384   return MadeChange;
1385 }
1386 
1387 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,
1388                                 MemoryDependenceResults *MD, DominatorTree *DT,
1389                                 const TargetLibraryInfo *TLI) {
1390   bool MadeChange = false;
1391   for (BasicBlock &BB : F)
1392     // Only check non-dead blocks.  Dead blocks may have strange pointer
1393     // cycles that will confuse alias analysis.
1394     if (DT->isReachableFromEntry(&BB))
1395       MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI);
1396 
1397   return MadeChange;
1398 }
1399 
1400 namespace {
1401 //=============================================================================
1402 // MemorySSA backed dead store elimination.
1403 //
1404 // The code below implements dead store elimination using MemorySSA. It uses
1405 // the following general approach: given a MemoryDef, walk upwards to find
1406 // clobbering MemoryDefs that may be killed by the starting def. Then check
1407 // that there are no uses that may read the location of the original MemoryDef
1408 // in between both MemoryDefs. A bit more concretely:
1409 //
1410 // For all MemoryDefs StartDef:
1411 // 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking
1412 //    upwards.
1413 // 2. Check that there are no reads between DomAccess and the StartDef by
1414 //    checking all uses starting at DomAccess and walking until we see StartDef.
1415 // 3. For each found DomDef, check that:
1416 //   1. There are no barrier instructions between DomDef and StartDef (like
1417 //       throws or stores with ordering constraints).
1418 //   2. StartDef is executed whenever DomDef is executed.
1419 //   3. StartDef completely overwrites DomDef.
1420 // 4. Erase DomDef from the function and MemorySSA.
1421 
1422 // Returns true if \p M is an intrisnic that does not read or write memory.
1423 bool isNoopIntrinsic(MemoryUseOrDef *M) {
1424   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) {
1425     switch (II->getIntrinsicID()) {
1426     case Intrinsic::lifetime_start:
1427     case Intrinsic::lifetime_end:
1428     case Intrinsic::invariant_end:
1429     case Intrinsic::launder_invariant_group:
1430     case Intrinsic::assume:
1431       return true;
1432     case Intrinsic::dbg_addr:
1433     case Intrinsic::dbg_declare:
1434     case Intrinsic::dbg_label:
1435     case Intrinsic::dbg_value:
1436       llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
1437     default:
1438       return false;
1439     }
1440   }
1441   return false;
1442 }
1443 
1444 // Check if we can ignore \p D for DSE.
1445 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
1446   Instruction *DI = D->getMemoryInst();
1447   // Calls that only access inaccessible memory cannot read or write any memory
1448   // locations we consider for elimination.
1449   if (auto CS = CallSite(DI))
1450     if (CS.onlyAccessesInaccessibleMemory())
1451       return true;
1452 
1453   // We can eliminate stores to locations not visible to the caller across
1454   // throwing instructions.
1455   if (DI->mayThrow() && !DefVisibleToCaller)
1456     return true;
1457 
1458   // We can remove the dead stores, irrespective of the fence and its ordering
1459   // (release/acquire/seq_cst). Fences only constraints the ordering of
1460   // already visible stores, it does not make a store visible to other
1461   // threads. So, skipping over a fence does not change a store from being
1462   // dead.
1463   if (isa<FenceInst>(DI))
1464     return true;
1465 
1466   // Skip intrinsics that do not really read or modify memory.
1467   if (isNoopIntrinsic(D))
1468     return true;
1469 
1470   return false;
1471 }
1472 
1473 struct DSEState {
1474   Function &F;
1475   AliasAnalysis &AA;
1476   MemorySSA &MSSA;
1477   DominatorTree &DT;
1478   PostDominatorTree &PDT;
1479   const TargetLibraryInfo &TLI;
1480 
1481   // All MemoryDefs that potentially could kill other MemDefs.
1482   SmallVector<MemoryDef *, 64> MemDefs;
1483   // Any that should be skipped as they are already deleted
1484   SmallPtrSet<MemoryAccess *, 4> SkipStores;
1485   // Keep track of all of the objects that are invisible to the caller until the
1486   // function returns.
1487   SmallPtrSet<const Value *, 16> InvisibleToCaller;
1488   // Keep track of blocks with throwing instructions not modeled in MemorySSA.
1489   SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
1490   // Post-order numbers for each basic block. Used to figure out if memory
1491   // accesses are executed before another access.
1492   DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
1493 
1494   /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
1495   /// basic block.
1496   DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
1497 
1498   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
1499            PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
1500       : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {}
1501 
1502   static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1503                       DominatorTree &DT, PostDominatorTree &PDT,
1504                       const TargetLibraryInfo &TLI) {
1505     DSEState State(F, AA, MSSA, DT, PDT, TLI);
1506     // Collect blocks with throwing instructions not modeled in MemorySSA and
1507     // alloc-like objects.
1508     unsigned PO = 0;
1509     for (BasicBlock *BB : post_order(&F)) {
1510       State.PostOrderNumbers[BB] = PO++;
1511       for (Instruction &I : *BB) {
1512         if (I.mayThrow() && !MSSA.getMemoryAccess(&I))
1513           State.ThrowingBlocks.insert(I.getParent());
1514 
1515         auto *MD = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(&I));
1516         if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
1517             hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I))
1518           State.MemDefs.push_back(MD);
1519 
1520         // Track alloca and alloca-like objects. Here we care about objects not
1521         // visible to the caller during function execution. Alloca objects are
1522         // invalid in the caller, for alloca-like objects we ensure that they
1523         // are not captured throughout the function.
1524         if (isa<AllocaInst>(&I) ||
1525             (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true)))
1526           State.InvisibleToCaller.insert(&I);
1527       }
1528     }
1529 
1530     // Treat byval or inalloca arguments the same as Allocas, stores to them are
1531     // dead at the end of the function.
1532     for (Argument &AI : F.args())
1533       if (AI.hasByValOrInAllocaAttr())
1534         State.InvisibleToCaller.insert(&AI);
1535     return State;
1536   }
1537 
1538   Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
1539     if (!I->mayWriteToMemory())
1540       return None;
1541 
1542     if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I))
1543       return {MemoryLocation::getForDest(MTI)};
1544 
1545     if (auto CS = CallSite(I)) {
1546       if (Function *F = CS.getCalledFunction()) {
1547         StringRef FnName = F->getName();
1548         if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy))
1549           return {MemoryLocation(CS.getArgument(0))};
1550         if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy))
1551           return {MemoryLocation(CS.getArgument(0))};
1552         if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat))
1553           return {MemoryLocation(CS.getArgument(0))};
1554         if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat))
1555           return {MemoryLocation(CS.getArgument(0))};
1556       }
1557       return None;
1558     }
1559 
1560     return MemoryLocation::getOrNone(I);
1561   }
1562 
1563   /// Returns true if \p Use completely overwrites \p DefLoc.
1564   bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const {
1565     // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1566     // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1567     // MemoryDef.
1568     if (!UseInst->mayWriteToMemory())
1569       return false;
1570 
1571     if (auto CS = CallSite(UseInst))
1572       if (CS.onlyAccessesInaccessibleMemory())
1573         return false;
1574 
1575     ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc);
1576     // If necessary, perform additional analysis.
1577     if (isModSet(MR))
1578       MR = AA.callCapturesBefore(UseInst, DefLoc, &DT);
1579 
1580     Optional<MemoryLocation> UseLoc = getLocForWriteEx(UseInst);
1581     return isModSet(MR) && isMustSet(MR) &&
1582            UseLoc->Size.getValue() >= DefLoc.Size.getValue();
1583   }
1584 
1585   /// Returns true if \p Use may read from \p DefLoc.
1586   bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const {
1587     if (!UseInst->mayReadFromMemory())
1588       return false;
1589 
1590     if (auto CS = CallSite(UseInst))
1591       if (CS.onlyAccessesInaccessibleMemory())
1592         return false;
1593 
1594     ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc);
1595     // If necessary, perform additional analysis.
1596     if (isRefSet(MR))
1597       MR = AA.callCapturesBefore(UseInst, DefLoc, &DT);
1598     return isRefSet(MR);
1599   }
1600 
1601   // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no
1602   // read access in between or return None otherwise. The returned value may not
1603   // (completely) overwrite \p DefLoc. Currently we bail out when we encounter
1604   // an aliasing MemoryUse (read).
1605   Optional<MemoryAccess *> getDomMemoryDef(MemoryDef *KillingDef,
1606                                            MemoryAccess *Current,
1607                                            MemoryLocation DefLoc,
1608                                            bool DefVisibleToCaller,
1609                                            int &ScanLimit) const {
1610     MemoryAccess *DomAccess;
1611     bool StepAgain;
1612     LLVM_DEBUG(dbgs() << "  trying to get dominating access for " << *Current
1613                       << "\n");
1614     // Find the next clobbering Mod access for DefLoc, starting at Current.
1615     do {
1616       StepAgain = false;
1617       // Reached TOP.
1618       if (MSSA.isLiveOnEntryDef(Current))
1619         return None;
1620 
1621       if (isa<MemoryPhi>(Current)) {
1622         DomAccess = Current;
1623         break;
1624       }
1625       MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current);
1626       // Look for access that clobber DefLoc.
1627       DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD,
1628                                                                       DefLoc);
1629       if (MSSA.isLiveOnEntryDef(DomAccess))
1630         return None;
1631 
1632       if (isa<MemoryPhi>(DomAccess))
1633         break;
1634 
1635       // Check if we can skip DomDef for DSE. We also require the KillingDef
1636       // execute whenever DomDef executes and use post-dominance to ensure that.
1637 
1638       MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess);
1639       if ((DomDef && canSkipDef(DomDef, DefVisibleToCaller)) ||
1640           !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) {
1641         StepAgain = true;
1642         Current = DomDef->getDefiningAccess();
1643       }
1644 
1645     } while (StepAgain);
1646 
1647     LLVM_DEBUG({
1648       dbgs() << "  Checking for reads of " << *DomAccess;
1649       if (isa<MemoryDef>(DomAccess))
1650         dbgs() << " (" << *cast<MemoryDef>(DomAccess)->getMemoryInst() << ")\n";
1651     });
1652 
1653     SmallSetVector<MemoryAccess *, 32> WorkList;
1654     auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1655       for (Use &U : Acc->uses())
1656         WorkList.insert(cast<MemoryAccess>(U.getUser()));
1657     };
1658     PushMemUses(DomAccess);
1659 
1660     // Check if DomDef may be read.
1661     for (unsigned I = 0; I < WorkList.size(); I++) {
1662       MemoryAccess *UseAccess = WorkList[I];
1663 
1664       LLVM_DEBUG(dbgs() << "   Checking use " << *UseAccess);
1665       if (--ScanLimit == 0) {
1666         LLVM_DEBUG(dbgs() << "  ...  hit scan limit\n");
1667         return None;
1668       }
1669 
1670       if (isa<MemoryPhi>(UseAccess)) {
1671         PushMemUses(UseAccess);
1672         continue;
1673       }
1674 
1675       Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1676       LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1677 
1678       if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) {
1679         PushMemUses(UseAccess);
1680         continue;
1681       }
1682 
1683       // Uses which may read the original MemoryDef mean we cannot eliminate the
1684       // original MD. Stop walk.
1685       if (isReadClobber(DefLoc, UseInst)) {
1686         LLVM_DEBUG(dbgs() << "  ... found read clobber\n");
1687         return None;
1688       }
1689 
1690       // For the KillingDef we only have to check if it reads the memory
1691       // location.
1692       // TODO: It would probably be better to check for self-reads before
1693       // calling the function.
1694       if (KillingDef == UseAccess)
1695         continue;
1696 
1697       // Check all uses for MemoryDefs, except for defs completely overwriting
1698       // the original location. Otherwise we have to check uses of *all*
1699       // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1700       // miss cases like the following
1701       //   1 = Def(LoE) ; <----- DomDef stores [0,1]
1702       //   2 = Def(1)   ; (2, 1) = NoAlias,   stores [2,3]
1703       //   Use(2)       ; MayAlias 2 *and* 1, loads [0, 3].
1704       //                  (The Use points to the *first* Def it may alias)
1705       //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,
1706       //                  stores [0,1]
1707       if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1708         if (!isCompleteOverwrite(DefLoc, UseInst))
1709           PushMemUses(UseDef);
1710       }
1711     }
1712 
1713     // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead.
1714     return {DomAccess};
1715   }
1716 
1717   // Delete dead memory defs
1718   void deleteDeadInstruction(Instruction *SI) {
1719     MemorySSAUpdater Updater(&MSSA);
1720     SmallVector<Instruction *, 32> NowDeadInsts;
1721     NowDeadInsts.push_back(SI);
1722     --NumFastOther;
1723 
1724     while (!NowDeadInsts.empty()) {
1725       Instruction *DeadInst = NowDeadInsts.pop_back_val();
1726       ++NumFastOther;
1727 
1728       // Try to preserve debug information attached to the dead instruction.
1729       salvageDebugInfo(*DeadInst);
1730 
1731       // Remove the Instruction from MSSA.
1732       if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1733         if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1734           SkipStores.insert(MD);
1735         }
1736         Updater.removeMemoryAccess(MA);
1737       }
1738 
1739       auto I = IOLs.find(DeadInst->getParent());
1740       if (I != IOLs.end())
1741         I->second.erase(DeadInst);
1742       // Remove its operands
1743       for (Use &O : DeadInst->operands())
1744         if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1745           O = nullptr;
1746           if (isInstructionTriviallyDead(OpI, &TLI))
1747             NowDeadInsts.push_back(OpI);
1748         }
1749 
1750       DeadInst->eraseFromParent();
1751     }
1752   }
1753 
1754   // Check for any extra throws between SI and NI that block DSE.  This only
1755   // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
1756   // throw are handled during the walk from one def to the next.
1757   bool mayThrowBetween(Instruction *SI, Instruction *NI,
1758                        const Value *SILocUnd) const {
1759     // First see if we can ignore it by using the fact that SI is an
1760     // alloca/alloca like object that is not visible to the caller during
1761     // execution of the function.
1762     if (SILocUnd && InvisibleToCaller.count(SILocUnd))
1763       return false;
1764 
1765     if (SI->getParent() == NI->getParent())
1766       return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end();
1767     return !ThrowingBlocks.empty();
1768   }
1769 
1770   // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
1771   // act as barriers:
1772   //  * A memory instruction that may throw and \p SI accesses a non-stack
1773   //  object.
1774   //  * Atomic stores stronger that monotonic.
1775   bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc,
1776                     const Value *SILocUnd, Instruction *NI,
1777                     MemoryLocation &NILoc) const {
1778     // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
1779     // like object that does not escape.
1780     if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd))
1781       return true;
1782 
1783     if (NI->isAtomic()) {
1784       if (auto *NSI = dyn_cast<StoreInst>(NI)) {
1785         if (isStrongerThanMonotonic(NSI->getOrdering()))
1786           return true;
1787       } else
1788         llvm_unreachable(
1789             "Other instructions should be modeled/skipped in MemorySSA");
1790     }
1791 
1792     return false;
1793   }
1794 };
1795 
1796 bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
1797                                   MemorySSA &MSSA, DominatorTree &DT,
1798                                   PostDominatorTree &PDT,
1799                                   const TargetLibraryInfo &TLI) {
1800   const DataLayout &DL = F.getParent()->getDataLayout();
1801   bool MadeChange = false;
1802 
1803   DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
1804   // For each store:
1805   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
1806     MemoryDef *KillingDef = State.MemDefs[I];
1807     if (State.SkipStores.count(KillingDef))
1808       continue;
1809     Instruction *SI = KillingDef->getMemoryInst();
1810     auto MaybeSILoc = State.getLocForWriteEx(SI);
1811     if (!MaybeSILoc) {
1812       LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
1813                         << *SI << "\n");
1814       continue;
1815     }
1816     MemoryLocation SILoc = *MaybeSILoc;
1817     assert(SILoc.Ptr && "SILoc should not be null");
1818     const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL);
1819     Instruction *DefObj =
1820         const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd));
1821     bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd);
1822     if (DefObj && ((isAllocLikeFn(DefObj, &TLI) &&
1823                     !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT))))
1824       DefVisibleToCaller = false;
1825 
1826     MemoryAccess *Current = KillingDef;
1827     LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
1828                       << *KillingDef << " (" << *SI << ")\n");
1829 
1830     int ScanLimit = MemorySSAScanLimit;
1831     // Worklist of MemoryAccesses that may be killed by KillingDef.
1832     SetVector<MemoryAccess *> ToCheck;
1833     ToCheck.insert(KillingDef->getDefiningAccess());
1834 
1835     // Check if MemoryAccesses in the worklist are killed by KillingDef.
1836     for (unsigned I = 0; I < ToCheck.size(); I++) {
1837       Current = ToCheck[I];
1838       if (State.SkipStores.count(Current))
1839         continue;
1840 
1841       Optional<MemoryAccess *> Next = State.getDomMemoryDef(
1842           KillingDef, Current, SILoc, DefVisibleToCaller, ScanLimit);
1843 
1844       if (!Next) {
1845         LLVM_DEBUG(dbgs() << "  finished walk\n");
1846         continue;
1847       }
1848 
1849       MemoryAccess *DomAccess = *Next;
1850       LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess << "\n");
1851       if (isa<MemoryPhi>(DomAccess)) {
1852         for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) {
1853           MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
1854           BasicBlock *IncomingBlock = IncomingAccess->getBlock();
1855           BasicBlock *PhiBlock = DomAccess->getBlock();
1856 
1857           // We only consider incoming MemoryAccesses that come before the
1858           // MemoryPhi. Otherwise we could discover candidates that do not
1859           // strictly dominate our starting def.
1860           if (State.PostOrderNumbers[IncomingBlock] >
1861               State.PostOrderNumbers[PhiBlock])
1862             ToCheck.insert(IncomingAccess);
1863         }
1864         continue;
1865       }
1866       MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess);
1867       Instruction *NI = NextDef->getMemoryInst();
1868       LLVM_DEBUG(dbgs() << "  def " << *NI << "\n");
1869 
1870       if (!hasAnalyzableMemoryWrite(NI, TLI)) {
1871         LLVM_DEBUG(dbgs() << " skip, cannot analyze def\n");
1872         continue;
1873       }
1874 
1875       if (!isRemovable(NI)) {
1876         LLVM_DEBUG(dbgs() << " skip, cannot remove def\n");
1877         continue;
1878       }
1879 
1880       MemoryLocation NILoc = *State.getLocForWriteEx(NI);
1881       // Check for anything that looks like it will be a barrier to further
1882       // removal
1883       if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) {
1884         LLVM_DEBUG(dbgs() << "  skip, barrier\n");
1885         continue;
1886       }
1887 
1888       // Before we try to remove anything, check for any extra throwing
1889       // instructions that block us from DSEing
1890       if (State.mayThrowBetween(SI, NI, SILocUnd)) {
1891         LLVM_DEBUG(dbgs() << " skip, may throw!\n");
1892         break;
1893       }
1894 
1895       if (!DebugCounter::shouldExecute(MemorySSACounter))
1896         break;
1897 
1898       // Check if NI overwrites SI.
1899       int64_t InstWriteOffset, DepWriteOffset;
1900       auto Iter = State.IOLs.insert(
1901           std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
1902               NI->getParent(), InstOverlapIntervalsTy()));
1903       auto &IOL = Iter.first->second;
1904       OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset,
1905                                        InstWriteOffset, NI, IOL, AA, &F);
1906 
1907       ToCheck.insert(NextDef->getDefiningAccess());
1908       if (OR == OW_Complete) {
1909         LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *NI
1910                           << "\n  KILLER: " << *SI << '\n');
1911         State.deleteDeadInstruction(NI);
1912         ++NumFastStores;
1913         MadeChange = true;
1914       }
1915     }
1916   }
1917 
1918   if (EnablePartialOverwriteTracking)
1919     for (auto &KV : State.IOLs)
1920       MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second);
1921 
1922   return MadeChange;
1923 }
1924 } // end anonymous namespace
1925 
1926 //===----------------------------------------------------------------------===//
1927 // DSE Pass
1928 //===----------------------------------------------------------------------===//
1929 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
1930   AliasAnalysis &AA = AM.getResult<AAManager>(F);
1931   const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1932   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
1933 
1934   if (EnableMemorySSA) {
1935     MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
1936     PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1937 
1938     if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI))
1939       return PreservedAnalyses::all();
1940   } else {
1941     MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
1942 
1943     if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI))
1944       return PreservedAnalyses::all();
1945   }
1946 
1947   PreservedAnalyses PA;
1948   PA.preserveSet<CFGAnalyses>();
1949   PA.preserve<GlobalsAA>();
1950   if (EnableMemorySSA)
1951     PA.preserve<MemorySSAAnalysis>();
1952   else
1953     PA.preserve<MemoryDependenceAnalysis>();
1954   return PA;
1955 }
1956 
1957 namespace {
1958 
1959 /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
1960 class DSELegacyPass : public FunctionPass {
1961 public:
1962   static char ID; // Pass identification, replacement for typeid
1963 
1964   DSELegacyPass() : FunctionPass(ID) {
1965     initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
1966   }
1967 
1968   bool runOnFunction(Function &F) override {
1969     if (skipFunction(F))
1970       return false;
1971 
1972     AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1973     DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1974     const TargetLibraryInfo &TLI =
1975         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1976 
1977     if (EnableMemorySSA) {
1978       MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
1979       PostDominatorTree &PDT =
1980           getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1981 
1982       return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI);
1983     } else {
1984       MemoryDependenceResults &MD =
1985           getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
1986 
1987       return eliminateDeadStores(F, &AA, &MD, &DT, &TLI);
1988     }
1989   }
1990 
1991   void getAnalysisUsage(AnalysisUsage &AU) const override {
1992     AU.setPreservesCFG();
1993     AU.addRequired<AAResultsWrapperPass>();
1994     AU.addRequired<TargetLibraryInfoWrapperPass>();
1995     AU.addPreserved<GlobalsAAWrapperPass>();
1996     AU.addRequired<DominatorTreeWrapperPass>();
1997     AU.addPreserved<DominatorTreeWrapperPass>();
1998 
1999     if (EnableMemorySSA) {
2000       AU.addRequired<PostDominatorTreeWrapperPass>();
2001       AU.addRequired<MemorySSAWrapperPass>();
2002       AU.addPreserved<PostDominatorTreeWrapperPass>();
2003       AU.addPreserved<MemorySSAWrapperPass>();
2004     } else {
2005       AU.addRequired<MemoryDependenceWrapperPass>();
2006       AU.addPreserved<MemoryDependenceWrapperPass>();
2007     }
2008   }
2009 };
2010 
2011 } // end anonymous namespace
2012 
2013 char DSELegacyPass::ID = 0;
2014 
2015 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2016                       false)
2017 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2018 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
2019 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2020 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2021 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2022 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2023 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2024 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2025                     false)
2026 
2027 FunctionPass *llvm::createDeadStoreEliminationPass() {
2028   return new DSELegacyPass();
2029 }
2030