1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions.  It also performs simple dead load elimination.
12 //
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Transforms/Scalar/GVN.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/DepthFirstIterator.h"
21 #include "llvm/ADT/Hashing.h"
22 #include "llvm/ADT/MapVector.h"
23 #include "llvm/ADT/PointerIntPair.h"
24 #include "llvm/ADT/PostOrderIterator.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/AssumptionCache.h"
32 #include "llvm/Analysis/CFG.h"
33 #include "llvm/Analysis/GlobalsModRef.h"
34 #include "llvm/Analysis/InstructionSimplify.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/MemoryBuiltins.h"
37 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
38 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
39 #include "llvm/Analysis/PHITransAddr.h"
40 #include "llvm/Analysis/TargetLibraryInfo.h"
41 #include "llvm/Analysis/Utils/Local.h"
42 #include "llvm/Analysis/ValueTracking.h"
43 #include "llvm/IR/Attributes.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/CallSite.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DebugLoc.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/LLVMContext.h"
58 #include "llvm/IR/Metadata.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/Operator.h"
61 #include "llvm/IR/PassManager.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/Pass.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/raw_ostream.h"
72 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
73 #include "llvm/Transforms/Utils/SSAUpdater.h"
74 #include "llvm/Transforms/Utils/VNCoercion.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <utility>
79 #include <vector>
80 
81 using namespace llvm;
82 using namespace llvm::gvn;
83 using namespace llvm::VNCoercion;
84 using namespace PatternMatch;
85 
86 #define DEBUG_TYPE "gvn"
87 
88 STATISTIC(NumGVNInstr,  "Number of instructions deleted");
89 STATISTIC(NumGVNLoad,   "Number of loads deleted");
90 STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
91 STATISTIC(NumGVNBlocks, "Number of blocks merged");
92 STATISTIC(NumGVNSimpl,  "Number of instructions simplified");
93 STATISTIC(NumGVNEqProp, "Number of equalities propagated");
94 STATISTIC(NumPRELoad,   "Number of loads PRE'd");
95 
96 static cl::opt<bool> EnablePRE("enable-pre",
97                                cl::init(true), cl::Hidden);
98 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
99 
100 // Maximum allowed recursion depth.
101 static cl::opt<uint32_t>
102 MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
103                 cl::desc("Max recurse depth (default = 1000)"));
104 
105 struct llvm::GVN::Expression {
106   uint32_t opcode;
107   Type *type;
108   bool commutative = false;
109   SmallVector<uint32_t, 4> varargs;
110 
111   Expression(uint32_t o = ~2U) : opcode(o) {}
112 
113   bool operator==(const Expression &other) const {
114     if (opcode != other.opcode)
115       return false;
116     if (opcode == ~0U || opcode == ~1U)
117       return true;
118     if (type != other.type)
119       return false;
120     if (varargs != other.varargs)
121       return false;
122     return true;
123   }
124 
125   friend hash_code hash_value(const Expression &Value) {
126     return hash_combine(
127         Value.opcode, Value.type,
128         hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
129   }
130 };
131 
132 namespace llvm {
133 
134 template <> struct DenseMapInfo<GVN::Expression> {
135   static inline GVN::Expression getEmptyKey() { return ~0U; }
136   static inline GVN::Expression getTombstoneKey() { return ~1U; }
137 
138   static unsigned getHashValue(const GVN::Expression &e) {
139     using llvm::hash_value;
140 
141     return static_cast<unsigned>(hash_value(e));
142   }
143 
144   static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
145     return LHS == RHS;
146   }
147 };
148 
149 } // end namespace llvm
150 
151 /// Represents a particular available value that we know how to materialize.
152 /// Materialization of an AvailableValue never fails.  An AvailableValue is
153 /// implicitly associated with a rematerialization point which is the
154 /// location of the instruction from which it was formed.
155 struct llvm::gvn::AvailableValue {
156   enum ValType {
157     SimpleVal, // A simple offsetted value that is accessed.
158     LoadVal,   // A value produced by a load.
159     MemIntrin, // A memory intrinsic which is loaded from.
160     UndefVal   // A UndefValue representing a value from dead block (which
161                // is not yet physically removed from the CFG).
162   };
163 
164   /// V - The value that is live out of the block.
165   PointerIntPair<Value *, 2, ValType> Val;
166 
167   /// Offset - The byte offset in Val that is interesting for the load query.
168   unsigned Offset;
169 
170   static AvailableValue get(Value *V, unsigned Offset = 0) {
171     AvailableValue Res;
172     Res.Val.setPointer(V);
173     Res.Val.setInt(SimpleVal);
174     Res.Offset = Offset;
175     return Res;
176   }
177 
178   static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
179     AvailableValue Res;
180     Res.Val.setPointer(MI);
181     Res.Val.setInt(MemIntrin);
182     Res.Offset = Offset;
183     return Res;
184   }
185 
186   static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) {
187     AvailableValue Res;
188     Res.Val.setPointer(LI);
189     Res.Val.setInt(LoadVal);
190     Res.Offset = Offset;
191     return Res;
192   }
193 
194   static AvailableValue getUndef() {
195     AvailableValue Res;
196     Res.Val.setPointer(nullptr);
197     Res.Val.setInt(UndefVal);
198     Res.Offset = 0;
199     return Res;
200   }
201 
202   bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
203   bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
204   bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
205   bool isUndefValue() const { return Val.getInt() == UndefVal; }
206 
207   Value *getSimpleValue() const {
208     assert(isSimpleValue() && "Wrong accessor");
209     return Val.getPointer();
210   }
211 
212   LoadInst *getCoercedLoadValue() const {
213     assert(isCoercedLoadValue() && "Wrong accessor");
214     return cast<LoadInst>(Val.getPointer());
215   }
216 
217   MemIntrinsic *getMemIntrinValue() const {
218     assert(isMemIntrinValue() && "Wrong accessor");
219     return cast<MemIntrinsic>(Val.getPointer());
220   }
221 
222   /// Emit code at the specified insertion point to adjust the value defined
223   /// here to the specified type. This handles various coercion cases.
224   Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt,
225                                   GVN &gvn) const;
226 };
227 
228 /// Represents an AvailableValue which can be rematerialized at the end of
229 /// the associated BasicBlock.
230 struct llvm::gvn::AvailableValueInBlock {
231   /// BB - The basic block in question.
232   BasicBlock *BB;
233 
234   /// AV - The actual available value
235   AvailableValue AV;
236 
237   static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
238     AvailableValueInBlock Res;
239     Res.BB = BB;
240     Res.AV = std::move(AV);
241     return Res;
242   }
243 
244   static AvailableValueInBlock get(BasicBlock *BB, Value *V,
245                                    unsigned Offset = 0) {
246     return get(BB, AvailableValue::get(V, Offset));
247   }
248 
249   static AvailableValueInBlock getUndef(BasicBlock *BB) {
250     return get(BB, AvailableValue::getUndef());
251   }
252 
253   /// Emit code at the end of this block to adjust the value defined here to
254   /// the specified type. This handles various coercion cases.
255   Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const {
256     return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn);
257   }
258 };
259 
260 //===----------------------------------------------------------------------===//
261 //                     ValueTable Internal Functions
262 //===----------------------------------------------------------------------===//
263 
264 GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
265   Expression e;
266   e.type = I->getType();
267   e.opcode = I->getOpcode();
268   for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
269        OI != OE; ++OI)
270     e.varargs.push_back(lookupOrAdd(*OI));
271   if (I->isCommutative()) {
272     // Ensure that commutative instructions that only differ by a permutation
273     // of their operands get the same value number by sorting the operand value
274     // numbers.  Since all commutative instructions have two operands it is more
275     // efficient to sort by hand rather than using, say, std::sort.
276     assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
277     if (e.varargs[0] > e.varargs[1])
278       std::swap(e.varargs[0], e.varargs[1]);
279     e.commutative = true;
280   }
281 
282   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
283     // Sort the operand value numbers so x<y and y>x get the same value number.
284     CmpInst::Predicate Predicate = C->getPredicate();
285     if (e.varargs[0] > e.varargs[1]) {
286       std::swap(e.varargs[0], e.varargs[1]);
287       Predicate = CmpInst::getSwappedPredicate(Predicate);
288     }
289     e.opcode = (C->getOpcode() << 8) | Predicate;
290     e.commutative = true;
291   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
292     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
293          II != IE; ++II)
294       e.varargs.push_back(*II);
295   }
296 
297   return e;
298 }
299 
300 GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
301                                                CmpInst::Predicate Predicate,
302                                                Value *LHS, Value *RHS) {
303   assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
304          "Not a comparison!");
305   Expression e;
306   e.type = CmpInst::makeCmpResultType(LHS->getType());
307   e.varargs.push_back(lookupOrAdd(LHS));
308   e.varargs.push_back(lookupOrAdd(RHS));
309 
310   // Sort the operand value numbers so x<y and y>x get the same value number.
311   if (e.varargs[0] > e.varargs[1]) {
312     std::swap(e.varargs[0], e.varargs[1]);
313     Predicate = CmpInst::getSwappedPredicate(Predicate);
314   }
315   e.opcode = (Opcode << 8) | Predicate;
316   e.commutative = true;
317   return e;
318 }
319 
320 GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
321   assert(EI && "Not an ExtractValueInst?");
322   Expression e;
323   e.type = EI->getType();
324   e.opcode = 0;
325 
326   IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
327   if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
328     // EI might be an extract from one of our recognised intrinsics. If it
329     // is we'll synthesize a semantically equivalent expression instead on
330     // an extract value expression.
331     switch (I->getIntrinsicID()) {
332       case Intrinsic::sadd_with_overflow:
333       case Intrinsic::uadd_with_overflow:
334         e.opcode = Instruction::Add;
335         break;
336       case Intrinsic::ssub_with_overflow:
337       case Intrinsic::usub_with_overflow:
338         e.opcode = Instruction::Sub;
339         break;
340       case Intrinsic::smul_with_overflow:
341       case Intrinsic::umul_with_overflow:
342         e.opcode = Instruction::Mul;
343         break;
344       default:
345         break;
346     }
347 
348     if (e.opcode != 0) {
349       // Intrinsic recognized. Grab its args to finish building the expression.
350       assert(I->getNumArgOperands() == 2 &&
351              "Expect two args for recognised intrinsics.");
352       e.varargs.push_back(lookupOrAdd(I->getArgOperand(0)));
353       e.varargs.push_back(lookupOrAdd(I->getArgOperand(1)));
354       return e;
355     }
356   }
357 
358   // Not a recognised intrinsic. Fall back to producing an extract value
359   // expression.
360   e.opcode = EI->getOpcode();
361   for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
362        OI != OE; ++OI)
363     e.varargs.push_back(lookupOrAdd(*OI));
364 
365   for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
366          II != IE; ++II)
367     e.varargs.push_back(*II);
368 
369   return e;
370 }
371 
372 //===----------------------------------------------------------------------===//
373 //                     ValueTable External Functions
374 //===----------------------------------------------------------------------===//
375 
376 GVN::ValueTable::ValueTable() = default;
377 GVN::ValueTable::ValueTable(const ValueTable &) = default;
378 GVN::ValueTable::ValueTable(ValueTable &&) = default;
379 GVN::ValueTable::~ValueTable() = default;
380 
381 /// add - Insert a value into the table with a specified value number.
382 void GVN::ValueTable::add(Value *V, uint32_t num) {
383   valueNumbering.insert(std::make_pair(V, num));
384   if (PHINode *PN = dyn_cast<PHINode>(V))
385     NumberingPhi[num] = PN;
386 }
387 
388 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
389   if (AA->doesNotAccessMemory(C)) {
390     Expression exp = createExpr(C);
391     uint32_t e = assignExpNewValueNum(exp).first;
392     valueNumbering[C] = e;
393     return e;
394   } else if (AA->onlyReadsMemory(C)) {
395     Expression exp = createExpr(C);
396     auto ValNum = assignExpNewValueNum(exp);
397     if (ValNum.second) {
398       valueNumbering[C] = ValNum.first;
399       return ValNum.first;
400     }
401     if (!MD) {
402       uint32_t e = assignExpNewValueNum(exp).first;
403       valueNumbering[C] = e;
404       return e;
405     }
406 
407     MemDepResult local_dep = MD->getDependency(C);
408 
409     if (!local_dep.isDef() && !local_dep.isNonLocal()) {
410       valueNumbering[C] =  nextValueNumber;
411       return nextValueNumber++;
412     }
413 
414     if (local_dep.isDef()) {
415       CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
416 
417       if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
418         valueNumbering[C] = nextValueNumber;
419         return nextValueNumber++;
420       }
421 
422       for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
423         uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
424         uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
425         if (c_vn != cd_vn) {
426           valueNumbering[C] = nextValueNumber;
427           return nextValueNumber++;
428         }
429       }
430 
431       uint32_t v = lookupOrAdd(local_cdep);
432       valueNumbering[C] = v;
433       return v;
434     }
435 
436     // Non-local case.
437     const MemoryDependenceResults::NonLocalDepInfo &deps =
438       MD->getNonLocalCallDependency(CallSite(C));
439     // FIXME: Move the checking logic to MemDep!
440     CallInst* cdep = nullptr;
441 
442     // Check to see if we have a single dominating call instruction that is
443     // identical to C.
444     for (unsigned i = 0, e = deps.size(); i != e; ++i) {
445       const NonLocalDepEntry *I = &deps[i];
446       if (I->getResult().isNonLocal())
447         continue;
448 
449       // We don't handle non-definitions.  If we already have a call, reject
450       // instruction dependencies.
451       if (!I->getResult().isDef() || cdep != nullptr) {
452         cdep = nullptr;
453         break;
454       }
455 
456       CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
457       // FIXME: All duplicated with non-local case.
458       if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
459         cdep = NonLocalDepCall;
460         continue;
461       }
462 
463       cdep = nullptr;
464       break;
465     }
466 
467     if (!cdep) {
468       valueNumbering[C] = nextValueNumber;
469       return nextValueNumber++;
470     }
471 
472     if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
473       valueNumbering[C] = nextValueNumber;
474       return nextValueNumber++;
475     }
476     for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
477       uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
478       uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
479       if (c_vn != cd_vn) {
480         valueNumbering[C] = nextValueNumber;
481         return nextValueNumber++;
482       }
483     }
484 
485     uint32_t v = lookupOrAdd(cdep);
486     valueNumbering[C] = v;
487     return v;
488   } else {
489     valueNumbering[C] = nextValueNumber;
490     return nextValueNumber++;
491   }
492 }
493 
494 /// Returns true if a value number exists for the specified value.
495 bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
496 
497 /// lookup_or_add - Returns the value number for the specified value, assigning
498 /// it a new number if it did not have one before.
499 uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
500   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
501   if (VI != valueNumbering.end())
502     return VI->second;
503 
504   if (!isa<Instruction>(V)) {
505     valueNumbering[V] = nextValueNumber;
506     return nextValueNumber++;
507   }
508 
509   Instruction* I = cast<Instruction>(V);
510   Expression exp;
511   switch (I->getOpcode()) {
512     case Instruction::Call:
513       return lookupOrAddCall(cast<CallInst>(I));
514     case Instruction::Add:
515     case Instruction::FAdd:
516     case Instruction::Sub:
517     case Instruction::FSub:
518     case Instruction::Mul:
519     case Instruction::FMul:
520     case Instruction::UDiv:
521     case Instruction::SDiv:
522     case Instruction::FDiv:
523     case Instruction::URem:
524     case Instruction::SRem:
525     case Instruction::FRem:
526     case Instruction::Shl:
527     case Instruction::LShr:
528     case Instruction::AShr:
529     case Instruction::And:
530     case Instruction::Or:
531     case Instruction::Xor:
532     case Instruction::ICmp:
533     case Instruction::FCmp:
534     case Instruction::Trunc:
535     case Instruction::ZExt:
536     case Instruction::SExt:
537     case Instruction::FPToUI:
538     case Instruction::FPToSI:
539     case Instruction::UIToFP:
540     case Instruction::SIToFP:
541     case Instruction::FPTrunc:
542     case Instruction::FPExt:
543     case Instruction::PtrToInt:
544     case Instruction::IntToPtr:
545     case Instruction::BitCast:
546     case Instruction::Select:
547     case Instruction::ExtractElement:
548     case Instruction::InsertElement:
549     case Instruction::ShuffleVector:
550     case Instruction::InsertValue:
551     case Instruction::GetElementPtr:
552       exp = createExpr(I);
553       break;
554     case Instruction::ExtractValue:
555       exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
556       break;
557     case Instruction::PHI:
558       valueNumbering[V] = nextValueNumber;
559       NumberingPhi[nextValueNumber] = cast<PHINode>(V);
560       return nextValueNumber++;
561     default:
562       valueNumbering[V] = nextValueNumber;
563       return nextValueNumber++;
564   }
565 
566   uint32_t e = assignExpNewValueNum(exp).first;
567   valueNumbering[V] = e;
568   return e;
569 }
570 
571 /// Returns the value number of the specified value. Fails if
572 /// the value has not yet been numbered.
573 uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
574   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
575   if (Verify) {
576     assert(VI != valueNumbering.end() && "Value not numbered?");
577     return VI->second;
578   }
579   return (VI != valueNumbering.end()) ? VI->second : 0;
580 }
581 
582 /// Returns the value number of the given comparison,
583 /// assigning it a new number if it did not have one before.  Useful when
584 /// we deduced the result of a comparison, but don't immediately have an
585 /// instruction realizing that comparison to hand.
586 uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
587                                          CmpInst::Predicate Predicate,
588                                          Value *LHS, Value *RHS) {
589   Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
590   return assignExpNewValueNum(exp).first;
591 }
592 
593 /// Remove all entries from the ValueTable.
594 void GVN::ValueTable::clear() {
595   valueNumbering.clear();
596   expressionNumbering.clear();
597   NumberingPhi.clear();
598   PhiTranslateTable.clear();
599   nextValueNumber = 1;
600   Expressions.clear();
601   ExprIdx.clear();
602   nextExprNumber = 0;
603 }
604 
605 /// Remove a value from the value numbering.
606 void GVN::ValueTable::erase(Value *V) {
607   uint32_t Num = valueNumbering.lookup(V);
608   valueNumbering.erase(V);
609   // If V is PHINode, V <--> value number is an one-to-one mapping.
610   if (isa<PHINode>(V))
611     NumberingPhi.erase(Num);
612 }
613 
614 /// verifyRemoved - Verify that the value is removed from all internal data
615 /// structures.
616 void GVN::ValueTable::verifyRemoved(const Value *V) const {
617   for (DenseMap<Value*, uint32_t>::const_iterator
618          I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
619     assert(I->first != V && "Inst still occurs in value numbering map!");
620   }
621 }
622 
623 //===----------------------------------------------------------------------===//
624 //                                GVN Pass
625 //===----------------------------------------------------------------------===//
626 
627 PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
628   // FIXME: The order of evaluation of these 'getResult' calls is very
629   // significant! Re-ordering these variables will cause GVN when run alone to
630   // be less effective! We should fix memdep and basic-aa to not exhibit this
631   // behavior, but until then don't change the order here.
632   auto &AC = AM.getResult<AssumptionAnalysis>(F);
633   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
634   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
635   auto &AA = AM.getResult<AAManager>(F);
636   auto &MemDep = AM.getResult<MemoryDependenceAnalysis>(F);
637   auto *LI = AM.getCachedResult<LoopAnalysis>(F);
638   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
639   bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep, LI, &ORE);
640   if (!Changed)
641     return PreservedAnalyses::all();
642   PreservedAnalyses PA;
643   PA.preserve<DominatorTreeAnalysis>();
644   PA.preserve<GlobalsAA>();
645   PA.preserve<TargetLibraryAnalysis>();
646   return PA;
647 }
648 
649 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
650 LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
651   errs() << "{\n";
652   for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
653        E = d.end(); I != E; ++I) {
654       errs() << I->first << "\n";
655       I->second->dump();
656   }
657   errs() << "}\n";
658 }
659 #endif
660 
661 /// Return true if we can prove that the value
662 /// we're analyzing is fully available in the specified block.  As we go, keep
663 /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
664 /// map is actually a tri-state map with the following values:
665 ///   0) we know the block *is not* fully available.
666 ///   1) we know the block *is* fully available.
667 ///   2) we do not know whether the block is fully available or not, but we are
668 ///      currently speculating that it will be.
669 ///   3) we are speculating for this block and have used that to speculate for
670 ///      other blocks.
671 static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
672                             DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
673                             uint32_t RecurseDepth) {
674   if (RecurseDepth > MaxRecurseDepth)
675     return false;
676 
677   // Optimistically assume that the block is fully available and check to see
678   // if we already know about this block in one lookup.
679   std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
680     FullyAvailableBlocks.insert(std::make_pair(BB, 2));
681 
682   // If the entry already existed for this block, return the precomputed value.
683   if (!IV.second) {
684     // If this is a speculative "available" value, mark it as being used for
685     // speculation of other blocks.
686     if (IV.first->second == 2)
687       IV.first->second = 3;
688     return IV.first->second != 0;
689   }
690 
691   // Otherwise, see if it is fully available in all predecessors.
692   pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
693 
694   // If this block has no predecessors, it isn't live-in here.
695   if (PI == PE)
696     goto SpeculationFailure;
697 
698   for (; PI != PE; ++PI)
699     // If the value isn't fully available in one of our predecessors, then it
700     // isn't fully available in this block either.  Undo our previous
701     // optimistic assumption and bail out.
702     if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1))
703       goto SpeculationFailure;
704 
705   return true;
706 
707 // If we get here, we found out that this is not, after
708 // all, a fully-available block.  We have a problem if we speculated on this and
709 // used the speculation to mark other blocks as available.
710 SpeculationFailure:
711   char &BBVal = FullyAvailableBlocks[BB];
712 
713   // If we didn't speculate on this, just return with it set to false.
714   if (BBVal == 2) {
715     BBVal = 0;
716     return false;
717   }
718 
719   // If we did speculate on this value, we could have blocks set to 1 that are
720   // incorrect.  Walk the (transitive) successors of this block and mark them as
721   // 0 if set to one.
722   SmallVector<BasicBlock*, 32> BBWorklist;
723   BBWorklist.push_back(BB);
724 
725   do {
726     BasicBlock *Entry = BBWorklist.pop_back_val();
727     // Note that this sets blocks to 0 (unavailable) if they happen to not
728     // already be in FullyAvailableBlocks.  This is safe.
729     char &EntryVal = FullyAvailableBlocks[Entry];
730     if (EntryVal == 0) continue;  // Already unavailable.
731 
732     // Mark as unavailable.
733     EntryVal = 0;
734 
735     BBWorklist.append(succ_begin(Entry), succ_end(Entry));
736   } while (!BBWorklist.empty());
737 
738   return false;
739 }
740 
741 /// Given a set of loads specified by ValuesPerBlock,
742 /// construct SSA form, allowing us to eliminate LI.  This returns the value
743 /// that should be used at LI's definition site.
744 static Value *ConstructSSAForLoadSet(LoadInst *LI,
745                          SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
746                                      GVN &gvn) {
747   // Check for the fully redundant, dominating load case.  In this case, we can
748   // just use the dominating value directly.
749   if (ValuesPerBlock.size() == 1 &&
750       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
751                                                LI->getParent())) {
752     assert(!ValuesPerBlock[0].AV.isUndefValue() &&
753            "Dead BB dominate this block");
754     return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn);
755   }
756 
757   // Otherwise, we have to construct SSA form.
758   SmallVector<PHINode*, 8> NewPHIs;
759   SSAUpdater SSAUpdate(&NewPHIs);
760   SSAUpdate.Initialize(LI->getType(), LI->getName());
761 
762   for (const AvailableValueInBlock &AV : ValuesPerBlock) {
763     BasicBlock *BB = AV.BB;
764 
765     if (SSAUpdate.HasValueForBlock(BB))
766       continue;
767 
768     SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn));
769   }
770 
771   // Perform PHI construction.
772   return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
773 }
774 
775 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
776                                                 Instruction *InsertPt,
777                                                 GVN &gvn) const {
778   Value *Res;
779   Type *LoadTy = LI->getType();
780   const DataLayout &DL = LI->getModule()->getDataLayout();
781   if (isSimpleValue()) {
782     Res = getSimpleValue();
783     if (Res->getType() != LoadTy) {
784       Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
785 
786       DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
787                    << *getSimpleValue() << '\n'
788                    << *Res << '\n' << "\n\n\n");
789     }
790   } else if (isCoercedLoadValue()) {
791     LoadInst *Load = getCoercedLoadValue();
792     if (Load->getType() == LoadTy && Offset == 0) {
793       Res = Load;
794     } else {
795       Res = getLoadValueForLoad(Load, Offset, LoadTy, InsertPt, DL);
796       // We would like to use gvn.markInstructionForDeletion here, but we can't
797       // because the load is already memoized into the leader map table that GVN
798       // tracks.  It is potentially possible to remove the load from the table,
799       // but then there all of the operations based on it would need to be
800       // rehashed.  Just leave the dead load around.
801       gvn.getMemDep().removeInstruction(Load);
802       DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
803                    << *getCoercedLoadValue() << '\n'
804                    << *Res << '\n'
805                    << "\n\n\n");
806     }
807   } else if (isMemIntrinValue()) {
808     Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
809                                  InsertPt, DL);
810     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
811                  << "  " << *getMemIntrinValue() << '\n'
812                  << *Res << '\n' << "\n\n\n");
813   } else {
814     assert(isUndefValue() && "Should be UndefVal");
815     DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
816     return UndefValue::get(LoadTy);
817   }
818   assert(Res && "failed to materialize?");
819   return Res;
820 }
821 
822 static bool isLifetimeStart(const Instruction *Inst) {
823   if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
824     return II->getIntrinsicID() == Intrinsic::lifetime_start;
825   return false;
826 }
827 
828 /// \brief Try to locate the three instruction involved in a missed
829 /// load-elimination case that is due to an intervening store.
830 static void reportMayClobberedLoad(LoadInst *LI, MemDepResult DepInfo,
831                                    DominatorTree *DT,
832                                    OptimizationRemarkEmitter *ORE) {
833   using namespace ore;
834 
835   User *OtherAccess = nullptr;
836 
837   OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", LI);
838   R << "load of type " << NV("Type", LI->getType()) << " not eliminated"
839     << setExtraArgs();
840 
841   for (auto *U : LI->getPointerOperand()->users())
842     if (U != LI && (isa<LoadInst>(U) || isa<StoreInst>(U)) &&
843         DT->dominates(cast<Instruction>(U), LI)) {
844       // FIXME: for now give up if there are multiple memory accesses that
845       // dominate the load.  We need further analysis to decide which one is
846       // that we're forwarding from.
847       if (OtherAccess)
848         OtherAccess = nullptr;
849       else
850         OtherAccess = U;
851     }
852 
853   if (OtherAccess)
854     R << " in favor of " << NV("OtherAccess", OtherAccess);
855 
856   R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
857 
858   ORE->emit(R);
859 }
860 
861 bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
862                                   Value *Address, AvailableValue &Res) {
863   assert((DepInfo.isDef() || DepInfo.isClobber()) &&
864          "expected a local dependence");
865   assert(LI->isUnordered() && "rules below are incorrect for ordered access");
866 
867   const DataLayout &DL = LI->getModule()->getDataLayout();
868 
869   if (DepInfo.isClobber()) {
870     // If the dependence is to a store that writes to a superset of the bits
871     // read by the load, we can extract the bits we need for the load from the
872     // stored value.
873     if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
874       // Can't forward from non-atomic to atomic without violating memory model.
875       if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
876         int Offset =
877           analyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, DL);
878         if (Offset != -1) {
879           Res = AvailableValue::get(DepSI->getValueOperand(), Offset);
880           return true;
881         }
882       }
883     }
884 
885     // Check to see if we have something like this:
886     //    load i32* P
887     //    load i8* (P+1)
888     // if we have this, replace the later with an extraction from the former.
889     if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
890       // If this is a clobber and L is the first instruction in its block, then
891       // we have the first instruction in the entry block.
892       // Can't forward from non-atomic to atomic without violating memory model.
893       if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
894         int Offset =
895           analyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
896 
897         if (Offset != -1) {
898           Res = AvailableValue::getLoad(DepLI, Offset);
899           return true;
900         }
901       }
902     }
903 
904     // If the clobbering value is a memset/memcpy/memmove, see if we can
905     // forward a value on from it.
906     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
907       if (Address && !LI->isAtomic()) {
908         int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
909                                                       DepMI, DL);
910         if (Offset != -1) {
911           Res = AvailableValue::getMI(DepMI, Offset);
912           return true;
913         }
914       }
915     }
916     // Nothing known about this clobber, have to be conservative
917     DEBUG(
918       // fast print dep, using operator<< on instruction is too slow.
919       dbgs() << "GVN: load ";
920       LI->printAsOperand(dbgs());
921       Instruction *I = DepInfo.getInst();
922       dbgs() << " is clobbered by " << *I << '\n';
923     );
924     if (ORE->allowExtraAnalysis(DEBUG_TYPE))
925       reportMayClobberedLoad(LI, DepInfo, DT, ORE);
926 
927     return false;
928   }
929   assert(DepInfo.isDef() && "follows from above");
930 
931   Instruction *DepInst = DepInfo.getInst();
932 
933   // Loading the allocation -> undef.
934   if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
935       // Loading immediately after lifetime begin -> undef.
936       isLifetimeStart(DepInst)) {
937     Res = AvailableValue::get(UndefValue::get(LI->getType()));
938     return true;
939   }
940 
941   // Loading from calloc (which zero initializes memory) -> zero
942   if (isCallocLikeFn(DepInst, TLI)) {
943     Res = AvailableValue::get(Constant::getNullValue(LI->getType()));
944     return true;
945   }
946 
947   if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
948     // Reject loads and stores that are to the same address but are of
949     // different types if we have to. If the stored value is larger or equal to
950     // the loaded value, we can reuse it.
951     if (S->getValueOperand()->getType() != LI->getType() &&
952         !canCoerceMustAliasedValueToLoad(S->getValueOperand(),
953                                          LI->getType(), DL))
954       return false;
955 
956     // Can't forward from non-atomic to atomic without violating memory model.
957     if (S->isAtomic() < LI->isAtomic())
958       return false;
959 
960     Res = AvailableValue::get(S->getValueOperand());
961     return true;
962   }
963 
964   if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
965     // If the types mismatch and we can't handle it, reject reuse of the load.
966     // If the stored value is larger or equal to the loaded value, we can reuse
967     // it.
968     if (LD->getType() != LI->getType() &&
969         !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
970       return false;
971 
972     // Can't forward from non-atomic to atomic without violating memory model.
973     if (LD->isAtomic() < LI->isAtomic())
974       return false;
975 
976     Res = AvailableValue::getLoad(LD);
977     return true;
978   }
979 
980   // Unknown def - must be conservative
981   DEBUG(
982     // fast print dep, using operator<< on instruction is too slow.
983     dbgs() << "GVN: load ";
984     LI->printAsOperand(dbgs());
985     dbgs() << " has unknown def " << *DepInst << '\n';
986   );
987   return false;
988 }
989 
990 void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
991                                   AvailValInBlkVect &ValuesPerBlock,
992                                   UnavailBlkVect &UnavailableBlocks) {
993   // Filter out useless results (non-locals, etc).  Keep track of the blocks
994   // where we have a value available in repl, also keep track of whether we see
995   // dependencies that produce an unknown value for the load (such as a call
996   // that could potentially clobber the load).
997   unsigned NumDeps = Deps.size();
998   for (unsigned i = 0, e = NumDeps; i != e; ++i) {
999     BasicBlock *DepBB = Deps[i].getBB();
1000     MemDepResult DepInfo = Deps[i].getResult();
1001 
1002     if (DeadBlocks.count(DepBB)) {
1003       // Dead dependent mem-op disguise as a load evaluating the same value
1004       // as the load in question.
1005       ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1006       continue;
1007     }
1008 
1009     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1010       UnavailableBlocks.push_back(DepBB);
1011       continue;
1012     }
1013 
1014     // The address being loaded in this non-local block may not be the same as
1015     // the pointer operand of the load if PHI translation occurs.  Make sure
1016     // to consider the right address.
1017     Value *Address = Deps[i].getAddress();
1018 
1019     AvailableValue AV;
1020     if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) {
1021       // subtlety: because we know this was a non-local dependency, we know
1022       // it's safe to materialize anywhere between the instruction within
1023       // DepInfo and the end of it's block.
1024       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1025                                                           std::move(AV)));
1026     } else {
1027       UnavailableBlocks.push_back(DepBB);
1028     }
1029   }
1030 
1031   assert(NumDeps == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1032          "post condition violation");
1033 }
1034 
1035 bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
1036                          UnavailBlkVect &UnavailableBlocks) {
1037   // Okay, we have *some* definitions of the value.  This means that the value
1038   // is available in some of our (transitive) predecessors.  Lets think about
1039   // doing PRE of this load.  This will involve inserting a new load into the
1040   // predecessor when it's not available.  We could do this in general, but
1041   // prefer to not increase code size.  As such, we only do this when we know
1042   // that we only have to insert *one* load (which means we're basically moving
1043   // the load, not inserting a new one).
1044 
1045   SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
1046                                         UnavailableBlocks.end());
1047 
1048   // Let's find the first basic block with more than one predecessor.  Walk
1049   // backwards through predecessors if needed.
1050   BasicBlock *LoadBB = LI->getParent();
1051   BasicBlock *TmpBB = LoadBB;
1052   bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
1053 
1054   // Check that there is no implicit control flow instructions above our load in
1055   // its block. If there is an instruction that doesn't always pass the
1056   // execution to the following instruction, then moving through it may become
1057   // invalid. For example:
1058   //
1059   // int arr[LEN];
1060   // int index = ???;
1061   // ...
1062   // guard(0 <= index && index < LEN);
1063   // use(arr[index]);
1064   //
1065   // It is illegal to move the array access to any point above the guard,
1066   // because if the index is out of bounds we should deoptimize rather than
1067   // access the array.
1068   // Check that there is no guard in this block above our intruction.
1069   if (!IsSafeToSpeculativelyExecute) {
1070     auto It = FirstImplicitControlFlowInsts.find(TmpBB);
1071     if (It != FirstImplicitControlFlowInsts.end()) {
1072       assert(It->second->getParent() == TmpBB &&
1073              "Implicit control flow map broken?");
1074       if (OI->dominates(It->second, LI))
1075         return false;
1076     }
1077   }
1078   while (TmpBB->getSinglePredecessor()) {
1079     TmpBB = TmpBB->getSinglePredecessor();
1080     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1081       return false;
1082     if (Blockers.count(TmpBB))
1083       return false;
1084 
1085     // If any of these blocks has more than one successor (i.e. if the edge we
1086     // just traversed was critical), then there are other paths through this
1087     // block along which the load may not be anticipated.  Hoisting the load
1088     // above this block would be adding the load to execution paths along
1089     // which it was not previously executed.
1090     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1091       return false;
1092 
1093     // Check that there is no implicit control flow in a block above.
1094     if (!IsSafeToSpeculativelyExecute &&
1095         FirstImplicitControlFlowInsts.count(TmpBB))
1096       return false;
1097   }
1098 
1099   assert(TmpBB);
1100   LoadBB = TmpBB;
1101 
1102   // Check to see how many predecessors have the loaded value fully
1103   // available.
1104   MapVector<BasicBlock *, Value *> PredLoads;
1105   DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1106   for (const AvailableValueInBlock &AV : ValuesPerBlock)
1107     FullyAvailableBlocks[AV.BB] = true;
1108   for (BasicBlock *UnavailableBB : UnavailableBlocks)
1109     FullyAvailableBlocks[UnavailableBB] = false;
1110 
1111   SmallVector<BasicBlock *, 4> CriticalEdgePred;
1112   for (BasicBlock *Pred : predecessors(LoadBB)) {
1113     // If any predecessor block is an EH pad that does not allow non-PHI
1114     // instructions before the terminator, we can't PRE the load.
1115     if (Pred->getTerminator()->isEHPad()) {
1116       DEBUG(dbgs()
1117             << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1118             << Pred->getName() << "': " << *LI << '\n');
1119       return false;
1120     }
1121 
1122     if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
1123       continue;
1124     }
1125 
1126     if (Pred->getTerminator()->getNumSuccessors() != 1) {
1127       if (isa<IndirectBrInst>(Pred->getTerminator())) {
1128         DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1129               << Pred->getName() << "': " << *LI << '\n');
1130         return false;
1131       }
1132 
1133       if (LoadBB->isEHPad()) {
1134         DEBUG(dbgs()
1135               << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1136               << Pred->getName() << "': " << *LI << '\n');
1137         return false;
1138       }
1139 
1140       CriticalEdgePred.push_back(Pred);
1141     } else {
1142       // Only add the predecessors that will not be split for now.
1143       PredLoads[Pred] = nullptr;
1144     }
1145   }
1146 
1147   // Decide whether PRE is profitable for this load.
1148   unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
1149   assert(NumUnavailablePreds != 0 &&
1150          "Fully available value should already be eliminated!");
1151 
1152   // If this load is unavailable in multiple predecessors, reject it.
1153   // FIXME: If we could restructure the CFG, we could make a common pred with
1154   // all the preds that don't have an available LI and insert a new load into
1155   // that one block.
1156   if (NumUnavailablePreds != 1)
1157       return false;
1158 
1159   // Split critical edges, and update the unavailable predecessors accordingly.
1160   for (BasicBlock *OrigPred : CriticalEdgePred) {
1161     BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1162     assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1163     PredLoads[NewPred] = nullptr;
1164     DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1165                  << LoadBB->getName() << '\n');
1166   }
1167 
1168   // Check if the load can safely be moved to all the unavailable predecessors.
1169   bool CanDoPRE = true;
1170   const DataLayout &DL = LI->getModule()->getDataLayout();
1171   SmallVector<Instruction*, 8> NewInsts;
1172   for (auto &PredLoad : PredLoads) {
1173     BasicBlock *UnavailablePred = PredLoad.first;
1174 
1175     // Do PHI translation to get its value in the predecessor if necessary.  The
1176     // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1177 
1178     // If all preds have a single successor, then we know it is safe to insert
1179     // the load on the pred (?!?), so we can insert code to materialize the
1180     // pointer if it is not available.
1181     PHITransAddr Address(LI->getPointerOperand(), DL, AC);
1182     Value *LoadPtr = nullptr;
1183     LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1184                                                 *DT, NewInsts);
1185 
1186     // If we couldn't find or insert a computation of this phi translated value,
1187     // we fail PRE.
1188     if (!LoadPtr) {
1189       DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1190             << *LI->getPointerOperand() << "\n");
1191       CanDoPRE = false;
1192       break;
1193     }
1194 
1195     PredLoad.second = LoadPtr;
1196   }
1197 
1198   if (!CanDoPRE) {
1199     while (!NewInsts.empty()) {
1200       Instruction *I = NewInsts.pop_back_val();
1201       markInstructionForDeletion(I);
1202     }
1203     // HINT: Don't revert the edge-splitting as following transformation may
1204     // also need to split these critical edges.
1205     return !CriticalEdgePred.empty();
1206   }
1207 
1208   // Okay, we can eliminate this load by inserting a reload in the predecessor
1209   // and using PHI construction to get the value in the other predecessors, do
1210   // it.
1211   DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1212   DEBUG(if (!NewInsts.empty())
1213           dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1214                  << *NewInsts.back() << '\n');
1215 
1216   // Assign value numbers to the new instructions.
1217   for (Instruction *I : NewInsts) {
1218     // Instructions that have been inserted in predecessor(s) to materialize
1219     // the load address do not retain their original debug locations. Doing
1220     // so could lead to confusing (but correct) source attributions.
1221     // FIXME: How do we retain source locations without causing poor debugging
1222     // behavior?
1223     I->setDebugLoc(DebugLoc());
1224 
1225     // FIXME: We really _ought_ to insert these value numbers into their
1226     // parent's availability map.  However, in doing so, we risk getting into
1227     // ordering issues.  If a block hasn't been processed yet, we would be
1228     // marking a value as AVAIL-IN, which isn't what we intend.
1229     VN.lookupOrAdd(I);
1230   }
1231 
1232   for (const auto &PredLoad : PredLoads) {
1233     BasicBlock *UnavailablePred = PredLoad.first;
1234     Value *LoadPtr = PredLoad.second;
1235 
1236     auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
1237                                  LI->isVolatile(), LI->getAlignment(),
1238                                  LI->getOrdering(), LI->getSyncScopeID(),
1239                                  UnavailablePred->getTerminator());
1240     NewLoad->setDebugLoc(LI->getDebugLoc());
1241 
1242     // Transfer the old load's AA tags to the new load.
1243     AAMDNodes Tags;
1244     LI->getAAMetadata(Tags);
1245     if (Tags)
1246       NewLoad->setAAMetadata(Tags);
1247 
1248     if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load))
1249       NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1250     if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
1251       NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1252     if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))
1253       NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1254 
1255     // We do not propagate the old load's debug location, because the new
1256     // load now lives in a different BB, and we want to avoid a jumpy line
1257     // table.
1258     // FIXME: How do we retain source locations without causing poor debugging
1259     // behavior?
1260 
1261     // Add the newly created load.
1262     ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1263                                                         NewLoad));
1264     MD->invalidateCachedPointerInfo(LoadPtr);
1265     DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1266   }
1267 
1268   // Perform PHI construction.
1269   Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
1270   LI->replaceAllUsesWith(V);
1271   if (isa<PHINode>(V))
1272     V->takeName(LI);
1273   if (Instruction *I = dyn_cast<Instruction>(V))
1274     I->setDebugLoc(LI->getDebugLoc());
1275   if (V->getType()->isPtrOrPtrVectorTy())
1276     MD->invalidateCachedPointerInfo(V);
1277   markInstructionForDeletion(LI);
1278   ORE->emit([&]() {
1279     return OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI)
1280            << "load eliminated by PRE";
1281   });
1282   ++NumPRELoad;
1283   return true;
1284 }
1285 
1286 static void reportLoadElim(LoadInst *LI, Value *AvailableValue,
1287                            OptimizationRemarkEmitter *ORE) {
1288   using namespace ore;
1289 
1290   ORE->emit([&]() {
1291     return OptimizationRemark(DEBUG_TYPE, "LoadElim", LI)
1292            << "load of type " << NV("Type", LI->getType()) << " eliminated"
1293            << setExtraArgs() << " in favor of "
1294            << NV("InfavorOfValue", AvailableValue);
1295   });
1296 }
1297 
1298 /// Attempt to eliminate a load whose dependencies are
1299 /// non-local by performing PHI construction.
1300 bool GVN::processNonLocalLoad(LoadInst *LI) {
1301   // non-local speculations are not allowed under asan.
1302   if (LI->getParent()->getParent()->hasFnAttribute(
1303           Attribute::SanitizeAddress) ||
1304       LI->getParent()->getParent()->hasFnAttribute(
1305           Attribute::SanitizeHWAddress))
1306     return false;
1307 
1308   // Step 1: Find the non-local dependencies of the load.
1309   LoadDepVect Deps;
1310   MD->getNonLocalPointerDependency(LI, Deps);
1311 
1312   // If we had to process more than one hundred blocks to find the
1313   // dependencies, this load isn't worth worrying about.  Optimizing
1314   // it will be too expensive.
1315   unsigned NumDeps = Deps.size();
1316   if (NumDeps > 100)
1317     return false;
1318 
1319   // If we had a phi translation failure, we'll have a single entry which is a
1320   // clobber in the current block.  Reject this early.
1321   if (NumDeps == 1 &&
1322       !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
1323     DEBUG(
1324       dbgs() << "GVN: non-local load ";
1325       LI->printAsOperand(dbgs());
1326       dbgs() << " has unknown dependencies\n";
1327     );
1328     return false;
1329   }
1330 
1331   // If this load follows a GEP, see if we can PRE the indices before analyzing.
1332   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
1333     for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
1334                                         OE = GEP->idx_end();
1335          OI != OE; ++OI)
1336       if (Instruction *I = dyn_cast<Instruction>(OI->get()))
1337         performScalarPRE(I);
1338   }
1339 
1340   // Step 2: Analyze the availability of the load
1341   AvailValInBlkVect ValuesPerBlock;
1342   UnavailBlkVect UnavailableBlocks;
1343   AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
1344 
1345   // If we have no predecessors that produce a known value for this load, exit
1346   // early.
1347   if (ValuesPerBlock.empty())
1348     return false;
1349 
1350   // Step 3: Eliminate fully redundancy.
1351   //
1352   // If all of the instructions we depend on produce a known value for this
1353   // load, then it is fully redundant and we can use PHI insertion to compute
1354   // its value.  Insert PHIs and remove the fully redundant value now.
1355   if (UnavailableBlocks.empty()) {
1356     DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1357 
1358     // Perform PHI construction.
1359     Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
1360     LI->replaceAllUsesWith(V);
1361 
1362     if (isa<PHINode>(V))
1363       V->takeName(LI);
1364     if (Instruction *I = dyn_cast<Instruction>(V))
1365       // If instruction I has debug info, then we should not update it.
1366       // Also, if I has a null DebugLoc, then it is still potentially incorrect
1367       // to propagate LI's DebugLoc because LI may not post-dominate I.
1368       if (LI->getDebugLoc() && LI->getParent() == I->getParent())
1369         I->setDebugLoc(LI->getDebugLoc());
1370     if (V->getType()->isPtrOrPtrVectorTy())
1371       MD->invalidateCachedPointerInfo(V);
1372     markInstructionForDeletion(LI);
1373     ++NumGVNLoad;
1374     reportLoadElim(LI, V, ORE);
1375     return true;
1376   }
1377 
1378   // Step 4: Eliminate partial redundancy.
1379   if (!EnablePRE || !EnableLoadPRE)
1380     return false;
1381 
1382   return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
1383 }
1384 
1385 bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
1386   assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
1387          "This function can only be called with llvm.assume intrinsic");
1388   Value *V = IntrinsicI->getArgOperand(0);
1389 
1390   if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
1391     if (Cond->isZero()) {
1392       Type *Int8Ty = Type::getInt8Ty(V->getContext());
1393       // Insert a new store to null instruction before the load to indicate that
1394       // this code is not reachable.  FIXME: We could insert unreachable
1395       // instruction directly because we can modify the CFG.
1396       new StoreInst(UndefValue::get(Int8Ty),
1397                     Constant::getNullValue(Int8Ty->getPointerTo()),
1398                     IntrinsicI);
1399     }
1400     markInstructionForDeletion(IntrinsicI);
1401     return false;
1402   } else if (isa<Constant>(V)) {
1403     // If it's not false, and constant, it must evaluate to true. This means our
1404     // assume is assume(true), and thus, pointless, and we don't want to do
1405     // anything more here.
1406     return false;
1407   }
1408 
1409   Constant *True = ConstantInt::getTrue(V->getContext());
1410   bool Changed = false;
1411 
1412   for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
1413     BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
1414 
1415     // This property is only true in dominated successors, propagateEquality
1416     // will check dominance for us.
1417     Changed |= propagateEquality(V, True, Edge, false);
1418   }
1419 
1420   // We can replace assume value with true, which covers cases like this:
1421   // call void @llvm.assume(i1 %cmp)
1422   // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
1423   ReplaceWithConstMap[V] = True;
1424 
1425   // If one of *cmp *eq operand is const, adding it to map will cover this:
1426   // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
1427   // call void @llvm.assume(i1 %cmp)
1428   // ret float %0 ; will change it to ret float 3.000000e+00
1429   if (auto *CmpI = dyn_cast<CmpInst>(V)) {
1430     if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
1431         CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
1432         (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
1433          CmpI->getFastMathFlags().noNaNs())) {
1434       Value *CmpLHS = CmpI->getOperand(0);
1435       Value *CmpRHS = CmpI->getOperand(1);
1436       if (isa<Constant>(CmpLHS))
1437         std::swap(CmpLHS, CmpRHS);
1438       auto *RHSConst = dyn_cast<Constant>(CmpRHS);
1439 
1440       // If only one operand is constant.
1441       if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
1442         ReplaceWithConstMap[CmpLHS] = RHSConst;
1443     }
1444   }
1445   return Changed;
1446 }
1447 
1448 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
1449   auto *ReplInst = dyn_cast<Instruction>(Repl);
1450   if (!ReplInst)
1451     return;
1452 
1453   // Patch the replacement so that it is not more restrictive than the value
1454   // being replaced.
1455   // Note that if 'I' is a load being replaced by some operation,
1456   // for example, by an arithmetic operation, then andIRFlags()
1457   // would just erase all math flags from the original arithmetic
1458   // operation, which is clearly not wanted and not needed.
1459   if (!isa<LoadInst>(I))
1460     ReplInst->andIRFlags(I);
1461 
1462   // FIXME: If both the original and replacement value are part of the
1463   // same control-flow region (meaning that the execution of one
1464   // guarantees the execution of the other), then we can combine the
1465   // noalias scopes here and do better than the general conservative
1466   // answer used in combineMetadata().
1467 
1468   // In general, GVN unifies expressions over different control-flow
1469   // regions, and so we need a conservative combination of the noalias
1470   // scopes.
1471   static const unsigned KnownIDs[] = {
1472       LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
1473       LLVMContext::MD_noalias,        LLVMContext::MD_range,
1474       LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
1475       LLVMContext::MD_invariant_group};
1476   combineMetadata(ReplInst, I, KnownIDs);
1477 }
1478 
1479 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
1480   patchReplacementInstruction(I, Repl);
1481   I->replaceAllUsesWith(Repl);
1482 }
1483 
1484 /// Attempt to eliminate a load, first by eliminating it
1485 /// locally, and then attempting non-local elimination if that fails.
1486 bool GVN::processLoad(LoadInst *L) {
1487   if (!MD)
1488     return false;
1489 
1490   // This code hasn't been audited for ordered or volatile memory access
1491   if (!L->isUnordered())
1492     return false;
1493 
1494   if (L->use_empty()) {
1495     markInstructionForDeletion(L);
1496     return true;
1497   }
1498 
1499   // ... to a pointer that has been loaded from before...
1500   MemDepResult Dep = MD->getDependency(L);
1501 
1502   // If it is defined in another block, try harder.
1503   if (Dep.isNonLocal())
1504     return processNonLocalLoad(L);
1505 
1506   // Only handle the local case below
1507   if (!Dep.isDef() && !Dep.isClobber()) {
1508     // This might be a NonFuncLocal or an Unknown
1509     DEBUG(
1510       // fast print dep, using operator<< on instruction is too slow.
1511       dbgs() << "GVN: load ";
1512       L->printAsOperand(dbgs());
1513       dbgs() << " has unknown dependence\n";
1514     );
1515     return false;
1516   }
1517 
1518   AvailableValue AV;
1519   if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
1520     Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
1521 
1522     // Replace the load!
1523     patchAndReplaceAllUsesWith(L, AvailableValue);
1524     markInstructionForDeletion(L);
1525     ++NumGVNLoad;
1526     reportLoadElim(L, AvailableValue, ORE);
1527     // Tell MDA to rexamine the reused pointer since we might have more
1528     // information after forwarding it.
1529     if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
1530       MD->invalidateCachedPointerInfo(AvailableValue);
1531     return true;
1532   }
1533 
1534   return false;
1535 }
1536 
1537 /// Return a pair the first field showing the value number of \p Exp and the
1538 /// second field showing whether it is a value number newly created.
1539 std::pair<uint32_t, bool>
1540 GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
1541   uint32_t &e = expressionNumbering[Exp];
1542   bool CreateNewValNum = !e;
1543   if (CreateNewValNum) {
1544     Expressions.push_back(Exp);
1545     if (ExprIdx.size() < nextValueNumber + 1)
1546       ExprIdx.resize(nextValueNumber * 2);
1547     e = nextValueNumber;
1548     ExprIdx[nextValueNumber++] = nextExprNumber++;
1549   }
1550   return {e, CreateNewValNum};
1551 }
1552 
1553 /// Return whether all the values related with the same \p num are
1554 /// defined in \p BB.
1555 bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
1556                                      GVN &Gvn) {
1557   LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
1558   while (Vals && Vals->BB == BB)
1559     Vals = Vals->Next;
1560   return !Vals;
1561 }
1562 
1563 /// Wrap phiTranslateImpl to provide caching functionality.
1564 uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
1565                                        const BasicBlock *PhiBlock, uint32_t Num,
1566                                        GVN &Gvn) {
1567   auto FindRes = PhiTranslateTable.find({Num, Pred});
1568   if (FindRes != PhiTranslateTable.end())
1569     return FindRes->second;
1570   uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
1571   PhiTranslateTable.insert({{Num, Pred}, NewNum});
1572   return NewNum;
1573 }
1574 
1575 /// Translate value number \p Num using phis, so that it has the values of
1576 /// the phis in BB.
1577 uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
1578                                            const BasicBlock *PhiBlock,
1579                                            uint32_t Num, GVN &Gvn) {
1580   if (PHINode *PN = NumberingPhi[Num]) {
1581     for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
1582       if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
1583         if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
1584           return TransVal;
1585     }
1586     return Num;
1587   }
1588 
1589   // If there is any value related with Num is defined in a BB other than
1590   // PhiBlock, it cannot depend on a phi in PhiBlock without going through
1591   // a backedge. We can do an early exit in that case to save compile time.
1592   if (!areAllValsInBB(Num, PhiBlock, Gvn))
1593     return Num;
1594 
1595   if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
1596     return Num;
1597   Expression Exp = Expressions[ExprIdx[Num]];
1598 
1599   for (unsigned i = 0; i < Exp.varargs.size(); i++) {
1600     // For InsertValue and ExtractValue, some varargs are index numbers
1601     // instead of value numbers. Those index numbers should not be
1602     // translated.
1603     if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
1604         (i > 0 && Exp.opcode == Instruction::ExtractValue))
1605       continue;
1606     Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
1607   }
1608 
1609   if (Exp.commutative) {
1610     assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
1611     if (Exp.varargs[0] > Exp.varargs[1]) {
1612       std::swap(Exp.varargs[0], Exp.varargs[1]);
1613       uint32_t Opcode = Exp.opcode >> 8;
1614       if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
1615         Exp.opcode = (Opcode << 8) |
1616                      CmpInst::getSwappedPredicate(
1617                          static_cast<CmpInst::Predicate>(Exp.opcode & 255));
1618     }
1619   }
1620 
1621   if (uint32_t NewNum = expressionNumbering[Exp])
1622     return NewNum;
1623   return Num;
1624 }
1625 
1626 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
1627 /// again.
1628 void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
1629                                                const BasicBlock &CurrBlock) {
1630   for (const BasicBlock *Pred : predecessors(&CurrBlock)) {
1631     auto FindRes = PhiTranslateTable.find({Num, Pred});
1632     if (FindRes != PhiTranslateTable.end())
1633       PhiTranslateTable.erase(FindRes);
1634   }
1635 }
1636 
1637 // In order to find a leader for a given value number at a
1638 // specific basic block, we first obtain the list of all Values for that number,
1639 // and then scan the list to find one whose block dominates the block in
1640 // question.  This is fast because dominator tree queries consist of only
1641 // a few comparisons of DFS numbers.
1642 Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
1643   LeaderTableEntry Vals = LeaderTable[num];
1644   if (!Vals.Val) return nullptr;
1645 
1646   Value *Val = nullptr;
1647   if (DT->dominates(Vals.BB, BB)) {
1648     Val = Vals.Val;
1649     if (isa<Constant>(Val)) return Val;
1650   }
1651 
1652   LeaderTableEntry* Next = Vals.Next;
1653   while (Next) {
1654     if (DT->dominates(Next->BB, BB)) {
1655       if (isa<Constant>(Next->Val)) return Next->Val;
1656       if (!Val) Val = Next->Val;
1657     }
1658 
1659     Next = Next->Next;
1660   }
1661 
1662   return Val;
1663 }
1664 
1665 /// There is an edge from 'Src' to 'Dst'.  Return
1666 /// true if every path from the entry block to 'Dst' passes via this edge.  In
1667 /// particular 'Dst' must not be reachable via another edge from 'Src'.
1668 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
1669                                        DominatorTree *DT) {
1670   // While in theory it is interesting to consider the case in which Dst has
1671   // more than one predecessor, because Dst might be part of a loop which is
1672   // only reachable from Src, in practice it is pointless since at the time
1673   // GVN runs all such loops have preheaders, which means that Dst will have
1674   // been changed to have only one predecessor, namely Src.
1675   const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
1676   assert((!Pred || Pred == E.getStart()) &&
1677          "No edge between these basic blocks!");
1678   return Pred != nullptr;
1679 }
1680 
1681 void GVN::assignBlockRPONumber(Function &F) {
1682   uint32_t NextBlockNumber = 1;
1683   ReversePostOrderTraversal<Function *> RPOT(&F);
1684   for (BasicBlock *BB : RPOT)
1685     BlockRPONumber[BB] = NextBlockNumber++;
1686 }
1687 
1688 // Tries to replace instruction with const, using information from
1689 // ReplaceWithConstMap.
1690 bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
1691   bool Changed = false;
1692   for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
1693     Value *Operand = Instr->getOperand(OpNum);
1694     auto it = ReplaceWithConstMap.find(Operand);
1695     if (it != ReplaceWithConstMap.end()) {
1696       assert(!isa<Constant>(Operand) &&
1697              "Replacing constants with constants is invalid");
1698       DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second
1699                    << " in instruction " << *Instr << '\n');
1700       Instr->setOperand(OpNum, it->second);
1701       Changed = true;
1702     }
1703   }
1704   return Changed;
1705 }
1706 
1707 /// The given values are known to be equal in every block
1708 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
1709 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
1710 /// If DominatesByEdge is false, then it means that we will propagate the RHS
1711 /// value starting from the end of Root.Start.
1712 bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
1713                             bool DominatesByEdge) {
1714   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
1715   Worklist.push_back(std::make_pair(LHS, RHS));
1716   bool Changed = false;
1717   // For speed, compute a conservative fast approximation to
1718   // DT->dominates(Root, Root.getEnd());
1719   const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
1720 
1721   while (!Worklist.empty()) {
1722     std::pair<Value*, Value*> Item = Worklist.pop_back_val();
1723     LHS = Item.first; RHS = Item.second;
1724 
1725     if (LHS == RHS)
1726       continue;
1727     assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
1728 
1729     // Don't try to propagate equalities between constants.
1730     if (isa<Constant>(LHS) && isa<Constant>(RHS))
1731       continue;
1732 
1733     // Prefer a constant on the right-hand side, or an Argument if no constants.
1734     if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
1735       std::swap(LHS, RHS);
1736     assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
1737 
1738     // If there is no obvious reason to prefer the left-hand side over the
1739     // right-hand side, ensure the longest lived term is on the right-hand side,
1740     // so the shortest lived term will be replaced by the longest lived.
1741     // This tends to expose more simplifications.
1742     uint32_t LVN = VN.lookupOrAdd(LHS);
1743     if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
1744         (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
1745       // Move the 'oldest' value to the right-hand side, using the value number
1746       // as a proxy for age.
1747       uint32_t RVN = VN.lookupOrAdd(RHS);
1748       if (LVN < RVN) {
1749         std::swap(LHS, RHS);
1750         LVN = RVN;
1751       }
1752     }
1753 
1754     // If value numbering later sees that an instruction in the scope is equal
1755     // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
1756     // the invariant that instructions only occur in the leader table for their
1757     // own value number (this is used by removeFromLeaderTable), do not do this
1758     // if RHS is an instruction (if an instruction in the scope is morphed into
1759     // LHS then it will be turned into RHS by the next GVN iteration anyway, so
1760     // using the leader table is about compiling faster, not optimizing better).
1761     // The leader table only tracks basic blocks, not edges. Only add to if we
1762     // have the simple case where the edge dominates the end.
1763     if (RootDominatesEnd && !isa<Instruction>(RHS))
1764       addToLeaderTable(LVN, RHS, Root.getEnd());
1765 
1766     // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
1767     // LHS always has at least one use that is not dominated by Root, this will
1768     // never do anything if LHS has only one use.
1769     if (!LHS->hasOneUse()) {
1770       unsigned NumReplacements =
1771           DominatesByEdge
1772               ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
1773               : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart());
1774 
1775       Changed |= NumReplacements > 0;
1776       NumGVNEqProp += NumReplacements;
1777     }
1778 
1779     // Now try to deduce additional equalities from this one. For example, if
1780     // the known equality was "(A != B)" == "false" then it follows that A and B
1781     // are equal in the scope. Only boolean equalities with an explicit true or
1782     // false RHS are currently supported.
1783     if (!RHS->getType()->isIntegerTy(1))
1784       // Not a boolean equality - bail out.
1785       continue;
1786     ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
1787     if (!CI)
1788       // RHS neither 'true' nor 'false' - bail out.
1789       continue;
1790     // Whether RHS equals 'true'.  Otherwise it equals 'false'.
1791     bool isKnownTrue = CI->isMinusOne();
1792     bool isKnownFalse = !isKnownTrue;
1793 
1794     // If "A && B" is known true then both A and B are known true.  If "A || B"
1795     // is known false then both A and B are known false.
1796     Value *A, *B;
1797     if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
1798         (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
1799       Worklist.push_back(std::make_pair(A, RHS));
1800       Worklist.push_back(std::make_pair(B, RHS));
1801       continue;
1802     }
1803 
1804     // If we are propagating an equality like "(A == B)" == "true" then also
1805     // propagate the equality A == B.  When propagating a comparison such as
1806     // "(A >= B)" == "true", replace all instances of "A < B" with "false".
1807     if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
1808       Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1809 
1810       // If "A == B" is known true, or "A != B" is known false, then replace
1811       // A with B everywhere in the scope.
1812       if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
1813           (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
1814         Worklist.push_back(std::make_pair(Op0, Op1));
1815 
1816       // Handle the floating point versions of equality comparisons too.
1817       if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
1818           (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
1819 
1820         // Floating point -0.0 and 0.0 compare equal, so we can only
1821         // propagate values if we know that we have a constant and that
1822         // its value is non-zero.
1823 
1824         // FIXME: We should do this optimization if 'no signed zeros' is
1825         // applicable via an instruction-level fast-math-flag or some other
1826         // indicator that relaxed FP semantics are being used.
1827 
1828         if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
1829           Worklist.push_back(std::make_pair(Op0, Op1));
1830       }
1831 
1832       // If "A >= B" is known true, replace "A < B" with false everywhere.
1833       CmpInst::Predicate NotPred = Cmp->getInversePredicate();
1834       Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
1835       // Since we don't have the instruction "A < B" immediately to hand, work
1836       // out the value number that it would have and use that to find an
1837       // appropriate instruction (if any).
1838       uint32_t NextNum = VN.getNextUnusedValueNumber();
1839       uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
1840       // If the number we were assigned was brand new then there is no point in
1841       // looking for an instruction realizing it: there cannot be one!
1842       if (Num < NextNum) {
1843         Value *NotCmp = findLeader(Root.getEnd(), Num);
1844         if (NotCmp && isa<Instruction>(NotCmp)) {
1845           unsigned NumReplacements =
1846               DominatesByEdge
1847                   ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
1848                   : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
1849                                              Root.getStart());
1850           Changed |= NumReplacements > 0;
1851           NumGVNEqProp += NumReplacements;
1852         }
1853       }
1854       // Ensure that any instruction in scope that gets the "A < B" value number
1855       // is replaced with false.
1856       // The leader table only tracks basic blocks, not edges. Only add to if we
1857       // have the simple case where the edge dominates the end.
1858       if (RootDominatesEnd)
1859         addToLeaderTable(Num, NotVal, Root.getEnd());
1860 
1861       continue;
1862     }
1863   }
1864 
1865   return Changed;
1866 }
1867 
1868 /// When calculating availability, handle an instruction
1869 /// by inserting it into the appropriate sets
1870 bool GVN::processInstruction(Instruction *I) {
1871   // Ignore dbg info intrinsics.
1872   if (isa<DbgInfoIntrinsic>(I))
1873     return false;
1874 
1875   // If the instruction can be easily simplified then do so now in preference
1876   // to value numbering it.  Value numbering often exposes redundancies, for
1877   // example if it determines that %y is equal to %x then the instruction
1878   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1879   const DataLayout &DL = I->getModule()->getDataLayout();
1880   if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
1881     bool Changed = false;
1882     if (!I->use_empty()) {
1883       I->replaceAllUsesWith(V);
1884       Changed = true;
1885     }
1886     if (isInstructionTriviallyDead(I, TLI)) {
1887       markInstructionForDeletion(I);
1888       Changed = true;
1889     }
1890     if (Changed) {
1891       if (MD && V->getType()->isPtrOrPtrVectorTy())
1892         MD->invalidateCachedPointerInfo(V);
1893       ++NumGVNSimpl;
1894       return true;
1895     }
1896   }
1897 
1898   if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
1899     if (IntrinsicI->getIntrinsicID() == Intrinsic::assume)
1900       return processAssumeIntrinsic(IntrinsicI);
1901 
1902   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1903     if (processLoad(LI))
1904       return true;
1905 
1906     unsigned Num = VN.lookupOrAdd(LI);
1907     addToLeaderTable(Num, LI, LI->getParent());
1908     return false;
1909   }
1910 
1911   // For conditional branches, we can perform simple conditional propagation on
1912   // the condition value itself.
1913   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1914     if (!BI->isConditional())
1915       return false;
1916 
1917     if (isa<Constant>(BI->getCondition()))
1918       return processFoldableCondBr(BI);
1919 
1920     Value *BranchCond = BI->getCondition();
1921     BasicBlock *TrueSucc = BI->getSuccessor(0);
1922     BasicBlock *FalseSucc = BI->getSuccessor(1);
1923     // Avoid multiple edges early.
1924     if (TrueSucc == FalseSucc)
1925       return false;
1926 
1927     BasicBlock *Parent = BI->getParent();
1928     bool Changed = false;
1929 
1930     Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
1931     BasicBlockEdge TrueE(Parent, TrueSucc);
1932     Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
1933 
1934     Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
1935     BasicBlockEdge FalseE(Parent, FalseSucc);
1936     Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
1937 
1938     return Changed;
1939   }
1940 
1941   // For switches, propagate the case values into the case destinations.
1942   if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
1943     Value *SwitchCond = SI->getCondition();
1944     BasicBlock *Parent = SI->getParent();
1945     bool Changed = false;
1946 
1947     // Remember how many outgoing edges there are to every successor.
1948     SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
1949     for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
1950       ++SwitchEdges[SI->getSuccessor(i)];
1951 
1952     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
1953          i != e; ++i) {
1954       BasicBlock *Dst = i->getCaseSuccessor();
1955       // If there is only a single edge, propagate the case value into it.
1956       if (SwitchEdges.lookup(Dst) == 1) {
1957         BasicBlockEdge E(Parent, Dst);
1958         Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
1959       }
1960     }
1961     return Changed;
1962   }
1963 
1964   // Instructions with void type don't return a value, so there's
1965   // no point in trying to find redundancies in them.
1966   if (I->getType()->isVoidTy())
1967     return false;
1968 
1969   uint32_t NextNum = VN.getNextUnusedValueNumber();
1970   unsigned Num = VN.lookupOrAdd(I);
1971 
1972   // Allocations are always uniquely numbered, so we can save time and memory
1973   // by fast failing them.
1974   if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
1975     addToLeaderTable(Num, I, I->getParent());
1976     return false;
1977   }
1978 
1979   // If the number we were assigned was a brand new VN, then we don't
1980   // need to do a lookup to see if the number already exists
1981   // somewhere in the domtree: it can't!
1982   if (Num >= NextNum) {
1983     addToLeaderTable(Num, I, I->getParent());
1984     return false;
1985   }
1986 
1987   // Perform fast-path value-number based elimination of values inherited from
1988   // dominators.
1989   Value *Repl = findLeader(I->getParent(), Num);
1990   if (!Repl) {
1991     // Failure, just remember this instance for future use.
1992     addToLeaderTable(Num, I, I->getParent());
1993     return false;
1994   } else if (Repl == I) {
1995     // If I was the result of a shortcut PRE, it might already be in the table
1996     // and the best replacement for itself. Nothing to do.
1997     return false;
1998   }
1999 
2000   // Remove it!
2001   patchAndReplaceAllUsesWith(I, Repl);
2002   if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2003     MD->invalidateCachedPointerInfo(Repl);
2004   markInstructionForDeletion(I);
2005   return true;
2006 }
2007 
2008 /// runOnFunction - This is the main transformation entry point for a function.
2009 bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2010                   const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2011                   MemoryDependenceResults *RunMD, LoopInfo *LI,
2012                   OptimizationRemarkEmitter *RunORE) {
2013   AC = &RunAC;
2014   DT = &RunDT;
2015   VN.setDomTree(DT);
2016   TLI = &RunTLI;
2017   VN.setAliasAnalysis(&RunAA);
2018   MD = RunMD;
2019   OrderedInstructions OrderedInstrs(DT);
2020   OI = &OrderedInstrs;
2021   VN.setMemDep(MD);
2022   ORE = RunORE;
2023 
2024   bool Changed = false;
2025   bool ShouldContinue = true;
2026 
2027   // Merge unconditional branches, allowing PRE to catch more
2028   // optimization opportunities.
2029   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
2030     BasicBlock *BB = &*FI++;
2031 
2032     bool removedBlock = MergeBlockIntoPredecessor(BB, DT, LI, MD);
2033     if (removedBlock)
2034       ++NumGVNBlocks;
2035 
2036     Changed |= removedBlock;
2037   }
2038 
2039   unsigned Iteration = 0;
2040   while (ShouldContinue) {
2041     DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2042     ShouldContinue = iterateOnFunction(F);
2043     Changed |= ShouldContinue;
2044     ++Iteration;
2045   }
2046 
2047   if (EnablePRE) {
2048     // Fabricate val-num for dead-code in order to suppress assertion in
2049     // performPRE().
2050     assignValNumForDeadCode();
2051     assignBlockRPONumber(F);
2052     bool PREChanged = true;
2053     while (PREChanged) {
2054       PREChanged = performPRE(F);
2055       Changed |= PREChanged;
2056     }
2057   }
2058 
2059   // FIXME: Should perform GVN again after PRE does something.  PRE can move
2060   // computations into blocks where they become fully redundant.  Note that
2061   // we can't do this until PRE's critical edge splitting updates memdep.
2062   // Actually, when this happens, we should just fully integrate PRE into GVN.
2063 
2064   cleanupGlobalSets();
2065   // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2066   // iteration.
2067   DeadBlocks.clear();
2068 
2069   return Changed;
2070 }
2071 
2072 bool GVN::processBlock(BasicBlock *BB) {
2073   // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2074   // (and incrementing BI before processing an instruction).
2075   assert(InstrsToErase.empty() &&
2076          "We expect InstrsToErase to be empty across iterations");
2077   if (DeadBlocks.count(BB))
2078     return false;
2079 
2080   // Clearing map before every BB because it can be used only for single BB.
2081   ReplaceWithConstMap.clear();
2082   bool ChangedFunction = false;
2083 
2084   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2085        BI != BE;) {
2086     if (!ReplaceWithConstMap.empty())
2087       ChangedFunction |= replaceOperandsWithConsts(&*BI);
2088     ChangedFunction |= processInstruction(&*BI);
2089 
2090     if (InstrsToErase.empty()) {
2091       ++BI;
2092       continue;
2093     }
2094 
2095     // If we need some instructions deleted, do it now.
2096     NumGVNInstr += InstrsToErase.size();
2097 
2098     // Avoid iterator invalidation.
2099     bool AtStart = BI == BB->begin();
2100     if (!AtStart)
2101       --BI;
2102 
2103     bool InvalidateImplicitCF = false;
2104     const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
2105     for (auto *I : InstrsToErase) {
2106       assert(I->getParent() == BB && "Removing instruction from wrong block?");
2107       DEBUG(dbgs() << "GVN removed: " << *I << '\n');
2108       salvageDebugInfo(*I);
2109       if (MD) MD->removeInstruction(I);
2110       DEBUG(verifyRemoved(I));
2111       if (MaybeFirstICF == I) {
2112         // We have erased the first ICF in block. The map needs to be updated.
2113         InvalidateImplicitCF = true;
2114         // Do not keep dangling pointer on the erased instruction.
2115         MaybeFirstICF = nullptr;
2116       }
2117       I->eraseFromParent();
2118     }
2119 
2120     OI->invalidateBlock(BB);
2121     InstrsToErase.clear();
2122     if (InvalidateImplicitCF)
2123       fillImplicitControlFlowInfo(BB);
2124 
2125     if (AtStart)
2126       BI = BB->begin();
2127     else
2128       ++BI;
2129   }
2130 
2131   return ChangedFunction;
2132 }
2133 
2134 // Instantiate an expression in a predecessor that lacked it.
2135 bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2136                                     BasicBlock *Curr, unsigned int ValNo) {
2137   // Because we are going top-down through the block, all value numbers
2138   // will be available in the predecessor by the time we need them.  Any
2139   // that weren't originally present will have been instantiated earlier
2140   // in this loop.
2141   bool success = true;
2142   for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
2143     Value *Op = Instr->getOperand(i);
2144     if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2145       continue;
2146     // This could be a newly inserted instruction, in which case, we won't
2147     // find a value number, and should give up before we hurt ourselves.
2148     // FIXME: Rewrite the infrastructure to let it easier to value number
2149     // and process newly inserted instructions.
2150     if (!VN.exists(Op)) {
2151       success = false;
2152       break;
2153     }
2154     uint32_t TValNo =
2155         VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
2156     if (Value *V = findLeader(Pred, TValNo)) {
2157       Instr->setOperand(i, V);
2158     } else {
2159       success = false;
2160       break;
2161     }
2162   }
2163 
2164   // Fail out if we encounter an operand that is not available in
2165   // the PRE predecessor.  This is typically because of loads which
2166   // are not value numbered precisely.
2167   if (!success)
2168     return false;
2169 
2170   Instr->insertBefore(Pred->getTerminator());
2171   Instr->setName(Instr->getName() + ".pre");
2172   Instr->setDebugLoc(Instr->getDebugLoc());
2173 
2174   unsigned Num = VN.lookupOrAdd(Instr);
2175   VN.add(Instr, Num);
2176 
2177   // Update the availability map to include the new instruction.
2178   addToLeaderTable(Num, Instr, Pred);
2179   return true;
2180 }
2181 
2182 bool GVN::performScalarPRE(Instruction *CurInst) {
2183   if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
2184       isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
2185       CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2186       isa<DbgInfoIntrinsic>(CurInst))
2187     return false;
2188 
2189   // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2190   // sinking the compare again, and it would force the code generator to
2191   // move the i1 from processor flags or predicate registers into a general
2192   // purpose register.
2193   if (isa<CmpInst>(CurInst))
2194     return false;
2195 
2196   // We don't currently value number ANY inline asm calls.
2197   if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
2198     if (CallI->isInlineAsm())
2199       return false;
2200 
2201   uint32_t ValNo = VN.lookup(CurInst);
2202 
2203   // Look for the predecessors for PRE opportunities.  We're
2204   // only trying to solve the basic diamond case, where
2205   // a value is computed in the successor and one predecessor,
2206   // but not the other.  We also explicitly disallow cases
2207   // where the successor is its own predecessor, because they're
2208   // more complicated to get right.
2209   unsigned NumWith = 0;
2210   unsigned NumWithout = 0;
2211   BasicBlock *PREPred = nullptr;
2212   BasicBlock *CurrentBlock = CurInst->getParent();
2213 
2214   SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
2215   for (BasicBlock *P : predecessors(CurrentBlock)) {
2216     // We're not interested in PRE where blocks with predecessors that are
2217     // not reachable.
2218     if (!DT->isReachableFromEntry(P)) {
2219       NumWithout = 2;
2220       break;
2221     }
2222     // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
2223     // when CurInst has operand defined in CurrentBlock (so it may be defined
2224     // by phi in the loop header).
2225     if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
2226         llvm::any_of(CurInst->operands(), [&](const Use &U) {
2227           if (auto *Inst = dyn_cast<Instruction>(U.get()))
2228             return Inst->getParent() == CurrentBlock;
2229           return false;
2230         })) {
2231       NumWithout = 2;
2232       break;
2233     }
2234 
2235     uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
2236     Value *predV = findLeader(P, TValNo);
2237     if (!predV) {
2238       predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
2239       PREPred = P;
2240       ++NumWithout;
2241     } else if (predV == CurInst) {
2242       /* CurInst dominates this predecessor. */
2243       NumWithout = 2;
2244       break;
2245     } else {
2246       predMap.push_back(std::make_pair(predV, P));
2247       ++NumWith;
2248     }
2249   }
2250 
2251   // Don't do PRE when it might increase code size, i.e. when
2252   // we would need to insert instructions in more than one pred.
2253   if (NumWithout > 1 || NumWith == 0)
2254     return false;
2255 
2256   // We may have a case where all predecessors have the instruction,
2257   // and we just need to insert a phi node. Otherwise, perform
2258   // insertion.
2259   Instruction *PREInstr = nullptr;
2260 
2261   if (NumWithout != 0) {
2262     if (!isSafeToSpeculativelyExecute(CurInst)) {
2263       // It is only valid to insert a new instruction if the current instruction
2264       // is always executed. An instruction with implicit control flow could
2265       // prevent us from doing it. If we cannot speculate the execution, then
2266       // PRE should be prohibited.
2267       auto It = FirstImplicitControlFlowInsts.find(CurrentBlock);
2268       if (It != FirstImplicitControlFlowInsts.end()) {
2269         assert(It->second->getParent() == CurrentBlock &&
2270                "Implicit control flow map broken?");
2271         if (OI->dominates(It->second, CurInst))
2272           return false;
2273       }
2274     }
2275 
2276     // Don't do PRE across indirect branch.
2277     if (isa<IndirectBrInst>(PREPred->getTerminator()))
2278       return false;
2279 
2280     // We can't do PRE safely on a critical edge, so instead we schedule
2281     // the edge to be split and perform the PRE the next time we iterate
2282     // on the function.
2283     unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2284     if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2285       toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2286       return false;
2287     }
2288     // We need to insert somewhere, so let's give it a shot
2289     PREInstr = CurInst->clone();
2290     if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
2291       // If we failed insertion, make sure we remove the instruction.
2292       DEBUG(verifyRemoved(PREInstr));
2293       PREInstr->deleteValue();
2294       return false;
2295     }
2296   }
2297 
2298   // Either we should have filled in the PRE instruction, or we should
2299   // not have needed insertions.
2300   assert(PREInstr != nullptr || NumWithout == 0);
2301 
2302   ++NumGVNPRE;
2303 
2304   // Create a PHI to make the value available in this block.
2305   PHINode *Phi =
2306       PHINode::Create(CurInst->getType(), predMap.size(),
2307                       CurInst->getName() + ".pre-phi", &CurrentBlock->front());
2308   for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
2309     if (Value *V = predMap[i].first) {
2310       // If we use an existing value in this phi, we have to patch the original
2311       // value because the phi will be used to replace a later value.
2312       patchReplacementInstruction(CurInst, V);
2313       Phi->addIncoming(V, predMap[i].second);
2314     } else
2315       Phi->addIncoming(PREInstr, PREPred);
2316   }
2317 
2318   VN.add(Phi, ValNo);
2319   // After creating a new PHI for ValNo, the phi translate result for ValNo will
2320   // be changed, so erase the related stale entries in phi translate cache.
2321   VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
2322   addToLeaderTable(ValNo, Phi, CurrentBlock);
2323   Phi->setDebugLoc(CurInst->getDebugLoc());
2324   CurInst->replaceAllUsesWith(Phi);
2325   if (MD && Phi->getType()->isPtrOrPtrVectorTy())
2326     MD->invalidateCachedPointerInfo(Phi);
2327   VN.erase(CurInst);
2328   removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
2329 
2330   DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2331   if (MD)
2332     MD->removeInstruction(CurInst);
2333   DEBUG(verifyRemoved(CurInst));
2334   bool InvalidateImplicitCF =
2335       FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
2336   // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
2337   // some assertion failures.
2338   OI->invalidateBlock(CurrentBlock);
2339   CurInst->eraseFromParent();
2340   if (InvalidateImplicitCF)
2341     fillImplicitControlFlowInfo(CurrentBlock);
2342   ++NumGVNInstr;
2343 
2344   return true;
2345 }
2346 
2347 /// Perform a purely local form of PRE that looks for diamond
2348 /// control flow patterns and attempts to perform simple PRE at the join point.
2349 bool GVN::performPRE(Function &F) {
2350   bool Changed = false;
2351   for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
2352     // Nothing to PRE in the entry block.
2353     if (CurrentBlock == &F.getEntryBlock())
2354       continue;
2355 
2356     // Don't perform PRE on an EH pad.
2357     if (CurrentBlock->isEHPad())
2358       continue;
2359 
2360     for (BasicBlock::iterator BI = CurrentBlock->begin(),
2361                               BE = CurrentBlock->end();
2362          BI != BE;) {
2363       Instruction *CurInst = &*BI++;
2364       Changed |= performScalarPRE(CurInst);
2365     }
2366   }
2367 
2368   if (splitCriticalEdges())
2369     Changed = true;
2370 
2371   return Changed;
2372 }
2373 
2374 /// Split the critical edge connecting the given two blocks, and return
2375 /// the block inserted to the critical edge.
2376 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
2377   BasicBlock *BB =
2378       SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
2379   if (MD)
2380     MD->invalidateCachedPredecessors();
2381   return BB;
2382 }
2383 
2384 /// Split critical edges found during the previous
2385 /// iteration that may enable further optimization.
2386 bool GVN::splitCriticalEdges() {
2387   if (toSplit.empty())
2388     return false;
2389   do {
2390     std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2391     SplitCriticalEdge(Edge.first, Edge.second,
2392                       CriticalEdgeSplittingOptions(DT));
2393   } while (!toSplit.empty());
2394   if (MD) MD->invalidateCachedPredecessors();
2395   return true;
2396 }
2397 
2398 /// Executes one iteration of GVN
2399 bool GVN::iterateOnFunction(Function &F) {
2400   cleanupGlobalSets();
2401 
2402   // Top-down walk of the dominator tree
2403   bool Changed = false;
2404   // Needed for value numbering with phi construction to work.
2405   // RPOT walks the graph in its constructor and will not be invalidated during
2406   // processBlock.
2407   ReversePostOrderTraversal<Function *> RPOT(&F);
2408 
2409   for (BasicBlock *BB : RPOT)
2410     fillImplicitControlFlowInfo(BB);
2411   for (BasicBlock *BB : RPOT)
2412     Changed |= processBlock(BB);
2413 
2414   return Changed;
2415 }
2416 
2417 void GVN::cleanupGlobalSets() {
2418   VN.clear();
2419   LeaderTable.clear();
2420   BlockRPONumber.clear();
2421   TableAllocator.Reset();
2422   FirstImplicitControlFlowInsts.clear();
2423 }
2424 
2425 void
2426 GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
2427   // Make sure that all marked instructions are actually deleted by this point,
2428   // so that we don't need to care about omitting them.
2429   assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
2430   auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
2431     // If a block's instruction doesn't always pass the control to its successor
2432     // instruction, mark the block as having implicit control flow. We use them
2433     // to avoid wrong assumptions of sort "if A is executed and B post-dominates
2434     // A, then B is also executed". This is not true is there is an implicit
2435     // control flow instruction (e.g. a guard) between them.
2436     //
2437     // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
2438     // for volatile stores and loads because they can trap. The discussion on
2439     // whether or not it is correct is still ongoing. We might want to get rid
2440     // of this logic in the future. Anyways, trapping instructions shouldn't
2441     // introduce implicit control flow, so we explicitly allow them here. This
2442     // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
2443     if (isGuaranteedToTransferExecutionToSuccessor(I))
2444       return false;
2445     if (isa<LoadInst>(I)) {
2446       assert(cast<LoadInst>(I)->isVolatile() &&
2447              "Non-volatile load should transfer execution to successor!");
2448       return false;
2449     }
2450     if (isa<StoreInst>(I)) {
2451       assert(cast<StoreInst>(I)->isVolatile() &&
2452              "Non-volatile store should transfer execution to successor!");
2453       return false;
2454     }
2455     return true;
2456   };
2457   FirstImplicitControlFlowInsts.erase(BB);
2458 
2459   for (auto &I : *BB)
2460     if (MayNotTransferExecutionToSuccessor(&I)) {
2461       FirstImplicitControlFlowInsts[BB] = &I;
2462       break;
2463     }
2464 }
2465 
2466 /// Verify that the specified instruction does not occur in our
2467 /// internal data structures.
2468 void GVN::verifyRemoved(const Instruction *Inst) const {
2469   VN.verifyRemoved(Inst);
2470 
2471   // Walk through the value number scope to make sure the instruction isn't
2472   // ferreted away in it.
2473   for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
2474        I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
2475     const LeaderTableEntry *Node = &I->second;
2476     assert(Node->Val != Inst && "Inst still in value numbering scope!");
2477 
2478     while (Node->Next) {
2479       Node = Node->Next;
2480       assert(Node->Val != Inst && "Inst still in value numbering scope!");
2481     }
2482   }
2483 }
2484 
2485 /// BB is declared dead, which implied other blocks become dead as well. This
2486 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
2487 /// live successors, update their phi nodes by replacing the operands
2488 /// corresponding to dead blocks with UndefVal.
2489 void GVN::addDeadBlock(BasicBlock *BB) {
2490   SmallVector<BasicBlock *, 4> NewDead;
2491   SmallSetVector<BasicBlock *, 4> DF;
2492 
2493   NewDead.push_back(BB);
2494   while (!NewDead.empty()) {
2495     BasicBlock *D = NewDead.pop_back_val();
2496     if (DeadBlocks.count(D))
2497       continue;
2498 
2499     // All blocks dominated by D are dead.
2500     SmallVector<BasicBlock *, 8> Dom;
2501     DT->getDescendants(D, Dom);
2502     DeadBlocks.insert(Dom.begin(), Dom.end());
2503 
2504     // Figure out the dominance-frontier(D).
2505     for (BasicBlock *B : Dom) {
2506       for (BasicBlock *S : successors(B)) {
2507         if (DeadBlocks.count(S))
2508           continue;
2509 
2510         bool AllPredDead = true;
2511         for (BasicBlock *P : predecessors(S))
2512           if (!DeadBlocks.count(P)) {
2513             AllPredDead = false;
2514             break;
2515           }
2516 
2517         if (!AllPredDead) {
2518           // S could be proved dead later on. That is why we don't update phi
2519           // operands at this moment.
2520           DF.insert(S);
2521         } else {
2522           // While S is not dominated by D, it is dead by now. This could take
2523           // place if S already have a dead predecessor before D is declared
2524           // dead.
2525           NewDead.push_back(S);
2526         }
2527       }
2528     }
2529   }
2530 
2531   // For the dead blocks' live successors, update their phi nodes by replacing
2532   // the operands corresponding to dead blocks with UndefVal.
2533   for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
2534         I != E; I++) {
2535     BasicBlock *B = *I;
2536     if (DeadBlocks.count(B))
2537       continue;
2538 
2539     SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
2540     for (BasicBlock *P : Preds) {
2541       if (!DeadBlocks.count(P))
2542         continue;
2543 
2544       if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
2545         if (BasicBlock *S = splitCriticalEdges(P, B))
2546           DeadBlocks.insert(P = S);
2547       }
2548 
2549       for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
2550         PHINode &Phi = cast<PHINode>(*II);
2551         Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
2552                              UndefValue::get(Phi.getType()));
2553       }
2554     }
2555   }
2556 }
2557 
2558 // If the given branch is recognized as a foldable branch (i.e. conditional
2559 // branch with constant condition), it will perform following analyses and
2560 // transformation.
2561 //  1) If the dead out-coming edge is a critical-edge, split it. Let
2562 //     R be the target of the dead out-coming edge.
2563 //  1) Identify the set of dead blocks implied by the branch's dead outcoming
2564 //     edge. The result of this step will be {X| X is dominated by R}
2565 //  2) Identify those blocks which haves at least one dead predecessor. The
2566 //     result of this step will be dominance-frontier(R).
2567 //  3) Update the PHIs in DF(R) by replacing the operands corresponding to
2568 //     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
2569 //
2570 // Return true iff *NEW* dead code are found.
2571 bool GVN::processFoldableCondBr(BranchInst *BI) {
2572   if (!BI || BI->isUnconditional())
2573     return false;
2574 
2575   // If a branch has two identical successors, we cannot declare either dead.
2576   if (BI->getSuccessor(0) == BI->getSuccessor(1))
2577     return false;
2578 
2579   ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
2580   if (!Cond)
2581     return false;
2582 
2583   BasicBlock *DeadRoot =
2584       Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
2585   if (DeadBlocks.count(DeadRoot))
2586     return false;
2587 
2588   if (!DeadRoot->getSinglePredecessor())
2589     DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
2590 
2591   addDeadBlock(DeadRoot);
2592   return true;
2593 }
2594 
2595 // performPRE() will trigger assert if it comes across an instruction without
2596 // associated val-num. As it normally has far more live instructions than dead
2597 // instructions, it makes more sense just to "fabricate" a val-number for the
2598 // dead code than checking if instruction involved is dead or not.
2599 void GVN::assignValNumForDeadCode() {
2600   for (BasicBlock *BB : DeadBlocks) {
2601     for (Instruction &Inst : *BB) {
2602       unsigned ValNum = VN.lookupOrAdd(&Inst);
2603       addToLeaderTable(ValNum, &Inst, BB);
2604     }
2605   }
2606 }
2607 
2608 class llvm::gvn::GVNLegacyPass : public FunctionPass {
2609 public:
2610   static char ID; // Pass identification, replacement for typeid
2611 
2612   explicit GVNLegacyPass(bool NoLoads = false)
2613       : FunctionPass(ID), NoLoads(NoLoads) {
2614     initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
2615   }
2616 
2617   bool runOnFunction(Function &F) override {
2618     if (skipFunction(F))
2619       return false;
2620 
2621     auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
2622 
2623     return Impl.runImpl(
2624         F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
2625         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
2626         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
2627         getAnalysis<AAResultsWrapperPass>().getAAResults(),
2628         NoLoads ? nullptr
2629                 : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(),
2630         LIWP ? &LIWP->getLoopInfo() : nullptr,
2631         &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
2632   }
2633 
2634   void getAnalysisUsage(AnalysisUsage &AU) const override {
2635     AU.addRequired<AssumptionCacheTracker>();
2636     AU.addRequired<DominatorTreeWrapperPass>();
2637     AU.addRequired<TargetLibraryInfoWrapperPass>();
2638     if (!NoLoads)
2639       AU.addRequired<MemoryDependenceWrapperPass>();
2640     AU.addRequired<AAResultsWrapperPass>();
2641 
2642     AU.addPreserved<DominatorTreeWrapperPass>();
2643     AU.addPreserved<GlobalsAAWrapperPass>();
2644     AU.addPreserved<TargetLibraryInfoWrapperPass>();
2645     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2646   }
2647 
2648 private:
2649   bool NoLoads;
2650   GVN Impl;
2651 };
2652 
2653 char GVNLegacyPass::ID = 0;
2654 
2655 INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
2656 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2657 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2658 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2659 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2660 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2661 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2662 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
2663 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
2664 
2665 // The public interface to this file...
2666 FunctionPass *llvm::createGVNPass(bool NoLoads) {
2667   return new GVNLegacyPass(NoLoads);
2668 }
2669