1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions.  It also performs simple dead load elimination.
12 //
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Transforms/Scalar/GVN.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/DepthFirstIterator.h"
21 #include "llvm/ADT/Hashing.h"
22 #include "llvm/ADT/MapVector.h"
23 #include "llvm/ADT/PostOrderIterator.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/Analysis/AssumptionCache.h"
29 #include "llvm/Analysis/CFG.h"
30 #include "llvm/Analysis/ConstantFolding.h"
31 #include "llvm/Analysis/GlobalsModRef.h"
32 #include "llvm/Analysis/InstructionSimplify.h"
33 #include "llvm/Analysis/Loads.h"
34 #include "llvm/Analysis/MemoryBuiltins.h"
35 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
36 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
37 #include "llvm/Analysis/PHITransAddr.h"
38 #include "llvm/Analysis/TargetLibraryInfo.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/Dominators.h"
41 #include "llvm/IR/GlobalVariable.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/Metadata.h"
46 #include "llvm/IR/PatternMatch.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/raw_ostream.h"
50 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
51 #include "llvm/Transforms/Utils/Local.h"
52 #include "llvm/Transforms/Utils/SSAUpdater.h"
53 #include "llvm/Transforms/Utils/VNCoercion.h"
54 
55 #include <vector>
56 using namespace llvm;
57 using namespace llvm::gvn;
58 using namespace llvm::VNCoercion;
59 using namespace PatternMatch;
60 
61 #define DEBUG_TYPE "gvn"
62 
63 STATISTIC(NumGVNInstr,  "Number of instructions deleted");
64 STATISTIC(NumGVNLoad,   "Number of loads deleted");
65 STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
66 STATISTIC(NumGVNBlocks, "Number of blocks merged");
67 STATISTIC(NumGVNSimpl,  "Number of instructions simplified");
68 STATISTIC(NumGVNEqProp, "Number of equalities propagated");
69 STATISTIC(NumPRELoad,   "Number of loads PRE'd");
70 
71 static cl::opt<bool> EnablePRE("enable-pre",
72                                cl::init(true), cl::Hidden);
73 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
74 
75 // Maximum allowed recursion depth.
76 static cl::opt<uint32_t>
77 MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
78                 cl::desc("Max recurse depth (default = 1000)"));
79 
80 struct llvm::GVN::Expression {
81   uint32_t opcode;
82   Type *type;
83   bool commutative;
84   SmallVector<uint32_t, 4> varargs;
85 
86   Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
87 
88   bool operator==(const Expression &other) const {
89     if (opcode != other.opcode)
90       return false;
91     if (opcode == ~0U || opcode == ~1U)
92       return true;
93     if (type != other.type)
94       return false;
95     if (varargs != other.varargs)
96       return false;
97     return true;
98   }
99 
100   friend hash_code hash_value(const Expression &Value) {
101     return hash_combine(
102         Value.opcode, Value.type,
103         hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
104   }
105 };
106 
107 namespace llvm {
108 template <> struct DenseMapInfo<GVN::Expression> {
109   static inline GVN::Expression getEmptyKey() { return ~0U; }
110 
111   static inline GVN::Expression getTombstoneKey() { return ~1U; }
112 
113   static unsigned getHashValue(const GVN::Expression &e) {
114     using llvm::hash_value;
115     return static_cast<unsigned>(hash_value(e));
116   }
117   static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
118     return LHS == RHS;
119   }
120 };
121 } // End llvm namespace.
122 
123 /// Represents a particular available value that we know how to materialize.
124 /// Materialization of an AvailableValue never fails.  An AvailableValue is
125 /// implicitly associated with a rematerialization point which is the
126 /// location of the instruction from which it was formed.
127 struct llvm::gvn::AvailableValue {
128   enum ValType {
129     SimpleVal, // A simple offsetted value that is accessed.
130     LoadVal,   // A value produced by a load.
131     MemIntrin, // A memory intrinsic which is loaded from.
132     UndefVal   // A UndefValue representing a value from dead block (which
133                // is not yet physically removed from the CFG).
134   };
135 
136   /// V - The value that is live out of the block.
137   PointerIntPair<Value *, 2, ValType> Val;
138 
139   /// Offset - The byte offset in Val that is interesting for the load query.
140   unsigned Offset;
141 
142   static AvailableValue get(Value *V, unsigned Offset = 0) {
143     AvailableValue Res;
144     Res.Val.setPointer(V);
145     Res.Val.setInt(SimpleVal);
146     Res.Offset = Offset;
147     return Res;
148   }
149 
150   static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
151     AvailableValue Res;
152     Res.Val.setPointer(MI);
153     Res.Val.setInt(MemIntrin);
154     Res.Offset = Offset;
155     return Res;
156   }
157 
158   static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) {
159     AvailableValue Res;
160     Res.Val.setPointer(LI);
161     Res.Val.setInt(LoadVal);
162     Res.Offset = Offset;
163     return Res;
164   }
165 
166   static AvailableValue getUndef() {
167     AvailableValue Res;
168     Res.Val.setPointer(nullptr);
169     Res.Val.setInt(UndefVal);
170     Res.Offset = 0;
171     return Res;
172   }
173 
174   bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
175   bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
176   bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
177   bool isUndefValue() const { return Val.getInt() == UndefVal; }
178 
179   Value *getSimpleValue() const {
180     assert(isSimpleValue() && "Wrong accessor");
181     return Val.getPointer();
182   }
183 
184   LoadInst *getCoercedLoadValue() const {
185     assert(isCoercedLoadValue() && "Wrong accessor");
186     return cast<LoadInst>(Val.getPointer());
187   }
188 
189   MemIntrinsic *getMemIntrinValue() const {
190     assert(isMemIntrinValue() && "Wrong accessor");
191     return cast<MemIntrinsic>(Val.getPointer());
192   }
193 
194   /// Emit code at the specified insertion point to adjust the value defined
195   /// here to the specified type. This handles various coercion cases.
196   Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt,
197                                   GVN &gvn) const;
198 };
199 
200 /// Represents an AvailableValue which can be rematerialized at the end of
201 /// the associated BasicBlock.
202 struct llvm::gvn::AvailableValueInBlock {
203   /// BB - The basic block in question.
204   BasicBlock *BB;
205 
206   /// AV - The actual available value
207   AvailableValue AV;
208 
209   static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
210     AvailableValueInBlock Res;
211     Res.BB = BB;
212     Res.AV = std::move(AV);
213     return Res;
214   }
215 
216   static AvailableValueInBlock get(BasicBlock *BB, Value *V,
217                                    unsigned Offset = 0) {
218     return get(BB, AvailableValue::get(V, Offset));
219   }
220   static AvailableValueInBlock getUndef(BasicBlock *BB) {
221     return get(BB, AvailableValue::getUndef());
222   }
223 
224   /// Emit code at the end of this block to adjust the value defined here to
225   /// the specified type. This handles various coercion cases.
226   Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const {
227     return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn);
228   }
229 };
230 
231 //===----------------------------------------------------------------------===//
232 //                     ValueTable Internal Functions
233 //===----------------------------------------------------------------------===//
234 
235 GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
236   Expression e;
237   e.type = I->getType();
238   e.opcode = I->getOpcode();
239   for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
240        OI != OE; ++OI)
241     e.varargs.push_back(lookupOrAdd(*OI));
242   if (I->isCommutative()) {
243     // Ensure that commutative instructions that only differ by a permutation
244     // of their operands get the same value number by sorting the operand value
245     // numbers.  Since all commutative instructions have two operands it is more
246     // efficient to sort by hand rather than using, say, std::sort.
247     assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
248     if (e.varargs[0] > e.varargs[1])
249       std::swap(e.varargs[0], e.varargs[1]);
250     e.commutative = true;
251   }
252 
253   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
254     // Sort the operand value numbers so x<y and y>x get the same value number.
255     CmpInst::Predicate Predicate = C->getPredicate();
256     if (e.varargs[0] > e.varargs[1]) {
257       std::swap(e.varargs[0], e.varargs[1]);
258       Predicate = CmpInst::getSwappedPredicate(Predicate);
259     }
260     e.opcode = (C->getOpcode() << 8) | Predicate;
261     e.commutative = true;
262   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
263     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
264          II != IE; ++II)
265       e.varargs.push_back(*II);
266   }
267 
268   return e;
269 }
270 
271 GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
272                                                CmpInst::Predicate Predicate,
273                                                Value *LHS, Value *RHS) {
274   assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
275          "Not a comparison!");
276   Expression e;
277   e.type = CmpInst::makeCmpResultType(LHS->getType());
278   e.varargs.push_back(lookupOrAdd(LHS));
279   e.varargs.push_back(lookupOrAdd(RHS));
280 
281   // Sort the operand value numbers so x<y and y>x get the same value number.
282   if (e.varargs[0] > e.varargs[1]) {
283     std::swap(e.varargs[0], e.varargs[1]);
284     Predicate = CmpInst::getSwappedPredicate(Predicate);
285   }
286   e.opcode = (Opcode << 8) | Predicate;
287   e.commutative = true;
288   return e;
289 }
290 
291 GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
292   assert(EI && "Not an ExtractValueInst?");
293   Expression e;
294   e.type = EI->getType();
295   e.opcode = 0;
296 
297   IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
298   if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
299     // EI might be an extract from one of our recognised intrinsics. If it
300     // is we'll synthesize a semantically equivalent expression instead on
301     // an extract value expression.
302     switch (I->getIntrinsicID()) {
303       case Intrinsic::sadd_with_overflow:
304       case Intrinsic::uadd_with_overflow:
305         e.opcode = Instruction::Add;
306         break;
307       case Intrinsic::ssub_with_overflow:
308       case Intrinsic::usub_with_overflow:
309         e.opcode = Instruction::Sub;
310         break;
311       case Intrinsic::smul_with_overflow:
312       case Intrinsic::umul_with_overflow:
313         e.opcode = Instruction::Mul;
314         break;
315       default:
316         break;
317     }
318 
319     if (e.opcode != 0) {
320       // Intrinsic recognized. Grab its args to finish building the expression.
321       assert(I->getNumArgOperands() == 2 &&
322              "Expect two args for recognised intrinsics.");
323       e.varargs.push_back(lookupOrAdd(I->getArgOperand(0)));
324       e.varargs.push_back(lookupOrAdd(I->getArgOperand(1)));
325       return e;
326     }
327   }
328 
329   // Not a recognised intrinsic. Fall back to producing an extract value
330   // expression.
331   e.opcode = EI->getOpcode();
332   for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
333        OI != OE; ++OI)
334     e.varargs.push_back(lookupOrAdd(*OI));
335 
336   for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
337          II != IE; ++II)
338     e.varargs.push_back(*II);
339 
340   return e;
341 }
342 
343 //===----------------------------------------------------------------------===//
344 //                     ValueTable External Functions
345 //===----------------------------------------------------------------------===//
346 
347 GVN::ValueTable::ValueTable() : nextValueNumber(1) {}
348 GVN::ValueTable::ValueTable(const ValueTable &) = default;
349 GVN::ValueTable::ValueTable(ValueTable &&) = default;
350 GVN::ValueTable::~ValueTable() = default;
351 
352 /// add - Insert a value into the table with a specified value number.
353 void GVN::ValueTable::add(Value *V, uint32_t num) {
354   valueNumbering.insert(std::make_pair(V, num));
355   if (PHINode *PN = dyn_cast<PHINode>(V))
356     NumberingPhi[num] = PN;
357 }
358 
359 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
360   if (AA->doesNotAccessMemory(C)) {
361     Expression exp = createExpr(C);
362     uint32_t e = assignExpNewValueNum(exp).first;
363     valueNumbering[C] = e;
364     return e;
365   } else if (AA->onlyReadsMemory(C)) {
366     Expression exp = createExpr(C);
367     auto ValNum = assignExpNewValueNum(exp);
368     if (ValNum.second) {
369       valueNumbering[C] = ValNum.first;
370       return ValNum.first;
371     }
372     if (!MD) {
373       uint32_t e = assignExpNewValueNum(exp).first;
374       valueNumbering[C] = e;
375       return e;
376     }
377 
378     MemDepResult local_dep = MD->getDependency(C);
379 
380     if (!local_dep.isDef() && !local_dep.isNonLocal()) {
381       valueNumbering[C] =  nextValueNumber;
382       return nextValueNumber++;
383     }
384 
385     if (local_dep.isDef()) {
386       CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
387 
388       if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
389         valueNumbering[C] = nextValueNumber;
390         return nextValueNumber++;
391       }
392 
393       for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
394         uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
395         uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
396         if (c_vn != cd_vn) {
397           valueNumbering[C] = nextValueNumber;
398           return nextValueNumber++;
399         }
400       }
401 
402       uint32_t v = lookupOrAdd(local_cdep);
403       valueNumbering[C] = v;
404       return v;
405     }
406 
407     // Non-local case.
408     const MemoryDependenceResults::NonLocalDepInfo &deps =
409       MD->getNonLocalCallDependency(CallSite(C));
410     // FIXME: Move the checking logic to MemDep!
411     CallInst* cdep = nullptr;
412 
413     // Check to see if we have a single dominating call instruction that is
414     // identical to C.
415     for (unsigned i = 0, e = deps.size(); i != e; ++i) {
416       const NonLocalDepEntry *I = &deps[i];
417       if (I->getResult().isNonLocal())
418         continue;
419 
420       // We don't handle non-definitions.  If we already have a call, reject
421       // instruction dependencies.
422       if (!I->getResult().isDef() || cdep != nullptr) {
423         cdep = nullptr;
424         break;
425       }
426 
427       CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
428       // FIXME: All duplicated with non-local case.
429       if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
430         cdep = NonLocalDepCall;
431         continue;
432       }
433 
434       cdep = nullptr;
435       break;
436     }
437 
438     if (!cdep) {
439       valueNumbering[C] = nextValueNumber;
440       return nextValueNumber++;
441     }
442 
443     if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
444       valueNumbering[C] = nextValueNumber;
445       return nextValueNumber++;
446     }
447     for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
448       uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
449       uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
450       if (c_vn != cd_vn) {
451         valueNumbering[C] = nextValueNumber;
452         return nextValueNumber++;
453       }
454     }
455 
456     uint32_t v = lookupOrAdd(cdep);
457     valueNumbering[C] = v;
458     return v;
459 
460   } else {
461     valueNumbering[C] = nextValueNumber;
462     return nextValueNumber++;
463   }
464 }
465 
466 /// Returns true if a value number exists for the specified value.
467 bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
468 
469 /// lookup_or_add - Returns the value number for the specified value, assigning
470 /// it a new number if it did not have one before.
471 uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
472   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
473   if (VI != valueNumbering.end())
474     return VI->second;
475 
476   if (!isa<Instruction>(V)) {
477     valueNumbering[V] = nextValueNumber;
478     return nextValueNumber++;
479   }
480 
481   Instruction* I = cast<Instruction>(V);
482   Expression exp;
483   switch (I->getOpcode()) {
484     case Instruction::Call:
485       return lookupOrAddCall(cast<CallInst>(I));
486     case Instruction::Add:
487     case Instruction::FAdd:
488     case Instruction::Sub:
489     case Instruction::FSub:
490     case Instruction::Mul:
491     case Instruction::FMul:
492     case Instruction::UDiv:
493     case Instruction::SDiv:
494     case Instruction::FDiv:
495     case Instruction::URem:
496     case Instruction::SRem:
497     case Instruction::FRem:
498     case Instruction::Shl:
499     case Instruction::LShr:
500     case Instruction::AShr:
501     case Instruction::And:
502     case Instruction::Or:
503     case Instruction::Xor:
504     case Instruction::ICmp:
505     case Instruction::FCmp:
506     case Instruction::Trunc:
507     case Instruction::ZExt:
508     case Instruction::SExt:
509     case Instruction::FPToUI:
510     case Instruction::FPToSI:
511     case Instruction::UIToFP:
512     case Instruction::SIToFP:
513     case Instruction::FPTrunc:
514     case Instruction::FPExt:
515     case Instruction::PtrToInt:
516     case Instruction::IntToPtr:
517     case Instruction::BitCast:
518     case Instruction::Select:
519     case Instruction::ExtractElement:
520     case Instruction::InsertElement:
521     case Instruction::ShuffleVector:
522     case Instruction::InsertValue:
523     case Instruction::GetElementPtr:
524       exp = createExpr(I);
525       break;
526     case Instruction::ExtractValue:
527       exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
528       break;
529     case Instruction::PHI:
530       valueNumbering[V] = nextValueNumber;
531       NumberingPhi[nextValueNumber] = cast<PHINode>(V);
532       return nextValueNumber++;
533     default:
534       valueNumbering[V] = nextValueNumber;
535       return nextValueNumber++;
536   }
537 
538   uint32_t e = assignExpNewValueNum(exp).first;
539   valueNumbering[V] = e;
540   return e;
541 }
542 
543 /// Returns the value number of the specified value. Fails if
544 /// the value has not yet been numbered.
545 uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
546   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
547   if (Verify) {
548     assert(VI != valueNumbering.end() && "Value not numbered?");
549     return VI->second;
550   }
551   return (VI != valueNumbering.end()) ? VI->second : 0;
552 }
553 
554 /// Returns the value number of the given comparison,
555 /// assigning it a new number if it did not have one before.  Useful when
556 /// we deduced the result of a comparison, but don't immediately have an
557 /// instruction realizing that comparison to hand.
558 uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
559                                          CmpInst::Predicate Predicate,
560                                          Value *LHS, Value *RHS) {
561   Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
562   return assignExpNewValueNum(exp).first;
563 }
564 
565 /// Remove all entries from the ValueTable.
566 void GVN::ValueTable::clear() {
567   valueNumbering.clear();
568   expressionNumbering.clear();
569   NumberingPhi.clear();
570   PhiTranslateTable.clear();
571   nextValueNumber = 1;
572   Expressions.clear();
573   ExprIdx.clear();
574   nextExprNumber = 0;
575 }
576 
577 /// Remove a value from the value numbering.
578 void GVN::ValueTable::erase(Value *V) {
579   uint32_t Num = valueNumbering.lookup(V);
580   valueNumbering.erase(V);
581   // If V is PHINode, V <--> value number is an one-to-one mapping.
582   if (isa<PHINode>(V))
583     NumberingPhi.erase(Num);
584 }
585 
586 /// verifyRemoved - Verify that the value is removed from all internal data
587 /// structures.
588 void GVN::ValueTable::verifyRemoved(const Value *V) const {
589   for (DenseMap<Value*, uint32_t>::const_iterator
590          I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
591     assert(I->first != V && "Inst still occurs in value numbering map!");
592   }
593 }
594 
595 //===----------------------------------------------------------------------===//
596 //                                GVN Pass
597 //===----------------------------------------------------------------------===//
598 
599 PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
600   // FIXME: The order of evaluation of these 'getResult' calls is very
601   // significant! Re-ordering these variables will cause GVN when run alone to
602   // be less effective! We should fix memdep and basic-aa to not exhibit this
603   // behavior, but until then don't change the order here.
604   auto &AC = AM.getResult<AssumptionAnalysis>(F);
605   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
606   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
607   auto &AA = AM.getResult<AAManager>(F);
608   auto &MemDep = AM.getResult<MemoryDependenceAnalysis>(F);
609   auto *LI = AM.getCachedResult<LoopAnalysis>(F);
610   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
611   bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep, LI, &ORE);
612   if (!Changed)
613     return PreservedAnalyses::all();
614   PreservedAnalyses PA;
615   PA.preserve<DominatorTreeAnalysis>();
616   PA.preserve<GlobalsAA>();
617   PA.preserve<TargetLibraryAnalysis>();
618   return PA;
619 }
620 
621 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
622 LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
623   errs() << "{\n";
624   for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
625        E = d.end(); I != E; ++I) {
626       errs() << I->first << "\n";
627       I->second->dump();
628   }
629   errs() << "}\n";
630 }
631 #endif
632 
633 /// Return true if we can prove that the value
634 /// we're analyzing is fully available in the specified block.  As we go, keep
635 /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
636 /// map is actually a tri-state map with the following values:
637 ///   0) we know the block *is not* fully available.
638 ///   1) we know the block *is* fully available.
639 ///   2) we do not know whether the block is fully available or not, but we are
640 ///      currently speculating that it will be.
641 ///   3) we are speculating for this block and have used that to speculate for
642 ///      other blocks.
643 static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
644                             DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
645                             uint32_t RecurseDepth) {
646   if (RecurseDepth > MaxRecurseDepth)
647     return false;
648 
649   // Optimistically assume that the block is fully available and check to see
650   // if we already know about this block in one lookup.
651   std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
652     FullyAvailableBlocks.insert(std::make_pair(BB, 2));
653 
654   // If the entry already existed for this block, return the precomputed value.
655   if (!IV.second) {
656     // If this is a speculative "available" value, mark it as being used for
657     // speculation of other blocks.
658     if (IV.first->second == 2)
659       IV.first->second = 3;
660     return IV.first->second != 0;
661   }
662 
663   // Otherwise, see if it is fully available in all predecessors.
664   pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
665 
666   // If this block has no predecessors, it isn't live-in here.
667   if (PI == PE)
668     goto SpeculationFailure;
669 
670   for (; PI != PE; ++PI)
671     // If the value isn't fully available in one of our predecessors, then it
672     // isn't fully available in this block either.  Undo our previous
673     // optimistic assumption and bail out.
674     if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1))
675       goto SpeculationFailure;
676 
677   return true;
678 
679 // If we get here, we found out that this is not, after
680 // all, a fully-available block.  We have a problem if we speculated on this and
681 // used the speculation to mark other blocks as available.
682 SpeculationFailure:
683   char &BBVal = FullyAvailableBlocks[BB];
684 
685   // If we didn't speculate on this, just return with it set to false.
686   if (BBVal == 2) {
687     BBVal = 0;
688     return false;
689   }
690 
691   // If we did speculate on this value, we could have blocks set to 1 that are
692   // incorrect.  Walk the (transitive) successors of this block and mark them as
693   // 0 if set to one.
694   SmallVector<BasicBlock*, 32> BBWorklist;
695   BBWorklist.push_back(BB);
696 
697   do {
698     BasicBlock *Entry = BBWorklist.pop_back_val();
699     // Note that this sets blocks to 0 (unavailable) if they happen to not
700     // already be in FullyAvailableBlocks.  This is safe.
701     char &EntryVal = FullyAvailableBlocks[Entry];
702     if (EntryVal == 0) continue;  // Already unavailable.
703 
704     // Mark as unavailable.
705     EntryVal = 0;
706 
707     BBWorklist.append(succ_begin(Entry), succ_end(Entry));
708   } while (!BBWorklist.empty());
709 
710   return false;
711 }
712 
713 
714 
715 
716 /// Given a set of loads specified by ValuesPerBlock,
717 /// construct SSA form, allowing us to eliminate LI.  This returns the value
718 /// that should be used at LI's definition site.
719 static Value *ConstructSSAForLoadSet(LoadInst *LI,
720                          SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
721                                      GVN &gvn) {
722   // Check for the fully redundant, dominating load case.  In this case, we can
723   // just use the dominating value directly.
724   if (ValuesPerBlock.size() == 1 &&
725       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
726                                                LI->getParent())) {
727     assert(!ValuesPerBlock[0].AV.isUndefValue() &&
728            "Dead BB dominate this block");
729     return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn);
730   }
731 
732   // Otherwise, we have to construct SSA form.
733   SmallVector<PHINode*, 8> NewPHIs;
734   SSAUpdater SSAUpdate(&NewPHIs);
735   SSAUpdate.Initialize(LI->getType(), LI->getName());
736 
737   for (const AvailableValueInBlock &AV : ValuesPerBlock) {
738     BasicBlock *BB = AV.BB;
739 
740     if (SSAUpdate.HasValueForBlock(BB))
741       continue;
742 
743     SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn));
744   }
745 
746   // Perform PHI construction.
747   return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
748 }
749 
750 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
751                                                 Instruction *InsertPt,
752                                                 GVN &gvn) const {
753   Value *Res;
754   Type *LoadTy = LI->getType();
755   const DataLayout &DL = LI->getModule()->getDataLayout();
756   if (isSimpleValue()) {
757     Res = getSimpleValue();
758     if (Res->getType() != LoadTy) {
759       Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
760 
761       DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
762                    << *getSimpleValue() << '\n'
763                    << *Res << '\n' << "\n\n\n");
764     }
765   } else if (isCoercedLoadValue()) {
766     LoadInst *Load = getCoercedLoadValue();
767     if (Load->getType() == LoadTy && Offset == 0) {
768       Res = Load;
769     } else {
770       Res = getLoadValueForLoad(Load, Offset, LoadTy, InsertPt, DL);
771       // We would like to use gvn.markInstructionForDeletion here, but we can't
772       // because the load is already memoized into the leader map table that GVN
773       // tracks.  It is potentially possible to remove the load from the table,
774       // but then there all of the operations based on it would need to be
775       // rehashed.  Just leave the dead load around.
776       gvn.getMemDep().removeInstruction(Load);
777       DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
778                    << *getCoercedLoadValue() << '\n'
779                    << *Res << '\n'
780                    << "\n\n\n");
781     }
782   } else if (isMemIntrinValue()) {
783     Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
784                                  InsertPt, DL);
785     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
786                  << "  " << *getMemIntrinValue() << '\n'
787                  << *Res << '\n' << "\n\n\n");
788   } else {
789     assert(isUndefValue() && "Should be UndefVal");
790     DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
791     return UndefValue::get(LoadTy);
792   }
793   assert(Res && "failed to materialize?");
794   return Res;
795 }
796 
797 static bool isLifetimeStart(const Instruction *Inst) {
798   if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
799     return II->getIntrinsicID() == Intrinsic::lifetime_start;
800   return false;
801 }
802 
803 /// \brief Try to locate the three instruction involved in a missed
804 /// load-elimination case that is due to an intervening store.
805 static void reportMayClobberedLoad(LoadInst *LI, MemDepResult DepInfo,
806                                    DominatorTree *DT,
807                                    OptimizationRemarkEmitter *ORE) {
808   using namespace ore;
809   User *OtherAccess = nullptr;
810 
811   OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", LI);
812   R << "load of type " << NV("Type", LI->getType()) << " not eliminated"
813     << setExtraArgs();
814 
815   for (auto *U : LI->getPointerOperand()->users())
816     if (U != LI && (isa<LoadInst>(U) || isa<StoreInst>(U)) &&
817         DT->dominates(cast<Instruction>(U), LI)) {
818       // FIXME: for now give up if there are multiple memory accesses that
819       // dominate the load.  We need further analysis to decide which one is
820       // that we're forwarding from.
821       if (OtherAccess)
822         OtherAccess = nullptr;
823       else
824         OtherAccess = U;
825     }
826 
827   if (OtherAccess)
828     R << " in favor of " << NV("OtherAccess", OtherAccess);
829 
830   R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
831 
832   ORE->emit(R);
833 }
834 
835 bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
836                                   Value *Address, AvailableValue &Res) {
837 
838   assert((DepInfo.isDef() || DepInfo.isClobber()) &&
839          "expected a local dependence");
840   assert(LI->isUnordered() && "rules below are incorrect for ordered access");
841 
842   const DataLayout &DL = LI->getModule()->getDataLayout();
843 
844   if (DepInfo.isClobber()) {
845     // If the dependence is to a store that writes to a superset of the bits
846     // read by the load, we can extract the bits we need for the load from the
847     // stored value.
848     if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
849       // Can't forward from non-atomic to atomic without violating memory model.
850       if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
851         int Offset =
852           analyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, DL);
853         if (Offset != -1) {
854           Res = AvailableValue::get(DepSI->getValueOperand(), Offset);
855           return true;
856         }
857       }
858     }
859 
860     // Check to see if we have something like this:
861     //    load i32* P
862     //    load i8* (P+1)
863     // if we have this, replace the later with an extraction from the former.
864     if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
865       // If this is a clobber and L is the first instruction in its block, then
866       // we have the first instruction in the entry block.
867       // Can't forward from non-atomic to atomic without violating memory model.
868       if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
869         int Offset =
870           analyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
871 
872         if (Offset != -1) {
873           Res = AvailableValue::getLoad(DepLI, Offset);
874           return true;
875         }
876       }
877     }
878 
879     // If the clobbering value is a memset/memcpy/memmove, see if we can
880     // forward a value on from it.
881     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
882       if (Address && !LI->isAtomic()) {
883         int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
884                                                       DepMI, DL);
885         if (Offset != -1) {
886           Res = AvailableValue::getMI(DepMI, Offset);
887           return true;
888         }
889       }
890     }
891     // Nothing known about this clobber, have to be conservative
892     DEBUG(
893       // fast print dep, using operator<< on instruction is too slow.
894       dbgs() << "GVN: load ";
895       LI->printAsOperand(dbgs());
896       Instruction *I = DepInfo.getInst();
897       dbgs() << " is clobbered by " << *I << '\n';
898     );
899 
900     if (ORE->allowExtraAnalysis())
901       reportMayClobberedLoad(LI, DepInfo, DT, ORE);
902 
903     return false;
904   }
905   assert(DepInfo.isDef() && "follows from above");
906 
907   Instruction *DepInst = DepInfo.getInst();
908 
909   // Loading the allocation -> undef.
910   if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
911       // Loading immediately after lifetime begin -> undef.
912       isLifetimeStart(DepInst)) {
913     Res = AvailableValue::get(UndefValue::get(LI->getType()));
914     return true;
915   }
916 
917   // Loading from calloc (which zero initializes memory) -> zero
918   if (isCallocLikeFn(DepInst, TLI)) {
919     Res = AvailableValue::get(Constant::getNullValue(LI->getType()));
920     return true;
921   }
922 
923   if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
924     // Reject loads and stores that are to the same address but are of
925     // different types if we have to. If the stored value is larger or equal to
926     // the loaded value, we can reuse it.
927     if (S->getValueOperand()->getType() != LI->getType() &&
928         !canCoerceMustAliasedValueToLoad(S->getValueOperand(),
929                                          LI->getType(), DL))
930       return false;
931 
932     // Can't forward from non-atomic to atomic without violating memory model.
933     if (S->isAtomic() < LI->isAtomic())
934       return false;
935 
936     Res = AvailableValue::get(S->getValueOperand());
937     return true;
938   }
939 
940   if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
941     // If the types mismatch and we can't handle it, reject reuse of the load.
942     // If the stored value is larger or equal to the loaded value, we can reuse
943     // it.
944     if (LD->getType() != LI->getType() &&
945         !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
946       return false;
947 
948     // Can't forward from non-atomic to atomic without violating memory model.
949     if (LD->isAtomic() < LI->isAtomic())
950       return false;
951 
952     Res = AvailableValue::getLoad(LD);
953     return true;
954   }
955 
956   // Unknown def - must be conservative
957   DEBUG(
958     // fast print dep, using operator<< on instruction is too slow.
959     dbgs() << "GVN: load ";
960     LI->printAsOperand(dbgs());
961     dbgs() << " has unknown def " << *DepInst << '\n';
962   );
963   return false;
964 }
965 
966 void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
967                                   AvailValInBlkVect &ValuesPerBlock,
968                                   UnavailBlkVect &UnavailableBlocks) {
969 
970   // Filter out useless results (non-locals, etc).  Keep track of the blocks
971   // where we have a value available in repl, also keep track of whether we see
972   // dependencies that produce an unknown value for the load (such as a call
973   // that could potentially clobber the load).
974   unsigned NumDeps = Deps.size();
975   for (unsigned i = 0, e = NumDeps; i != e; ++i) {
976     BasicBlock *DepBB = Deps[i].getBB();
977     MemDepResult DepInfo = Deps[i].getResult();
978 
979     if (DeadBlocks.count(DepBB)) {
980       // Dead dependent mem-op disguise as a load evaluating the same value
981       // as the load in question.
982       ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
983       continue;
984     }
985 
986     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
987       UnavailableBlocks.push_back(DepBB);
988       continue;
989     }
990 
991     // The address being loaded in this non-local block may not be the same as
992     // the pointer operand of the load if PHI translation occurs.  Make sure
993     // to consider the right address.
994     Value *Address = Deps[i].getAddress();
995 
996     AvailableValue AV;
997     if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) {
998       // subtlety: because we know this was a non-local dependency, we know
999       // it's safe to materialize anywhere between the instruction within
1000       // DepInfo and the end of it's block.
1001       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1002                                                           std::move(AV)));
1003     } else {
1004       UnavailableBlocks.push_back(DepBB);
1005     }
1006   }
1007 
1008   assert(NumDeps == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1009          "post condition violation");
1010 }
1011 
1012 bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
1013                          UnavailBlkVect &UnavailableBlocks) {
1014   // Okay, we have *some* definitions of the value.  This means that the value
1015   // is available in some of our (transitive) predecessors.  Lets think about
1016   // doing PRE of this load.  This will involve inserting a new load into the
1017   // predecessor when it's not available.  We could do this in general, but
1018   // prefer to not increase code size.  As such, we only do this when we know
1019   // that we only have to insert *one* load (which means we're basically moving
1020   // the load, not inserting a new one).
1021 
1022   SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
1023                                         UnavailableBlocks.end());
1024 
1025   // Let's find the first basic block with more than one predecessor.  Walk
1026   // backwards through predecessors if needed.
1027   BasicBlock *LoadBB = LI->getParent();
1028   BasicBlock *TmpBB = LoadBB;
1029 
1030   while (TmpBB->getSinglePredecessor()) {
1031     TmpBB = TmpBB->getSinglePredecessor();
1032     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1033       return false;
1034     if (Blockers.count(TmpBB))
1035       return false;
1036 
1037     // If any of these blocks has more than one successor (i.e. if the edge we
1038     // just traversed was critical), then there are other paths through this
1039     // block along which the load may not be anticipated.  Hoisting the load
1040     // above this block would be adding the load to execution paths along
1041     // which it was not previously executed.
1042     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1043       return false;
1044   }
1045 
1046   assert(TmpBB);
1047   LoadBB = TmpBB;
1048 
1049   // Check to see how many predecessors have the loaded value fully
1050   // available.
1051   MapVector<BasicBlock *, Value *> PredLoads;
1052   DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1053   for (const AvailableValueInBlock &AV : ValuesPerBlock)
1054     FullyAvailableBlocks[AV.BB] = true;
1055   for (BasicBlock *UnavailableBB : UnavailableBlocks)
1056     FullyAvailableBlocks[UnavailableBB] = false;
1057 
1058   SmallVector<BasicBlock *, 4> CriticalEdgePred;
1059   for (BasicBlock *Pred : predecessors(LoadBB)) {
1060     // If any predecessor block is an EH pad that does not allow non-PHI
1061     // instructions before the terminator, we can't PRE the load.
1062     if (Pred->getTerminator()->isEHPad()) {
1063       DEBUG(dbgs()
1064             << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1065             << Pred->getName() << "': " << *LI << '\n');
1066       return false;
1067     }
1068 
1069     if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
1070       continue;
1071     }
1072 
1073     if (Pred->getTerminator()->getNumSuccessors() != 1) {
1074       if (isa<IndirectBrInst>(Pred->getTerminator())) {
1075         DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1076               << Pred->getName() << "': " << *LI << '\n');
1077         return false;
1078       }
1079 
1080       if (LoadBB->isEHPad()) {
1081         DEBUG(dbgs()
1082               << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1083               << Pred->getName() << "': " << *LI << '\n');
1084         return false;
1085       }
1086 
1087       CriticalEdgePred.push_back(Pred);
1088     } else {
1089       // Only add the predecessors that will not be split for now.
1090       PredLoads[Pred] = nullptr;
1091     }
1092   }
1093 
1094   // Decide whether PRE is profitable for this load.
1095   unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
1096   assert(NumUnavailablePreds != 0 &&
1097          "Fully available value should already be eliminated!");
1098 
1099   // If this load is unavailable in multiple predecessors, reject it.
1100   // FIXME: If we could restructure the CFG, we could make a common pred with
1101   // all the preds that don't have an available LI and insert a new load into
1102   // that one block.
1103   if (NumUnavailablePreds != 1)
1104       return false;
1105 
1106   // Split critical edges, and update the unavailable predecessors accordingly.
1107   for (BasicBlock *OrigPred : CriticalEdgePred) {
1108     BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1109     assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1110     PredLoads[NewPred] = nullptr;
1111     DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1112                  << LoadBB->getName() << '\n');
1113   }
1114 
1115   // Check if the load can safely be moved to all the unavailable predecessors.
1116   bool CanDoPRE = true;
1117   const DataLayout &DL = LI->getModule()->getDataLayout();
1118   SmallVector<Instruction*, 8> NewInsts;
1119   for (auto &PredLoad : PredLoads) {
1120     BasicBlock *UnavailablePred = PredLoad.first;
1121 
1122     // Do PHI translation to get its value in the predecessor if necessary.  The
1123     // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1124 
1125     // If all preds have a single successor, then we know it is safe to insert
1126     // the load on the pred (?!?), so we can insert code to materialize the
1127     // pointer if it is not available.
1128     PHITransAddr Address(LI->getPointerOperand(), DL, AC);
1129     Value *LoadPtr = nullptr;
1130     LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1131                                                 *DT, NewInsts);
1132 
1133     // If we couldn't find or insert a computation of this phi translated value,
1134     // we fail PRE.
1135     if (!LoadPtr) {
1136       DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1137             << *LI->getPointerOperand() << "\n");
1138       CanDoPRE = false;
1139       break;
1140     }
1141 
1142     PredLoad.second = LoadPtr;
1143   }
1144 
1145   if (!CanDoPRE) {
1146     while (!NewInsts.empty()) {
1147       Instruction *I = NewInsts.pop_back_val();
1148       if (MD) MD->removeInstruction(I);
1149       I->eraseFromParent();
1150     }
1151     // HINT: Don't revert the edge-splitting as following transformation may
1152     // also need to split these critical edges.
1153     return !CriticalEdgePred.empty();
1154   }
1155 
1156   // Okay, we can eliminate this load by inserting a reload in the predecessor
1157   // and using PHI construction to get the value in the other predecessors, do
1158   // it.
1159   DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1160   DEBUG(if (!NewInsts.empty())
1161           dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1162                  << *NewInsts.back() << '\n');
1163 
1164   // Assign value numbers to the new instructions.
1165   for (Instruction *I : NewInsts) {
1166     // Instructions that have been inserted in predecessor(s) to materialize
1167     // the load address do not retain their original debug locations. Doing
1168     // so could lead to confusing (but correct) source attributions.
1169     // FIXME: How do we retain source locations without causing poor debugging
1170     // behavior?
1171     I->setDebugLoc(DebugLoc());
1172 
1173     // FIXME: We really _ought_ to insert these value numbers into their
1174     // parent's availability map.  However, in doing so, we risk getting into
1175     // ordering issues.  If a block hasn't been processed yet, we would be
1176     // marking a value as AVAIL-IN, which isn't what we intend.
1177     VN.lookupOrAdd(I);
1178   }
1179 
1180   for (const auto &PredLoad : PredLoads) {
1181     BasicBlock *UnavailablePred = PredLoad.first;
1182     Value *LoadPtr = PredLoad.second;
1183 
1184     auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
1185                                  LI->isVolatile(), LI->getAlignment(),
1186                                  LI->getOrdering(), LI->getSynchScope(),
1187                                  UnavailablePred->getTerminator());
1188 
1189     // Transfer the old load's AA tags to the new load.
1190     AAMDNodes Tags;
1191     LI->getAAMetadata(Tags);
1192     if (Tags)
1193       NewLoad->setAAMetadata(Tags);
1194 
1195     if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load))
1196       NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1197     if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
1198       NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1199     if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))
1200       NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1201 
1202     // We do not propagate the old load's debug location, because the new
1203     // load now lives in a different BB, and we want to avoid a jumpy line
1204     // table.
1205     // FIXME: How do we retain source locations without causing poor debugging
1206     // behavior?
1207 
1208     // Add the newly created load.
1209     ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1210                                                         NewLoad));
1211     MD->invalidateCachedPointerInfo(LoadPtr);
1212     DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1213   }
1214 
1215   // Perform PHI construction.
1216   Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
1217   LI->replaceAllUsesWith(V);
1218   if (isa<PHINode>(V))
1219     V->takeName(LI);
1220   if (Instruction *I = dyn_cast<Instruction>(V))
1221     I->setDebugLoc(LI->getDebugLoc());
1222   if (V->getType()->getScalarType()->isPointerTy())
1223     MD->invalidateCachedPointerInfo(V);
1224   markInstructionForDeletion(LI);
1225   ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI)
1226             << "load eliminated by PRE");
1227   ++NumPRELoad;
1228   return true;
1229 }
1230 
1231 static void reportLoadElim(LoadInst *LI, Value *AvailableValue,
1232                            OptimizationRemarkEmitter *ORE) {
1233   using namespace ore;
1234   ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadElim", LI)
1235             << "load of type " << NV("Type", LI->getType()) << " eliminated"
1236             << setExtraArgs() << " in favor of "
1237             << NV("InfavorOfValue", AvailableValue));
1238 }
1239 
1240 /// Attempt to eliminate a load whose dependencies are
1241 /// non-local by performing PHI construction.
1242 bool GVN::processNonLocalLoad(LoadInst *LI) {
1243   // non-local speculations are not allowed under asan.
1244   if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
1245     return false;
1246 
1247   // Step 1: Find the non-local dependencies of the load.
1248   LoadDepVect Deps;
1249   MD->getNonLocalPointerDependency(LI, Deps);
1250 
1251   // If we had to process more than one hundred blocks to find the
1252   // dependencies, this load isn't worth worrying about.  Optimizing
1253   // it will be too expensive.
1254   unsigned NumDeps = Deps.size();
1255   if (NumDeps > 100)
1256     return false;
1257 
1258   // If we had a phi translation failure, we'll have a single entry which is a
1259   // clobber in the current block.  Reject this early.
1260   if (NumDeps == 1 &&
1261       !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
1262     DEBUG(
1263       dbgs() << "GVN: non-local load ";
1264       LI->printAsOperand(dbgs());
1265       dbgs() << " has unknown dependencies\n";
1266     );
1267     return false;
1268   }
1269 
1270   // If this load follows a GEP, see if we can PRE the indices before analyzing.
1271   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
1272     for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
1273                                         OE = GEP->idx_end();
1274          OI != OE; ++OI)
1275       if (Instruction *I = dyn_cast<Instruction>(OI->get()))
1276         performScalarPRE(I);
1277   }
1278 
1279   // Step 2: Analyze the availability of the load
1280   AvailValInBlkVect ValuesPerBlock;
1281   UnavailBlkVect UnavailableBlocks;
1282   AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
1283 
1284   // If we have no predecessors that produce a known value for this load, exit
1285   // early.
1286   if (ValuesPerBlock.empty())
1287     return false;
1288 
1289   // Step 3: Eliminate fully redundancy.
1290   //
1291   // If all of the instructions we depend on produce a known value for this
1292   // load, then it is fully redundant and we can use PHI insertion to compute
1293   // its value.  Insert PHIs and remove the fully redundant value now.
1294   if (UnavailableBlocks.empty()) {
1295     DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1296 
1297     // Perform PHI construction.
1298     Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
1299     LI->replaceAllUsesWith(V);
1300 
1301     if (isa<PHINode>(V))
1302       V->takeName(LI);
1303     if (Instruction *I = dyn_cast<Instruction>(V))
1304       // If instruction I has debug info, then we should not update it.
1305       // Also, if I has a null DebugLoc, then it is still potentially incorrect
1306       // to propagate LI's DebugLoc because LI may not post-dominate I.
1307       if (LI->getDebugLoc() && LI->getParent() == I->getParent())
1308         I->setDebugLoc(LI->getDebugLoc());
1309     if (V->getType()->getScalarType()->isPointerTy())
1310       MD->invalidateCachedPointerInfo(V);
1311     markInstructionForDeletion(LI);
1312     ++NumGVNLoad;
1313     reportLoadElim(LI, V, ORE);
1314     return true;
1315   }
1316 
1317   // Step 4: Eliminate partial redundancy.
1318   if (!EnablePRE || !EnableLoadPRE)
1319     return false;
1320 
1321   return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
1322 }
1323 
1324 bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
1325   assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
1326          "This function can only be called with llvm.assume intrinsic");
1327   Value *V = IntrinsicI->getArgOperand(0);
1328 
1329   if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
1330     if (Cond->isZero()) {
1331       Type *Int8Ty = Type::getInt8Ty(V->getContext());
1332       // Insert a new store to null instruction before the load to indicate that
1333       // this code is not reachable.  FIXME: We could insert unreachable
1334       // instruction directly because we can modify the CFG.
1335       new StoreInst(UndefValue::get(Int8Ty),
1336                     Constant::getNullValue(Int8Ty->getPointerTo()),
1337                     IntrinsicI);
1338     }
1339     markInstructionForDeletion(IntrinsicI);
1340     return false;
1341   }
1342 
1343   Constant *True = ConstantInt::getTrue(V->getContext());
1344   bool Changed = false;
1345 
1346   for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
1347     BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
1348 
1349     // This property is only true in dominated successors, propagateEquality
1350     // will check dominance for us.
1351     Changed |= propagateEquality(V, True, Edge, false);
1352   }
1353 
1354   // We can replace assume value with true, which covers cases like this:
1355   // call void @llvm.assume(i1 %cmp)
1356   // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
1357   ReplaceWithConstMap[V] = True;
1358 
1359   // If one of *cmp *eq operand is const, adding it to map will cover this:
1360   // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
1361   // call void @llvm.assume(i1 %cmp)
1362   // ret float %0 ; will change it to ret float 3.000000e+00
1363   if (auto *CmpI = dyn_cast<CmpInst>(V)) {
1364     if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
1365         CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
1366         (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
1367          CmpI->getFastMathFlags().noNaNs())) {
1368       Value *CmpLHS = CmpI->getOperand(0);
1369       Value *CmpRHS = CmpI->getOperand(1);
1370       if (isa<Constant>(CmpLHS))
1371         std::swap(CmpLHS, CmpRHS);
1372       auto *RHSConst = dyn_cast<Constant>(CmpRHS);
1373 
1374       // If only one operand is constant.
1375       if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
1376         ReplaceWithConstMap[CmpLHS] = RHSConst;
1377     }
1378   }
1379   return Changed;
1380 }
1381 
1382 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
1383   auto *ReplInst = dyn_cast<Instruction>(Repl);
1384   if (!ReplInst)
1385     return;
1386 
1387   // Patch the replacement so that it is not more restrictive than the value
1388   // being replaced.
1389   // Note that if 'I' is a load being replaced by some operation,
1390   // for example, by an arithmetic operation, then andIRFlags()
1391   // would just erase all math flags from the original arithmetic
1392   // operation, which is clearly not wanted and not needed.
1393   if (!isa<LoadInst>(I))
1394     ReplInst->andIRFlags(I);
1395 
1396   // FIXME: If both the original and replacement value are part of the
1397   // same control-flow region (meaning that the execution of one
1398   // guarantees the execution of the other), then we can combine the
1399   // noalias scopes here and do better than the general conservative
1400   // answer used in combineMetadata().
1401 
1402   // In general, GVN unifies expressions over different control-flow
1403   // regions, and so we need a conservative combination of the noalias
1404   // scopes.
1405   static const unsigned KnownIDs[] = {
1406       LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
1407       LLVMContext::MD_noalias,        LLVMContext::MD_range,
1408       LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
1409       LLVMContext::MD_invariant_group};
1410   combineMetadata(ReplInst, I, KnownIDs);
1411 }
1412 
1413 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
1414   patchReplacementInstruction(I, Repl);
1415   I->replaceAllUsesWith(Repl);
1416 }
1417 
1418 /// Attempt to eliminate a load, first by eliminating it
1419 /// locally, and then attempting non-local elimination if that fails.
1420 bool GVN::processLoad(LoadInst *L) {
1421   if (!MD)
1422     return false;
1423 
1424   // This code hasn't been audited for ordered or volatile memory access
1425   if (!L->isUnordered())
1426     return false;
1427 
1428   if (L->use_empty()) {
1429     markInstructionForDeletion(L);
1430     return true;
1431   }
1432 
1433   // ... to a pointer that has been loaded from before...
1434   MemDepResult Dep = MD->getDependency(L);
1435 
1436   // If it is defined in another block, try harder.
1437   if (Dep.isNonLocal())
1438     return processNonLocalLoad(L);
1439 
1440   // Only handle the local case below
1441   if (!Dep.isDef() && !Dep.isClobber()) {
1442     // This might be a NonFuncLocal or an Unknown
1443     DEBUG(
1444       // fast print dep, using operator<< on instruction is too slow.
1445       dbgs() << "GVN: load ";
1446       L->printAsOperand(dbgs());
1447       dbgs() << " has unknown dependence\n";
1448     );
1449     return false;
1450   }
1451 
1452   AvailableValue AV;
1453   if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
1454     Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
1455 
1456     // Replace the load!
1457     patchAndReplaceAllUsesWith(L, AvailableValue);
1458     markInstructionForDeletion(L);
1459     ++NumGVNLoad;
1460     reportLoadElim(L, AvailableValue, ORE);
1461     // Tell MDA to rexamine the reused pointer since we might have more
1462     // information after forwarding it.
1463     if (MD && AvailableValue->getType()->getScalarType()->isPointerTy())
1464       MD->invalidateCachedPointerInfo(AvailableValue);
1465     return true;
1466   }
1467 
1468   return false;
1469 }
1470 
1471 /// Return a pair the first field showing the value number of \p Exp and the
1472 /// second field showing whether it is a value number newly created.
1473 std::pair<uint32_t, bool>
1474 GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
1475   uint32_t &e = expressionNumbering[Exp];
1476   bool CreateNewValNum = !e;
1477   if (CreateNewValNum) {
1478     Expressions.push_back(Exp);
1479     if (ExprIdx.size() < nextValueNumber + 1)
1480       ExprIdx.resize(nextValueNumber * 2);
1481     e = nextValueNumber;
1482     ExprIdx[nextValueNumber++] = nextExprNumber++;
1483   }
1484   return {e, CreateNewValNum};
1485 }
1486 
1487 /// Return whether all the values related with the same \p num are
1488 /// defined in \p BB.
1489 bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
1490                                      GVN &Gvn) {
1491   LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
1492   while (Vals && Vals->BB == BB)
1493     Vals = Vals->Next;
1494   return !Vals;
1495 }
1496 
1497 /// Wrap phiTranslateImpl to provide caching functionality.
1498 uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
1499                                        const BasicBlock *PhiBlock, uint32_t Num,
1500                                        GVN &Gvn) {
1501   auto FindRes = PhiTranslateTable.find({Num, Pred});
1502   if (FindRes != PhiTranslateTable.end())
1503     return FindRes->second;
1504   uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
1505   PhiTranslateTable.insert({{Num, Pred}, NewNum});
1506   return NewNum;
1507 }
1508 
1509 /// Translate value number \p Num using phis, so that it has the values of
1510 /// the phis in BB.
1511 uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
1512                                            const BasicBlock *PhiBlock,
1513                                            uint32_t Num, GVN &Gvn) {
1514   if (PHINode *PN = NumberingPhi[Num]) {
1515     for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
1516       if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
1517         if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
1518           return TransVal;
1519     }
1520     return Num;
1521   }
1522 
1523   // If there is any value related with Num is defined in a BB other than
1524   // PhiBlock, it cannot depend on a phi in PhiBlock without going through
1525   // a backedge. We can do an early exit in that case to save compile time.
1526   if (!areAllValsInBB(Num, PhiBlock, Gvn))
1527     return Num;
1528 
1529   if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
1530     return Num;
1531   Expression Exp = Expressions[ExprIdx[Num]];
1532 
1533   for (unsigned i = 0; i < Exp.varargs.size(); i++) {
1534     // For InsertValue and ExtractValue, some varargs are index numbers
1535     // instead of value numbers. Those index numbers should not be
1536     // translated.
1537     if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
1538         (i > 0 && Exp.opcode == Instruction::ExtractValue))
1539       continue;
1540     Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
1541   }
1542 
1543   if (Exp.commutative) {
1544     assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
1545     if (Exp.varargs[0] > Exp.varargs[1]) {
1546       std::swap(Exp.varargs[0], Exp.varargs[1]);
1547       uint32_t Opcode = Exp.opcode >> 8;
1548       if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
1549         Exp.opcode = (Opcode << 8) |
1550                      CmpInst::getSwappedPredicate(
1551                          static_cast<CmpInst::Predicate>(Exp.opcode & 255));
1552     }
1553   }
1554 
1555   if (uint32_t NewNum = expressionNumbering[Exp])
1556     return NewNum;
1557   return Num;
1558 }
1559 
1560 // In order to find a leader for a given value number at a
1561 // specific basic block, we first obtain the list of all Values for that number,
1562 // and then scan the list to find one whose block dominates the block in
1563 // question.  This is fast because dominator tree queries consist of only
1564 // a few comparisons of DFS numbers.
1565 Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
1566   LeaderTableEntry Vals = LeaderTable[num];
1567   if (!Vals.Val) return nullptr;
1568 
1569   Value *Val = nullptr;
1570   if (DT->dominates(Vals.BB, BB)) {
1571     Val = Vals.Val;
1572     if (isa<Constant>(Val)) return Val;
1573   }
1574 
1575   LeaderTableEntry* Next = Vals.Next;
1576   while (Next) {
1577     if (DT->dominates(Next->BB, BB)) {
1578       if (isa<Constant>(Next->Val)) return Next->Val;
1579       if (!Val) Val = Next->Val;
1580     }
1581 
1582     Next = Next->Next;
1583   }
1584 
1585   return Val;
1586 }
1587 
1588 /// There is an edge from 'Src' to 'Dst'.  Return
1589 /// true if every path from the entry block to 'Dst' passes via this edge.  In
1590 /// particular 'Dst' must not be reachable via another edge from 'Src'.
1591 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
1592                                        DominatorTree *DT) {
1593   // While in theory it is interesting to consider the case in which Dst has
1594   // more than one predecessor, because Dst might be part of a loop which is
1595   // only reachable from Src, in practice it is pointless since at the time
1596   // GVN runs all such loops have preheaders, which means that Dst will have
1597   // been changed to have only one predecessor, namely Src.
1598   const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
1599   assert((!Pred || Pred == E.getStart()) &&
1600          "No edge between these basic blocks!");
1601   return Pred != nullptr;
1602 }
1603 
1604 
1605 void GVN::assignBlockRPONumber(Function &F) {
1606   uint32_t NextBlockNumber = 1;
1607   ReversePostOrderTraversal<Function *> RPOT(&F);
1608   for (BasicBlock *BB : RPOT)
1609     BlockRPONumber[BB] = NextBlockNumber++;
1610 }
1611 
1612 
1613 // Tries to replace instruction with const, using information from
1614 // ReplaceWithConstMap.
1615 bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
1616   bool Changed = false;
1617   for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
1618     Value *Operand = Instr->getOperand(OpNum);
1619     auto it = ReplaceWithConstMap.find(Operand);
1620     if (it != ReplaceWithConstMap.end()) {
1621       assert(!isa<Constant>(Operand) &&
1622              "Replacing constants with constants is invalid");
1623       DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second
1624                    << " in instruction " << *Instr << '\n');
1625       Instr->setOperand(OpNum, it->second);
1626       Changed = true;
1627     }
1628   }
1629   return Changed;
1630 }
1631 
1632 /// The given values are known to be equal in every block
1633 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
1634 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
1635 /// If DominatesByEdge is false, then it means that we will propagate the RHS
1636 /// value starting from the end of Root.Start.
1637 bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
1638                             bool DominatesByEdge) {
1639   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
1640   Worklist.push_back(std::make_pair(LHS, RHS));
1641   bool Changed = false;
1642   // For speed, compute a conservative fast approximation to
1643   // DT->dominates(Root, Root.getEnd());
1644   const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
1645 
1646   while (!Worklist.empty()) {
1647     std::pair<Value*, Value*> Item = Worklist.pop_back_val();
1648     LHS = Item.first; RHS = Item.second;
1649 
1650     if (LHS == RHS)
1651       continue;
1652     assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
1653 
1654     // Don't try to propagate equalities between constants.
1655     if (isa<Constant>(LHS) && isa<Constant>(RHS))
1656       continue;
1657 
1658     // Prefer a constant on the right-hand side, or an Argument if no constants.
1659     if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
1660       std::swap(LHS, RHS);
1661     assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
1662 
1663     // If there is no obvious reason to prefer the left-hand side over the
1664     // right-hand side, ensure the longest lived term is on the right-hand side,
1665     // so the shortest lived term will be replaced by the longest lived.
1666     // This tends to expose more simplifications.
1667     uint32_t LVN = VN.lookupOrAdd(LHS);
1668     if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
1669         (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
1670       // Move the 'oldest' value to the right-hand side, using the value number
1671       // as a proxy for age.
1672       uint32_t RVN = VN.lookupOrAdd(RHS);
1673       if (LVN < RVN) {
1674         std::swap(LHS, RHS);
1675         LVN = RVN;
1676       }
1677     }
1678 
1679     // If value numbering later sees that an instruction in the scope is equal
1680     // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
1681     // the invariant that instructions only occur in the leader table for their
1682     // own value number (this is used by removeFromLeaderTable), do not do this
1683     // if RHS is an instruction (if an instruction in the scope is morphed into
1684     // LHS then it will be turned into RHS by the next GVN iteration anyway, so
1685     // using the leader table is about compiling faster, not optimizing better).
1686     // The leader table only tracks basic blocks, not edges. Only add to if we
1687     // have the simple case where the edge dominates the end.
1688     if (RootDominatesEnd && !isa<Instruction>(RHS))
1689       addToLeaderTable(LVN, RHS, Root.getEnd());
1690 
1691     // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
1692     // LHS always has at least one use that is not dominated by Root, this will
1693     // never do anything if LHS has only one use.
1694     if (!LHS->hasOneUse()) {
1695       unsigned NumReplacements =
1696           DominatesByEdge
1697               ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
1698               : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart());
1699 
1700       Changed |= NumReplacements > 0;
1701       NumGVNEqProp += NumReplacements;
1702     }
1703 
1704     // Now try to deduce additional equalities from this one. For example, if
1705     // the known equality was "(A != B)" == "false" then it follows that A and B
1706     // are equal in the scope. Only boolean equalities with an explicit true or
1707     // false RHS are currently supported.
1708     if (!RHS->getType()->isIntegerTy(1))
1709       // Not a boolean equality - bail out.
1710       continue;
1711     ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
1712     if (!CI)
1713       // RHS neither 'true' nor 'false' - bail out.
1714       continue;
1715     // Whether RHS equals 'true'.  Otherwise it equals 'false'.
1716     bool isKnownTrue = CI->isAllOnesValue();
1717     bool isKnownFalse = !isKnownTrue;
1718 
1719     // If "A && B" is known true then both A and B are known true.  If "A || B"
1720     // is known false then both A and B are known false.
1721     Value *A, *B;
1722     if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
1723         (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
1724       Worklist.push_back(std::make_pair(A, RHS));
1725       Worklist.push_back(std::make_pair(B, RHS));
1726       continue;
1727     }
1728 
1729     // If we are propagating an equality like "(A == B)" == "true" then also
1730     // propagate the equality A == B.  When propagating a comparison such as
1731     // "(A >= B)" == "true", replace all instances of "A < B" with "false".
1732     if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
1733       Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1734 
1735       // If "A == B" is known true, or "A != B" is known false, then replace
1736       // A with B everywhere in the scope.
1737       if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
1738           (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
1739         Worklist.push_back(std::make_pair(Op0, Op1));
1740 
1741       // Handle the floating point versions of equality comparisons too.
1742       if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
1743           (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
1744 
1745         // Floating point -0.0 and 0.0 compare equal, so we can only
1746         // propagate values if we know that we have a constant and that
1747         // its value is non-zero.
1748 
1749         // FIXME: We should do this optimization if 'no signed zeros' is
1750         // applicable via an instruction-level fast-math-flag or some other
1751         // indicator that relaxed FP semantics are being used.
1752 
1753         if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
1754           Worklist.push_back(std::make_pair(Op0, Op1));
1755       }
1756 
1757       // If "A >= B" is known true, replace "A < B" with false everywhere.
1758       CmpInst::Predicate NotPred = Cmp->getInversePredicate();
1759       Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
1760       // Since we don't have the instruction "A < B" immediately to hand, work
1761       // out the value number that it would have and use that to find an
1762       // appropriate instruction (if any).
1763       uint32_t NextNum = VN.getNextUnusedValueNumber();
1764       uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
1765       // If the number we were assigned was brand new then there is no point in
1766       // looking for an instruction realizing it: there cannot be one!
1767       if (Num < NextNum) {
1768         Value *NotCmp = findLeader(Root.getEnd(), Num);
1769         if (NotCmp && isa<Instruction>(NotCmp)) {
1770           unsigned NumReplacements =
1771               DominatesByEdge
1772                   ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
1773                   : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
1774                                              Root.getStart());
1775           Changed |= NumReplacements > 0;
1776           NumGVNEqProp += NumReplacements;
1777         }
1778       }
1779       // Ensure that any instruction in scope that gets the "A < B" value number
1780       // is replaced with false.
1781       // The leader table only tracks basic blocks, not edges. Only add to if we
1782       // have the simple case where the edge dominates the end.
1783       if (RootDominatesEnd)
1784         addToLeaderTable(Num, NotVal, Root.getEnd());
1785 
1786       continue;
1787     }
1788   }
1789 
1790   return Changed;
1791 }
1792 
1793 /// When calculating availability, handle an instruction
1794 /// by inserting it into the appropriate sets
1795 bool GVN::processInstruction(Instruction *I) {
1796   // Ignore dbg info intrinsics.
1797   if (isa<DbgInfoIntrinsic>(I))
1798     return false;
1799 
1800   // If the instruction can be easily simplified then do so now in preference
1801   // to value numbering it.  Value numbering often exposes redundancies, for
1802   // example if it determines that %y is equal to %x then the instruction
1803   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1804   const DataLayout &DL = I->getModule()->getDataLayout();
1805   if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
1806     bool Changed = false;
1807     if (!I->use_empty()) {
1808       I->replaceAllUsesWith(V);
1809       Changed = true;
1810     }
1811     if (isInstructionTriviallyDead(I, TLI)) {
1812       markInstructionForDeletion(I);
1813       Changed = true;
1814     }
1815     if (Changed) {
1816       if (MD && V->getType()->getScalarType()->isPointerTy())
1817         MD->invalidateCachedPointerInfo(V);
1818       ++NumGVNSimpl;
1819       return true;
1820     }
1821   }
1822 
1823   if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
1824     if (IntrinsicI->getIntrinsicID() == Intrinsic::assume)
1825       return processAssumeIntrinsic(IntrinsicI);
1826 
1827   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1828     if (processLoad(LI))
1829       return true;
1830 
1831     unsigned Num = VN.lookupOrAdd(LI);
1832     addToLeaderTable(Num, LI, LI->getParent());
1833     return false;
1834   }
1835 
1836   // For conditional branches, we can perform simple conditional propagation on
1837   // the condition value itself.
1838   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1839     if (!BI->isConditional())
1840       return false;
1841 
1842     if (isa<Constant>(BI->getCondition()))
1843       return processFoldableCondBr(BI);
1844 
1845     Value *BranchCond = BI->getCondition();
1846     BasicBlock *TrueSucc = BI->getSuccessor(0);
1847     BasicBlock *FalseSucc = BI->getSuccessor(1);
1848     // Avoid multiple edges early.
1849     if (TrueSucc == FalseSucc)
1850       return false;
1851 
1852     BasicBlock *Parent = BI->getParent();
1853     bool Changed = false;
1854 
1855     Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
1856     BasicBlockEdge TrueE(Parent, TrueSucc);
1857     Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
1858 
1859     Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
1860     BasicBlockEdge FalseE(Parent, FalseSucc);
1861     Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
1862 
1863     return Changed;
1864   }
1865 
1866   // For switches, propagate the case values into the case destinations.
1867   if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
1868     Value *SwitchCond = SI->getCondition();
1869     BasicBlock *Parent = SI->getParent();
1870     bool Changed = false;
1871 
1872     // Remember how many outgoing edges there are to every successor.
1873     SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
1874     for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
1875       ++SwitchEdges[SI->getSuccessor(i)];
1876 
1877     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
1878          i != e; ++i) {
1879       BasicBlock *Dst = i->getCaseSuccessor();
1880       // If there is only a single edge, propagate the case value into it.
1881       if (SwitchEdges.lookup(Dst) == 1) {
1882         BasicBlockEdge E(Parent, Dst);
1883         Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
1884       }
1885     }
1886     return Changed;
1887   }
1888 
1889   // Instructions with void type don't return a value, so there's
1890   // no point in trying to find redundancies in them.
1891   if (I->getType()->isVoidTy())
1892     return false;
1893 
1894   uint32_t NextNum = VN.getNextUnusedValueNumber();
1895   unsigned Num = VN.lookupOrAdd(I);
1896 
1897   // Allocations are always uniquely numbered, so we can save time and memory
1898   // by fast failing them.
1899   if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
1900     addToLeaderTable(Num, I, I->getParent());
1901     return false;
1902   }
1903 
1904   // If the number we were assigned was a brand new VN, then we don't
1905   // need to do a lookup to see if the number already exists
1906   // somewhere in the domtree: it can't!
1907   if (Num >= NextNum) {
1908     addToLeaderTable(Num, I, I->getParent());
1909     return false;
1910   }
1911 
1912   // Perform fast-path value-number based elimination of values inherited from
1913   // dominators.
1914   Value *Repl = findLeader(I->getParent(), Num);
1915   if (!Repl) {
1916     // Failure, just remember this instance for future use.
1917     addToLeaderTable(Num, I, I->getParent());
1918     return false;
1919   } else if (Repl == I) {
1920     // If I was the result of a shortcut PRE, it might already be in the table
1921     // and the best replacement for itself. Nothing to do.
1922     return false;
1923   }
1924 
1925   // Remove it!
1926   patchAndReplaceAllUsesWith(I, Repl);
1927   if (MD && Repl->getType()->getScalarType()->isPointerTy())
1928     MD->invalidateCachedPointerInfo(Repl);
1929   markInstructionForDeletion(I);
1930   return true;
1931 }
1932 
1933 /// runOnFunction - This is the main transformation entry point for a function.
1934 bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
1935                   const TargetLibraryInfo &RunTLI, AAResults &RunAA,
1936                   MemoryDependenceResults *RunMD, LoopInfo *LI,
1937                   OptimizationRemarkEmitter *RunORE) {
1938   AC = &RunAC;
1939   DT = &RunDT;
1940   VN.setDomTree(DT);
1941   TLI = &RunTLI;
1942   VN.setAliasAnalysis(&RunAA);
1943   MD = RunMD;
1944   VN.setMemDep(MD);
1945   ORE = RunORE;
1946 
1947   bool Changed = false;
1948   bool ShouldContinue = true;
1949 
1950   // Merge unconditional branches, allowing PRE to catch more
1951   // optimization opportunities.
1952   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
1953     BasicBlock *BB = &*FI++;
1954 
1955     bool removedBlock = MergeBlockIntoPredecessor(BB, DT, LI, MD);
1956     if (removedBlock)
1957       ++NumGVNBlocks;
1958 
1959     Changed |= removedBlock;
1960   }
1961 
1962   unsigned Iteration = 0;
1963   while (ShouldContinue) {
1964     DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
1965     ShouldContinue = iterateOnFunction(F);
1966     Changed |= ShouldContinue;
1967     ++Iteration;
1968   }
1969 
1970   if (EnablePRE) {
1971     // Fabricate val-num for dead-code in order to suppress assertion in
1972     // performPRE().
1973     assignValNumForDeadCode();
1974     assignBlockRPONumber(F);
1975     bool PREChanged = true;
1976     while (PREChanged) {
1977       PREChanged = performPRE(F);
1978       Changed |= PREChanged;
1979     }
1980   }
1981 
1982   // FIXME: Should perform GVN again after PRE does something.  PRE can move
1983   // computations into blocks where they become fully redundant.  Note that
1984   // we can't do this until PRE's critical edge splitting updates memdep.
1985   // Actually, when this happens, we should just fully integrate PRE into GVN.
1986 
1987   cleanupGlobalSets();
1988   // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
1989   // iteration.
1990   DeadBlocks.clear();
1991 
1992   return Changed;
1993 }
1994 
1995 bool GVN::processBlock(BasicBlock *BB) {
1996   // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
1997   // (and incrementing BI before processing an instruction).
1998   assert(InstrsToErase.empty() &&
1999          "We expect InstrsToErase to be empty across iterations");
2000   if (DeadBlocks.count(BB))
2001     return false;
2002 
2003   // Clearing map before every BB because it can be used only for single BB.
2004   ReplaceWithConstMap.clear();
2005   bool ChangedFunction = false;
2006 
2007   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2008        BI != BE;) {
2009     if (!ReplaceWithConstMap.empty())
2010       ChangedFunction |= replaceOperandsWithConsts(&*BI);
2011     ChangedFunction |= processInstruction(&*BI);
2012 
2013     if (InstrsToErase.empty()) {
2014       ++BI;
2015       continue;
2016     }
2017 
2018     // If we need some instructions deleted, do it now.
2019     NumGVNInstr += InstrsToErase.size();
2020 
2021     // Avoid iterator invalidation.
2022     bool AtStart = BI == BB->begin();
2023     if (!AtStart)
2024       --BI;
2025 
2026     for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(),
2027          E = InstrsToErase.end(); I != E; ++I) {
2028       DEBUG(dbgs() << "GVN removed: " << **I << '\n');
2029       if (MD) MD->removeInstruction(*I);
2030       DEBUG(verifyRemoved(*I));
2031       (*I)->eraseFromParent();
2032     }
2033     InstrsToErase.clear();
2034 
2035     if (AtStart)
2036       BI = BB->begin();
2037     else
2038       ++BI;
2039   }
2040 
2041   return ChangedFunction;
2042 }
2043 
2044 // Instantiate an expression in a predecessor that lacked it.
2045 bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2046                                     BasicBlock *Curr, unsigned int ValNo) {
2047   // Because we are going top-down through the block, all value numbers
2048   // will be available in the predecessor by the time we need them.  Any
2049   // that weren't originally present will have been instantiated earlier
2050   // in this loop.
2051   bool success = true;
2052   for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
2053     Value *Op = Instr->getOperand(i);
2054     if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2055       continue;
2056     // This could be a newly inserted instruction, in which case, we won't
2057     // find a value number, and should give up before we hurt ourselves.
2058     // FIXME: Rewrite the infrastructure to let it easier to value number
2059     // and process newly inserted instructions.
2060     if (!VN.exists(Op)) {
2061       success = false;
2062       break;
2063     }
2064     uint32_t TValNo =
2065         VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
2066     if (Value *V = findLeader(Pred, TValNo)) {
2067       Instr->setOperand(i, V);
2068     } else {
2069       success = false;
2070       break;
2071     }
2072   }
2073 
2074   // Fail out if we encounter an operand that is not available in
2075   // the PRE predecessor.  This is typically because of loads which
2076   // are not value numbered precisely.
2077   if (!success)
2078     return false;
2079 
2080   Instr->insertBefore(Pred->getTerminator());
2081   Instr->setName(Instr->getName() + ".pre");
2082   Instr->setDebugLoc(Instr->getDebugLoc());
2083 
2084   unsigned Num = VN.lookupOrAdd(Instr);
2085   VN.add(Instr, Num);
2086 
2087   // Update the availability map to include the new instruction.
2088   addToLeaderTable(Num, Instr, Pred);
2089   return true;
2090 }
2091 
2092 bool GVN::performScalarPRE(Instruction *CurInst) {
2093   if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
2094       isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
2095       CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2096       isa<DbgInfoIntrinsic>(CurInst))
2097     return false;
2098 
2099   // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2100   // sinking the compare again, and it would force the code generator to
2101   // move the i1 from processor flags or predicate registers into a general
2102   // purpose register.
2103   if (isa<CmpInst>(CurInst))
2104     return false;
2105 
2106   // We don't currently value number ANY inline asm calls.
2107   if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
2108     if (CallI->isInlineAsm())
2109       return false;
2110 
2111   uint32_t ValNo = VN.lookup(CurInst);
2112 
2113   // Look for the predecessors for PRE opportunities.  We're
2114   // only trying to solve the basic diamond case, where
2115   // a value is computed in the successor and one predecessor,
2116   // but not the other.  We also explicitly disallow cases
2117   // where the successor is its own predecessor, because they're
2118   // more complicated to get right.
2119   unsigned NumWith = 0;
2120   unsigned NumWithout = 0;
2121   BasicBlock *PREPred = nullptr;
2122   BasicBlock *CurrentBlock = CurInst->getParent();
2123 
2124   SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
2125   for (BasicBlock *P : predecessors(CurrentBlock)) {
2126     // We're not interested in PRE where blocks with predecessors that are
2127     // not reachable.
2128     if (!DT->isReachableFromEntry(P)) {
2129       NumWithout = 2;
2130       break;
2131     }
2132     // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
2133     // when CurInst has operand defined in CurrentBlock (so it may be defined
2134     // by phi in the loop header).
2135     if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
2136         any_of(CurInst->operands(), [&](const Use &U) {
2137           if (auto *Inst = dyn_cast<Instruction>(U.get()))
2138             return Inst->getParent() == CurrentBlock;
2139           return false;
2140         })) {
2141       NumWithout = 2;
2142       break;
2143     }
2144 
2145     uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
2146     Value *predV = findLeader(P, TValNo);
2147     if (!predV) {
2148       predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
2149       PREPred = P;
2150       ++NumWithout;
2151     } else if (predV == CurInst) {
2152       /* CurInst dominates this predecessor. */
2153       NumWithout = 2;
2154       break;
2155     } else {
2156       predMap.push_back(std::make_pair(predV, P));
2157       ++NumWith;
2158     }
2159   }
2160 
2161   // Don't do PRE when it might increase code size, i.e. when
2162   // we would need to insert instructions in more than one pred.
2163   if (NumWithout > 1 || NumWith == 0)
2164     return false;
2165 
2166   // We may have a case where all predecessors have the instruction,
2167   // and we just need to insert a phi node. Otherwise, perform
2168   // insertion.
2169   Instruction *PREInstr = nullptr;
2170 
2171   if (NumWithout != 0) {
2172     // Don't do PRE across indirect branch.
2173     if (isa<IndirectBrInst>(PREPred->getTerminator()))
2174       return false;
2175 
2176     // We can't do PRE safely on a critical edge, so instead we schedule
2177     // the edge to be split and perform the PRE the next time we iterate
2178     // on the function.
2179     unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2180     if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2181       toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2182       return false;
2183     }
2184     // We need to insert somewhere, so let's give it a shot
2185     PREInstr = CurInst->clone();
2186     if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
2187       // If we failed insertion, make sure we remove the instruction.
2188       DEBUG(verifyRemoved(PREInstr));
2189       PREInstr->deleteValue();
2190       return false;
2191     }
2192   }
2193 
2194   // Either we should have filled in the PRE instruction, or we should
2195   // not have needed insertions.
2196   assert (PREInstr != nullptr || NumWithout == 0);
2197 
2198   ++NumGVNPRE;
2199 
2200   // Create a PHI to make the value available in this block.
2201   PHINode *Phi =
2202       PHINode::Create(CurInst->getType(), predMap.size(),
2203                       CurInst->getName() + ".pre-phi", &CurrentBlock->front());
2204   for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
2205     if (Value *V = predMap[i].first)
2206       Phi->addIncoming(V, predMap[i].second);
2207     else
2208       Phi->addIncoming(PREInstr, PREPred);
2209   }
2210 
2211   VN.add(Phi, ValNo);
2212   addToLeaderTable(ValNo, Phi, CurrentBlock);
2213   Phi->setDebugLoc(CurInst->getDebugLoc());
2214   CurInst->replaceAllUsesWith(Phi);
2215   if (MD && Phi->getType()->getScalarType()->isPointerTy())
2216     MD->invalidateCachedPointerInfo(Phi);
2217   VN.erase(CurInst);
2218   removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
2219 
2220   DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2221   if (MD)
2222     MD->removeInstruction(CurInst);
2223   DEBUG(verifyRemoved(CurInst));
2224   CurInst->eraseFromParent();
2225   ++NumGVNInstr;
2226 
2227   return true;
2228 }
2229 
2230 /// Perform a purely local form of PRE that looks for diamond
2231 /// control flow patterns and attempts to perform simple PRE at the join point.
2232 bool GVN::performPRE(Function &F) {
2233   bool Changed = false;
2234   for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
2235     // Nothing to PRE in the entry block.
2236     if (CurrentBlock == &F.getEntryBlock())
2237       continue;
2238 
2239     // Don't perform PRE on an EH pad.
2240     if (CurrentBlock->isEHPad())
2241       continue;
2242 
2243     for (BasicBlock::iterator BI = CurrentBlock->begin(),
2244                               BE = CurrentBlock->end();
2245          BI != BE;) {
2246       Instruction *CurInst = &*BI++;
2247       Changed |= performScalarPRE(CurInst);
2248     }
2249   }
2250 
2251   if (splitCriticalEdges())
2252     Changed = true;
2253 
2254   return Changed;
2255 }
2256 
2257 /// Split the critical edge connecting the given two blocks, and return
2258 /// the block inserted to the critical edge.
2259 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
2260   BasicBlock *BB =
2261       SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
2262   if (MD)
2263     MD->invalidateCachedPredecessors();
2264   return BB;
2265 }
2266 
2267 /// Split critical edges found during the previous
2268 /// iteration that may enable further optimization.
2269 bool GVN::splitCriticalEdges() {
2270   if (toSplit.empty())
2271     return false;
2272   do {
2273     std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2274     SplitCriticalEdge(Edge.first, Edge.second,
2275                       CriticalEdgeSplittingOptions(DT));
2276   } while (!toSplit.empty());
2277   if (MD) MD->invalidateCachedPredecessors();
2278   return true;
2279 }
2280 
2281 /// Executes one iteration of GVN
2282 bool GVN::iterateOnFunction(Function &F) {
2283   cleanupGlobalSets();
2284 
2285   // Top-down walk of the dominator tree
2286   bool Changed = false;
2287   // Needed for value numbering with phi construction to work.
2288   // RPOT walks the graph in its constructor and will not be invalidated during
2289   // processBlock.
2290   ReversePostOrderTraversal<Function *> RPOT(&F);
2291   for (BasicBlock *BB : RPOT)
2292     Changed |= processBlock(BB);
2293 
2294   return Changed;
2295 }
2296 
2297 void GVN::cleanupGlobalSets() {
2298   VN.clear();
2299   LeaderTable.clear();
2300   BlockRPONumber.clear();
2301   TableAllocator.Reset();
2302 }
2303 
2304 /// Verify that the specified instruction does not occur in our
2305 /// internal data structures.
2306 void GVN::verifyRemoved(const Instruction *Inst) const {
2307   VN.verifyRemoved(Inst);
2308 
2309   // Walk through the value number scope to make sure the instruction isn't
2310   // ferreted away in it.
2311   for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
2312        I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
2313     const LeaderTableEntry *Node = &I->second;
2314     assert(Node->Val != Inst && "Inst still in value numbering scope!");
2315 
2316     while (Node->Next) {
2317       Node = Node->Next;
2318       assert(Node->Val != Inst && "Inst still in value numbering scope!");
2319     }
2320   }
2321 }
2322 
2323 /// BB is declared dead, which implied other blocks become dead as well. This
2324 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
2325 /// live successors, update their phi nodes by replacing the operands
2326 /// corresponding to dead blocks with UndefVal.
2327 void GVN::addDeadBlock(BasicBlock *BB) {
2328   SmallVector<BasicBlock *, 4> NewDead;
2329   SmallSetVector<BasicBlock *, 4> DF;
2330 
2331   NewDead.push_back(BB);
2332   while (!NewDead.empty()) {
2333     BasicBlock *D = NewDead.pop_back_val();
2334     if (DeadBlocks.count(D))
2335       continue;
2336 
2337     // All blocks dominated by D are dead.
2338     SmallVector<BasicBlock *, 8> Dom;
2339     DT->getDescendants(D, Dom);
2340     DeadBlocks.insert(Dom.begin(), Dom.end());
2341 
2342     // Figure out the dominance-frontier(D).
2343     for (BasicBlock *B : Dom) {
2344       for (BasicBlock *S : successors(B)) {
2345         if (DeadBlocks.count(S))
2346           continue;
2347 
2348         bool AllPredDead = true;
2349         for (BasicBlock *P : predecessors(S))
2350           if (!DeadBlocks.count(P)) {
2351             AllPredDead = false;
2352             break;
2353           }
2354 
2355         if (!AllPredDead) {
2356           // S could be proved dead later on. That is why we don't update phi
2357           // operands at this moment.
2358           DF.insert(S);
2359         } else {
2360           // While S is not dominated by D, it is dead by now. This could take
2361           // place if S already have a dead predecessor before D is declared
2362           // dead.
2363           NewDead.push_back(S);
2364         }
2365       }
2366     }
2367   }
2368 
2369   // For the dead blocks' live successors, update their phi nodes by replacing
2370   // the operands corresponding to dead blocks with UndefVal.
2371   for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
2372         I != E; I++) {
2373     BasicBlock *B = *I;
2374     if (DeadBlocks.count(B))
2375       continue;
2376 
2377     SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
2378     for (BasicBlock *P : Preds) {
2379       if (!DeadBlocks.count(P))
2380         continue;
2381 
2382       if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
2383         if (BasicBlock *S = splitCriticalEdges(P, B))
2384           DeadBlocks.insert(P = S);
2385       }
2386 
2387       for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
2388         PHINode &Phi = cast<PHINode>(*II);
2389         Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
2390                              UndefValue::get(Phi.getType()));
2391       }
2392     }
2393   }
2394 }
2395 
2396 // If the given branch is recognized as a foldable branch (i.e. conditional
2397 // branch with constant condition), it will perform following analyses and
2398 // transformation.
2399 //  1) If the dead out-coming edge is a critical-edge, split it. Let
2400 //     R be the target of the dead out-coming edge.
2401 //  1) Identify the set of dead blocks implied by the branch's dead outcoming
2402 //     edge. The result of this step will be {X| X is dominated by R}
2403 //  2) Identify those blocks which haves at least one dead predecessor. The
2404 //     result of this step will be dominance-frontier(R).
2405 //  3) Update the PHIs in DF(R) by replacing the operands corresponding to
2406 //     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
2407 //
2408 // Return true iff *NEW* dead code are found.
2409 bool GVN::processFoldableCondBr(BranchInst *BI) {
2410   if (!BI || BI->isUnconditional())
2411     return false;
2412 
2413   // If a branch has two identical successors, we cannot declare either dead.
2414   if (BI->getSuccessor(0) == BI->getSuccessor(1))
2415     return false;
2416 
2417   ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
2418   if (!Cond)
2419     return false;
2420 
2421   BasicBlock *DeadRoot =
2422       Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
2423   if (DeadBlocks.count(DeadRoot))
2424     return false;
2425 
2426   if (!DeadRoot->getSinglePredecessor())
2427     DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
2428 
2429   addDeadBlock(DeadRoot);
2430   return true;
2431 }
2432 
2433 // performPRE() will trigger assert if it comes across an instruction without
2434 // associated val-num. As it normally has far more live instructions than dead
2435 // instructions, it makes more sense just to "fabricate" a val-number for the
2436 // dead code than checking if instruction involved is dead or not.
2437 void GVN::assignValNumForDeadCode() {
2438   for (BasicBlock *BB : DeadBlocks) {
2439     for (Instruction &Inst : *BB) {
2440       unsigned ValNum = VN.lookupOrAdd(&Inst);
2441       addToLeaderTable(ValNum, &Inst, BB);
2442     }
2443   }
2444 }
2445 
2446 class llvm::gvn::GVNLegacyPass : public FunctionPass {
2447 public:
2448   static char ID; // Pass identification, replacement for typeid
2449   explicit GVNLegacyPass(bool NoLoads = false)
2450       : FunctionPass(ID), NoLoads(NoLoads) {
2451     initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
2452   }
2453 
2454   bool runOnFunction(Function &F) override {
2455     if (skipFunction(F))
2456       return false;
2457 
2458     auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
2459 
2460     return Impl.runImpl(
2461         F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
2462         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
2463         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
2464         getAnalysis<AAResultsWrapperPass>().getAAResults(),
2465         NoLoads ? nullptr
2466                 : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(),
2467         LIWP ? &LIWP->getLoopInfo() : nullptr,
2468         &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
2469   }
2470 
2471   void getAnalysisUsage(AnalysisUsage &AU) const override {
2472     AU.addRequired<AssumptionCacheTracker>();
2473     AU.addRequired<DominatorTreeWrapperPass>();
2474     AU.addRequired<TargetLibraryInfoWrapperPass>();
2475     if (!NoLoads)
2476       AU.addRequired<MemoryDependenceWrapperPass>();
2477     AU.addRequired<AAResultsWrapperPass>();
2478 
2479     AU.addPreserved<DominatorTreeWrapperPass>();
2480     AU.addPreserved<GlobalsAAWrapperPass>();
2481     AU.addPreserved<TargetLibraryInfoWrapperPass>();
2482     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2483   }
2484 
2485 private:
2486   bool NoLoads;
2487   GVN Impl;
2488 };
2489 
2490 char GVNLegacyPass::ID = 0;
2491 
2492 // The public interface to this file...
2493 FunctionPass *llvm::createGVNPass(bool NoLoads) {
2494   return new GVNLegacyPass(NoLoads);
2495 }
2496 
2497 INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
2498 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2499 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2500 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2501 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2502 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2503 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2504 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
2505 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
2506