1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global value numbering to eliminate fully redundant 11 // instructions. It also performs simple dead load elimination. 12 // 13 // Note that this pass does the value numbering itself; it does not use the 14 // ValueNumbering analysis passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Scalar/GVN.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/DepthFirstIterator.h" 21 #include "llvm/ADT/Hashing.h" 22 #include "llvm/ADT/MapVector.h" 23 #include "llvm/ADT/PostOrderIterator.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/AssumptionCache.h" 29 #include "llvm/Analysis/CFG.h" 30 #include "llvm/Analysis/ConstantFolding.h" 31 #include "llvm/Analysis/GlobalsModRef.h" 32 #include "llvm/Analysis/InstructionSimplify.h" 33 #include "llvm/Analysis/Loads.h" 34 #include "llvm/Analysis/MemoryBuiltins.h" 35 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 36 #include "llvm/Analysis/OptimizationDiagnosticInfo.h" 37 #include "llvm/Analysis/PHITransAddr.h" 38 #include "llvm/Analysis/TargetLibraryInfo.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/IR/Dominators.h" 41 #include "llvm/IR/GlobalVariable.h" 42 #include "llvm/IR/IRBuilder.h" 43 #include "llvm/IR/IntrinsicInst.h" 44 #include "llvm/IR/LLVMContext.h" 45 #include "llvm/IR/Metadata.h" 46 #include "llvm/IR/PatternMatch.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include "llvm/Transforms/Utils/SSAUpdater.h" 53 #include "llvm/Transforms/Utils/VNCoercion.h" 54 55 #include <vector> 56 using namespace llvm; 57 using namespace llvm::gvn; 58 using namespace llvm::VNCoercion; 59 using namespace PatternMatch; 60 61 #define DEBUG_TYPE "gvn" 62 63 STATISTIC(NumGVNInstr, "Number of instructions deleted"); 64 STATISTIC(NumGVNLoad, "Number of loads deleted"); 65 STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 66 STATISTIC(NumGVNBlocks, "Number of blocks merged"); 67 STATISTIC(NumGVNSimpl, "Number of instructions simplified"); 68 STATISTIC(NumGVNEqProp, "Number of equalities propagated"); 69 STATISTIC(NumPRELoad, "Number of loads PRE'd"); 70 71 static cl::opt<bool> EnablePRE("enable-pre", 72 cl::init(true), cl::Hidden); 73 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 74 75 // Maximum allowed recursion depth. 76 static cl::opt<uint32_t> 77 MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, 78 cl::desc("Max recurse depth (default = 1000)")); 79 80 struct llvm::GVN::Expression { 81 uint32_t opcode; 82 Type *type; 83 SmallVector<uint32_t, 4> varargs; 84 85 Expression(uint32_t o = ~2U) : opcode(o) {} 86 87 bool operator==(const Expression &other) const { 88 if (opcode != other.opcode) 89 return false; 90 if (opcode == ~0U || opcode == ~1U) 91 return true; 92 if (type != other.type) 93 return false; 94 if (varargs != other.varargs) 95 return false; 96 return true; 97 } 98 99 friend hash_code hash_value(const Expression &Value) { 100 return hash_combine( 101 Value.opcode, Value.type, 102 hash_combine_range(Value.varargs.begin(), Value.varargs.end())); 103 } 104 }; 105 106 namespace llvm { 107 template <> struct DenseMapInfo<GVN::Expression> { 108 static inline GVN::Expression getEmptyKey() { return ~0U; } 109 110 static inline GVN::Expression getTombstoneKey() { return ~1U; } 111 112 static unsigned getHashValue(const GVN::Expression &e) { 113 using llvm::hash_value; 114 return static_cast<unsigned>(hash_value(e)); 115 } 116 static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) { 117 return LHS == RHS; 118 } 119 }; 120 } // End llvm namespace. 121 122 /// Represents a particular available value that we know how to materialize. 123 /// Materialization of an AvailableValue never fails. An AvailableValue is 124 /// implicitly associated with a rematerialization point which is the 125 /// location of the instruction from which it was formed. 126 struct llvm::gvn::AvailableValue { 127 enum ValType { 128 SimpleVal, // A simple offsetted value that is accessed. 129 LoadVal, // A value produced by a load. 130 MemIntrin, // A memory intrinsic which is loaded from. 131 UndefVal // A UndefValue representing a value from dead block (which 132 // is not yet physically removed from the CFG). 133 }; 134 135 /// V - The value that is live out of the block. 136 PointerIntPair<Value *, 2, ValType> Val; 137 138 /// Offset - The byte offset in Val that is interesting for the load query. 139 unsigned Offset; 140 141 static AvailableValue get(Value *V, unsigned Offset = 0) { 142 AvailableValue Res; 143 Res.Val.setPointer(V); 144 Res.Val.setInt(SimpleVal); 145 Res.Offset = Offset; 146 return Res; 147 } 148 149 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) { 150 AvailableValue Res; 151 Res.Val.setPointer(MI); 152 Res.Val.setInt(MemIntrin); 153 Res.Offset = Offset; 154 return Res; 155 } 156 157 static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) { 158 AvailableValue Res; 159 Res.Val.setPointer(LI); 160 Res.Val.setInt(LoadVal); 161 Res.Offset = Offset; 162 return Res; 163 } 164 165 static AvailableValue getUndef() { 166 AvailableValue Res; 167 Res.Val.setPointer(nullptr); 168 Res.Val.setInt(UndefVal); 169 Res.Offset = 0; 170 return Res; 171 } 172 173 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 174 bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } 175 bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } 176 bool isUndefValue() const { return Val.getInt() == UndefVal; } 177 178 Value *getSimpleValue() const { 179 assert(isSimpleValue() && "Wrong accessor"); 180 return Val.getPointer(); 181 } 182 183 LoadInst *getCoercedLoadValue() const { 184 assert(isCoercedLoadValue() && "Wrong accessor"); 185 return cast<LoadInst>(Val.getPointer()); 186 } 187 188 MemIntrinsic *getMemIntrinValue() const { 189 assert(isMemIntrinValue() && "Wrong accessor"); 190 return cast<MemIntrinsic>(Val.getPointer()); 191 } 192 193 /// Emit code at the specified insertion point to adjust the value defined 194 /// here to the specified type. This handles various coercion cases. 195 Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt, 196 GVN &gvn) const; 197 }; 198 199 /// Represents an AvailableValue which can be rematerialized at the end of 200 /// the associated BasicBlock. 201 struct llvm::gvn::AvailableValueInBlock { 202 /// BB - The basic block in question. 203 BasicBlock *BB; 204 205 /// AV - The actual available value 206 AvailableValue AV; 207 208 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) { 209 AvailableValueInBlock Res; 210 Res.BB = BB; 211 Res.AV = std::move(AV); 212 return Res; 213 } 214 215 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 216 unsigned Offset = 0) { 217 return get(BB, AvailableValue::get(V, Offset)); 218 } 219 static AvailableValueInBlock getUndef(BasicBlock *BB) { 220 return get(BB, AvailableValue::getUndef()); 221 } 222 223 /// Emit code at the end of this block to adjust the value defined here to 224 /// the specified type. This handles various coercion cases. 225 Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const { 226 return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn); 227 } 228 }; 229 230 //===----------------------------------------------------------------------===// 231 // ValueTable Internal Functions 232 //===----------------------------------------------------------------------===// 233 234 GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { 235 Expression e; 236 e.type = I->getType(); 237 e.opcode = I->getOpcode(); 238 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 239 OI != OE; ++OI) 240 e.varargs.push_back(lookupOrAdd(*OI)); 241 if (I->isCommutative()) { 242 // Ensure that commutative instructions that only differ by a permutation 243 // of their operands get the same value number by sorting the operand value 244 // numbers. Since all commutative instructions have two operands it is more 245 // efficient to sort by hand rather than using, say, std::sort. 246 assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); 247 if (e.varargs[0] > e.varargs[1]) 248 std::swap(e.varargs[0], e.varargs[1]); 249 } 250 251 if (CmpInst *C = dyn_cast<CmpInst>(I)) { 252 // Sort the operand value numbers so x<y and y>x get the same value number. 253 CmpInst::Predicate Predicate = C->getPredicate(); 254 if (e.varargs[0] > e.varargs[1]) { 255 std::swap(e.varargs[0], e.varargs[1]); 256 Predicate = CmpInst::getSwappedPredicate(Predicate); 257 } 258 e.opcode = (C->getOpcode() << 8) | Predicate; 259 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 260 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 261 II != IE; ++II) 262 e.varargs.push_back(*II); 263 } 264 265 return e; 266 } 267 268 GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, 269 CmpInst::Predicate Predicate, 270 Value *LHS, Value *RHS) { 271 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 272 "Not a comparison!"); 273 Expression e; 274 e.type = CmpInst::makeCmpResultType(LHS->getType()); 275 e.varargs.push_back(lookupOrAdd(LHS)); 276 e.varargs.push_back(lookupOrAdd(RHS)); 277 278 // Sort the operand value numbers so x<y and y>x get the same value number. 279 if (e.varargs[0] > e.varargs[1]) { 280 std::swap(e.varargs[0], e.varargs[1]); 281 Predicate = CmpInst::getSwappedPredicate(Predicate); 282 } 283 e.opcode = (Opcode << 8) | Predicate; 284 return e; 285 } 286 287 GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) { 288 assert(EI && "Not an ExtractValueInst?"); 289 Expression e; 290 e.type = EI->getType(); 291 e.opcode = 0; 292 293 IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); 294 if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { 295 // EI might be an extract from one of our recognised intrinsics. If it 296 // is we'll synthesize a semantically equivalent expression instead on 297 // an extract value expression. 298 switch (I->getIntrinsicID()) { 299 case Intrinsic::sadd_with_overflow: 300 case Intrinsic::uadd_with_overflow: 301 e.opcode = Instruction::Add; 302 break; 303 case Intrinsic::ssub_with_overflow: 304 case Intrinsic::usub_with_overflow: 305 e.opcode = Instruction::Sub; 306 break; 307 case Intrinsic::smul_with_overflow: 308 case Intrinsic::umul_with_overflow: 309 e.opcode = Instruction::Mul; 310 break; 311 default: 312 break; 313 } 314 315 if (e.opcode != 0) { 316 // Intrinsic recognized. Grab its args to finish building the expression. 317 assert(I->getNumArgOperands() == 2 && 318 "Expect two args for recognised intrinsics."); 319 e.varargs.push_back(lookupOrAdd(I->getArgOperand(0))); 320 e.varargs.push_back(lookupOrAdd(I->getArgOperand(1))); 321 return e; 322 } 323 } 324 325 // Not a recognised intrinsic. Fall back to producing an extract value 326 // expression. 327 e.opcode = EI->getOpcode(); 328 for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); 329 OI != OE; ++OI) 330 e.varargs.push_back(lookupOrAdd(*OI)); 331 332 for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); 333 II != IE; ++II) 334 e.varargs.push_back(*II); 335 336 return e; 337 } 338 339 //===----------------------------------------------------------------------===// 340 // ValueTable External Functions 341 //===----------------------------------------------------------------------===// 342 343 GVN::ValueTable::ValueTable() : nextValueNumber(1) {} 344 GVN::ValueTable::ValueTable(const ValueTable &) = default; 345 GVN::ValueTable::ValueTable(ValueTable &&) = default; 346 GVN::ValueTable::~ValueTable() = default; 347 348 /// add - Insert a value into the table with a specified value number. 349 void GVN::ValueTable::add(Value *V, uint32_t num) { 350 valueNumbering.insert(std::make_pair(V, num)); 351 } 352 353 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { 354 if (AA->doesNotAccessMemory(C)) { 355 Expression exp = createExpr(C); 356 uint32_t &e = expressionNumbering[exp]; 357 if (!e) e = nextValueNumber++; 358 valueNumbering[C] = e; 359 return e; 360 } else if (AA->onlyReadsMemory(C)) { 361 Expression exp = createExpr(C); 362 uint32_t &e = expressionNumbering[exp]; 363 if (!e) { 364 e = nextValueNumber++; 365 valueNumbering[C] = e; 366 return e; 367 } 368 if (!MD) { 369 e = nextValueNumber++; 370 valueNumbering[C] = e; 371 return e; 372 } 373 374 MemDepResult local_dep = MD->getDependency(C); 375 376 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 377 valueNumbering[C] = nextValueNumber; 378 return nextValueNumber++; 379 } 380 381 if (local_dep.isDef()) { 382 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 383 384 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 385 valueNumbering[C] = nextValueNumber; 386 return nextValueNumber++; 387 } 388 389 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 390 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); 391 uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i)); 392 if (c_vn != cd_vn) { 393 valueNumbering[C] = nextValueNumber; 394 return nextValueNumber++; 395 } 396 } 397 398 uint32_t v = lookupOrAdd(local_cdep); 399 valueNumbering[C] = v; 400 return v; 401 } 402 403 // Non-local case. 404 const MemoryDependenceResults::NonLocalDepInfo &deps = 405 MD->getNonLocalCallDependency(CallSite(C)); 406 // FIXME: Move the checking logic to MemDep! 407 CallInst* cdep = nullptr; 408 409 // Check to see if we have a single dominating call instruction that is 410 // identical to C. 411 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 412 const NonLocalDepEntry *I = &deps[i]; 413 if (I->getResult().isNonLocal()) 414 continue; 415 416 // We don't handle non-definitions. If we already have a call, reject 417 // instruction dependencies. 418 if (!I->getResult().isDef() || cdep != nullptr) { 419 cdep = nullptr; 420 break; 421 } 422 423 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 424 // FIXME: All duplicated with non-local case. 425 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 426 cdep = NonLocalDepCall; 427 continue; 428 } 429 430 cdep = nullptr; 431 break; 432 } 433 434 if (!cdep) { 435 valueNumbering[C] = nextValueNumber; 436 return nextValueNumber++; 437 } 438 439 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 440 valueNumbering[C] = nextValueNumber; 441 return nextValueNumber++; 442 } 443 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 444 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); 445 uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i)); 446 if (c_vn != cd_vn) { 447 valueNumbering[C] = nextValueNumber; 448 return nextValueNumber++; 449 } 450 } 451 452 uint32_t v = lookupOrAdd(cdep); 453 valueNumbering[C] = v; 454 return v; 455 456 } else { 457 valueNumbering[C] = nextValueNumber; 458 return nextValueNumber++; 459 } 460 } 461 462 /// Returns true if a value number exists for the specified value. 463 bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; } 464 465 /// lookup_or_add - Returns the value number for the specified value, assigning 466 /// it a new number if it did not have one before. 467 uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { 468 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 469 if (VI != valueNumbering.end()) 470 return VI->second; 471 472 if (!isa<Instruction>(V)) { 473 valueNumbering[V] = nextValueNumber; 474 return nextValueNumber++; 475 } 476 477 Instruction* I = cast<Instruction>(V); 478 Expression exp; 479 switch (I->getOpcode()) { 480 case Instruction::Call: 481 return lookupOrAddCall(cast<CallInst>(I)); 482 case Instruction::Add: 483 case Instruction::FAdd: 484 case Instruction::Sub: 485 case Instruction::FSub: 486 case Instruction::Mul: 487 case Instruction::FMul: 488 case Instruction::UDiv: 489 case Instruction::SDiv: 490 case Instruction::FDiv: 491 case Instruction::URem: 492 case Instruction::SRem: 493 case Instruction::FRem: 494 case Instruction::Shl: 495 case Instruction::LShr: 496 case Instruction::AShr: 497 case Instruction::And: 498 case Instruction::Or: 499 case Instruction::Xor: 500 case Instruction::ICmp: 501 case Instruction::FCmp: 502 case Instruction::Trunc: 503 case Instruction::ZExt: 504 case Instruction::SExt: 505 case Instruction::FPToUI: 506 case Instruction::FPToSI: 507 case Instruction::UIToFP: 508 case Instruction::SIToFP: 509 case Instruction::FPTrunc: 510 case Instruction::FPExt: 511 case Instruction::PtrToInt: 512 case Instruction::IntToPtr: 513 case Instruction::BitCast: 514 case Instruction::Select: 515 case Instruction::ExtractElement: 516 case Instruction::InsertElement: 517 case Instruction::ShuffleVector: 518 case Instruction::InsertValue: 519 case Instruction::GetElementPtr: 520 exp = createExpr(I); 521 break; 522 case Instruction::ExtractValue: 523 exp = createExtractvalueExpr(cast<ExtractValueInst>(I)); 524 break; 525 default: 526 valueNumbering[V] = nextValueNumber; 527 return nextValueNumber++; 528 } 529 530 uint32_t& e = expressionNumbering[exp]; 531 if (!e) e = nextValueNumber++; 532 valueNumbering[V] = e; 533 return e; 534 } 535 536 /// Returns the value number of the specified value. Fails if 537 /// the value has not yet been numbered. 538 uint32_t GVN::ValueTable::lookup(Value *V) const { 539 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 540 assert(VI != valueNumbering.end() && "Value not numbered?"); 541 return VI->second; 542 } 543 544 /// Returns the value number of the given comparison, 545 /// assigning it a new number if it did not have one before. Useful when 546 /// we deduced the result of a comparison, but don't immediately have an 547 /// instruction realizing that comparison to hand. 548 uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, 549 CmpInst::Predicate Predicate, 550 Value *LHS, Value *RHS) { 551 Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); 552 uint32_t& e = expressionNumbering[exp]; 553 if (!e) e = nextValueNumber++; 554 return e; 555 } 556 557 /// Remove all entries from the ValueTable. 558 void GVN::ValueTable::clear() { 559 valueNumbering.clear(); 560 expressionNumbering.clear(); 561 nextValueNumber = 1; 562 } 563 564 /// Remove a value from the value numbering. 565 void GVN::ValueTable::erase(Value *V) { 566 valueNumbering.erase(V); 567 } 568 569 /// verifyRemoved - Verify that the value is removed from all internal data 570 /// structures. 571 void GVN::ValueTable::verifyRemoved(const Value *V) const { 572 for (DenseMap<Value*, uint32_t>::const_iterator 573 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 574 assert(I->first != V && "Inst still occurs in value numbering map!"); 575 } 576 } 577 578 //===----------------------------------------------------------------------===// 579 // GVN Pass 580 //===----------------------------------------------------------------------===// 581 582 PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { 583 // FIXME: The order of evaluation of these 'getResult' calls is very 584 // significant! Re-ordering these variables will cause GVN when run alone to 585 // be less effective! We should fix memdep and basic-aa to not exhibit this 586 // behavior, but until then don't change the order here. 587 auto &AC = AM.getResult<AssumptionAnalysis>(F); 588 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 589 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 590 auto &AA = AM.getResult<AAManager>(F); 591 auto &MemDep = AM.getResult<MemoryDependenceAnalysis>(F); 592 auto *LI = AM.getCachedResult<LoopAnalysis>(F); 593 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 594 bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep, LI, &ORE); 595 if (!Changed) 596 return PreservedAnalyses::all(); 597 PreservedAnalyses PA; 598 PA.preserve<DominatorTreeAnalysis>(); 599 PA.preserve<GlobalsAA>(); 600 PA.preserve<TargetLibraryAnalysis>(); 601 return PA; 602 } 603 604 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 605 LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const { 606 errs() << "{\n"; 607 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 608 E = d.end(); I != E; ++I) { 609 errs() << I->first << "\n"; 610 I->second->dump(); 611 } 612 errs() << "}\n"; 613 } 614 #endif 615 616 /// Return true if we can prove that the value 617 /// we're analyzing is fully available in the specified block. As we go, keep 618 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This 619 /// map is actually a tri-state map with the following values: 620 /// 0) we know the block *is not* fully available. 621 /// 1) we know the block *is* fully available. 622 /// 2) we do not know whether the block is fully available or not, but we are 623 /// currently speculating that it will be. 624 /// 3) we are speculating for this block and have used that to speculate for 625 /// other blocks. 626 static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 627 DenseMap<BasicBlock*, char> &FullyAvailableBlocks, 628 uint32_t RecurseDepth) { 629 if (RecurseDepth > MaxRecurseDepth) 630 return false; 631 632 // Optimistically assume that the block is fully available and check to see 633 // if we already know about this block in one lookup. 634 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 635 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 636 637 // If the entry already existed for this block, return the precomputed value. 638 if (!IV.second) { 639 // If this is a speculative "available" value, mark it as being used for 640 // speculation of other blocks. 641 if (IV.first->second == 2) 642 IV.first->second = 3; 643 return IV.first->second != 0; 644 } 645 646 // Otherwise, see if it is fully available in all predecessors. 647 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 648 649 // If this block has no predecessors, it isn't live-in here. 650 if (PI == PE) 651 goto SpeculationFailure; 652 653 for (; PI != PE; ++PI) 654 // If the value isn't fully available in one of our predecessors, then it 655 // isn't fully available in this block either. Undo our previous 656 // optimistic assumption and bail out. 657 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1)) 658 goto SpeculationFailure; 659 660 return true; 661 662 // If we get here, we found out that this is not, after 663 // all, a fully-available block. We have a problem if we speculated on this and 664 // used the speculation to mark other blocks as available. 665 SpeculationFailure: 666 char &BBVal = FullyAvailableBlocks[BB]; 667 668 // If we didn't speculate on this, just return with it set to false. 669 if (BBVal == 2) { 670 BBVal = 0; 671 return false; 672 } 673 674 // If we did speculate on this value, we could have blocks set to 1 that are 675 // incorrect. Walk the (transitive) successors of this block and mark them as 676 // 0 if set to one. 677 SmallVector<BasicBlock*, 32> BBWorklist; 678 BBWorklist.push_back(BB); 679 680 do { 681 BasicBlock *Entry = BBWorklist.pop_back_val(); 682 // Note that this sets blocks to 0 (unavailable) if they happen to not 683 // already be in FullyAvailableBlocks. This is safe. 684 char &EntryVal = FullyAvailableBlocks[Entry]; 685 if (EntryVal == 0) continue; // Already unavailable. 686 687 // Mark as unavailable. 688 EntryVal = 0; 689 690 BBWorklist.append(succ_begin(Entry), succ_end(Entry)); 691 } while (!BBWorklist.empty()); 692 693 return false; 694 } 695 696 697 698 699 /// Given a set of loads specified by ValuesPerBlock, 700 /// construct SSA form, allowing us to eliminate LI. This returns the value 701 /// that should be used at LI's definition site. 702 static Value *ConstructSSAForLoadSet(LoadInst *LI, 703 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 704 GVN &gvn) { 705 // Check for the fully redundant, dominating load case. In this case, we can 706 // just use the dominating value directly. 707 if (ValuesPerBlock.size() == 1 && 708 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, 709 LI->getParent())) { 710 assert(!ValuesPerBlock[0].AV.isUndefValue() && 711 "Dead BB dominate this block"); 712 return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn); 713 } 714 715 // Otherwise, we have to construct SSA form. 716 SmallVector<PHINode*, 8> NewPHIs; 717 SSAUpdater SSAUpdate(&NewPHIs); 718 SSAUpdate.Initialize(LI->getType(), LI->getName()); 719 720 for (const AvailableValueInBlock &AV : ValuesPerBlock) { 721 BasicBlock *BB = AV.BB; 722 723 if (SSAUpdate.HasValueForBlock(BB)) 724 continue; 725 726 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn)); 727 } 728 729 // Perform PHI construction. 730 return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 731 } 732 733 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI, 734 Instruction *InsertPt, 735 GVN &gvn) const { 736 Value *Res; 737 Type *LoadTy = LI->getType(); 738 const DataLayout &DL = LI->getModule()->getDataLayout(); 739 if (isSimpleValue()) { 740 Res = getSimpleValue(); 741 if (Res->getType() != LoadTy) { 742 Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL); 743 744 DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 745 << *getSimpleValue() << '\n' 746 << *Res << '\n' << "\n\n\n"); 747 } 748 } else if (isCoercedLoadValue()) { 749 LoadInst *Load = getCoercedLoadValue(); 750 if (Load->getType() == LoadTy && Offset == 0) { 751 Res = Load; 752 } else { 753 Res = getLoadValueForLoad(Load, Offset, LoadTy, InsertPt, DL); 754 // We would like to use gvn.markInstructionForDeletion here, but we can't 755 // because the load is already memoized into the leader map table that GVN 756 // tracks. It is potentially possible to remove the load from the table, 757 // but then there all of the operations based on it would need to be 758 // rehashed. Just leave the dead load around. 759 gvn.getMemDep().removeInstruction(Load); 760 DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " 761 << *getCoercedLoadValue() << '\n' 762 << *Res << '\n' 763 << "\n\n\n"); 764 } 765 } else if (isMemIntrinValue()) { 766 Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, 767 InsertPt, DL); 768 DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 769 << " " << *getMemIntrinValue() << '\n' 770 << *Res << '\n' << "\n\n\n"); 771 } else { 772 assert(isUndefValue() && "Should be UndefVal"); 773 DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); 774 return UndefValue::get(LoadTy); 775 } 776 assert(Res && "failed to materialize?"); 777 return Res; 778 } 779 780 static bool isLifetimeStart(const Instruction *Inst) { 781 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 782 return II->getIntrinsicID() == Intrinsic::lifetime_start; 783 return false; 784 } 785 786 /// \brief Try to locate the three instruction involved in a missed 787 /// load-elimination case that is due to an intervening store. 788 static void reportMayClobberedLoad(LoadInst *LI, MemDepResult DepInfo, 789 DominatorTree *DT, 790 OptimizationRemarkEmitter *ORE) { 791 using namespace ore; 792 User *OtherAccess = nullptr; 793 794 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", LI); 795 R << "load of type " << NV("Type", LI->getType()) << " not eliminated" 796 << setExtraArgs(); 797 798 for (auto *U : LI->getPointerOperand()->users()) 799 if (U != LI && (isa<LoadInst>(U) || isa<StoreInst>(U)) && 800 DT->dominates(cast<Instruction>(U), LI)) { 801 // FIXME: for now give up if there are multiple memory accesses that 802 // dominate the load. We need further analysis to decide which one is 803 // that we're forwarding from. 804 if (OtherAccess) 805 OtherAccess = nullptr; 806 else 807 OtherAccess = U; 808 } 809 810 if (OtherAccess) 811 R << " in favor of " << NV("OtherAccess", OtherAccess); 812 813 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst()); 814 815 ORE->emit(R); 816 } 817 818 bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo, 819 Value *Address, AvailableValue &Res) { 820 821 assert((DepInfo.isDef() || DepInfo.isClobber()) && 822 "expected a local dependence"); 823 assert(LI->isUnordered() && "rules below are incorrect for ordered access"); 824 825 const DataLayout &DL = LI->getModule()->getDataLayout(); 826 827 if (DepInfo.isClobber()) { 828 // If the dependence is to a store that writes to a superset of the bits 829 // read by the load, we can extract the bits we need for the load from the 830 // stored value. 831 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 832 // Can't forward from non-atomic to atomic without violating memory model. 833 if (Address && LI->isAtomic() <= DepSI->isAtomic()) { 834 int Offset = 835 analyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, DL); 836 if (Offset != -1) { 837 Res = AvailableValue::get(DepSI->getValueOperand(), Offset); 838 return true; 839 } 840 } 841 } 842 843 // Check to see if we have something like this: 844 // load i32* P 845 // load i8* (P+1) 846 // if we have this, replace the later with an extraction from the former. 847 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { 848 // If this is a clobber and L is the first instruction in its block, then 849 // we have the first instruction in the entry block. 850 // Can't forward from non-atomic to atomic without violating memory model. 851 if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) { 852 int Offset = 853 analyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL); 854 855 if (Offset != -1) { 856 Res = AvailableValue::getLoad(DepLI, Offset); 857 return true; 858 } 859 } 860 } 861 862 // If the clobbering value is a memset/memcpy/memmove, see if we can 863 // forward a value on from it. 864 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 865 if (Address && !LI->isAtomic()) { 866 int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address, 867 DepMI, DL); 868 if (Offset != -1) { 869 Res = AvailableValue::getMI(DepMI, Offset); 870 return true; 871 } 872 } 873 } 874 // Nothing known about this clobber, have to be conservative 875 DEBUG( 876 // fast print dep, using operator<< on instruction is too slow. 877 dbgs() << "GVN: load "; 878 LI->printAsOperand(dbgs()); 879 Instruction *I = DepInfo.getInst(); 880 dbgs() << " is clobbered by " << *I << '\n'; 881 ); 882 883 if (ORE->allowExtraAnalysis()) 884 reportMayClobberedLoad(LI, DepInfo, DT, ORE); 885 886 return false; 887 } 888 assert(DepInfo.isDef() && "follows from above"); 889 890 Instruction *DepInst = DepInfo.getInst(); 891 892 // Loading the allocation -> undef. 893 if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || 894 // Loading immediately after lifetime begin -> undef. 895 isLifetimeStart(DepInst)) { 896 Res = AvailableValue::get(UndefValue::get(LI->getType())); 897 return true; 898 } 899 900 // Loading from calloc (which zero initializes memory) -> zero 901 if (isCallocLikeFn(DepInst, TLI)) { 902 Res = AvailableValue::get(Constant::getNullValue(LI->getType())); 903 return true; 904 } 905 906 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 907 // Reject loads and stores that are to the same address but are of 908 // different types if we have to. If the stored value is larger or equal to 909 // the loaded value, we can reuse it. 910 if (S->getValueOperand()->getType() != LI->getType() && 911 !canCoerceMustAliasedValueToLoad(S->getValueOperand(), 912 LI->getType(), DL)) 913 return false; 914 915 // Can't forward from non-atomic to atomic without violating memory model. 916 if (S->isAtomic() < LI->isAtomic()) 917 return false; 918 919 Res = AvailableValue::get(S->getValueOperand()); 920 return true; 921 } 922 923 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 924 // If the types mismatch and we can't handle it, reject reuse of the load. 925 // If the stored value is larger or equal to the loaded value, we can reuse 926 // it. 927 if (LD->getType() != LI->getType() && 928 !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) 929 return false; 930 931 // Can't forward from non-atomic to atomic without violating memory model. 932 if (LD->isAtomic() < LI->isAtomic()) 933 return false; 934 935 Res = AvailableValue::getLoad(LD); 936 return true; 937 } 938 939 // Unknown def - must be conservative 940 DEBUG( 941 // fast print dep, using operator<< on instruction is too slow. 942 dbgs() << "GVN: load "; 943 LI->printAsOperand(dbgs()); 944 dbgs() << " has unknown def " << *DepInst << '\n'; 945 ); 946 return false; 947 } 948 949 void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 950 AvailValInBlkVect &ValuesPerBlock, 951 UnavailBlkVect &UnavailableBlocks) { 952 953 // Filter out useless results (non-locals, etc). Keep track of the blocks 954 // where we have a value available in repl, also keep track of whether we see 955 // dependencies that produce an unknown value for the load (such as a call 956 // that could potentially clobber the load). 957 unsigned NumDeps = Deps.size(); 958 for (unsigned i = 0, e = NumDeps; i != e; ++i) { 959 BasicBlock *DepBB = Deps[i].getBB(); 960 MemDepResult DepInfo = Deps[i].getResult(); 961 962 if (DeadBlocks.count(DepBB)) { 963 // Dead dependent mem-op disguise as a load evaluating the same value 964 // as the load in question. 965 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); 966 continue; 967 } 968 969 if (!DepInfo.isDef() && !DepInfo.isClobber()) { 970 UnavailableBlocks.push_back(DepBB); 971 continue; 972 } 973 974 // The address being loaded in this non-local block may not be the same as 975 // the pointer operand of the load if PHI translation occurs. Make sure 976 // to consider the right address. 977 Value *Address = Deps[i].getAddress(); 978 979 AvailableValue AV; 980 if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) { 981 // subtlety: because we know this was a non-local dependency, we know 982 // it's safe to materialize anywhere between the instruction within 983 // DepInfo and the end of it's block. 984 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 985 std::move(AV))); 986 } else { 987 UnavailableBlocks.push_back(DepBB); 988 } 989 } 990 991 assert(NumDeps == ValuesPerBlock.size() + UnavailableBlocks.size() && 992 "post condition violation"); 993 } 994 995 bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 996 UnavailBlkVect &UnavailableBlocks) { 997 // Okay, we have *some* definitions of the value. This means that the value 998 // is available in some of our (transitive) predecessors. Lets think about 999 // doing PRE of this load. This will involve inserting a new load into the 1000 // predecessor when it's not available. We could do this in general, but 1001 // prefer to not increase code size. As such, we only do this when we know 1002 // that we only have to insert *one* load (which means we're basically moving 1003 // the load, not inserting a new one). 1004 1005 SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(), 1006 UnavailableBlocks.end()); 1007 1008 // Let's find the first basic block with more than one predecessor. Walk 1009 // backwards through predecessors if needed. 1010 BasicBlock *LoadBB = LI->getParent(); 1011 BasicBlock *TmpBB = LoadBB; 1012 1013 while (TmpBB->getSinglePredecessor()) { 1014 TmpBB = TmpBB->getSinglePredecessor(); 1015 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1016 return false; 1017 if (Blockers.count(TmpBB)) 1018 return false; 1019 1020 // If any of these blocks has more than one successor (i.e. if the edge we 1021 // just traversed was critical), then there are other paths through this 1022 // block along which the load may not be anticipated. Hoisting the load 1023 // above this block would be adding the load to execution paths along 1024 // which it was not previously executed. 1025 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1026 return false; 1027 } 1028 1029 assert(TmpBB); 1030 LoadBB = TmpBB; 1031 1032 // Check to see how many predecessors have the loaded value fully 1033 // available. 1034 MapVector<BasicBlock *, Value *> PredLoads; 1035 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1036 for (const AvailableValueInBlock &AV : ValuesPerBlock) 1037 FullyAvailableBlocks[AV.BB] = true; 1038 for (BasicBlock *UnavailableBB : UnavailableBlocks) 1039 FullyAvailableBlocks[UnavailableBB] = false; 1040 1041 SmallVector<BasicBlock *, 4> CriticalEdgePred; 1042 for (BasicBlock *Pred : predecessors(LoadBB)) { 1043 // If any predecessor block is an EH pad that does not allow non-PHI 1044 // instructions before the terminator, we can't PRE the load. 1045 if (Pred->getTerminator()->isEHPad()) { 1046 DEBUG(dbgs() 1047 << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '" 1048 << Pred->getName() << "': " << *LI << '\n'); 1049 return false; 1050 } 1051 1052 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { 1053 continue; 1054 } 1055 1056 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1057 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1058 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1059 << Pred->getName() << "': " << *LI << '\n'); 1060 return false; 1061 } 1062 1063 if (LoadBB->isEHPad()) { 1064 DEBUG(dbgs() 1065 << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '" 1066 << Pred->getName() << "': " << *LI << '\n'); 1067 return false; 1068 } 1069 1070 CriticalEdgePred.push_back(Pred); 1071 } else { 1072 // Only add the predecessors that will not be split for now. 1073 PredLoads[Pred] = nullptr; 1074 } 1075 } 1076 1077 // Decide whether PRE is profitable for this load. 1078 unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); 1079 assert(NumUnavailablePreds != 0 && 1080 "Fully available value should already be eliminated!"); 1081 1082 // If this load is unavailable in multiple predecessors, reject it. 1083 // FIXME: If we could restructure the CFG, we could make a common pred with 1084 // all the preds that don't have an available LI and insert a new load into 1085 // that one block. 1086 if (NumUnavailablePreds != 1) 1087 return false; 1088 1089 // Split critical edges, and update the unavailable predecessors accordingly. 1090 for (BasicBlock *OrigPred : CriticalEdgePred) { 1091 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); 1092 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); 1093 PredLoads[NewPred] = nullptr; 1094 DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" 1095 << LoadBB->getName() << '\n'); 1096 } 1097 1098 // Check if the load can safely be moved to all the unavailable predecessors. 1099 bool CanDoPRE = true; 1100 const DataLayout &DL = LI->getModule()->getDataLayout(); 1101 SmallVector<Instruction*, 8> NewInsts; 1102 for (auto &PredLoad : PredLoads) { 1103 BasicBlock *UnavailablePred = PredLoad.first; 1104 1105 // Do PHI translation to get its value in the predecessor if necessary. The 1106 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1107 1108 // If all preds have a single successor, then we know it is safe to insert 1109 // the load on the pred (?!?), so we can insert code to materialize the 1110 // pointer if it is not available. 1111 PHITransAddr Address(LI->getPointerOperand(), DL, AC); 1112 Value *LoadPtr = nullptr; 1113 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1114 *DT, NewInsts); 1115 1116 // If we couldn't find or insert a computation of this phi translated value, 1117 // we fail PRE. 1118 if (!LoadPtr) { 1119 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1120 << *LI->getPointerOperand() << "\n"); 1121 CanDoPRE = false; 1122 break; 1123 } 1124 1125 PredLoad.second = LoadPtr; 1126 } 1127 1128 if (!CanDoPRE) { 1129 while (!NewInsts.empty()) { 1130 Instruction *I = NewInsts.pop_back_val(); 1131 if (MD) MD->removeInstruction(I); 1132 I->eraseFromParent(); 1133 } 1134 // HINT: Don't revert the edge-splitting as following transformation may 1135 // also need to split these critical edges. 1136 return !CriticalEdgePred.empty(); 1137 } 1138 1139 // Okay, we can eliminate this load by inserting a reload in the predecessor 1140 // and using PHI construction to get the value in the other predecessors, do 1141 // it. 1142 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1143 DEBUG(if (!NewInsts.empty()) 1144 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1145 << *NewInsts.back() << '\n'); 1146 1147 // Assign value numbers to the new instructions. 1148 for (Instruction *I : NewInsts) { 1149 // Instructions that have been inserted in predecessor(s) to materialize 1150 // the load address do not retain their original debug locations. Doing 1151 // so could lead to confusing (but correct) source attributions. 1152 // FIXME: How do we retain source locations without causing poor debugging 1153 // behavior? 1154 I->setDebugLoc(DebugLoc()); 1155 1156 // FIXME: We really _ought_ to insert these value numbers into their 1157 // parent's availability map. However, in doing so, we risk getting into 1158 // ordering issues. If a block hasn't been processed yet, we would be 1159 // marking a value as AVAIL-IN, which isn't what we intend. 1160 VN.lookupOrAdd(I); 1161 } 1162 1163 for (const auto &PredLoad : PredLoads) { 1164 BasicBlock *UnavailablePred = PredLoad.first; 1165 Value *LoadPtr = PredLoad.second; 1166 1167 auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", 1168 LI->isVolatile(), LI->getAlignment(), 1169 LI->getOrdering(), LI->getSynchScope(), 1170 UnavailablePred->getTerminator()); 1171 1172 // Transfer the old load's AA tags to the new load. 1173 AAMDNodes Tags; 1174 LI->getAAMetadata(Tags); 1175 if (Tags) 1176 NewLoad->setAAMetadata(Tags); 1177 1178 if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load)) 1179 NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); 1180 if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group)) 1181 NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); 1182 if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) 1183 NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); 1184 1185 // We do not propagate the old load's debug location, because the new 1186 // load now lives in a different BB, and we want to avoid a jumpy line 1187 // table. 1188 // FIXME: How do we retain source locations without causing poor debugging 1189 // behavior? 1190 1191 // Add the newly created load. 1192 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1193 NewLoad)); 1194 MD->invalidateCachedPointerInfo(LoadPtr); 1195 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1196 } 1197 1198 // Perform PHI construction. 1199 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1200 LI->replaceAllUsesWith(V); 1201 if (isa<PHINode>(V)) 1202 V->takeName(LI); 1203 if (Instruction *I = dyn_cast<Instruction>(V)) 1204 I->setDebugLoc(LI->getDebugLoc()); 1205 if (V->getType()->isPtrOrPtrVectorTy()) 1206 MD->invalidateCachedPointerInfo(V); 1207 markInstructionForDeletion(LI); 1208 ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI) 1209 << "load eliminated by PRE"); 1210 ++NumPRELoad; 1211 return true; 1212 } 1213 1214 static void reportLoadElim(LoadInst *LI, Value *AvailableValue, 1215 OptimizationRemarkEmitter *ORE) { 1216 using namespace ore; 1217 ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadElim", LI) 1218 << "load of type " << NV("Type", LI->getType()) << " eliminated" 1219 << setExtraArgs() << " in favor of " 1220 << NV("InfavorOfValue", AvailableValue)); 1221 } 1222 1223 /// Attempt to eliminate a load whose dependencies are 1224 /// non-local by performing PHI construction. 1225 bool GVN::processNonLocalLoad(LoadInst *LI) { 1226 // non-local speculations are not allowed under asan. 1227 if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress)) 1228 return false; 1229 1230 // Step 1: Find the non-local dependencies of the load. 1231 LoadDepVect Deps; 1232 MD->getNonLocalPointerDependency(LI, Deps); 1233 1234 // If we had to process more than one hundred blocks to find the 1235 // dependencies, this load isn't worth worrying about. Optimizing 1236 // it will be too expensive. 1237 unsigned NumDeps = Deps.size(); 1238 if (NumDeps > 100) 1239 return false; 1240 1241 // If we had a phi translation failure, we'll have a single entry which is a 1242 // clobber in the current block. Reject this early. 1243 if (NumDeps == 1 && 1244 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { 1245 DEBUG( 1246 dbgs() << "GVN: non-local load "; 1247 LI->printAsOperand(dbgs()); 1248 dbgs() << " has unknown dependencies\n"; 1249 ); 1250 return false; 1251 } 1252 1253 // If this load follows a GEP, see if we can PRE the indices before analyzing. 1254 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) { 1255 for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(), 1256 OE = GEP->idx_end(); 1257 OI != OE; ++OI) 1258 if (Instruction *I = dyn_cast<Instruction>(OI->get())) 1259 performScalarPRE(I); 1260 } 1261 1262 // Step 2: Analyze the availability of the load 1263 AvailValInBlkVect ValuesPerBlock; 1264 UnavailBlkVect UnavailableBlocks; 1265 AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); 1266 1267 // If we have no predecessors that produce a known value for this load, exit 1268 // early. 1269 if (ValuesPerBlock.empty()) 1270 return false; 1271 1272 // Step 3: Eliminate fully redundancy. 1273 // 1274 // If all of the instructions we depend on produce a known value for this 1275 // load, then it is fully redundant and we can use PHI insertion to compute 1276 // its value. Insert PHIs and remove the fully redundant value now. 1277 if (UnavailableBlocks.empty()) { 1278 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1279 1280 // Perform PHI construction. 1281 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1282 LI->replaceAllUsesWith(V); 1283 1284 if (isa<PHINode>(V)) 1285 V->takeName(LI); 1286 if (Instruction *I = dyn_cast<Instruction>(V)) 1287 // If instruction I has debug info, then we should not update it. 1288 // Also, if I has a null DebugLoc, then it is still potentially incorrect 1289 // to propagate LI's DebugLoc because LI may not post-dominate I. 1290 if (LI->getDebugLoc() && LI->getParent() == I->getParent()) 1291 I->setDebugLoc(LI->getDebugLoc()); 1292 if (V->getType()->isPtrOrPtrVectorTy()) 1293 MD->invalidateCachedPointerInfo(V); 1294 markInstructionForDeletion(LI); 1295 ++NumGVNLoad; 1296 reportLoadElim(LI, V, ORE); 1297 return true; 1298 } 1299 1300 // Step 4: Eliminate partial redundancy. 1301 if (!EnablePRE || !EnableLoadPRE) 1302 return false; 1303 1304 return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); 1305 } 1306 1307 bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { 1308 assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume && 1309 "This function can only be called with llvm.assume intrinsic"); 1310 Value *V = IntrinsicI->getArgOperand(0); 1311 1312 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) { 1313 if (Cond->isZero()) { 1314 Type *Int8Ty = Type::getInt8Ty(V->getContext()); 1315 // Insert a new store to null instruction before the load to indicate that 1316 // this code is not reachable. FIXME: We could insert unreachable 1317 // instruction directly because we can modify the CFG. 1318 new StoreInst(UndefValue::get(Int8Ty), 1319 Constant::getNullValue(Int8Ty->getPointerTo()), 1320 IntrinsicI); 1321 } 1322 markInstructionForDeletion(IntrinsicI); 1323 return false; 1324 } 1325 1326 Constant *True = ConstantInt::getTrue(V->getContext()); 1327 bool Changed = false; 1328 1329 for (BasicBlock *Successor : successors(IntrinsicI->getParent())) { 1330 BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); 1331 1332 // This property is only true in dominated successors, propagateEquality 1333 // will check dominance for us. 1334 Changed |= propagateEquality(V, True, Edge, false); 1335 } 1336 1337 // We can replace assume value with true, which covers cases like this: 1338 // call void @llvm.assume(i1 %cmp) 1339 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true 1340 ReplaceWithConstMap[V] = True; 1341 1342 // If one of *cmp *eq operand is const, adding it to map will cover this: 1343 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen 1344 // call void @llvm.assume(i1 %cmp) 1345 // ret float %0 ; will change it to ret float 3.000000e+00 1346 if (auto *CmpI = dyn_cast<CmpInst>(V)) { 1347 if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ || 1348 CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ || 1349 (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ && 1350 CmpI->getFastMathFlags().noNaNs())) { 1351 Value *CmpLHS = CmpI->getOperand(0); 1352 Value *CmpRHS = CmpI->getOperand(1); 1353 if (isa<Constant>(CmpLHS)) 1354 std::swap(CmpLHS, CmpRHS); 1355 auto *RHSConst = dyn_cast<Constant>(CmpRHS); 1356 1357 // If only one operand is constant. 1358 if (RHSConst != nullptr && !isa<Constant>(CmpLHS)) 1359 ReplaceWithConstMap[CmpLHS] = RHSConst; 1360 } 1361 } 1362 return Changed; 1363 } 1364 1365 static void patchReplacementInstruction(Instruction *I, Value *Repl) { 1366 auto *ReplInst = dyn_cast<Instruction>(Repl); 1367 if (!ReplInst) 1368 return; 1369 1370 // Patch the replacement so that it is not more restrictive than the value 1371 // being replaced. 1372 // Note that if 'I' is a load being replaced by some operation, 1373 // for example, by an arithmetic operation, then andIRFlags() 1374 // would just erase all math flags from the original arithmetic 1375 // operation, which is clearly not wanted and not needed. 1376 if (!isa<LoadInst>(I)) 1377 ReplInst->andIRFlags(I); 1378 1379 // FIXME: If both the original and replacement value are part of the 1380 // same control-flow region (meaning that the execution of one 1381 // guarantees the execution of the other), then we can combine the 1382 // noalias scopes here and do better than the general conservative 1383 // answer used in combineMetadata(). 1384 1385 // In general, GVN unifies expressions over different control-flow 1386 // regions, and so we need a conservative combination of the noalias 1387 // scopes. 1388 static const unsigned KnownIDs[] = { 1389 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, 1390 LLVMContext::MD_noalias, LLVMContext::MD_range, 1391 LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, 1392 LLVMContext::MD_invariant_group}; 1393 combineMetadata(ReplInst, I, KnownIDs); 1394 } 1395 1396 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { 1397 patchReplacementInstruction(I, Repl); 1398 I->replaceAllUsesWith(Repl); 1399 } 1400 1401 /// Attempt to eliminate a load, first by eliminating it 1402 /// locally, and then attempting non-local elimination if that fails. 1403 bool GVN::processLoad(LoadInst *L) { 1404 if (!MD) 1405 return false; 1406 1407 // This code hasn't been audited for ordered or volatile memory access 1408 if (!L->isUnordered()) 1409 return false; 1410 1411 if (L->use_empty()) { 1412 markInstructionForDeletion(L); 1413 return true; 1414 } 1415 1416 // ... to a pointer that has been loaded from before... 1417 MemDepResult Dep = MD->getDependency(L); 1418 1419 // If it is defined in another block, try harder. 1420 if (Dep.isNonLocal()) 1421 return processNonLocalLoad(L); 1422 1423 // Only handle the local case below 1424 if (!Dep.isDef() && !Dep.isClobber()) { 1425 // This might be a NonFuncLocal or an Unknown 1426 DEBUG( 1427 // fast print dep, using operator<< on instruction is too slow. 1428 dbgs() << "GVN: load "; 1429 L->printAsOperand(dbgs()); 1430 dbgs() << " has unknown dependence\n"; 1431 ); 1432 return false; 1433 } 1434 1435 AvailableValue AV; 1436 if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) { 1437 Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this); 1438 1439 // Replace the load! 1440 patchAndReplaceAllUsesWith(L, AvailableValue); 1441 markInstructionForDeletion(L); 1442 ++NumGVNLoad; 1443 reportLoadElim(L, AvailableValue, ORE); 1444 // Tell MDA to rexamine the reused pointer since we might have more 1445 // information after forwarding it. 1446 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) 1447 MD->invalidateCachedPointerInfo(AvailableValue); 1448 return true; 1449 } 1450 1451 return false; 1452 } 1453 1454 // In order to find a leader for a given value number at a 1455 // specific basic block, we first obtain the list of all Values for that number, 1456 // and then scan the list to find one whose block dominates the block in 1457 // question. This is fast because dominator tree queries consist of only 1458 // a few comparisons of DFS numbers. 1459 Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { 1460 LeaderTableEntry Vals = LeaderTable[num]; 1461 if (!Vals.Val) return nullptr; 1462 1463 Value *Val = nullptr; 1464 if (DT->dominates(Vals.BB, BB)) { 1465 Val = Vals.Val; 1466 if (isa<Constant>(Val)) return Val; 1467 } 1468 1469 LeaderTableEntry* Next = Vals.Next; 1470 while (Next) { 1471 if (DT->dominates(Next->BB, BB)) { 1472 if (isa<Constant>(Next->Val)) return Next->Val; 1473 if (!Val) Val = Next->Val; 1474 } 1475 1476 Next = Next->Next; 1477 } 1478 1479 return Val; 1480 } 1481 1482 /// There is an edge from 'Src' to 'Dst'. Return 1483 /// true if every path from the entry block to 'Dst' passes via this edge. In 1484 /// particular 'Dst' must not be reachable via another edge from 'Src'. 1485 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, 1486 DominatorTree *DT) { 1487 // While in theory it is interesting to consider the case in which Dst has 1488 // more than one predecessor, because Dst might be part of a loop which is 1489 // only reachable from Src, in practice it is pointless since at the time 1490 // GVN runs all such loops have preheaders, which means that Dst will have 1491 // been changed to have only one predecessor, namely Src. 1492 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); 1493 assert((!Pred || Pred == E.getStart()) && 1494 "No edge between these basic blocks!"); 1495 return Pred != nullptr; 1496 } 1497 1498 // Tries to replace instruction with const, using information from 1499 // ReplaceWithConstMap. 1500 bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { 1501 bool Changed = false; 1502 for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { 1503 Value *Operand = Instr->getOperand(OpNum); 1504 auto it = ReplaceWithConstMap.find(Operand); 1505 if (it != ReplaceWithConstMap.end()) { 1506 assert(!isa<Constant>(Operand) && 1507 "Replacing constants with constants is invalid"); 1508 DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second 1509 << " in instruction " << *Instr << '\n'); 1510 Instr->setOperand(OpNum, it->second); 1511 Changed = true; 1512 } 1513 } 1514 return Changed; 1515 } 1516 1517 /// The given values are known to be equal in every block 1518 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with 1519 /// 'RHS' everywhere in the scope. Returns whether a change was made. 1520 /// If DominatesByEdge is false, then it means that we will propagate the RHS 1521 /// value starting from the end of Root.Start. 1522 bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, 1523 bool DominatesByEdge) { 1524 SmallVector<std::pair<Value*, Value*>, 4> Worklist; 1525 Worklist.push_back(std::make_pair(LHS, RHS)); 1526 bool Changed = false; 1527 // For speed, compute a conservative fast approximation to 1528 // DT->dominates(Root, Root.getEnd()); 1529 const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); 1530 1531 while (!Worklist.empty()) { 1532 std::pair<Value*, Value*> Item = Worklist.pop_back_val(); 1533 LHS = Item.first; RHS = Item.second; 1534 1535 if (LHS == RHS) 1536 continue; 1537 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); 1538 1539 // Don't try to propagate equalities between constants. 1540 if (isa<Constant>(LHS) && isa<Constant>(RHS)) 1541 continue; 1542 1543 // Prefer a constant on the right-hand side, or an Argument if no constants. 1544 if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) 1545 std::swap(LHS, RHS); 1546 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); 1547 1548 // If there is no obvious reason to prefer the left-hand side over the 1549 // right-hand side, ensure the longest lived term is on the right-hand side, 1550 // so the shortest lived term will be replaced by the longest lived. 1551 // This tends to expose more simplifications. 1552 uint32_t LVN = VN.lookupOrAdd(LHS); 1553 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || 1554 (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { 1555 // Move the 'oldest' value to the right-hand side, using the value number 1556 // as a proxy for age. 1557 uint32_t RVN = VN.lookupOrAdd(RHS); 1558 if (LVN < RVN) { 1559 std::swap(LHS, RHS); 1560 LVN = RVN; 1561 } 1562 } 1563 1564 // If value numbering later sees that an instruction in the scope is equal 1565 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve 1566 // the invariant that instructions only occur in the leader table for their 1567 // own value number (this is used by removeFromLeaderTable), do not do this 1568 // if RHS is an instruction (if an instruction in the scope is morphed into 1569 // LHS then it will be turned into RHS by the next GVN iteration anyway, so 1570 // using the leader table is about compiling faster, not optimizing better). 1571 // The leader table only tracks basic blocks, not edges. Only add to if we 1572 // have the simple case where the edge dominates the end. 1573 if (RootDominatesEnd && !isa<Instruction>(RHS)) 1574 addToLeaderTable(LVN, RHS, Root.getEnd()); 1575 1576 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As 1577 // LHS always has at least one use that is not dominated by Root, this will 1578 // never do anything if LHS has only one use. 1579 if (!LHS->hasOneUse()) { 1580 unsigned NumReplacements = 1581 DominatesByEdge 1582 ? replaceDominatedUsesWith(LHS, RHS, *DT, Root) 1583 : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart()); 1584 1585 Changed |= NumReplacements > 0; 1586 NumGVNEqProp += NumReplacements; 1587 } 1588 1589 // Now try to deduce additional equalities from this one. For example, if 1590 // the known equality was "(A != B)" == "false" then it follows that A and B 1591 // are equal in the scope. Only boolean equalities with an explicit true or 1592 // false RHS are currently supported. 1593 if (!RHS->getType()->isIntegerTy(1)) 1594 // Not a boolean equality - bail out. 1595 continue; 1596 ConstantInt *CI = dyn_cast<ConstantInt>(RHS); 1597 if (!CI) 1598 // RHS neither 'true' nor 'false' - bail out. 1599 continue; 1600 // Whether RHS equals 'true'. Otherwise it equals 'false'. 1601 bool isKnownTrue = CI->isMinusOne(); 1602 bool isKnownFalse = !isKnownTrue; 1603 1604 // If "A && B" is known true then both A and B are known true. If "A || B" 1605 // is known false then both A and B are known false. 1606 Value *A, *B; 1607 if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || 1608 (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { 1609 Worklist.push_back(std::make_pair(A, RHS)); 1610 Worklist.push_back(std::make_pair(B, RHS)); 1611 continue; 1612 } 1613 1614 // If we are propagating an equality like "(A == B)" == "true" then also 1615 // propagate the equality A == B. When propagating a comparison such as 1616 // "(A >= B)" == "true", replace all instances of "A < B" with "false". 1617 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) { 1618 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); 1619 1620 // If "A == B" is known true, or "A != B" is known false, then replace 1621 // A with B everywhere in the scope. 1622 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || 1623 (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) 1624 Worklist.push_back(std::make_pair(Op0, Op1)); 1625 1626 // Handle the floating point versions of equality comparisons too. 1627 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || 1628 (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { 1629 1630 // Floating point -0.0 and 0.0 compare equal, so we can only 1631 // propagate values if we know that we have a constant and that 1632 // its value is non-zero. 1633 1634 // FIXME: We should do this optimization if 'no signed zeros' is 1635 // applicable via an instruction-level fast-math-flag or some other 1636 // indicator that relaxed FP semantics are being used. 1637 1638 if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero()) 1639 Worklist.push_back(std::make_pair(Op0, Op1)); 1640 } 1641 1642 // If "A >= B" is known true, replace "A < B" with false everywhere. 1643 CmpInst::Predicate NotPred = Cmp->getInversePredicate(); 1644 Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); 1645 // Since we don't have the instruction "A < B" immediately to hand, work 1646 // out the value number that it would have and use that to find an 1647 // appropriate instruction (if any). 1648 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1649 uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1); 1650 // If the number we were assigned was brand new then there is no point in 1651 // looking for an instruction realizing it: there cannot be one! 1652 if (Num < NextNum) { 1653 Value *NotCmp = findLeader(Root.getEnd(), Num); 1654 if (NotCmp && isa<Instruction>(NotCmp)) { 1655 unsigned NumReplacements = 1656 DominatesByEdge 1657 ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root) 1658 : replaceDominatedUsesWith(NotCmp, NotVal, *DT, 1659 Root.getStart()); 1660 Changed |= NumReplacements > 0; 1661 NumGVNEqProp += NumReplacements; 1662 } 1663 } 1664 // Ensure that any instruction in scope that gets the "A < B" value number 1665 // is replaced with false. 1666 // The leader table only tracks basic blocks, not edges. Only add to if we 1667 // have the simple case where the edge dominates the end. 1668 if (RootDominatesEnd) 1669 addToLeaderTable(Num, NotVal, Root.getEnd()); 1670 1671 continue; 1672 } 1673 } 1674 1675 return Changed; 1676 } 1677 1678 /// When calculating availability, handle an instruction 1679 /// by inserting it into the appropriate sets 1680 bool GVN::processInstruction(Instruction *I) { 1681 // Ignore dbg info intrinsics. 1682 if (isa<DbgInfoIntrinsic>(I)) 1683 return false; 1684 1685 // If the instruction can be easily simplified then do so now in preference 1686 // to value numbering it. Value numbering often exposes redundancies, for 1687 // example if it determines that %y is equal to %x then the instruction 1688 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 1689 const DataLayout &DL = I->getModule()->getDataLayout(); 1690 if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) { 1691 bool Changed = false; 1692 if (!I->use_empty()) { 1693 I->replaceAllUsesWith(V); 1694 Changed = true; 1695 } 1696 if (isInstructionTriviallyDead(I, TLI)) { 1697 markInstructionForDeletion(I); 1698 Changed = true; 1699 } 1700 if (Changed) { 1701 if (MD && V->getType()->isPtrOrPtrVectorTy()) 1702 MD->invalidateCachedPointerInfo(V); 1703 ++NumGVNSimpl; 1704 return true; 1705 } 1706 } 1707 1708 if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I)) 1709 if (IntrinsicI->getIntrinsicID() == Intrinsic::assume) 1710 return processAssumeIntrinsic(IntrinsicI); 1711 1712 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1713 if (processLoad(LI)) 1714 return true; 1715 1716 unsigned Num = VN.lookupOrAdd(LI); 1717 addToLeaderTable(Num, LI, LI->getParent()); 1718 return false; 1719 } 1720 1721 // For conditional branches, we can perform simple conditional propagation on 1722 // the condition value itself. 1723 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1724 if (!BI->isConditional()) 1725 return false; 1726 1727 if (isa<Constant>(BI->getCondition())) 1728 return processFoldableCondBr(BI); 1729 1730 Value *BranchCond = BI->getCondition(); 1731 BasicBlock *TrueSucc = BI->getSuccessor(0); 1732 BasicBlock *FalseSucc = BI->getSuccessor(1); 1733 // Avoid multiple edges early. 1734 if (TrueSucc == FalseSucc) 1735 return false; 1736 1737 BasicBlock *Parent = BI->getParent(); 1738 bool Changed = false; 1739 1740 Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); 1741 BasicBlockEdge TrueE(Parent, TrueSucc); 1742 Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true); 1743 1744 Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); 1745 BasicBlockEdge FalseE(Parent, FalseSucc); 1746 Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true); 1747 1748 return Changed; 1749 } 1750 1751 // For switches, propagate the case values into the case destinations. 1752 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 1753 Value *SwitchCond = SI->getCondition(); 1754 BasicBlock *Parent = SI->getParent(); 1755 bool Changed = false; 1756 1757 // Remember how many outgoing edges there are to every successor. 1758 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; 1759 for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) 1760 ++SwitchEdges[SI->getSuccessor(i)]; 1761 1762 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); 1763 i != e; ++i) { 1764 BasicBlock *Dst = i->getCaseSuccessor(); 1765 // If there is only a single edge, propagate the case value into it. 1766 if (SwitchEdges.lookup(Dst) == 1) { 1767 BasicBlockEdge E(Parent, Dst); 1768 Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true); 1769 } 1770 } 1771 return Changed; 1772 } 1773 1774 // Instructions with void type don't return a value, so there's 1775 // no point in trying to find redundancies in them. 1776 if (I->getType()->isVoidTy()) 1777 return false; 1778 1779 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1780 unsigned Num = VN.lookupOrAdd(I); 1781 1782 // Allocations are always uniquely numbered, so we can save time and memory 1783 // by fast failing them. 1784 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 1785 addToLeaderTable(Num, I, I->getParent()); 1786 return false; 1787 } 1788 1789 // If the number we were assigned was a brand new VN, then we don't 1790 // need to do a lookup to see if the number already exists 1791 // somewhere in the domtree: it can't! 1792 if (Num >= NextNum) { 1793 addToLeaderTable(Num, I, I->getParent()); 1794 return false; 1795 } 1796 1797 // Perform fast-path value-number based elimination of values inherited from 1798 // dominators. 1799 Value *Repl = findLeader(I->getParent(), Num); 1800 if (!Repl) { 1801 // Failure, just remember this instance for future use. 1802 addToLeaderTable(Num, I, I->getParent()); 1803 return false; 1804 } else if (Repl == I) { 1805 // If I was the result of a shortcut PRE, it might already be in the table 1806 // and the best replacement for itself. Nothing to do. 1807 return false; 1808 } 1809 1810 // Remove it! 1811 patchAndReplaceAllUsesWith(I, Repl); 1812 if (MD && Repl->getType()->isPtrOrPtrVectorTy()) 1813 MD->invalidateCachedPointerInfo(Repl); 1814 markInstructionForDeletion(I); 1815 return true; 1816 } 1817 1818 /// runOnFunction - This is the main transformation entry point for a function. 1819 bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, 1820 const TargetLibraryInfo &RunTLI, AAResults &RunAA, 1821 MemoryDependenceResults *RunMD, LoopInfo *LI, 1822 OptimizationRemarkEmitter *RunORE) { 1823 AC = &RunAC; 1824 DT = &RunDT; 1825 VN.setDomTree(DT); 1826 TLI = &RunTLI; 1827 VN.setAliasAnalysis(&RunAA); 1828 MD = RunMD; 1829 VN.setMemDep(MD); 1830 ORE = RunORE; 1831 1832 bool Changed = false; 1833 bool ShouldContinue = true; 1834 1835 // Merge unconditional branches, allowing PRE to catch more 1836 // optimization opportunities. 1837 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1838 BasicBlock *BB = &*FI++; 1839 1840 bool removedBlock = MergeBlockIntoPredecessor(BB, DT, LI, MD); 1841 if (removedBlock) 1842 ++NumGVNBlocks; 1843 1844 Changed |= removedBlock; 1845 } 1846 1847 unsigned Iteration = 0; 1848 while (ShouldContinue) { 1849 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 1850 ShouldContinue = iterateOnFunction(F); 1851 Changed |= ShouldContinue; 1852 ++Iteration; 1853 } 1854 1855 if (EnablePRE) { 1856 // Fabricate val-num for dead-code in order to suppress assertion in 1857 // performPRE(). 1858 assignValNumForDeadCode(); 1859 bool PREChanged = true; 1860 while (PREChanged) { 1861 PREChanged = performPRE(F); 1862 Changed |= PREChanged; 1863 } 1864 } 1865 1866 // FIXME: Should perform GVN again after PRE does something. PRE can move 1867 // computations into blocks where they become fully redundant. Note that 1868 // we can't do this until PRE's critical edge splitting updates memdep. 1869 // Actually, when this happens, we should just fully integrate PRE into GVN. 1870 1871 cleanupGlobalSets(); 1872 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each 1873 // iteration. 1874 DeadBlocks.clear(); 1875 1876 return Changed; 1877 } 1878 1879 bool GVN::processBlock(BasicBlock *BB) { 1880 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function 1881 // (and incrementing BI before processing an instruction). 1882 assert(InstrsToErase.empty() && 1883 "We expect InstrsToErase to be empty across iterations"); 1884 if (DeadBlocks.count(BB)) 1885 return false; 1886 1887 // Clearing map before every BB because it can be used only for single BB. 1888 ReplaceWithConstMap.clear(); 1889 bool ChangedFunction = false; 1890 1891 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1892 BI != BE;) { 1893 if (!ReplaceWithConstMap.empty()) 1894 ChangedFunction |= replaceOperandsWithConsts(&*BI); 1895 ChangedFunction |= processInstruction(&*BI); 1896 1897 if (InstrsToErase.empty()) { 1898 ++BI; 1899 continue; 1900 } 1901 1902 // If we need some instructions deleted, do it now. 1903 NumGVNInstr += InstrsToErase.size(); 1904 1905 // Avoid iterator invalidation. 1906 bool AtStart = BI == BB->begin(); 1907 if (!AtStart) 1908 --BI; 1909 1910 for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(), 1911 E = InstrsToErase.end(); I != E; ++I) { 1912 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 1913 if (MD) MD->removeInstruction(*I); 1914 DEBUG(verifyRemoved(*I)); 1915 (*I)->eraseFromParent(); 1916 } 1917 InstrsToErase.clear(); 1918 1919 if (AtStart) 1920 BI = BB->begin(); 1921 else 1922 ++BI; 1923 } 1924 1925 return ChangedFunction; 1926 } 1927 1928 // Instantiate an expression in a predecessor that lacked it. 1929 bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, 1930 unsigned int ValNo) { 1931 // Because we are going top-down through the block, all value numbers 1932 // will be available in the predecessor by the time we need them. Any 1933 // that weren't originally present will have been instantiated earlier 1934 // in this loop. 1935 bool success = true; 1936 for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) { 1937 Value *Op = Instr->getOperand(i); 1938 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1939 continue; 1940 // This could be a newly inserted instruction, in which case, we won't 1941 // find a value number, and should give up before we hurt ourselves. 1942 // FIXME: Rewrite the infrastructure to let it easier to value number 1943 // and process newly inserted instructions. 1944 if (!VN.exists(Op)) { 1945 success = false; 1946 break; 1947 } 1948 if (Value *V = findLeader(Pred, VN.lookup(Op))) { 1949 Instr->setOperand(i, V); 1950 } else { 1951 success = false; 1952 break; 1953 } 1954 } 1955 1956 // Fail out if we encounter an operand that is not available in 1957 // the PRE predecessor. This is typically because of loads which 1958 // are not value numbered precisely. 1959 if (!success) 1960 return false; 1961 1962 Instr->insertBefore(Pred->getTerminator()); 1963 Instr->setName(Instr->getName() + ".pre"); 1964 Instr->setDebugLoc(Instr->getDebugLoc()); 1965 VN.add(Instr, ValNo); 1966 1967 // Update the availability map to include the new instruction. 1968 addToLeaderTable(ValNo, Instr, Pred); 1969 return true; 1970 } 1971 1972 bool GVN::performScalarPRE(Instruction *CurInst) { 1973 if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) || 1974 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() || 1975 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 1976 isa<DbgInfoIntrinsic>(CurInst)) 1977 return false; 1978 1979 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from 1980 // sinking the compare again, and it would force the code generator to 1981 // move the i1 from processor flags or predicate registers into a general 1982 // purpose register. 1983 if (isa<CmpInst>(CurInst)) 1984 return false; 1985 1986 // We don't currently value number ANY inline asm calls. 1987 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 1988 if (CallI->isInlineAsm()) 1989 return false; 1990 1991 uint32_t ValNo = VN.lookup(CurInst); 1992 1993 // Look for the predecessors for PRE opportunities. We're 1994 // only trying to solve the basic diamond case, where 1995 // a value is computed in the successor and one predecessor, 1996 // but not the other. We also explicitly disallow cases 1997 // where the successor is its own predecessor, because they're 1998 // more complicated to get right. 1999 unsigned NumWith = 0; 2000 unsigned NumWithout = 0; 2001 BasicBlock *PREPred = nullptr; 2002 BasicBlock *CurrentBlock = CurInst->getParent(); 2003 2004 SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap; 2005 for (BasicBlock *P : predecessors(CurrentBlock)) { 2006 // We're not interested in PRE where the block is its 2007 // own predecessor, or in blocks with predecessors 2008 // that are not reachable. 2009 if (P == CurrentBlock) { 2010 NumWithout = 2; 2011 break; 2012 } else if (!DT->isReachableFromEntry(P)) { 2013 NumWithout = 2; 2014 break; 2015 } 2016 2017 Value *predV = findLeader(P, ValNo); 2018 if (!predV) { 2019 predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); 2020 PREPred = P; 2021 ++NumWithout; 2022 } else if (predV == CurInst) { 2023 /* CurInst dominates this predecessor. */ 2024 NumWithout = 2; 2025 break; 2026 } else { 2027 predMap.push_back(std::make_pair(predV, P)); 2028 ++NumWith; 2029 } 2030 } 2031 2032 // Don't do PRE when it might increase code size, i.e. when 2033 // we would need to insert instructions in more than one pred. 2034 if (NumWithout > 1 || NumWith == 0) 2035 return false; 2036 2037 // We may have a case where all predecessors have the instruction, 2038 // and we just need to insert a phi node. Otherwise, perform 2039 // insertion. 2040 Instruction *PREInstr = nullptr; 2041 2042 if (NumWithout != 0) { 2043 // Don't do PRE across indirect branch. 2044 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2045 return false; 2046 2047 // We can't do PRE safely on a critical edge, so instead we schedule 2048 // the edge to be split and perform the PRE the next time we iterate 2049 // on the function. 2050 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 2051 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2052 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2053 return false; 2054 } 2055 // We need to insert somewhere, so let's give it a shot 2056 PREInstr = CurInst->clone(); 2057 if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { 2058 // If we failed insertion, make sure we remove the instruction. 2059 DEBUG(verifyRemoved(PREInstr)); 2060 PREInstr->deleteValue(); 2061 return false; 2062 } 2063 } 2064 2065 // Either we should have filled in the PRE instruction, or we should 2066 // not have needed insertions. 2067 assert (PREInstr != nullptr || NumWithout == 0); 2068 2069 ++NumGVNPRE; 2070 2071 // Create a PHI to make the value available in this block. 2072 PHINode *Phi = 2073 PHINode::Create(CurInst->getType(), predMap.size(), 2074 CurInst->getName() + ".pre-phi", &CurrentBlock->front()); 2075 for (unsigned i = 0, e = predMap.size(); i != e; ++i) { 2076 if (Value *V = predMap[i].first) 2077 Phi->addIncoming(V, predMap[i].second); 2078 else 2079 Phi->addIncoming(PREInstr, PREPred); 2080 } 2081 2082 VN.add(Phi, ValNo); 2083 addToLeaderTable(ValNo, Phi, CurrentBlock); 2084 Phi->setDebugLoc(CurInst->getDebugLoc()); 2085 CurInst->replaceAllUsesWith(Phi); 2086 if (MD && Phi->getType()->isPtrOrPtrVectorTy()) 2087 MD->invalidateCachedPointerInfo(Phi); 2088 VN.erase(CurInst); 2089 removeFromLeaderTable(ValNo, CurInst, CurrentBlock); 2090 2091 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 2092 if (MD) 2093 MD->removeInstruction(CurInst); 2094 DEBUG(verifyRemoved(CurInst)); 2095 CurInst->eraseFromParent(); 2096 ++NumGVNInstr; 2097 2098 return true; 2099 } 2100 2101 /// Perform a purely local form of PRE that looks for diamond 2102 /// control flow patterns and attempts to perform simple PRE at the join point. 2103 bool GVN::performPRE(Function &F) { 2104 bool Changed = false; 2105 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { 2106 // Nothing to PRE in the entry block. 2107 if (CurrentBlock == &F.getEntryBlock()) 2108 continue; 2109 2110 // Don't perform PRE on an EH pad. 2111 if (CurrentBlock->isEHPad()) 2112 continue; 2113 2114 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2115 BE = CurrentBlock->end(); 2116 BI != BE;) { 2117 Instruction *CurInst = &*BI++; 2118 Changed |= performScalarPRE(CurInst); 2119 } 2120 } 2121 2122 if (splitCriticalEdges()) 2123 Changed = true; 2124 2125 return Changed; 2126 } 2127 2128 /// Split the critical edge connecting the given two blocks, and return 2129 /// the block inserted to the critical edge. 2130 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { 2131 BasicBlock *BB = 2132 SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT)); 2133 if (MD) 2134 MD->invalidateCachedPredecessors(); 2135 return BB; 2136 } 2137 2138 /// Split critical edges found during the previous 2139 /// iteration that may enable further optimization. 2140 bool GVN::splitCriticalEdges() { 2141 if (toSplit.empty()) 2142 return false; 2143 do { 2144 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 2145 SplitCriticalEdge(Edge.first, Edge.second, 2146 CriticalEdgeSplittingOptions(DT)); 2147 } while (!toSplit.empty()); 2148 if (MD) MD->invalidateCachedPredecessors(); 2149 return true; 2150 } 2151 2152 /// Executes one iteration of GVN 2153 bool GVN::iterateOnFunction(Function &F) { 2154 cleanupGlobalSets(); 2155 2156 // Top-down walk of the dominator tree 2157 bool Changed = false; 2158 // Needed for value numbering with phi construction to work. 2159 // RPOT walks the graph in its constructor and will not be invalidated during 2160 // processBlock. 2161 ReversePostOrderTraversal<Function *> RPOT(&F); 2162 for (BasicBlock *BB : RPOT) 2163 Changed |= processBlock(BB); 2164 2165 return Changed; 2166 } 2167 2168 void GVN::cleanupGlobalSets() { 2169 VN.clear(); 2170 LeaderTable.clear(); 2171 TableAllocator.Reset(); 2172 } 2173 2174 /// Verify that the specified instruction does not occur in our 2175 /// internal data structures. 2176 void GVN::verifyRemoved(const Instruction *Inst) const { 2177 VN.verifyRemoved(Inst); 2178 2179 // Walk through the value number scope to make sure the instruction isn't 2180 // ferreted away in it. 2181 for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator 2182 I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { 2183 const LeaderTableEntry *Node = &I->second; 2184 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2185 2186 while (Node->Next) { 2187 Node = Node->Next; 2188 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2189 } 2190 } 2191 } 2192 2193 /// BB is declared dead, which implied other blocks become dead as well. This 2194 /// function is to add all these blocks to "DeadBlocks". For the dead blocks' 2195 /// live successors, update their phi nodes by replacing the operands 2196 /// corresponding to dead blocks with UndefVal. 2197 void GVN::addDeadBlock(BasicBlock *BB) { 2198 SmallVector<BasicBlock *, 4> NewDead; 2199 SmallSetVector<BasicBlock *, 4> DF; 2200 2201 NewDead.push_back(BB); 2202 while (!NewDead.empty()) { 2203 BasicBlock *D = NewDead.pop_back_val(); 2204 if (DeadBlocks.count(D)) 2205 continue; 2206 2207 // All blocks dominated by D are dead. 2208 SmallVector<BasicBlock *, 8> Dom; 2209 DT->getDescendants(D, Dom); 2210 DeadBlocks.insert(Dom.begin(), Dom.end()); 2211 2212 // Figure out the dominance-frontier(D). 2213 for (BasicBlock *B : Dom) { 2214 for (BasicBlock *S : successors(B)) { 2215 if (DeadBlocks.count(S)) 2216 continue; 2217 2218 bool AllPredDead = true; 2219 for (BasicBlock *P : predecessors(S)) 2220 if (!DeadBlocks.count(P)) { 2221 AllPredDead = false; 2222 break; 2223 } 2224 2225 if (!AllPredDead) { 2226 // S could be proved dead later on. That is why we don't update phi 2227 // operands at this moment. 2228 DF.insert(S); 2229 } else { 2230 // While S is not dominated by D, it is dead by now. This could take 2231 // place if S already have a dead predecessor before D is declared 2232 // dead. 2233 NewDead.push_back(S); 2234 } 2235 } 2236 } 2237 } 2238 2239 // For the dead blocks' live successors, update their phi nodes by replacing 2240 // the operands corresponding to dead blocks with UndefVal. 2241 for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end(); 2242 I != E; I++) { 2243 BasicBlock *B = *I; 2244 if (DeadBlocks.count(B)) 2245 continue; 2246 2247 SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B)); 2248 for (BasicBlock *P : Preds) { 2249 if (!DeadBlocks.count(P)) 2250 continue; 2251 2252 if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { 2253 if (BasicBlock *S = splitCriticalEdges(P, B)) 2254 DeadBlocks.insert(P = S); 2255 } 2256 2257 for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) { 2258 PHINode &Phi = cast<PHINode>(*II); 2259 Phi.setIncomingValue(Phi.getBasicBlockIndex(P), 2260 UndefValue::get(Phi.getType())); 2261 } 2262 } 2263 } 2264 } 2265 2266 // If the given branch is recognized as a foldable branch (i.e. conditional 2267 // branch with constant condition), it will perform following analyses and 2268 // transformation. 2269 // 1) If the dead out-coming edge is a critical-edge, split it. Let 2270 // R be the target of the dead out-coming edge. 2271 // 1) Identify the set of dead blocks implied by the branch's dead outcoming 2272 // edge. The result of this step will be {X| X is dominated by R} 2273 // 2) Identify those blocks which haves at least one dead predecessor. The 2274 // result of this step will be dominance-frontier(R). 2275 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to 2276 // dead blocks with "UndefVal" in an hope these PHIs will optimized away. 2277 // 2278 // Return true iff *NEW* dead code are found. 2279 bool GVN::processFoldableCondBr(BranchInst *BI) { 2280 if (!BI || BI->isUnconditional()) 2281 return false; 2282 2283 // If a branch has two identical successors, we cannot declare either dead. 2284 if (BI->getSuccessor(0) == BI->getSuccessor(1)) 2285 return false; 2286 2287 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); 2288 if (!Cond) 2289 return false; 2290 2291 BasicBlock *DeadRoot = 2292 Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0); 2293 if (DeadBlocks.count(DeadRoot)) 2294 return false; 2295 2296 if (!DeadRoot->getSinglePredecessor()) 2297 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot); 2298 2299 addDeadBlock(DeadRoot); 2300 return true; 2301 } 2302 2303 // performPRE() will trigger assert if it comes across an instruction without 2304 // associated val-num. As it normally has far more live instructions than dead 2305 // instructions, it makes more sense just to "fabricate" a val-number for the 2306 // dead code than checking if instruction involved is dead or not. 2307 void GVN::assignValNumForDeadCode() { 2308 for (BasicBlock *BB : DeadBlocks) { 2309 for (Instruction &Inst : *BB) { 2310 unsigned ValNum = VN.lookupOrAdd(&Inst); 2311 addToLeaderTable(ValNum, &Inst, BB); 2312 } 2313 } 2314 } 2315 2316 class llvm::gvn::GVNLegacyPass : public FunctionPass { 2317 public: 2318 static char ID; // Pass identification, replacement for typeid 2319 explicit GVNLegacyPass(bool NoLoads = false) 2320 : FunctionPass(ID), NoLoads(NoLoads) { 2321 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry()); 2322 } 2323 2324 bool runOnFunction(Function &F) override { 2325 if (skipFunction(F)) 2326 return false; 2327 2328 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); 2329 2330 return Impl.runImpl( 2331 F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), 2332 getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 2333 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), 2334 getAnalysis<AAResultsWrapperPass>().getAAResults(), 2335 NoLoads ? nullptr 2336 : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(), 2337 LIWP ? &LIWP->getLoopInfo() : nullptr, 2338 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE()); 2339 } 2340 2341 void getAnalysisUsage(AnalysisUsage &AU) const override { 2342 AU.addRequired<AssumptionCacheTracker>(); 2343 AU.addRequired<DominatorTreeWrapperPass>(); 2344 AU.addRequired<TargetLibraryInfoWrapperPass>(); 2345 if (!NoLoads) 2346 AU.addRequired<MemoryDependenceWrapperPass>(); 2347 AU.addRequired<AAResultsWrapperPass>(); 2348 2349 AU.addPreserved<DominatorTreeWrapperPass>(); 2350 AU.addPreserved<GlobalsAAWrapperPass>(); 2351 AU.addPreserved<TargetLibraryInfoWrapperPass>(); 2352 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 2353 } 2354 2355 private: 2356 bool NoLoads; 2357 GVN Impl; 2358 }; 2359 2360 char GVNLegacyPass::ID = 0; 2361 2362 // The public interface to this file... 2363 FunctionPass *llvm::createGVNPass(bool NoLoads) { 2364 return new GVNLegacyPass(NoLoads); 2365 } 2366 2367 INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) 2368 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 2369 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 2370 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2371 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2372 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 2373 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 2374 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 2375 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) 2376