1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global value numbering to eliminate fully redundant 11 // instructions. It also performs simple dead load elimination. 12 // 13 // Note that this pass does the value numbering itself; it does not use the 14 // ValueNumbering analysis passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Scalar/GVN.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/DepthFirstIterator.h" 21 #include "llvm/ADT/Hashing.h" 22 #include "llvm/ADT/MapVector.h" 23 #include "llvm/ADT/PostOrderIterator.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/AssumptionCache.h" 29 #include "llvm/Analysis/CFG.h" 30 #include "llvm/Analysis/ConstantFolding.h" 31 #include "llvm/Analysis/GlobalsModRef.h" 32 #include "llvm/Analysis/InstructionSimplify.h" 33 #include "llvm/Analysis/Loads.h" 34 #include "llvm/Analysis/MemoryBuiltins.h" 35 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 36 #include "llvm/Analysis/OptimizationDiagnosticInfo.h" 37 #include "llvm/Analysis/PHITransAddr.h" 38 #include "llvm/Analysis/TargetLibraryInfo.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/IR/Dominators.h" 41 #include "llvm/IR/GlobalVariable.h" 42 #include "llvm/IR/IRBuilder.h" 43 #include "llvm/IR/IntrinsicInst.h" 44 #include "llvm/IR/LLVMContext.h" 45 #include "llvm/IR/Metadata.h" 46 #include "llvm/IR/PatternMatch.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include "llvm/Transforms/Utils/SSAUpdater.h" 53 #include "llvm/Transforms/Utils/VNCoercion.h" 54 55 #include <vector> 56 using namespace llvm; 57 using namespace llvm::gvn; 58 using namespace llvm::VNCoercion; 59 using namespace PatternMatch; 60 61 #define DEBUG_TYPE "gvn" 62 63 STATISTIC(NumGVNInstr, "Number of instructions deleted"); 64 STATISTIC(NumGVNLoad, "Number of loads deleted"); 65 STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 66 STATISTIC(NumGVNBlocks, "Number of blocks merged"); 67 STATISTIC(NumGVNSimpl, "Number of instructions simplified"); 68 STATISTIC(NumGVNEqProp, "Number of equalities propagated"); 69 STATISTIC(NumPRELoad, "Number of loads PRE'd"); 70 71 static cl::opt<bool> EnablePRE("enable-pre", 72 cl::init(true), cl::Hidden); 73 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 74 75 // Maximum allowed recursion depth. 76 static cl::opt<uint32_t> 77 MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, 78 cl::desc("Max recurse depth (default = 1000)")); 79 80 struct llvm::GVN::Expression { 81 uint32_t opcode; 82 Type *type; 83 SmallVector<uint32_t, 4> varargs; 84 85 Expression(uint32_t o = ~2U) : opcode(o) {} 86 87 bool operator==(const Expression &other) const { 88 if (opcode != other.opcode) 89 return false; 90 if (opcode == ~0U || opcode == ~1U) 91 return true; 92 if (type != other.type) 93 return false; 94 if (varargs != other.varargs) 95 return false; 96 return true; 97 } 98 99 friend hash_code hash_value(const Expression &Value) { 100 return hash_combine( 101 Value.opcode, Value.type, 102 hash_combine_range(Value.varargs.begin(), Value.varargs.end())); 103 } 104 }; 105 106 namespace llvm { 107 template <> struct DenseMapInfo<GVN::Expression> { 108 static inline GVN::Expression getEmptyKey() { return ~0U; } 109 110 static inline GVN::Expression getTombstoneKey() { return ~1U; } 111 112 static unsigned getHashValue(const GVN::Expression &e) { 113 using llvm::hash_value; 114 return static_cast<unsigned>(hash_value(e)); 115 } 116 static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) { 117 return LHS == RHS; 118 } 119 }; 120 } // End llvm namespace. 121 122 /// Represents a particular available value that we know how to materialize. 123 /// Materialization of an AvailableValue never fails. An AvailableValue is 124 /// implicitly associated with a rematerialization point which is the 125 /// location of the instruction from which it was formed. 126 struct llvm::gvn::AvailableValue { 127 enum ValType { 128 SimpleVal, // A simple offsetted value that is accessed. 129 LoadVal, // A value produced by a load. 130 MemIntrin, // A memory intrinsic which is loaded from. 131 UndefVal // A UndefValue representing a value from dead block (which 132 // is not yet physically removed from the CFG). 133 }; 134 135 /// V - The value that is live out of the block. 136 PointerIntPair<Value *, 2, ValType> Val; 137 138 /// Offset - The byte offset in Val that is interesting for the load query. 139 unsigned Offset; 140 141 static AvailableValue get(Value *V, unsigned Offset = 0) { 142 AvailableValue Res; 143 Res.Val.setPointer(V); 144 Res.Val.setInt(SimpleVal); 145 Res.Offset = Offset; 146 return Res; 147 } 148 149 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) { 150 AvailableValue Res; 151 Res.Val.setPointer(MI); 152 Res.Val.setInt(MemIntrin); 153 Res.Offset = Offset; 154 return Res; 155 } 156 157 static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) { 158 AvailableValue Res; 159 Res.Val.setPointer(LI); 160 Res.Val.setInt(LoadVal); 161 Res.Offset = Offset; 162 return Res; 163 } 164 165 static AvailableValue getUndef() { 166 AvailableValue Res; 167 Res.Val.setPointer(nullptr); 168 Res.Val.setInt(UndefVal); 169 Res.Offset = 0; 170 return Res; 171 } 172 173 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 174 bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } 175 bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } 176 bool isUndefValue() const { return Val.getInt() == UndefVal; } 177 178 Value *getSimpleValue() const { 179 assert(isSimpleValue() && "Wrong accessor"); 180 return Val.getPointer(); 181 } 182 183 LoadInst *getCoercedLoadValue() const { 184 assert(isCoercedLoadValue() && "Wrong accessor"); 185 return cast<LoadInst>(Val.getPointer()); 186 } 187 188 MemIntrinsic *getMemIntrinValue() const { 189 assert(isMemIntrinValue() && "Wrong accessor"); 190 return cast<MemIntrinsic>(Val.getPointer()); 191 } 192 193 /// Emit code at the specified insertion point to adjust the value defined 194 /// here to the specified type. This handles various coercion cases. 195 Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt, 196 GVN &gvn) const; 197 }; 198 199 /// Represents an AvailableValue which can be rematerialized at the end of 200 /// the associated BasicBlock. 201 struct llvm::gvn::AvailableValueInBlock { 202 /// BB - The basic block in question. 203 BasicBlock *BB; 204 205 /// AV - The actual available value 206 AvailableValue AV; 207 208 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) { 209 AvailableValueInBlock Res; 210 Res.BB = BB; 211 Res.AV = std::move(AV); 212 return Res; 213 } 214 215 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 216 unsigned Offset = 0) { 217 return get(BB, AvailableValue::get(V, Offset)); 218 } 219 static AvailableValueInBlock getUndef(BasicBlock *BB) { 220 return get(BB, AvailableValue::getUndef()); 221 } 222 223 /// Emit code at the end of this block to adjust the value defined here to 224 /// the specified type. This handles various coercion cases. 225 Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const { 226 return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn); 227 } 228 }; 229 230 //===----------------------------------------------------------------------===// 231 // ValueTable Internal Functions 232 //===----------------------------------------------------------------------===// 233 234 GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { 235 Expression e; 236 e.type = I->getType(); 237 e.opcode = I->getOpcode(); 238 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 239 OI != OE; ++OI) 240 e.varargs.push_back(lookupOrAdd(*OI)); 241 if (I->isCommutative()) { 242 // Ensure that commutative instructions that only differ by a permutation 243 // of their operands get the same value number by sorting the operand value 244 // numbers. Since all commutative instructions have two operands it is more 245 // efficient to sort by hand rather than using, say, std::sort. 246 assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); 247 if (e.varargs[0] > e.varargs[1]) 248 std::swap(e.varargs[0], e.varargs[1]); 249 } 250 251 if (CmpInst *C = dyn_cast<CmpInst>(I)) { 252 // Sort the operand value numbers so x<y and y>x get the same value number. 253 CmpInst::Predicate Predicate = C->getPredicate(); 254 if (e.varargs[0] > e.varargs[1]) { 255 std::swap(e.varargs[0], e.varargs[1]); 256 Predicate = CmpInst::getSwappedPredicate(Predicate); 257 } 258 e.opcode = (C->getOpcode() << 8) | Predicate; 259 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 260 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 261 II != IE; ++II) 262 e.varargs.push_back(*II); 263 } 264 265 return e; 266 } 267 268 GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, 269 CmpInst::Predicate Predicate, 270 Value *LHS, Value *RHS) { 271 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 272 "Not a comparison!"); 273 Expression e; 274 e.type = CmpInst::makeCmpResultType(LHS->getType()); 275 e.varargs.push_back(lookupOrAdd(LHS)); 276 e.varargs.push_back(lookupOrAdd(RHS)); 277 278 // Sort the operand value numbers so x<y and y>x get the same value number. 279 if (e.varargs[0] > e.varargs[1]) { 280 std::swap(e.varargs[0], e.varargs[1]); 281 Predicate = CmpInst::getSwappedPredicate(Predicate); 282 } 283 e.opcode = (Opcode << 8) | Predicate; 284 return e; 285 } 286 287 GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) { 288 assert(EI && "Not an ExtractValueInst?"); 289 Expression e; 290 e.type = EI->getType(); 291 e.opcode = 0; 292 293 IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); 294 if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { 295 // EI might be an extract from one of our recognised intrinsics. If it 296 // is we'll synthesize a semantically equivalent expression instead on 297 // an extract value expression. 298 switch (I->getIntrinsicID()) { 299 case Intrinsic::sadd_with_overflow: 300 case Intrinsic::uadd_with_overflow: 301 e.opcode = Instruction::Add; 302 break; 303 case Intrinsic::ssub_with_overflow: 304 case Intrinsic::usub_with_overflow: 305 e.opcode = Instruction::Sub; 306 break; 307 case Intrinsic::smul_with_overflow: 308 case Intrinsic::umul_with_overflow: 309 e.opcode = Instruction::Mul; 310 break; 311 default: 312 break; 313 } 314 315 if (e.opcode != 0) { 316 // Intrinsic recognized. Grab its args to finish building the expression. 317 assert(I->getNumArgOperands() == 2 && 318 "Expect two args for recognised intrinsics."); 319 e.varargs.push_back(lookupOrAdd(I->getArgOperand(0))); 320 e.varargs.push_back(lookupOrAdd(I->getArgOperand(1))); 321 return e; 322 } 323 } 324 325 // Not a recognised intrinsic. Fall back to producing an extract value 326 // expression. 327 e.opcode = EI->getOpcode(); 328 for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); 329 OI != OE; ++OI) 330 e.varargs.push_back(lookupOrAdd(*OI)); 331 332 for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); 333 II != IE; ++II) 334 e.varargs.push_back(*II); 335 336 return e; 337 } 338 339 //===----------------------------------------------------------------------===// 340 // ValueTable External Functions 341 //===----------------------------------------------------------------------===// 342 343 GVN::ValueTable::ValueTable() : nextValueNumber(1) {} 344 GVN::ValueTable::ValueTable(const ValueTable &) = default; 345 GVN::ValueTable::ValueTable(ValueTable &&) = default; 346 GVN::ValueTable::~ValueTable() = default; 347 348 /// add - Insert a value into the table with a specified value number. 349 void GVN::ValueTable::add(Value *V, uint32_t num) { 350 valueNumbering.insert(std::make_pair(V, num)); 351 } 352 353 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { 354 if (AA->doesNotAccessMemory(C)) { 355 Expression exp = createExpr(C); 356 uint32_t &e = expressionNumbering[exp]; 357 if (!e) e = nextValueNumber++; 358 valueNumbering[C] = e; 359 return e; 360 } else if (AA->onlyReadsMemory(C)) { 361 Expression exp = createExpr(C); 362 uint32_t &e = expressionNumbering[exp]; 363 if (!e) { 364 e = nextValueNumber++; 365 valueNumbering[C] = e; 366 return e; 367 } 368 if (!MD) { 369 e = nextValueNumber++; 370 valueNumbering[C] = e; 371 return e; 372 } 373 374 MemDepResult local_dep = MD->getDependency(C); 375 376 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 377 valueNumbering[C] = nextValueNumber; 378 return nextValueNumber++; 379 } 380 381 if (local_dep.isDef()) { 382 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 383 384 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 385 valueNumbering[C] = nextValueNumber; 386 return nextValueNumber++; 387 } 388 389 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 390 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); 391 uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i)); 392 if (c_vn != cd_vn) { 393 valueNumbering[C] = nextValueNumber; 394 return nextValueNumber++; 395 } 396 } 397 398 uint32_t v = lookupOrAdd(local_cdep); 399 valueNumbering[C] = v; 400 return v; 401 } 402 403 // Non-local case. 404 const MemoryDependenceResults::NonLocalDepInfo &deps = 405 MD->getNonLocalCallDependency(CallSite(C)); 406 // FIXME: Move the checking logic to MemDep! 407 CallInst* cdep = nullptr; 408 409 // Check to see if we have a single dominating call instruction that is 410 // identical to C. 411 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 412 const NonLocalDepEntry *I = &deps[i]; 413 if (I->getResult().isNonLocal()) 414 continue; 415 416 // We don't handle non-definitions. If we already have a call, reject 417 // instruction dependencies. 418 if (!I->getResult().isDef() || cdep != nullptr) { 419 cdep = nullptr; 420 break; 421 } 422 423 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 424 // FIXME: All duplicated with non-local case. 425 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 426 cdep = NonLocalDepCall; 427 continue; 428 } 429 430 cdep = nullptr; 431 break; 432 } 433 434 if (!cdep) { 435 valueNumbering[C] = nextValueNumber; 436 return nextValueNumber++; 437 } 438 439 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 440 valueNumbering[C] = nextValueNumber; 441 return nextValueNumber++; 442 } 443 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 444 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); 445 uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i)); 446 if (c_vn != cd_vn) { 447 valueNumbering[C] = nextValueNumber; 448 return nextValueNumber++; 449 } 450 } 451 452 uint32_t v = lookupOrAdd(cdep); 453 valueNumbering[C] = v; 454 return v; 455 456 } else { 457 valueNumbering[C] = nextValueNumber; 458 return nextValueNumber++; 459 } 460 } 461 462 /// Returns true if a value number exists for the specified value. 463 bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; } 464 465 /// lookup_or_add - Returns the value number for the specified value, assigning 466 /// it a new number if it did not have one before. 467 uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { 468 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 469 if (VI != valueNumbering.end()) 470 return VI->second; 471 472 if (!isa<Instruction>(V)) { 473 valueNumbering[V] = nextValueNumber; 474 return nextValueNumber++; 475 } 476 477 Instruction* I = cast<Instruction>(V); 478 Expression exp; 479 switch (I->getOpcode()) { 480 case Instruction::Call: 481 return lookupOrAddCall(cast<CallInst>(I)); 482 case Instruction::Add: 483 case Instruction::FAdd: 484 case Instruction::Sub: 485 case Instruction::FSub: 486 case Instruction::Mul: 487 case Instruction::FMul: 488 case Instruction::UDiv: 489 case Instruction::SDiv: 490 case Instruction::FDiv: 491 case Instruction::URem: 492 case Instruction::SRem: 493 case Instruction::FRem: 494 case Instruction::Shl: 495 case Instruction::LShr: 496 case Instruction::AShr: 497 case Instruction::And: 498 case Instruction::Or: 499 case Instruction::Xor: 500 case Instruction::ICmp: 501 case Instruction::FCmp: 502 case Instruction::Trunc: 503 case Instruction::ZExt: 504 case Instruction::SExt: 505 case Instruction::FPToUI: 506 case Instruction::FPToSI: 507 case Instruction::UIToFP: 508 case Instruction::SIToFP: 509 case Instruction::FPTrunc: 510 case Instruction::FPExt: 511 case Instruction::PtrToInt: 512 case Instruction::IntToPtr: 513 case Instruction::BitCast: 514 case Instruction::Select: 515 case Instruction::ExtractElement: 516 case Instruction::InsertElement: 517 case Instruction::ShuffleVector: 518 case Instruction::InsertValue: 519 case Instruction::GetElementPtr: 520 exp = createExpr(I); 521 break; 522 case Instruction::ExtractValue: 523 exp = createExtractvalueExpr(cast<ExtractValueInst>(I)); 524 break; 525 default: 526 valueNumbering[V] = nextValueNumber; 527 return nextValueNumber++; 528 } 529 530 uint32_t& e = expressionNumbering[exp]; 531 if (!e) e = nextValueNumber++; 532 valueNumbering[V] = e; 533 return e; 534 } 535 536 /// Returns the value number of the specified value. Fails if 537 /// the value has not yet been numbered. 538 uint32_t GVN::ValueTable::lookup(Value *V) const { 539 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 540 assert(VI != valueNumbering.end() && "Value not numbered?"); 541 return VI->second; 542 } 543 544 /// Returns the value number of the given comparison, 545 /// assigning it a new number if it did not have one before. Useful when 546 /// we deduced the result of a comparison, but don't immediately have an 547 /// instruction realizing that comparison to hand. 548 uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, 549 CmpInst::Predicate Predicate, 550 Value *LHS, Value *RHS) { 551 Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); 552 uint32_t& e = expressionNumbering[exp]; 553 if (!e) e = nextValueNumber++; 554 return e; 555 } 556 557 /// Remove all entries from the ValueTable. 558 void GVN::ValueTable::clear() { 559 valueNumbering.clear(); 560 expressionNumbering.clear(); 561 nextValueNumber = 1; 562 } 563 564 /// Remove a value from the value numbering. 565 void GVN::ValueTable::erase(Value *V) { 566 valueNumbering.erase(V); 567 } 568 569 /// verifyRemoved - Verify that the value is removed from all internal data 570 /// structures. 571 void GVN::ValueTable::verifyRemoved(const Value *V) const { 572 for (DenseMap<Value*, uint32_t>::const_iterator 573 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 574 assert(I->first != V && "Inst still occurs in value numbering map!"); 575 } 576 } 577 578 //===----------------------------------------------------------------------===// 579 // GVN Pass 580 //===----------------------------------------------------------------------===// 581 582 PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { 583 // FIXME: The order of evaluation of these 'getResult' calls is very 584 // significant! Re-ordering these variables will cause GVN when run alone to 585 // be less effective! We should fix memdep and basic-aa to not exhibit this 586 // behavior, but until then don't change the order here. 587 auto &AC = AM.getResult<AssumptionAnalysis>(F); 588 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 589 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 590 auto &AA = AM.getResult<AAManager>(F); 591 auto &MemDep = AM.getResult<MemoryDependenceAnalysis>(F); 592 auto *LI = AM.getCachedResult<LoopAnalysis>(F); 593 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 594 bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep, LI, &ORE); 595 if (!Changed) 596 return PreservedAnalyses::all(); 597 PreservedAnalyses PA; 598 PA.preserve<DominatorTreeAnalysis>(); 599 PA.preserve<GlobalsAA>(); 600 PA.preserve<TargetLibraryAnalysis>(); 601 return PA; 602 } 603 604 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 605 LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const { 606 errs() << "{\n"; 607 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 608 E = d.end(); I != E; ++I) { 609 errs() << I->first << "\n"; 610 I->second->dump(); 611 } 612 errs() << "}\n"; 613 } 614 #endif 615 616 /// Return true if we can prove that the value 617 /// we're analyzing is fully available in the specified block. As we go, keep 618 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This 619 /// map is actually a tri-state map with the following values: 620 /// 0) we know the block *is not* fully available. 621 /// 1) we know the block *is* fully available. 622 /// 2) we do not know whether the block is fully available or not, but we are 623 /// currently speculating that it will be. 624 /// 3) we are speculating for this block and have used that to speculate for 625 /// other blocks. 626 static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 627 DenseMap<BasicBlock*, char> &FullyAvailableBlocks, 628 uint32_t RecurseDepth) { 629 if (RecurseDepth > MaxRecurseDepth) 630 return false; 631 632 // Optimistically assume that the block is fully available and check to see 633 // if we already know about this block in one lookup. 634 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 635 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 636 637 // If the entry already existed for this block, return the precomputed value. 638 if (!IV.second) { 639 // If this is a speculative "available" value, mark it as being used for 640 // speculation of other blocks. 641 if (IV.first->second == 2) 642 IV.first->second = 3; 643 return IV.first->second != 0; 644 } 645 646 // Otherwise, see if it is fully available in all predecessors. 647 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 648 649 // If this block has no predecessors, it isn't live-in here. 650 if (PI == PE) 651 goto SpeculationFailure; 652 653 for (; PI != PE; ++PI) 654 // If the value isn't fully available in one of our predecessors, then it 655 // isn't fully available in this block either. Undo our previous 656 // optimistic assumption and bail out. 657 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1)) 658 goto SpeculationFailure; 659 660 return true; 661 662 // If we get here, we found out that this is not, after 663 // all, a fully-available block. We have a problem if we speculated on this and 664 // used the speculation to mark other blocks as available. 665 SpeculationFailure: 666 char &BBVal = FullyAvailableBlocks[BB]; 667 668 // If we didn't speculate on this, just return with it set to false. 669 if (BBVal == 2) { 670 BBVal = 0; 671 return false; 672 } 673 674 // If we did speculate on this value, we could have blocks set to 1 that are 675 // incorrect. Walk the (transitive) successors of this block and mark them as 676 // 0 if set to one. 677 SmallVector<BasicBlock*, 32> BBWorklist; 678 BBWorklist.push_back(BB); 679 680 do { 681 BasicBlock *Entry = BBWorklist.pop_back_val(); 682 // Note that this sets blocks to 0 (unavailable) if they happen to not 683 // already be in FullyAvailableBlocks. This is safe. 684 char &EntryVal = FullyAvailableBlocks[Entry]; 685 if (EntryVal == 0) continue; // Already unavailable. 686 687 // Mark as unavailable. 688 EntryVal = 0; 689 690 BBWorklist.append(succ_begin(Entry), succ_end(Entry)); 691 } while (!BBWorklist.empty()); 692 693 return false; 694 } 695 696 697 698 699 /// Given a set of loads specified by ValuesPerBlock, 700 /// construct SSA form, allowing us to eliminate LI. This returns the value 701 /// that should be used at LI's definition site. 702 static Value *ConstructSSAForLoadSet(LoadInst *LI, 703 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 704 GVN &gvn) { 705 // Check for the fully redundant, dominating load case. In this case, we can 706 // just use the dominating value directly. 707 if (ValuesPerBlock.size() == 1 && 708 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, 709 LI->getParent())) { 710 assert(!ValuesPerBlock[0].AV.isUndefValue() && 711 "Dead BB dominate this block"); 712 return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn); 713 } 714 715 // Otherwise, we have to construct SSA form. 716 SmallVector<PHINode*, 8> NewPHIs; 717 SSAUpdater SSAUpdate(&NewPHIs); 718 SSAUpdate.Initialize(LI->getType(), LI->getName()); 719 720 for (const AvailableValueInBlock &AV : ValuesPerBlock) { 721 BasicBlock *BB = AV.BB; 722 723 if (SSAUpdate.HasValueForBlock(BB)) 724 continue; 725 726 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn)); 727 } 728 729 // Perform PHI construction. 730 return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 731 } 732 733 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI, 734 Instruction *InsertPt, 735 GVN &gvn) const { 736 Value *Res; 737 Type *LoadTy = LI->getType(); 738 const DataLayout &DL = LI->getModule()->getDataLayout(); 739 if (isSimpleValue()) { 740 Res = getSimpleValue(); 741 if (Res->getType() != LoadTy) { 742 Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL); 743 744 DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 745 << *getSimpleValue() << '\n' 746 << *Res << '\n' << "\n\n\n"); 747 } 748 } else if (isCoercedLoadValue()) { 749 LoadInst *Load = getCoercedLoadValue(); 750 if (Load->getType() == LoadTy && Offset == 0) { 751 Res = Load; 752 } else { 753 Res = getLoadValueForLoad(Load, Offset, LoadTy, InsertPt, DL); 754 // We would like to use gvn.markInstructionForDeletion here, but we can't 755 // because the load is already memoized into the leader map table that GVN 756 // tracks. It is potentially possible to remove the load from the table, 757 // but then there all of the operations based on it would need to be 758 // rehashed. Just leave the dead load around. 759 gvn.getMemDep().removeInstruction(Load); 760 DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " 761 << *getCoercedLoadValue() << '\n' 762 << *Res << '\n' 763 << "\n\n\n"); 764 } 765 } else if (isMemIntrinValue()) { 766 Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, 767 InsertPt, DL); 768 DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 769 << " " << *getMemIntrinValue() << '\n' 770 << *Res << '\n' << "\n\n\n"); 771 } else { 772 assert(isUndefValue() && "Should be UndefVal"); 773 DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); 774 return UndefValue::get(LoadTy); 775 } 776 assert(Res && "failed to materialize?"); 777 return Res; 778 } 779 780 static bool isLifetimeStart(const Instruction *Inst) { 781 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 782 return II->getIntrinsicID() == Intrinsic::lifetime_start; 783 return false; 784 } 785 786 /// \brief Try to locate the three instruction involved in a missed 787 /// load-elimination case that is due to an intervening store. 788 static void reportMayClobberedLoad(LoadInst *LI, MemDepResult DepInfo, 789 DominatorTree *DT, 790 OptimizationRemarkEmitter *ORE) { 791 using namespace ore; 792 User *OtherAccess = nullptr; 793 794 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", LI); 795 R << "load of type " << NV("Type", LI->getType()) << " not eliminated" 796 << setExtraArgs(); 797 798 for (auto *U : LI->getPointerOperand()->users()) 799 if (U != LI && (isa<LoadInst>(U) || isa<StoreInst>(U)) && 800 DT->dominates(cast<Instruction>(U), LI)) { 801 // FIXME: for now give up if there are multiple memory accesses that 802 // dominate the load. We need further analysis to decide which one is 803 // that we're forwarding from. 804 if (OtherAccess) 805 OtherAccess = nullptr; 806 else 807 OtherAccess = U; 808 } 809 810 if (OtherAccess) 811 R << " in favor of " << NV("OtherAccess", OtherAccess); 812 813 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst()); 814 815 ORE->emit(R); 816 } 817 818 bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo, 819 Value *Address, AvailableValue &Res) { 820 821 assert((DepInfo.isDef() || DepInfo.isClobber()) && 822 "expected a local dependence"); 823 assert(LI->isUnordered() && "rules below are incorrect for ordered access"); 824 825 const DataLayout &DL = LI->getModule()->getDataLayout(); 826 827 if (DepInfo.isClobber()) { 828 // If the dependence is to a store that writes to a superset of the bits 829 // read by the load, we can extract the bits we need for the load from the 830 // stored value. 831 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 832 // Can't forward from non-atomic to atomic without violating memory model. 833 if (Address && LI->isAtomic() <= DepSI->isAtomic()) { 834 int Offset = 835 analyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, DL); 836 if (Offset != -1) { 837 Res = AvailableValue::get(DepSI->getValueOperand(), Offset); 838 return true; 839 } 840 } 841 } 842 843 // Check to see if we have something like this: 844 // load i32* P 845 // load i8* (P+1) 846 // if we have this, replace the later with an extraction from the former. 847 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { 848 // If this is a clobber and L is the first instruction in its block, then 849 // we have the first instruction in the entry block. 850 // Can't forward from non-atomic to atomic without violating memory model. 851 if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) { 852 int Offset = 853 analyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL); 854 855 if (Offset != -1) { 856 Res = AvailableValue::getLoad(DepLI, Offset); 857 return true; 858 } 859 } 860 } 861 862 // If the clobbering value is a memset/memcpy/memmove, see if we can 863 // forward a value on from it. 864 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 865 if (Address && !LI->isAtomic()) { 866 int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address, 867 DepMI, DL); 868 if (Offset != -1) { 869 Res = AvailableValue::getMI(DepMI, Offset); 870 return true; 871 } 872 } 873 } 874 // Nothing known about this clobber, have to be conservative 875 DEBUG( 876 // fast print dep, using operator<< on instruction is too slow. 877 dbgs() << "GVN: load "; 878 LI->printAsOperand(dbgs()); 879 Instruction *I = DepInfo.getInst(); 880 dbgs() << " is clobbered by " << *I << '\n'; 881 ); 882 883 if (ORE->allowExtraAnalysis()) 884 reportMayClobberedLoad(LI, DepInfo, DT, ORE); 885 886 return false; 887 } 888 assert(DepInfo.isDef() && "follows from above"); 889 890 Instruction *DepInst = DepInfo.getInst(); 891 892 // Loading the allocation -> undef. 893 if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || 894 // Loading immediately after lifetime begin -> undef. 895 isLifetimeStart(DepInst)) { 896 Res = AvailableValue::get(UndefValue::get(LI->getType())); 897 return true; 898 } 899 900 // Loading from calloc (which zero initializes memory) -> zero 901 if (isCallocLikeFn(DepInst, TLI)) { 902 Res = AvailableValue::get(Constant::getNullValue(LI->getType())); 903 return true; 904 } 905 906 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 907 // Reject loads and stores that are to the same address but are of 908 // different types if we have to. If the stored value is larger or equal to 909 // the loaded value, we can reuse it. 910 if (S->getValueOperand()->getType() != LI->getType() && 911 !canCoerceMustAliasedValueToLoad(S->getValueOperand(), 912 LI->getType(), DL)) 913 return false; 914 915 // Can't forward from non-atomic to atomic without violating memory model. 916 if (S->isAtomic() < LI->isAtomic()) 917 return false; 918 919 Res = AvailableValue::get(S->getValueOperand()); 920 return true; 921 } 922 923 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 924 // If the types mismatch and we can't handle it, reject reuse of the load. 925 // If the stored value is larger or equal to the loaded value, we can reuse 926 // it. 927 if (LD->getType() != LI->getType() && 928 !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) 929 return false; 930 931 // Can't forward from non-atomic to atomic without violating memory model. 932 if (LD->isAtomic() < LI->isAtomic()) 933 return false; 934 935 Res = AvailableValue::getLoad(LD); 936 return true; 937 } 938 939 // Unknown def - must be conservative 940 DEBUG( 941 // fast print dep, using operator<< on instruction is too slow. 942 dbgs() << "GVN: load "; 943 LI->printAsOperand(dbgs()); 944 dbgs() << " has unknown def " << *DepInst << '\n'; 945 ); 946 return false; 947 } 948 949 void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 950 AvailValInBlkVect &ValuesPerBlock, 951 UnavailBlkVect &UnavailableBlocks) { 952 953 // Filter out useless results (non-locals, etc). Keep track of the blocks 954 // where we have a value available in repl, also keep track of whether we see 955 // dependencies that produce an unknown value for the load (such as a call 956 // that could potentially clobber the load). 957 unsigned NumDeps = Deps.size(); 958 for (unsigned i = 0, e = NumDeps; i != e; ++i) { 959 BasicBlock *DepBB = Deps[i].getBB(); 960 MemDepResult DepInfo = Deps[i].getResult(); 961 962 if (DeadBlocks.count(DepBB)) { 963 // Dead dependent mem-op disguise as a load evaluating the same value 964 // as the load in question. 965 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); 966 continue; 967 } 968 969 if (!DepInfo.isDef() && !DepInfo.isClobber()) { 970 UnavailableBlocks.push_back(DepBB); 971 continue; 972 } 973 974 // The address being loaded in this non-local block may not be the same as 975 // the pointer operand of the load if PHI translation occurs. Make sure 976 // to consider the right address. 977 Value *Address = Deps[i].getAddress(); 978 979 AvailableValue AV; 980 if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) { 981 // subtlety: because we know this was a non-local dependency, we know 982 // it's safe to materialize anywhere between the instruction within 983 // DepInfo and the end of it's block. 984 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 985 std::move(AV))); 986 } else { 987 UnavailableBlocks.push_back(DepBB); 988 } 989 } 990 991 assert(NumDeps == ValuesPerBlock.size() + UnavailableBlocks.size() && 992 "post condition violation"); 993 } 994 995 bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 996 UnavailBlkVect &UnavailableBlocks) { 997 // Okay, we have *some* definitions of the value. This means that the value 998 // is available in some of our (transitive) predecessors. Lets think about 999 // doing PRE of this load. This will involve inserting a new load into the 1000 // predecessor when it's not available. We could do this in general, but 1001 // prefer to not increase code size. As such, we only do this when we know 1002 // that we only have to insert *one* load (which means we're basically moving 1003 // the load, not inserting a new one). 1004 1005 SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(), 1006 UnavailableBlocks.end()); 1007 1008 // Let's find the first basic block with more than one predecessor. Walk 1009 // backwards through predecessors if needed. 1010 BasicBlock *LoadBB = LI->getParent(); 1011 BasicBlock *TmpBB = LoadBB; 1012 1013 while (TmpBB->getSinglePredecessor()) { 1014 TmpBB = TmpBB->getSinglePredecessor(); 1015 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1016 return false; 1017 if (Blockers.count(TmpBB)) 1018 return false; 1019 1020 // If any of these blocks has more than one successor (i.e. if the edge we 1021 // just traversed was critical), then there are other paths through this 1022 // block along which the load may not be anticipated. Hoisting the load 1023 // above this block would be adding the load to execution paths along 1024 // which it was not previously executed. 1025 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1026 return false; 1027 } 1028 1029 assert(TmpBB); 1030 LoadBB = TmpBB; 1031 1032 // Check to see how many predecessors have the loaded value fully 1033 // available. 1034 MapVector<BasicBlock *, Value *> PredLoads; 1035 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1036 for (const AvailableValueInBlock &AV : ValuesPerBlock) 1037 FullyAvailableBlocks[AV.BB] = true; 1038 for (BasicBlock *UnavailableBB : UnavailableBlocks) 1039 FullyAvailableBlocks[UnavailableBB] = false; 1040 1041 SmallVector<BasicBlock *, 4> CriticalEdgePred; 1042 for (BasicBlock *Pred : predecessors(LoadBB)) { 1043 // If any predecessor block is an EH pad that does not allow non-PHI 1044 // instructions before the terminator, we can't PRE the load. 1045 if (Pred->getTerminator()->isEHPad()) { 1046 DEBUG(dbgs() 1047 << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '" 1048 << Pred->getName() << "': " << *LI << '\n'); 1049 return false; 1050 } 1051 1052 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { 1053 continue; 1054 } 1055 1056 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1057 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1058 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1059 << Pred->getName() << "': " << *LI << '\n'); 1060 return false; 1061 } 1062 1063 if (LoadBB->isEHPad()) { 1064 DEBUG(dbgs() 1065 << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '" 1066 << Pred->getName() << "': " << *LI << '\n'); 1067 return false; 1068 } 1069 1070 CriticalEdgePred.push_back(Pred); 1071 } else { 1072 // Only add the predecessors that will not be split for now. 1073 PredLoads[Pred] = nullptr; 1074 } 1075 } 1076 1077 // Decide whether PRE is profitable for this load. 1078 unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); 1079 assert(NumUnavailablePreds != 0 && 1080 "Fully available value should already be eliminated!"); 1081 1082 // If this load is unavailable in multiple predecessors, reject it. 1083 // FIXME: If we could restructure the CFG, we could make a common pred with 1084 // all the preds that don't have an available LI and insert a new load into 1085 // that one block. 1086 if (NumUnavailablePreds != 1) 1087 return false; 1088 1089 // Split critical edges, and update the unavailable predecessors accordingly. 1090 for (BasicBlock *OrigPred : CriticalEdgePred) { 1091 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); 1092 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); 1093 PredLoads[NewPred] = nullptr; 1094 DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" 1095 << LoadBB->getName() << '\n'); 1096 } 1097 1098 // Check if the load can safely be moved to all the unavailable predecessors. 1099 bool CanDoPRE = true; 1100 const DataLayout &DL = LI->getModule()->getDataLayout(); 1101 SmallVector<Instruction*, 8> NewInsts; 1102 for (auto &PredLoad : PredLoads) { 1103 BasicBlock *UnavailablePred = PredLoad.first; 1104 1105 // Do PHI translation to get its value in the predecessor if necessary. The 1106 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1107 1108 // If all preds have a single successor, then we know it is safe to insert 1109 // the load on the pred (?!?), so we can insert code to materialize the 1110 // pointer if it is not available. 1111 PHITransAddr Address(LI->getPointerOperand(), DL, AC); 1112 Value *LoadPtr = nullptr; 1113 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1114 *DT, NewInsts); 1115 1116 // If we couldn't find or insert a computation of this phi translated value, 1117 // we fail PRE. 1118 if (!LoadPtr) { 1119 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1120 << *LI->getPointerOperand() << "\n"); 1121 CanDoPRE = false; 1122 break; 1123 } 1124 1125 PredLoad.second = LoadPtr; 1126 } 1127 1128 if (!CanDoPRE) { 1129 while (!NewInsts.empty()) { 1130 Instruction *I = NewInsts.pop_back_val(); 1131 if (MD) MD->removeInstruction(I); 1132 I->eraseFromParent(); 1133 } 1134 // HINT: Don't revert the edge-splitting as following transformation may 1135 // also need to split these critical edges. 1136 return !CriticalEdgePred.empty(); 1137 } 1138 1139 // Okay, we can eliminate this load by inserting a reload in the predecessor 1140 // and using PHI construction to get the value in the other predecessors, do 1141 // it. 1142 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1143 DEBUG(if (!NewInsts.empty()) 1144 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1145 << *NewInsts.back() << '\n'); 1146 1147 // Assign value numbers to the new instructions. 1148 for (Instruction *I : NewInsts) { 1149 // Instructions that have been inserted in predecessor(s) to materialize 1150 // the load address do not retain their original debug locations. Doing 1151 // so could lead to confusing (but correct) source attributions. 1152 // FIXME: How do we retain source locations without causing poor debugging 1153 // behavior? 1154 I->setDebugLoc(DebugLoc()); 1155 1156 // FIXME: We really _ought_ to insert these value numbers into their 1157 // parent's availability map. However, in doing so, we risk getting into 1158 // ordering issues. If a block hasn't been processed yet, we would be 1159 // marking a value as AVAIL-IN, which isn't what we intend. 1160 VN.lookupOrAdd(I); 1161 } 1162 1163 for (const auto &PredLoad : PredLoads) { 1164 BasicBlock *UnavailablePred = PredLoad.first; 1165 Value *LoadPtr = PredLoad.second; 1166 1167 auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", 1168 LI->isVolatile(), LI->getAlignment(), 1169 LI->getOrdering(), LI->getSyncScopeID(), 1170 UnavailablePred->getTerminator()); 1171 NewLoad->setDebugLoc(LI->getDebugLoc()); 1172 1173 // Transfer the old load's AA tags to the new load. 1174 AAMDNodes Tags; 1175 LI->getAAMetadata(Tags); 1176 if (Tags) 1177 NewLoad->setAAMetadata(Tags); 1178 1179 if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load)) 1180 NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); 1181 if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group)) 1182 NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); 1183 if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) 1184 NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); 1185 1186 // We do not propagate the old load's debug location, because the new 1187 // load now lives in a different BB, and we want to avoid a jumpy line 1188 // table. 1189 // FIXME: How do we retain source locations without causing poor debugging 1190 // behavior? 1191 1192 // Add the newly created load. 1193 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1194 NewLoad)); 1195 MD->invalidateCachedPointerInfo(LoadPtr); 1196 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1197 } 1198 1199 // Perform PHI construction. 1200 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1201 LI->replaceAllUsesWith(V); 1202 if (isa<PHINode>(V)) 1203 V->takeName(LI); 1204 if (Instruction *I = dyn_cast<Instruction>(V)) 1205 I->setDebugLoc(LI->getDebugLoc()); 1206 if (V->getType()->isPtrOrPtrVectorTy()) 1207 MD->invalidateCachedPointerInfo(V); 1208 markInstructionForDeletion(LI); 1209 ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI) 1210 << "load eliminated by PRE"); 1211 ++NumPRELoad; 1212 return true; 1213 } 1214 1215 static void reportLoadElim(LoadInst *LI, Value *AvailableValue, 1216 OptimizationRemarkEmitter *ORE) { 1217 using namespace ore; 1218 ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadElim", LI) 1219 << "load of type " << NV("Type", LI->getType()) << " eliminated" 1220 << setExtraArgs() << " in favor of " 1221 << NV("InfavorOfValue", AvailableValue)); 1222 } 1223 1224 /// Attempt to eliminate a load whose dependencies are 1225 /// non-local by performing PHI construction. 1226 bool GVN::processNonLocalLoad(LoadInst *LI) { 1227 // non-local speculations are not allowed under asan. 1228 if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress)) 1229 return false; 1230 1231 // Step 1: Find the non-local dependencies of the load. 1232 LoadDepVect Deps; 1233 MD->getNonLocalPointerDependency(LI, Deps); 1234 1235 // If we had to process more than one hundred blocks to find the 1236 // dependencies, this load isn't worth worrying about. Optimizing 1237 // it will be too expensive. 1238 unsigned NumDeps = Deps.size(); 1239 if (NumDeps > 100) 1240 return false; 1241 1242 // If we had a phi translation failure, we'll have a single entry which is a 1243 // clobber in the current block. Reject this early. 1244 if (NumDeps == 1 && 1245 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { 1246 DEBUG( 1247 dbgs() << "GVN: non-local load "; 1248 LI->printAsOperand(dbgs()); 1249 dbgs() << " has unknown dependencies\n"; 1250 ); 1251 return false; 1252 } 1253 1254 // If this load follows a GEP, see if we can PRE the indices before analyzing. 1255 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) { 1256 for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(), 1257 OE = GEP->idx_end(); 1258 OI != OE; ++OI) 1259 if (Instruction *I = dyn_cast<Instruction>(OI->get())) 1260 performScalarPRE(I); 1261 } 1262 1263 // Step 2: Analyze the availability of the load 1264 AvailValInBlkVect ValuesPerBlock; 1265 UnavailBlkVect UnavailableBlocks; 1266 AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); 1267 1268 // If we have no predecessors that produce a known value for this load, exit 1269 // early. 1270 if (ValuesPerBlock.empty()) 1271 return false; 1272 1273 // Step 3: Eliminate fully redundancy. 1274 // 1275 // If all of the instructions we depend on produce a known value for this 1276 // load, then it is fully redundant and we can use PHI insertion to compute 1277 // its value. Insert PHIs and remove the fully redundant value now. 1278 if (UnavailableBlocks.empty()) { 1279 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1280 1281 // Perform PHI construction. 1282 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 1283 LI->replaceAllUsesWith(V); 1284 1285 if (isa<PHINode>(V)) 1286 V->takeName(LI); 1287 if (Instruction *I = dyn_cast<Instruction>(V)) 1288 // If instruction I has debug info, then we should not update it. 1289 // Also, if I has a null DebugLoc, then it is still potentially incorrect 1290 // to propagate LI's DebugLoc because LI may not post-dominate I. 1291 if (LI->getDebugLoc() && LI->getParent() == I->getParent()) 1292 I->setDebugLoc(LI->getDebugLoc()); 1293 if (V->getType()->isPtrOrPtrVectorTy()) 1294 MD->invalidateCachedPointerInfo(V); 1295 markInstructionForDeletion(LI); 1296 ++NumGVNLoad; 1297 reportLoadElim(LI, V, ORE); 1298 return true; 1299 } 1300 1301 // Step 4: Eliminate partial redundancy. 1302 if (!EnablePRE || !EnableLoadPRE) 1303 return false; 1304 1305 return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); 1306 } 1307 1308 bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { 1309 assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume && 1310 "This function can only be called with llvm.assume intrinsic"); 1311 Value *V = IntrinsicI->getArgOperand(0); 1312 1313 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) { 1314 if (Cond->isZero()) { 1315 Type *Int8Ty = Type::getInt8Ty(V->getContext()); 1316 // Insert a new store to null instruction before the load to indicate that 1317 // this code is not reachable. FIXME: We could insert unreachable 1318 // instruction directly because we can modify the CFG. 1319 new StoreInst(UndefValue::get(Int8Ty), 1320 Constant::getNullValue(Int8Ty->getPointerTo()), 1321 IntrinsicI); 1322 } 1323 markInstructionForDeletion(IntrinsicI); 1324 return false; 1325 } 1326 1327 Constant *True = ConstantInt::getTrue(V->getContext()); 1328 bool Changed = false; 1329 1330 for (BasicBlock *Successor : successors(IntrinsicI->getParent())) { 1331 BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); 1332 1333 // This property is only true in dominated successors, propagateEquality 1334 // will check dominance for us. 1335 Changed |= propagateEquality(V, True, Edge, false); 1336 } 1337 1338 // We can replace assume value with true, which covers cases like this: 1339 // call void @llvm.assume(i1 %cmp) 1340 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true 1341 ReplaceWithConstMap[V] = True; 1342 1343 // If one of *cmp *eq operand is const, adding it to map will cover this: 1344 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen 1345 // call void @llvm.assume(i1 %cmp) 1346 // ret float %0 ; will change it to ret float 3.000000e+00 1347 if (auto *CmpI = dyn_cast<CmpInst>(V)) { 1348 if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ || 1349 CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ || 1350 (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ && 1351 CmpI->getFastMathFlags().noNaNs())) { 1352 Value *CmpLHS = CmpI->getOperand(0); 1353 Value *CmpRHS = CmpI->getOperand(1); 1354 if (isa<Constant>(CmpLHS)) 1355 std::swap(CmpLHS, CmpRHS); 1356 auto *RHSConst = dyn_cast<Constant>(CmpRHS); 1357 1358 // If only one operand is constant. 1359 if (RHSConst != nullptr && !isa<Constant>(CmpLHS)) 1360 ReplaceWithConstMap[CmpLHS] = RHSConst; 1361 } 1362 } 1363 return Changed; 1364 } 1365 1366 static void patchReplacementInstruction(Instruction *I, Value *Repl) { 1367 auto *ReplInst = dyn_cast<Instruction>(Repl); 1368 if (!ReplInst) 1369 return; 1370 1371 // Patch the replacement so that it is not more restrictive than the value 1372 // being replaced. 1373 // Note that if 'I' is a load being replaced by some operation, 1374 // for example, by an arithmetic operation, then andIRFlags() 1375 // would just erase all math flags from the original arithmetic 1376 // operation, which is clearly not wanted and not needed. 1377 if (!isa<LoadInst>(I)) 1378 ReplInst->andIRFlags(I); 1379 1380 // FIXME: If both the original and replacement value are part of the 1381 // same control-flow region (meaning that the execution of one 1382 // guarantees the execution of the other), then we can combine the 1383 // noalias scopes here and do better than the general conservative 1384 // answer used in combineMetadata(). 1385 1386 // In general, GVN unifies expressions over different control-flow 1387 // regions, and so we need a conservative combination of the noalias 1388 // scopes. 1389 static const unsigned KnownIDs[] = { 1390 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, 1391 LLVMContext::MD_noalias, LLVMContext::MD_range, 1392 LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, 1393 LLVMContext::MD_invariant_group}; 1394 combineMetadata(ReplInst, I, KnownIDs); 1395 } 1396 1397 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { 1398 patchReplacementInstruction(I, Repl); 1399 I->replaceAllUsesWith(Repl); 1400 } 1401 1402 /// Attempt to eliminate a load, first by eliminating it 1403 /// locally, and then attempting non-local elimination if that fails. 1404 bool GVN::processLoad(LoadInst *L) { 1405 if (!MD) 1406 return false; 1407 1408 // This code hasn't been audited for ordered or volatile memory access 1409 if (!L->isUnordered()) 1410 return false; 1411 1412 if (L->use_empty()) { 1413 markInstructionForDeletion(L); 1414 return true; 1415 } 1416 1417 // ... to a pointer that has been loaded from before... 1418 MemDepResult Dep = MD->getDependency(L); 1419 1420 // If it is defined in another block, try harder. 1421 if (Dep.isNonLocal()) 1422 return processNonLocalLoad(L); 1423 1424 // Only handle the local case below 1425 if (!Dep.isDef() && !Dep.isClobber()) { 1426 // This might be a NonFuncLocal or an Unknown 1427 DEBUG( 1428 // fast print dep, using operator<< on instruction is too slow. 1429 dbgs() << "GVN: load "; 1430 L->printAsOperand(dbgs()); 1431 dbgs() << " has unknown dependence\n"; 1432 ); 1433 return false; 1434 } 1435 1436 AvailableValue AV; 1437 if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) { 1438 Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this); 1439 1440 // Replace the load! 1441 patchAndReplaceAllUsesWith(L, AvailableValue); 1442 markInstructionForDeletion(L); 1443 ++NumGVNLoad; 1444 reportLoadElim(L, AvailableValue, ORE); 1445 // Tell MDA to rexamine the reused pointer since we might have more 1446 // information after forwarding it. 1447 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) 1448 MD->invalidateCachedPointerInfo(AvailableValue); 1449 return true; 1450 } 1451 1452 return false; 1453 } 1454 1455 // In order to find a leader for a given value number at a 1456 // specific basic block, we first obtain the list of all Values for that number, 1457 // and then scan the list to find one whose block dominates the block in 1458 // question. This is fast because dominator tree queries consist of only 1459 // a few comparisons of DFS numbers. 1460 Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { 1461 LeaderTableEntry Vals = LeaderTable[num]; 1462 if (!Vals.Val) return nullptr; 1463 1464 Value *Val = nullptr; 1465 if (DT->dominates(Vals.BB, BB)) { 1466 Val = Vals.Val; 1467 if (isa<Constant>(Val)) return Val; 1468 } 1469 1470 LeaderTableEntry* Next = Vals.Next; 1471 while (Next) { 1472 if (DT->dominates(Next->BB, BB)) { 1473 if (isa<Constant>(Next->Val)) return Next->Val; 1474 if (!Val) Val = Next->Val; 1475 } 1476 1477 Next = Next->Next; 1478 } 1479 1480 return Val; 1481 } 1482 1483 /// There is an edge from 'Src' to 'Dst'. Return 1484 /// true if every path from the entry block to 'Dst' passes via this edge. In 1485 /// particular 'Dst' must not be reachable via another edge from 'Src'. 1486 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, 1487 DominatorTree *DT) { 1488 // While in theory it is interesting to consider the case in which Dst has 1489 // more than one predecessor, because Dst might be part of a loop which is 1490 // only reachable from Src, in practice it is pointless since at the time 1491 // GVN runs all such loops have preheaders, which means that Dst will have 1492 // been changed to have only one predecessor, namely Src. 1493 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); 1494 assert((!Pred || Pred == E.getStart()) && 1495 "No edge between these basic blocks!"); 1496 return Pred != nullptr; 1497 } 1498 1499 // Tries to replace instruction with const, using information from 1500 // ReplaceWithConstMap. 1501 bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { 1502 bool Changed = false; 1503 for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { 1504 Value *Operand = Instr->getOperand(OpNum); 1505 auto it = ReplaceWithConstMap.find(Operand); 1506 if (it != ReplaceWithConstMap.end()) { 1507 assert(!isa<Constant>(Operand) && 1508 "Replacing constants with constants is invalid"); 1509 DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second 1510 << " in instruction " << *Instr << '\n'); 1511 Instr->setOperand(OpNum, it->second); 1512 Changed = true; 1513 } 1514 } 1515 return Changed; 1516 } 1517 1518 /// The given values are known to be equal in every block 1519 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with 1520 /// 'RHS' everywhere in the scope. Returns whether a change was made. 1521 /// If DominatesByEdge is false, then it means that we will propagate the RHS 1522 /// value starting from the end of Root.Start. 1523 bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, 1524 bool DominatesByEdge) { 1525 SmallVector<std::pair<Value*, Value*>, 4> Worklist; 1526 Worklist.push_back(std::make_pair(LHS, RHS)); 1527 bool Changed = false; 1528 // For speed, compute a conservative fast approximation to 1529 // DT->dominates(Root, Root.getEnd()); 1530 const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); 1531 1532 while (!Worklist.empty()) { 1533 std::pair<Value*, Value*> Item = Worklist.pop_back_val(); 1534 LHS = Item.first; RHS = Item.second; 1535 1536 if (LHS == RHS) 1537 continue; 1538 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); 1539 1540 // Don't try to propagate equalities between constants. 1541 if (isa<Constant>(LHS) && isa<Constant>(RHS)) 1542 continue; 1543 1544 // Prefer a constant on the right-hand side, or an Argument if no constants. 1545 if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) 1546 std::swap(LHS, RHS); 1547 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); 1548 1549 // If there is no obvious reason to prefer the left-hand side over the 1550 // right-hand side, ensure the longest lived term is on the right-hand side, 1551 // so the shortest lived term will be replaced by the longest lived. 1552 // This tends to expose more simplifications. 1553 uint32_t LVN = VN.lookupOrAdd(LHS); 1554 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || 1555 (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { 1556 // Move the 'oldest' value to the right-hand side, using the value number 1557 // as a proxy for age. 1558 uint32_t RVN = VN.lookupOrAdd(RHS); 1559 if (LVN < RVN) { 1560 std::swap(LHS, RHS); 1561 LVN = RVN; 1562 } 1563 } 1564 1565 // If value numbering later sees that an instruction in the scope is equal 1566 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve 1567 // the invariant that instructions only occur in the leader table for their 1568 // own value number (this is used by removeFromLeaderTable), do not do this 1569 // if RHS is an instruction (if an instruction in the scope is morphed into 1570 // LHS then it will be turned into RHS by the next GVN iteration anyway, so 1571 // using the leader table is about compiling faster, not optimizing better). 1572 // The leader table only tracks basic blocks, not edges. Only add to if we 1573 // have the simple case where the edge dominates the end. 1574 if (RootDominatesEnd && !isa<Instruction>(RHS)) 1575 addToLeaderTable(LVN, RHS, Root.getEnd()); 1576 1577 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As 1578 // LHS always has at least one use that is not dominated by Root, this will 1579 // never do anything if LHS has only one use. 1580 if (!LHS->hasOneUse()) { 1581 unsigned NumReplacements = 1582 DominatesByEdge 1583 ? replaceDominatedUsesWith(LHS, RHS, *DT, Root) 1584 : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart()); 1585 1586 Changed |= NumReplacements > 0; 1587 NumGVNEqProp += NumReplacements; 1588 } 1589 1590 // Now try to deduce additional equalities from this one. For example, if 1591 // the known equality was "(A != B)" == "false" then it follows that A and B 1592 // are equal in the scope. Only boolean equalities with an explicit true or 1593 // false RHS are currently supported. 1594 if (!RHS->getType()->isIntegerTy(1)) 1595 // Not a boolean equality - bail out. 1596 continue; 1597 ConstantInt *CI = dyn_cast<ConstantInt>(RHS); 1598 if (!CI) 1599 // RHS neither 'true' nor 'false' - bail out. 1600 continue; 1601 // Whether RHS equals 'true'. Otherwise it equals 'false'. 1602 bool isKnownTrue = CI->isMinusOne(); 1603 bool isKnownFalse = !isKnownTrue; 1604 1605 // If "A && B" is known true then both A and B are known true. If "A || B" 1606 // is known false then both A and B are known false. 1607 Value *A, *B; 1608 if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || 1609 (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { 1610 Worklist.push_back(std::make_pair(A, RHS)); 1611 Worklist.push_back(std::make_pair(B, RHS)); 1612 continue; 1613 } 1614 1615 // If we are propagating an equality like "(A == B)" == "true" then also 1616 // propagate the equality A == B. When propagating a comparison such as 1617 // "(A >= B)" == "true", replace all instances of "A < B" with "false". 1618 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) { 1619 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); 1620 1621 // If "A == B" is known true, or "A != B" is known false, then replace 1622 // A with B everywhere in the scope. 1623 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || 1624 (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) 1625 Worklist.push_back(std::make_pair(Op0, Op1)); 1626 1627 // Handle the floating point versions of equality comparisons too. 1628 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || 1629 (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { 1630 1631 // Floating point -0.0 and 0.0 compare equal, so we can only 1632 // propagate values if we know that we have a constant and that 1633 // its value is non-zero. 1634 1635 // FIXME: We should do this optimization if 'no signed zeros' is 1636 // applicable via an instruction-level fast-math-flag or some other 1637 // indicator that relaxed FP semantics are being used. 1638 1639 if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero()) 1640 Worklist.push_back(std::make_pair(Op0, Op1)); 1641 } 1642 1643 // If "A >= B" is known true, replace "A < B" with false everywhere. 1644 CmpInst::Predicate NotPred = Cmp->getInversePredicate(); 1645 Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); 1646 // Since we don't have the instruction "A < B" immediately to hand, work 1647 // out the value number that it would have and use that to find an 1648 // appropriate instruction (if any). 1649 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1650 uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1); 1651 // If the number we were assigned was brand new then there is no point in 1652 // looking for an instruction realizing it: there cannot be one! 1653 if (Num < NextNum) { 1654 Value *NotCmp = findLeader(Root.getEnd(), Num); 1655 if (NotCmp && isa<Instruction>(NotCmp)) { 1656 unsigned NumReplacements = 1657 DominatesByEdge 1658 ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root) 1659 : replaceDominatedUsesWith(NotCmp, NotVal, *DT, 1660 Root.getStart()); 1661 Changed |= NumReplacements > 0; 1662 NumGVNEqProp += NumReplacements; 1663 } 1664 } 1665 // Ensure that any instruction in scope that gets the "A < B" value number 1666 // is replaced with false. 1667 // The leader table only tracks basic blocks, not edges. Only add to if we 1668 // have the simple case where the edge dominates the end. 1669 if (RootDominatesEnd) 1670 addToLeaderTable(Num, NotVal, Root.getEnd()); 1671 1672 continue; 1673 } 1674 } 1675 1676 return Changed; 1677 } 1678 1679 /// When calculating availability, handle an instruction 1680 /// by inserting it into the appropriate sets 1681 bool GVN::processInstruction(Instruction *I) { 1682 // Ignore dbg info intrinsics. 1683 if (isa<DbgInfoIntrinsic>(I)) 1684 return false; 1685 1686 // If the instruction can be easily simplified then do so now in preference 1687 // to value numbering it. Value numbering often exposes redundancies, for 1688 // example if it determines that %y is equal to %x then the instruction 1689 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 1690 const DataLayout &DL = I->getModule()->getDataLayout(); 1691 if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) { 1692 bool Changed = false; 1693 if (!I->use_empty()) { 1694 I->replaceAllUsesWith(V); 1695 Changed = true; 1696 } 1697 if (isInstructionTriviallyDead(I, TLI)) { 1698 markInstructionForDeletion(I); 1699 Changed = true; 1700 } 1701 if (Changed) { 1702 if (MD && V->getType()->isPtrOrPtrVectorTy()) 1703 MD->invalidateCachedPointerInfo(V); 1704 ++NumGVNSimpl; 1705 return true; 1706 } 1707 } 1708 1709 if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I)) 1710 if (IntrinsicI->getIntrinsicID() == Intrinsic::assume) 1711 return processAssumeIntrinsic(IntrinsicI); 1712 1713 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1714 if (processLoad(LI)) 1715 return true; 1716 1717 unsigned Num = VN.lookupOrAdd(LI); 1718 addToLeaderTable(Num, LI, LI->getParent()); 1719 return false; 1720 } 1721 1722 // For conditional branches, we can perform simple conditional propagation on 1723 // the condition value itself. 1724 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1725 if (!BI->isConditional()) 1726 return false; 1727 1728 if (isa<Constant>(BI->getCondition())) 1729 return processFoldableCondBr(BI); 1730 1731 Value *BranchCond = BI->getCondition(); 1732 BasicBlock *TrueSucc = BI->getSuccessor(0); 1733 BasicBlock *FalseSucc = BI->getSuccessor(1); 1734 // Avoid multiple edges early. 1735 if (TrueSucc == FalseSucc) 1736 return false; 1737 1738 BasicBlock *Parent = BI->getParent(); 1739 bool Changed = false; 1740 1741 Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); 1742 BasicBlockEdge TrueE(Parent, TrueSucc); 1743 Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true); 1744 1745 Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); 1746 BasicBlockEdge FalseE(Parent, FalseSucc); 1747 Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true); 1748 1749 return Changed; 1750 } 1751 1752 // For switches, propagate the case values into the case destinations. 1753 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 1754 Value *SwitchCond = SI->getCondition(); 1755 BasicBlock *Parent = SI->getParent(); 1756 bool Changed = false; 1757 1758 // Remember how many outgoing edges there are to every successor. 1759 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; 1760 for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) 1761 ++SwitchEdges[SI->getSuccessor(i)]; 1762 1763 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); 1764 i != e; ++i) { 1765 BasicBlock *Dst = i->getCaseSuccessor(); 1766 // If there is only a single edge, propagate the case value into it. 1767 if (SwitchEdges.lookup(Dst) == 1) { 1768 BasicBlockEdge E(Parent, Dst); 1769 Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true); 1770 } 1771 } 1772 return Changed; 1773 } 1774 1775 // Instructions with void type don't return a value, so there's 1776 // no point in trying to find redundancies in them. 1777 if (I->getType()->isVoidTy()) 1778 return false; 1779 1780 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1781 unsigned Num = VN.lookupOrAdd(I); 1782 1783 // Allocations are always uniquely numbered, so we can save time and memory 1784 // by fast failing them. 1785 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 1786 addToLeaderTable(Num, I, I->getParent()); 1787 return false; 1788 } 1789 1790 // If the number we were assigned was a brand new VN, then we don't 1791 // need to do a lookup to see if the number already exists 1792 // somewhere in the domtree: it can't! 1793 if (Num >= NextNum) { 1794 addToLeaderTable(Num, I, I->getParent()); 1795 return false; 1796 } 1797 1798 // Perform fast-path value-number based elimination of values inherited from 1799 // dominators. 1800 Value *Repl = findLeader(I->getParent(), Num); 1801 if (!Repl) { 1802 // Failure, just remember this instance for future use. 1803 addToLeaderTable(Num, I, I->getParent()); 1804 return false; 1805 } else if (Repl == I) { 1806 // If I was the result of a shortcut PRE, it might already be in the table 1807 // and the best replacement for itself. Nothing to do. 1808 return false; 1809 } 1810 1811 // Remove it! 1812 patchAndReplaceAllUsesWith(I, Repl); 1813 if (MD && Repl->getType()->isPtrOrPtrVectorTy()) 1814 MD->invalidateCachedPointerInfo(Repl); 1815 markInstructionForDeletion(I); 1816 return true; 1817 } 1818 1819 /// runOnFunction - This is the main transformation entry point for a function. 1820 bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, 1821 const TargetLibraryInfo &RunTLI, AAResults &RunAA, 1822 MemoryDependenceResults *RunMD, LoopInfo *LI, 1823 OptimizationRemarkEmitter *RunORE) { 1824 AC = &RunAC; 1825 DT = &RunDT; 1826 VN.setDomTree(DT); 1827 TLI = &RunTLI; 1828 VN.setAliasAnalysis(&RunAA); 1829 MD = RunMD; 1830 VN.setMemDep(MD); 1831 ORE = RunORE; 1832 1833 bool Changed = false; 1834 bool ShouldContinue = true; 1835 1836 // Merge unconditional branches, allowing PRE to catch more 1837 // optimization opportunities. 1838 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1839 BasicBlock *BB = &*FI++; 1840 1841 bool removedBlock = MergeBlockIntoPredecessor(BB, DT, LI, MD); 1842 if (removedBlock) 1843 ++NumGVNBlocks; 1844 1845 Changed |= removedBlock; 1846 } 1847 1848 unsigned Iteration = 0; 1849 while (ShouldContinue) { 1850 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 1851 ShouldContinue = iterateOnFunction(F); 1852 Changed |= ShouldContinue; 1853 ++Iteration; 1854 } 1855 1856 if (EnablePRE) { 1857 // Fabricate val-num for dead-code in order to suppress assertion in 1858 // performPRE(). 1859 assignValNumForDeadCode(); 1860 bool PREChanged = true; 1861 while (PREChanged) { 1862 PREChanged = performPRE(F); 1863 Changed |= PREChanged; 1864 } 1865 } 1866 1867 // FIXME: Should perform GVN again after PRE does something. PRE can move 1868 // computations into blocks where they become fully redundant. Note that 1869 // we can't do this until PRE's critical edge splitting updates memdep. 1870 // Actually, when this happens, we should just fully integrate PRE into GVN. 1871 1872 cleanupGlobalSets(); 1873 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each 1874 // iteration. 1875 DeadBlocks.clear(); 1876 1877 return Changed; 1878 } 1879 1880 bool GVN::processBlock(BasicBlock *BB) { 1881 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function 1882 // (and incrementing BI before processing an instruction). 1883 assert(InstrsToErase.empty() && 1884 "We expect InstrsToErase to be empty across iterations"); 1885 if (DeadBlocks.count(BB)) 1886 return false; 1887 1888 // Clearing map before every BB because it can be used only for single BB. 1889 ReplaceWithConstMap.clear(); 1890 bool ChangedFunction = false; 1891 1892 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1893 BI != BE;) { 1894 if (!ReplaceWithConstMap.empty()) 1895 ChangedFunction |= replaceOperandsWithConsts(&*BI); 1896 ChangedFunction |= processInstruction(&*BI); 1897 1898 if (InstrsToErase.empty()) { 1899 ++BI; 1900 continue; 1901 } 1902 1903 // If we need some instructions deleted, do it now. 1904 NumGVNInstr += InstrsToErase.size(); 1905 1906 // Avoid iterator invalidation. 1907 bool AtStart = BI == BB->begin(); 1908 if (!AtStart) 1909 --BI; 1910 1911 for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(), 1912 E = InstrsToErase.end(); I != E; ++I) { 1913 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 1914 if (MD) MD->removeInstruction(*I); 1915 DEBUG(verifyRemoved(*I)); 1916 (*I)->eraseFromParent(); 1917 } 1918 InstrsToErase.clear(); 1919 1920 if (AtStart) 1921 BI = BB->begin(); 1922 else 1923 ++BI; 1924 } 1925 1926 return ChangedFunction; 1927 } 1928 1929 // Instantiate an expression in a predecessor that lacked it. 1930 bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, 1931 unsigned int ValNo) { 1932 // Because we are going top-down through the block, all value numbers 1933 // will be available in the predecessor by the time we need them. Any 1934 // that weren't originally present will have been instantiated earlier 1935 // in this loop. 1936 bool success = true; 1937 for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) { 1938 Value *Op = Instr->getOperand(i); 1939 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1940 continue; 1941 // This could be a newly inserted instruction, in which case, we won't 1942 // find a value number, and should give up before we hurt ourselves. 1943 // FIXME: Rewrite the infrastructure to let it easier to value number 1944 // and process newly inserted instructions. 1945 if (!VN.exists(Op)) { 1946 success = false; 1947 break; 1948 } 1949 if (Value *V = findLeader(Pred, VN.lookup(Op))) { 1950 Instr->setOperand(i, V); 1951 } else { 1952 success = false; 1953 break; 1954 } 1955 } 1956 1957 // Fail out if we encounter an operand that is not available in 1958 // the PRE predecessor. This is typically because of loads which 1959 // are not value numbered precisely. 1960 if (!success) 1961 return false; 1962 1963 Instr->insertBefore(Pred->getTerminator()); 1964 Instr->setName(Instr->getName() + ".pre"); 1965 Instr->setDebugLoc(Instr->getDebugLoc()); 1966 VN.add(Instr, ValNo); 1967 1968 // Update the availability map to include the new instruction. 1969 addToLeaderTable(ValNo, Instr, Pred); 1970 return true; 1971 } 1972 1973 bool GVN::performScalarPRE(Instruction *CurInst) { 1974 if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) || 1975 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() || 1976 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 1977 isa<DbgInfoIntrinsic>(CurInst)) 1978 return false; 1979 1980 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from 1981 // sinking the compare again, and it would force the code generator to 1982 // move the i1 from processor flags or predicate registers into a general 1983 // purpose register. 1984 if (isa<CmpInst>(CurInst)) 1985 return false; 1986 1987 // We don't currently value number ANY inline asm calls. 1988 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 1989 if (CallI->isInlineAsm()) 1990 return false; 1991 1992 uint32_t ValNo = VN.lookup(CurInst); 1993 1994 // Look for the predecessors for PRE opportunities. We're 1995 // only trying to solve the basic diamond case, where 1996 // a value is computed in the successor and one predecessor, 1997 // but not the other. We also explicitly disallow cases 1998 // where the successor is its own predecessor, because they're 1999 // more complicated to get right. 2000 unsigned NumWith = 0; 2001 unsigned NumWithout = 0; 2002 BasicBlock *PREPred = nullptr; 2003 BasicBlock *CurrentBlock = CurInst->getParent(); 2004 2005 SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap; 2006 for (BasicBlock *P : predecessors(CurrentBlock)) { 2007 // We're not interested in PRE where the block is its 2008 // own predecessor, or in blocks with predecessors 2009 // that are not reachable. 2010 if (P == CurrentBlock) { 2011 NumWithout = 2; 2012 break; 2013 } else if (!DT->isReachableFromEntry(P)) { 2014 NumWithout = 2; 2015 break; 2016 } 2017 2018 Value *predV = findLeader(P, ValNo); 2019 if (!predV) { 2020 predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); 2021 PREPred = P; 2022 ++NumWithout; 2023 } else if (predV == CurInst) { 2024 /* CurInst dominates this predecessor. */ 2025 NumWithout = 2; 2026 break; 2027 } else { 2028 predMap.push_back(std::make_pair(predV, P)); 2029 ++NumWith; 2030 } 2031 } 2032 2033 // Don't do PRE when it might increase code size, i.e. when 2034 // we would need to insert instructions in more than one pred. 2035 if (NumWithout > 1 || NumWith == 0) 2036 return false; 2037 2038 // We may have a case where all predecessors have the instruction, 2039 // and we just need to insert a phi node. Otherwise, perform 2040 // insertion. 2041 Instruction *PREInstr = nullptr; 2042 2043 if (NumWithout != 0) { 2044 // Don't do PRE across indirect branch. 2045 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2046 return false; 2047 2048 // We can't do PRE safely on a critical edge, so instead we schedule 2049 // the edge to be split and perform the PRE the next time we iterate 2050 // on the function. 2051 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 2052 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2053 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2054 return false; 2055 } 2056 // We need to insert somewhere, so let's give it a shot 2057 PREInstr = CurInst->clone(); 2058 if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { 2059 // If we failed insertion, make sure we remove the instruction. 2060 DEBUG(verifyRemoved(PREInstr)); 2061 PREInstr->deleteValue(); 2062 return false; 2063 } 2064 } 2065 2066 // Either we should have filled in the PRE instruction, or we should 2067 // not have needed insertions. 2068 assert (PREInstr != nullptr || NumWithout == 0); 2069 2070 ++NumGVNPRE; 2071 2072 // Create a PHI to make the value available in this block. 2073 PHINode *Phi = 2074 PHINode::Create(CurInst->getType(), predMap.size(), 2075 CurInst->getName() + ".pre-phi", &CurrentBlock->front()); 2076 for (unsigned i = 0, e = predMap.size(); i != e; ++i) { 2077 if (Value *V = predMap[i].first) 2078 Phi->addIncoming(V, predMap[i].second); 2079 else 2080 Phi->addIncoming(PREInstr, PREPred); 2081 } 2082 2083 VN.add(Phi, ValNo); 2084 addToLeaderTable(ValNo, Phi, CurrentBlock); 2085 Phi->setDebugLoc(CurInst->getDebugLoc()); 2086 CurInst->replaceAllUsesWith(Phi); 2087 if (MD && Phi->getType()->isPtrOrPtrVectorTy()) 2088 MD->invalidateCachedPointerInfo(Phi); 2089 VN.erase(CurInst); 2090 removeFromLeaderTable(ValNo, CurInst, CurrentBlock); 2091 2092 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 2093 if (MD) 2094 MD->removeInstruction(CurInst); 2095 DEBUG(verifyRemoved(CurInst)); 2096 CurInst->eraseFromParent(); 2097 ++NumGVNInstr; 2098 2099 return true; 2100 } 2101 2102 /// Perform a purely local form of PRE that looks for diamond 2103 /// control flow patterns and attempts to perform simple PRE at the join point. 2104 bool GVN::performPRE(Function &F) { 2105 bool Changed = false; 2106 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { 2107 // Nothing to PRE in the entry block. 2108 if (CurrentBlock == &F.getEntryBlock()) 2109 continue; 2110 2111 // Don't perform PRE on an EH pad. 2112 if (CurrentBlock->isEHPad()) 2113 continue; 2114 2115 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2116 BE = CurrentBlock->end(); 2117 BI != BE;) { 2118 Instruction *CurInst = &*BI++; 2119 Changed |= performScalarPRE(CurInst); 2120 } 2121 } 2122 2123 if (splitCriticalEdges()) 2124 Changed = true; 2125 2126 return Changed; 2127 } 2128 2129 /// Split the critical edge connecting the given two blocks, and return 2130 /// the block inserted to the critical edge. 2131 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { 2132 BasicBlock *BB = 2133 SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT)); 2134 if (MD) 2135 MD->invalidateCachedPredecessors(); 2136 return BB; 2137 } 2138 2139 /// Split critical edges found during the previous 2140 /// iteration that may enable further optimization. 2141 bool GVN::splitCriticalEdges() { 2142 if (toSplit.empty()) 2143 return false; 2144 do { 2145 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 2146 SplitCriticalEdge(Edge.first, Edge.second, 2147 CriticalEdgeSplittingOptions(DT)); 2148 } while (!toSplit.empty()); 2149 if (MD) MD->invalidateCachedPredecessors(); 2150 return true; 2151 } 2152 2153 /// Executes one iteration of GVN 2154 bool GVN::iterateOnFunction(Function &F) { 2155 cleanupGlobalSets(); 2156 2157 // Top-down walk of the dominator tree 2158 bool Changed = false; 2159 // Needed for value numbering with phi construction to work. 2160 // RPOT walks the graph in its constructor and will not be invalidated during 2161 // processBlock. 2162 ReversePostOrderTraversal<Function *> RPOT(&F); 2163 for (BasicBlock *BB : RPOT) 2164 Changed |= processBlock(BB); 2165 2166 return Changed; 2167 } 2168 2169 void GVN::cleanupGlobalSets() { 2170 VN.clear(); 2171 LeaderTable.clear(); 2172 TableAllocator.Reset(); 2173 } 2174 2175 /// Verify that the specified instruction does not occur in our 2176 /// internal data structures. 2177 void GVN::verifyRemoved(const Instruction *Inst) const { 2178 VN.verifyRemoved(Inst); 2179 2180 // Walk through the value number scope to make sure the instruction isn't 2181 // ferreted away in it. 2182 for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator 2183 I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { 2184 const LeaderTableEntry *Node = &I->second; 2185 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2186 2187 while (Node->Next) { 2188 Node = Node->Next; 2189 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 2190 } 2191 } 2192 } 2193 2194 /// BB is declared dead, which implied other blocks become dead as well. This 2195 /// function is to add all these blocks to "DeadBlocks". For the dead blocks' 2196 /// live successors, update their phi nodes by replacing the operands 2197 /// corresponding to dead blocks with UndefVal. 2198 void GVN::addDeadBlock(BasicBlock *BB) { 2199 SmallVector<BasicBlock *, 4> NewDead; 2200 SmallSetVector<BasicBlock *, 4> DF; 2201 2202 NewDead.push_back(BB); 2203 while (!NewDead.empty()) { 2204 BasicBlock *D = NewDead.pop_back_val(); 2205 if (DeadBlocks.count(D)) 2206 continue; 2207 2208 // All blocks dominated by D are dead. 2209 SmallVector<BasicBlock *, 8> Dom; 2210 DT->getDescendants(D, Dom); 2211 DeadBlocks.insert(Dom.begin(), Dom.end()); 2212 2213 // Figure out the dominance-frontier(D). 2214 for (BasicBlock *B : Dom) { 2215 for (BasicBlock *S : successors(B)) { 2216 if (DeadBlocks.count(S)) 2217 continue; 2218 2219 bool AllPredDead = true; 2220 for (BasicBlock *P : predecessors(S)) 2221 if (!DeadBlocks.count(P)) { 2222 AllPredDead = false; 2223 break; 2224 } 2225 2226 if (!AllPredDead) { 2227 // S could be proved dead later on. That is why we don't update phi 2228 // operands at this moment. 2229 DF.insert(S); 2230 } else { 2231 // While S is not dominated by D, it is dead by now. This could take 2232 // place if S already have a dead predecessor before D is declared 2233 // dead. 2234 NewDead.push_back(S); 2235 } 2236 } 2237 } 2238 } 2239 2240 // For the dead blocks' live successors, update their phi nodes by replacing 2241 // the operands corresponding to dead blocks with UndefVal. 2242 for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end(); 2243 I != E; I++) { 2244 BasicBlock *B = *I; 2245 if (DeadBlocks.count(B)) 2246 continue; 2247 2248 SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B)); 2249 for (BasicBlock *P : Preds) { 2250 if (!DeadBlocks.count(P)) 2251 continue; 2252 2253 if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { 2254 if (BasicBlock *S = splitCriticalEdges(P, B)) 2255 DeadBlocks.insert(P = S); 2256 } 2257 2258 for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) { 2259 PHINode &Phi = cast<PHINode>(*II); 2260 Phi.setIncomingValue(Phi.getBasicBlockIndex(P), 2261 UndefValue::get(Phi.getType())); 2262 } 2263 } 2264 } 2265 } 2266 2267 // If the given branch is recognized as a foldable branch (i.e. conditional 2268 // branch with constant condition), it will perform following analyses and 2269 // transformation. 2270 // 1) If the dead out-coming edge is a critical-edge, split it. Let 2271 // R be the target of the dead out-coming edge. 2272 // 1) Identify the set of dead blocks implied by the branch's dead outcoming 2273 // edge. The result of this step will be {X| X is dominated by R} 2274 // 2) Identify those blocks which haves at least one dead predecessor. The 2275 // result of this step will be dominance-frontier(R). 2276 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to 2277 // dead blocks with "UndefVal" in an hope these PHIs will optimized away. 2278 // 2279 // Return true iff *NEW* dead code are found. 2280 bool GVN::processFoldableCondBr(BranchInst *BI) { 2281 if (!BI || BI->isUnconditional()) 2282 return false; 2283 2284 // If a branch has two identical successors, we cannot declare either dead. 2285 if (BI->getSuccessor(0) == BI->getSuccessor(1)) 2286 return false; 2287 2288 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); 2289 if (!Cond) 2290 return false; 2291 2292 BasicBlock *DeadRoot = 2293 Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0); 2294 if (DeadBlocks.count(DeadRoot)) 2295 return false; 2296 2297 if (!DeadRoot->getSinglePredecessor()) 2298 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot); 2299 2300 addDeadBlock(DeadRoot); 2301 return true; 2302 } 2303 2304 // performPRE() will trigger assert if it comes across an instruction without 2305 // associated val-num. As it normally has far more live instructions than dead 2306 // instructions, it makes more sense just to "fabricate" a val-number for the 2307 // dead code than checking if instruction involved is dead or not. 2308 void GVN::assignValNumForDeadCode() { 2309 for (BasicBlock *BB : DeadBlocks) { 2310 for (Instruction &Inst : *BB) { 2311 unsigned ValNum = VN.lookupOrAdd(&Inst); 2312 addToLeaderTable(ValNum, &Inst, BB); 2313 } 2314 } 2315 } 2316 2317 class llvm::gvn::GVNLegacyPass : public FunctionPass { 2318 public: 2319 static char ID; // Pass identification, replacement for typeid 2320 explicit GVNLegacyPass(bool NoLoads = false) 2321 : FunctionPass(ID), NoLoads(NoLoads) { 2322 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry()); 2323 } 2324 2325 bool runOnFunction(Function &F) override { 2326 if (skipFunction(F)) 2327 return false; 2328 2329 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); 2330 2331 return Impl.runImpl( 2332 F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), 2333 getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 2334 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), 2335 getAnalysis<AAResultsWrapperPass>().getAAResults(), 2336 NoLoads ? nullptr 2337 : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(), 2338 LIWP ? &LIWP->getLoopInfo() : nullptr, 2339 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE()); 2340 } 2341 2342 void getAnalysisUsage(AnalysisUsage &AU) const override { 2343 AU.addRequired<AssumptionCacheTracker>(); 2344 AU.addRequired<DominatorTreeWrapperPass>(); 2345 AU.addRequired<TargetLibraryInfoWrapperPass>(); 2346 if (!NoLoads) 2347 AU.addRequired<MemoryDependenceWrapperPass>(); 2348 AU.addRequired<AAResultsWrapperPass>(); 2349 2350 AU.addPreserved<DominatorTreeWrapperPass>(); 2351 AU.addPreserved<GlobalsAAWrapperPass>(); 2352 AU.addPreserved<TargetLibraryInfoWrapperPass>(); 2353 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 2354 } 2355 2356 private: 2357 bool NoLoads; 2358 GVN Impl; 2359 }; 2360 2361 char GVNLegacyPass::ID = 0; 2362 2363 // The public interface to this file... 2364 FunctionPass *llvm::createGVNPass(bool NoLoads) { 2365 return new GVNLegacyPass(NoLoads); 2366 } 2367 2368 INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) 2369 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 2370 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 2371 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2372 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2373 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 2374 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 2375 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 2376 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) 2377