1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a trivial dead store elimination that only considers 10 // basic-block local redundant stores. 11 // 12 // FIXME: This should eventually be extended to be a post-dominator tree 13 // traversal. Doing so would be pretty trivial. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/MapVector.h" 21 #include "llvm/ADT/PostOrderIterator.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/CaptureTracking.h" 29 #include "llvm/Analysis/GlobalsModRef.h" 30 #include "llvm/Analysis/MemoryBuiltins.h" 31 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/MemorySSA.h" 34 #include "llvm/Analysis/MemorySSAUpdater.h" 35 #include "llvm/Analysis/PostDominators.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/ValueTracking.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/Constant.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/InstIterator.h" 46 #include "llvm/IR/InstrTypes.h" 47 #include "llvm/IR/Instruction.h" 48 #include "llvm/IR/Instructions.h" 49 #include "llvm/IR/IntrinsicInst.h" 50 #include "llvm/IR/Intrinsics.h" 51 #include "llvm/IR/LLVMContext.h" 52 #include "llvm/IR/Module.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/Value.h" 55 #include "llvm/InitializePasses.h" 56 #include "llvm/Pass.h" 57 #include "llvm/Support/Casting.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/DebugCounter.h" 61 #include "llvm/Support/ErrorHandling.h" 62 #include "llvm/Support/MathExtras.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Scalar.h" 65 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 66 #include "llvm/Transforms/Utils/Local.h" 67 #include <algorithm> 68 #include <cassert> 69 #include <cstddef> 70 #include <cstdint> 71 #include <iterator> 72 #include <map> 73 #include <utility> 74 75 using namespace llvm; 76 77 #define DEBUG_TYPE "dse" 78 79 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE"); 80 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 81 STATISTIC(NumFastStores, "Number of stores deleted"); 82 STATISTIC(NumFastOther, "Number of other instrs removed"); 83 STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); 84 STATISTIC(NumModifiedStores, "Number of stores modified"); 85 STATISTIC(NumNoopStores, "Number of noop stores deleted"); 86 STATISTIC(NumCFGChecks, "Number of stores modified"); 87 STATISTIC(NumCFGTries, "Number of stores modified"); 88 STATISTIC(NumCFGSuccess, "Number of stores modified"); 89 90 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", 91 "Controls which MemoryDefs are eliminated."); 92 93 static cl::opt<bool> 94 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", 95 cl::init(true), cl::Hidden, 96 cl::desc("Enable partial-overwrite tracking in DSE")); 97 98 static cl::opt<bool> 99 EnablePartialStoreMerging("enable-dse-partial-store-merging", 100 cl::init(true), cl::Hidden, 101 cl::desc("Enable partial store merging in DSE")); 102 103 static cl::opt<bool> 104 EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, 105 cl::desc("Use the new MemorySSA-backed DSE.")); 106 107 static cl::opt<unsigned> 108 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, 109 cl::desc("The number of memory instructions to scan for " 110 "dead store elimination (default = 100)")); 111 112 static cl::opt<unsigned> MemorySSADefsPerBlockLimit( 113 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, 114 cl::desc("The number of MemoryDefs we consider as candidates to eliminated " 115 "other stores per basic block (default = 5000)")); 116 117 static cl::opt<unsigned> MemorySSAPathCheckLimit( 118 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, 119 cl::desc("The maximum number of blocks to check when trying to prove that " 120 "all paths to an exit go through a killing block (default = 50)")); 121 122 //===----------------------------------------------------------------------===// 123 // Helper functions 124 //===----------------------------------------------------------------------===// 125 using OverlapIntervalsTy = std::map<int64_t, int64_t>; 126 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; 127 128 /// Delete this instruction. Before we do, go through and zero out all the 129 /// operands of this instruction. If any of them become dead, delete them and 130 /// the computation tree that feeds them. 131 /// If ValueSet is non-null, remove any deleted instructions from it as well. 132 static void 133 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, 134 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, 135 InstOverlapIntervalsTy &IOL, 136 MapVector<Instruction *, bool> &ThrowableInst, 137 SmallSetVector<const Value *, 16> *ValueSet = nullptr) { 138 SmallVector<Instruction*, 32> NowDeadInsts; 139 140 NowDeadInsts.push_back(I); 141 --NumFastOther; 142 143 // Keeping the iterator straight is a pain, so we let this routine tell the 144 // caller what the next instruction is after we're done mucking about. 145 BasicBlock::iterator NewIter = *BBI; 146 147 // Before we touch this instruction, remove it from memdep! 148 do { 149 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 150 // Mark the DeadInst as dead in the list of throwable instructions. 151 auto It = ThrowableInst.find(DeadInst); 152 if (It != ThrowableInst.end()) 153 ThrowableInst[It->first] = false; 154 ++NumFastOther; 155 156 // Try to preserve debug information attached to the dead instruction. 157 salvageDebugInfo(*DeadInst); 158 salvageKnowledge(DeadInst); 159 160 // This instruction is dead, zap it, in stages. Start by removing it from 161 // MemDep, which needs to know the operands and needs it to be in the 162 // function. 163 MD.removeInstruction(DeadInst); 164 165 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 166 Value *Op = DeadInst->getOperand(op); 167 DeadInst->setOperand(op, nullptr); 168 169 // If this operand just became dead, add it to the NowDeadInsts list. 170 if (!Op->use_empty()) continue; 171 172 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 173 if (isInstructionTriviallyDead(OpI, &TLI)) 174 NowDeadInsts.push_back(OpI); 175 } 176 177 if (ValueSet) ValueSet->remove(DeadInst); 178 IOL.erase(DeadInst); 179 180 if (NewIter == DeadInst->getIterator()) 181 NewIter = DeadInst->eraseFromParent(); 182 else 183 DeadInst->eraseFromParent(); 184 } while (!NowDeadInsts.empty()); 185 *BBI = NewIter; 186 // Pop dead entries from back of ThrowableInst till we find an alive entry. 187 while (!ThrowableInst.empty() && !ThrowableInst.back().second) 188 ThrowableInst.pop_back(); 189 } 190 191 /// Does this instruction write some memory? This only returns true for things 192 /// that we can analyze with other helpers below. 193 static bool hasAnalyzableMemoryWrite(Instruction *I, 194 const TargetLibraryInfo &TLI) { 195 if (isa<StoreInst>(I)) 196 return true; 197 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 198 switch (II->getIntrinsicID()) { 199 default: 200 return false; 201 case Intrinsic::memset: 202 case Intrinsic::memmove: 203 case Intrinsic::memcpy: 204 case Intrinsic::memcpy_element_unordered_atomic: 205 case Intrinsic::memmove_element_unordered_atomic: 206 case Intrinsic::memset_element_unordered_atomic: 207 case Intrinsic::init_trampoline: 208 case Intrinsic::lifetime_end: 209 return true; 210 } 211 } 212 if (auto *CB = dyn_cast<CallBase>(I)) { 213 if (Function *F = CB->getCalledFunction()) { 214 LibFunc LF; 215 if (TLI.getLibFunc(*F, LF) && TLI.has(LF)) { 216 switch (LF) { 217 case LibFunc_strcpy: 218 case LibFunc_strncpy: 219 case LibFunc_strcat: 220 case LibFunc_strncat: 221 return true; 222 default: 223 return false; 224 } 225 } 226 } 227 } 228 return false; 229 } 230 231 /// Return a Location stored to by the specified instruction. If isRemovable 232 /// returns true, this function and getLocForRead completely describe the memory 233 /// operations for this instruction. 234 static MemoryLocation getLocForWrite(Instruction *Inst) { 235 236 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 237 return MemoryLocation::get(SI); 238 239 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) { 240 // memcpy/memmove/memset. 241 MemoryLocation Loc = MemoryLocation::getForDest(MI); 242 return Loc; 243 } 244 245 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 246 switch (II->getIntrinsicID()) { 247 default: 248 return MemoryLocation(); // Unhandled intrinsic. 249 case Intrinsic::init_trampoline: 250 return MemoryLocation(II->getArgOperand(0)); 251 case Intrinsic::lifetime_end: { 252 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 253 return MemoryLocation(II->getArgOperand(1), Len); 254 } 255 } 256 } 257 if (auto *CB = dyn_cast<CallBase>(Inst)) 258 // All the supported TLI functions so far happen to have dest as their 259 // first argument. 260 return MemoryLocation(CB->getArgOperand(0)); 261 return MemoryLocation(); 262 } 263 264 /// Return the location read by the specified "hasAnalyzableMemoryWrite" 265 /// instruction if any. 266 static MemoryLocation getLocForRead(Instruction *Inst, 267 const TargetLibraryInfo &TLI) { 268 assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); 269 270 // The only instructions that both read and write are the mem transfer 271 // instructions (memcpy/memmove). 272 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst)) 273 return MemoryLocation::getForSource(MTI); 274 return MemoryLocation(); 275 } 276 277 /// If the value of this instruction and the memory it writes to is unused, may 278 /// we delete this instruction? 279 static bool isRemovable(Instruction *I) { 280 // Don't remove volatile/atomic stores. 281 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 282 return SI->isUnordered(); 283 284 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 285 switch (II->getIntrinsicID()) { 286 default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); 287 case Intrinsic::lifetime_end: 288 // Never remove dead lifetime_end's, e.g. because it is followed by a 289 // free. 290 return false; 291 case Intrinsic::init_trampoline: 292 // Always safe to remove init_trampoline. 293 return true; 294 case Intrinsic::memset: 295 case Intrinsic::memmove: 296 case Intrinsic::memcpy: 297 // Don't remove volatile memory intrinsics. 298 return !cast<MemIntrinsic>(II)->isVolatile(); 299 case Intrinsic::memcpy_element_unordered_atomic: 300 case Intrinsic::memmove_element_unordered_atomic: 301 case Intrinsic::memset_element_unordered_atomic: 302 return true; 303 } 304 } 305 306 // note: only get here for calls with analyzable writes - i.e. libcalls 307 if (auto *CB = dyn_cast<CallBase>(I)) 308 return CB->use_empty(); 309 310 return false; 311 } 312 313 /// Returns true if the end of this instruction can be safely shortened in 314 /// length. 315 static bool isShortenableAtTheEnd(Instruction *I) { 316 // Don't shorten stores for now 317 if (isa<StoreInst>(I)) 318 return false; 319 320 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 321 switch (II->getIntrinsicID()) { 322 default: return false; 323 case Intrinsic::memset: 324 case Intrinsic::memcpy: 325 case Intrinsic::memcpy_element_unordered_atomic: 326 case Intrinsic::memset_element_unordered_atomic: 327 // Do shorten memory intrinsics. 328 // FIXME: Add memmove if it's also safe to transform. 329 return true; 330 } 331 } 332 333 // Don't shorten libcalls calls for now. 334 335 return false; 336 } 337 338 /// Returns true if the beginning of this instruction can be safely shortened 339 /// in length. 340 static bool isShortenableAtTheBeginning(Instruction *I) { 341 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 342 // easily done by offsetting the source address. 343 return isa<AnyMemSetInst>(I); 344 } 345 346 /// Return the pointer that is being written to. 347 static Value *getStoredPointerOperand(Instruction *I) { 348 //TODO: factor this to reuse getLocForWrite 349 MemoryLocation Loc = getLocForWrite(I); 350 assert(Loc.Ptr && 351 "unable to find pointer written for analyzable instruction?"); 352 // TODO: most APIs don't expect const Value * 353 return const_cast<Value*>(Loc.Ptr); 354 } 355 356 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 357 const TargetLibraryInfo &TLI, 358 const Function *F) { 359 uint64_t Size; 360 ObjectSizeOpts Opts; 361 Opts.NullIsUnknownSize = NullPointerIsDefined(F); 362 363 if (getObjectSize(V, Size, DL, &TLI, Opts)) 364 return Size; 365 return MemoryLocation::UnknownSize; 366 } 367 368 namespace { 369 370 enum OverwriteResult { 371 OW_Begin, 372 OW_Complete, 373 OW_End, 374 OW_PartialEarlierWithFullLater, 375 OW_Unknown 376 }; 377 378 } // end anonymous namespace 379 380 /// Return 'OW_Complete' if a store to the 'Later' location completely 381 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the 382 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the 383 /// beginning of the 'Earlier' location is overwritten by 'Later'. 384 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was 385 /// overwritten by a latter (smaller) store which doesn't write outside the big 386 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. 387 static OverwriteResult isOverwrite(const MemoryLocation &Later, 388 const MemoryLocation &Earlier, 389 const DataLayout &DL, 390 const TargetLibraryInfo &TLI, 391 int64_t &EarlierOff, int64_t &LaterOff, 392 Instruction *DepWrite, 393 InstOverlapIntervalsTy &IOL, 394 AliasAnalysis &AA, 395 const Function *F) { 396 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll 397 // get imprecise values here, though (except for unknown sizes). 398 if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) 399 return OW_Unknown; 400 401 const uint64_t LaterSize = Later.Size.getValue(); 402 const uint64_t EarlierSize = Earlier.Size.getValue(); 403 404 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 405 const Value *P2 = Later.Ptr->stripPointerCasts(); 406 407 // If the start pointers are the same, we just have to compare sizes to see if 408 // the later store was larger than the earlier store. 409 if (P1 == P2 || AA.isMustAlias(P1, P2)) { 410 // Make sure that the Later size is >= the Earlier size. 411 if (LaterSize >= EarlierSize) 412 return OW_Complete; 413 } 414 415 // Check to see if the later store is to the entire object (either a global, 416 // an alloca, or a byval/inalloca argument). If so, then it clearly 417 // overwrites any other store to the same object. 418 const Value *UO1 = GetUnderlyingObject(P1, DL), 419 *UO2 = GetUnderlyingObject(P2, DL); 420 421 // If we can't resolve the same pointers to the same object, then we can't 422 // analyze them at all. 423 if (UO1 != UO2) 424 return OW_Unknown; 425 426 // If the "Later" store is to a recognizable object, get its size. 427 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F); 428 if (ObjectSize != MemoryLocation::UnknownSize) 429 if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) 430 return OW_Complete; 431 432 // Okay, we have stores to two completely different pointers. Try to 433 // decompose the pointer into a "base + constant_offset" form. If the base 434 // pointers are equal, then we can reason about the two stores. 435 EarlierOff = 0; 436 LaterOff = 0; 437 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 438 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 439 440 // If the base pointers still differ, we have two completely different stores. 441 if (BP1 != BP2) 442 return OW_Unknown; 443 444 // The later store completely overlaps the earlier store if: 445 // 446 // 1. Both start at the same offset and the later one's size is greater than 447 // or equal to the earlier one's, or 448 // 449 // |--earlier--| 450 // |-- later --| 451 // 452 // 2. The earlier store has an offset greater than the later offset, but which 453 // still lies completely within the later store. 454 // 455 // |--earlier--| 456 // |----- later ------| 457 // 458 // We have to be careful here as *Off is signed while *.Size is unsigned. 459 if (EarlierOff >= LaterOff && 460 LaterSize >= EarlierSize && 461 uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) 462 return OW_Complete; 463 464 // We may now overlap, although the overlap is not complete. There might also 465 // be other incomplete overlaps, and together, they might cover the complete 466 // earlier write. 467 // Note: The correctness of this logic depends on the fact that this function 468 // is not even called providing DepWrite when there are any intervening reads. 469 if (EnablePartialOverwriteTracking && 470 LaterOff < int64_t(EarlierOff + EarlierSize) && 471 int64_t(LaterOff + LaterSize) >= EarlierOff) { 472 473 // Insert our part of the overlap into the map. 474 auto &IM = IOL[DepWrite]; 475 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff 476 << ", " << int64_t(EarlierOff + EarlierSize) 477 << ") Later [" << LaterOff << ", " 478 << int64_t(LaterOff + LaterSize) << ")\n"); 479 480 // Make sure that we only insert non-overlapping intervals and combine 481 // adjacent intervals. The intervals are stored in the map with the ending 482 // offset as the key (in the half-open sense) and the starting offset as 483 // the value. 484 int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize; 485 486 // Find any intervals ending at, or after, LaterIntStart which start 487 // before LaterIntEnd. 488 auto ILI = IM.lower_bound(LaterIntStart); 489 if (ILI != IM.end() && ILI->second <= LaterIntEnd) { 490 // This existing interval is overlapped with the current store somewhere 491 // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing 492 // intervals and adjusting our start and end. 493 LaterIntStart = std::min(LaterIntStart, ILI->second); 494 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 495 ILI = IM.erase(ILI); 496 497 // Continue erasing and adjusting our end in case other previous 498 // intervals are also overlapped with the current store. 499 // 500 // |--- ealier 1 ---| |--- ealier 2 ---| 501 // |------- later---------| 502 // 503 while (ILI != IM.end() && ILI->second <= LaterIntEnd) { 504 assert(ILI->second > LaterIntStart && "Unexpected interval"); 505 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 506 ILI = IM.erase(ILI); 507 } 508 } 509 510 IM[LaterIntEnd] = LaterIntStart; 511 512 ILI = IM.begin(); 513 if (ILI->second <= EarlierOff && 514 ILI->first >= int64_t(EarlierOff + EarlierSize)) { 515 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" 516 << EarlierOff << ", " 517 << int64_t(EarlierOff + EarlierSize) 518 << ") Composite Later [" << ILI->second << ", " 519 << ILI->first << ")\n"); 520 ++NumCompletePartials; 521 return OW_Complete; 522 } 523 } 524 525 // Check for an earlier store which writes to all the memory locations that 526 // the later store writes to. 527 if (EnablePartialStoreMerging && LaterOff >= EarlierOff && 528 int64_t(EarlierOff + EarlierSize) > LaterOff && 529 uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) { 530 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" 531 << EarlierOff << ", " 532 << int64_t(EarlierOff + EarlierSize) 533 << ") by a later store [" << LaterOff << ", " 534 << int64_t(LaterOff + LaterSize) << ")\n"); 535 // TODO: Maybe come up with a better name? 536 return OW_PartialEarlierWithFullLater; 537 } 538 539 // Another interesting case is if the later store overwrites the end of the 540 // earlier store. 541 // 542 // |--earlier--| 543 // |-- later --| 544 // 545 // In this case we may want to trim the size of earlier to avoid generating 546 // writes to addresses which will definitely be overwritten later 547 if (!EnablePartialOverwriteTracking && 548 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) && 549 int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize))) 550 return OW_End; 551 552 // Finally, we also need to check if the later store overwrites the beginning 553 // of the earlier store. 554 // 555 // |--earlier--| 556 // |-- later --| 557 // 558 // In this case we may want to move the destination address and trim the size 559 // of earlier to avoid generating writes to addresses which will definitely 560 // be overwritten later. 561 if (!EnablePartialOverwriteTracking && 562 (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) { 563 assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) && 564 "Expect to be handled as OW_Complete"); 565 return OW_Begin; 566 } 567 // Otherwise, they don't completely overlap. 568 return OW_Unknown; 569 } 570 571 /// If 'Inst' might be a self read (i.e. a noop copy of a 572 /// memory region into an identical pointer) then it doesn't actually make its 573 /// input dead in the traditional sense. Consider this case: 574 /// 575 /// memmove(A <- B) 576 /// memmove(A <- A) 577 /// 578 /// In this case, the second store to A does not make the first store to A dead. 579 /// The usual situation isn't an explicit A<-A store like this (which can be 580 /// trivially removed) but a case where two pointers may alias. 581 /// 582 /// This function detects when it is unsafe to remove a dependent instruction 583 /// because the DSE inducing instruction may be a self-read. 584 static bool isPossibleSelfRead(Instruction *Inst, 585 const MemoryLocation &InstStoreLoc, 586 Instruction *DepWrite, 587 const TargetLibraryInfo &TLI, 588 AliasAnalysis &AA) { 589 // Self reads can only happen for instructions that read memory. Get the 590 // location read. 591 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 592 if (!InstReadLoc.Ptr) 593 return false; // Not a reading instruction. 594 595 // If the read and written loc obviously don't alias, it isn't a read. 596 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) 597 return false; 598 599 if (isa<AnyMemCpyInst>(Inst)) { 600 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) 601 // but in practice memcpy(A <- B) either means that A and B are disjoint or 602 // are equal (i.e. there are not partial overlaps). Given that, if we have: 603 // 604 // memcpy/memmove(A <- B) // DepWrite 605 // memcpy(A <- B) // Inst 606 // 607 // with Inst reading/writing a >= size than DepWrite, we can reason as 608 // follows: 609 // 610 // - If A == B then both the copies are no-ops, so the DepWrite can be 611 // removed. 612 // - If A != B then A and B are disjoint locations in Inst. Since 613 // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. 614 // Therefore DepWrite can be removed. 615 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 616 617 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 618 return false; 619 } 620 621 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 622 // then it can't be considered dead. 623 return true; 624 } 625 626 /// Returns true if the memory which is accessed by the second instruction is not 627 /// modified between the first and the second instruction. 628 /// Precondition: Second instruction must be dominated by the first 629 /// instruction. 630 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 631 Instruction *SecondI, 632 AliasAnalysis *AA, 633 const DataLayout &DL, 634 DominatorTree *DT) { 635 // Do a backwards scan through the CFG from SecondI to FirstI. Look for 636 // instructions which can modify the memory location accessed by SecondI. 637 // 638 // While doing the walk keep track of the address to check. It might be 639 // different in different basic blocks due to PHI translation. 640 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>; 641 SmallVector<BlockAddressPair, 16> WorkList; 642 // Keep track of the address we visited each block with. Bail out if we 643 // visit a block with different addresses. 644 DenseMap<BasicBlock *, Value *> Visited; 645 646 BasicBlock::iterator FirstBBI(FirstI); 647 ++FirstBBI; 648 BasicBlock::iterator SecondBBI(SecondI); 649 BasicBlock *FirstBB = FirstI->getParent(); 650 BasicBlock *SecondBB = SecondI->getParent(); 651 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 652 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr); 653 654 // Start checking the SecondBB. 655 WorkList.push_back( 656 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr))); 657 bool isFirstBlock = true; 658 659 // Check all blocks going backward until we reach the FirstBB. 660 while (!WorkList.empty()) { 661 BlockAddressPair Current = WorkList.pop_back_val(); 662 BasicBlock *B = Current.first; 663 PHITransAddr &Addr = Current.second; 664 Value *Ptr = Addr.getAddr(); 665 666 // Ignore instructions before FirstI if this is the FirstBB. 667 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 668 669 BasicBlock::iterator EI; 670 if (isFirstBlock) { 671 // Ignore instructions after SecondI if this is the first visit of SecondBB. 672 assert(B == SecondBB && "first block is not the store block"); 673 EI = SecondBBI; 674 isFirstBlock = false; 675 } else { 676 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 677 // In this case we also have to look at instructions after SecondI. 678 EI = B->end(); 679 } 680 for (; BI != EI; ++BI) { 681 Instruction *I = &*BI; 682 if (I->mayWriteToMemory() && I != SecondI) 683 if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) 684 return false; 685 } 686 if (B != FirstBB) { 687 assert(B != &FirstBB->getParent()->getEntryBlock() && 688 "Should not hit the entry block because SI must be dominated by LI"); 689 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 690 PHITransAddr PredAddr = Addr; 691 if (PredAddr.NeedsPHITranslationFromBlock(B)) { 692 if (!PredAddr.IsPotentiallyPHITranslatable()) 693 return false; 694 if (PredAddr.PHITranslateValue(B, *PredI, DT, false)) 695 return false; 696 } 697 Value *TranslatedPtr = PredAddr.getAddr(); 698 auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr)); 699 if (!Inserted.second) { 700 // We already visited this block before. If it was with a different 701 // address - bail out! 702 if (TranslatedPtr != Inserted.first->second) 703 return false; 704 // ... otherwise just skip it. 705 continue; 706 } 707 WorkList.push_back(std::make_pair(*PredI, PredAddr)); 708 } 709 } 710 } 711 return true; 712 } 713 714 /// Find all blocks that will unconditionally lead to the block BB and append 715 /// them to F. 716 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 717 BasicBlock *BB, DominatorTree *DT) { 718 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 719 BasicBlock *Pred = *I; 720 if (Pred == BB) continue; 721 Instruction *PredTI = Pred->getTerminator(); 722 if (PredTI->getNumSuccessors() != 1) 723 continue; 724 725 if (DT->isReachableFromEntry(Pred)) 726 Blocks.push_back(Pred); 727 } 728 } 729 730 /// Handle frees of entire structures whose dependency is a store 731 /// to a field of that structure. 732 static bool handleFree(CallInst *F, AliasAnalysis *AA, 733 MemoryDependenceResults *MD, DominatorTree *DT, 734 const TargetLibraryInfo *TLI, 735 InstOverlapIntervalsTy &IOL, 736 MapVector<Instruction *, bool> &ThrowableInst) { 737 bool MadeChange = false; 738 739 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 740 SmallVector<BasicBlock *, 16> Blocks; 741 Blocks.push_back(F->getParent()); 742 const DataLayout &DL = F->getModule()->getDataLayout(); 743 744 while (!Blocks.empty()) { 745 BasicBlock *BB = Blocks.pop_back_val(); 746 Instruction *InstPt = BB->getTerminator(); 747 if (BB == F->getParent()) InstPt = F; 748 749 MemDepResult Dep = 750 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 751 while (Dep.isDef() || Dep.isClobber()) { 752 Instruction *Dependency = Dep.getInst(); 753 if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || 754 !isRemovable(Dependency)) 755 break; 756 757 Value *DepPointer = 758 GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); 759 760 // Check for aliasing. 761 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 762 break; 763 764 LLVM_DEBUG( 765 dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " 766 << *Dependency << '\n'); 767 768 // DCE instructions only used to calculate that store. 769 BasicBlock::iterator BBI(Dependency); 770 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, 771 ThrowableInst); 772 ++NumFastStores; 773 MadeChange = true; 774 775 // Inst's old Dependency is now deleted. Compute the next dependency, 776 // which may also be dead, as in 777 // s[0] = 0; 778 // s[1] = 0; // This has just been deleted. 779 // free(s); 780 Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); 781 } 782 783 if (Dep.isNonLocal()) 784 findUnconditionalPreds(Blocks, BB, DT); 785 } 786 787 return MadeChange; 788 } 789 790 /// Check to see if the specified location may alias any of the stack objects in 791 /// the DeadStackObjects set. If so, they become live because the location is 792 /// being loaded. 793 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 794 SmallSetVector<const Value *, 16> &DeadStackObjects, 795 const DataLayout &DL, AliasAnalysis *AA, 796 const TargetLibraryInfo *TLI, 797 const Function *F) { 798 const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); 799 800 // A constant can't be in the dead pointer set. 801 if (isa<Constant>(UnderlyingPointer)) 802 return; 803 804 // If the kill pointer can be easily reduced to an alloca, don't bother doing 805 // extraneous AA queries. 806 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 807 DeadStackObjects.remove(UnderlyingPointer); 808 return; 809 } 810 811 // Remove objects that could alias LoadedLoc. 812 DeadStackObjects.remove_if([&](const Value *I) { 813 // See if the loaded location could alias the stack location. 814 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F)); 815 return !AA->isNoAlias(StackLoc, LoadedLoc); 816 }); 817 } 818 819 /// Remove dead stores to stack-allocated locations in the function end block. 820 /// Ex: 821 /// %A = alloca i32 822 /// ... 823 /// store i32 1, i32* %A 824 /// ret void 825 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 826 MemoryDependenceResults *MD, 827 const TargetLibraryInfo *TLI, 828 InstOverlapIntervalsTy &IOL, 829 MapVector<Instruction *, bool> &ThrowableInst) { 830 bool MadeChange = false; 831 832 // Keep track of all of the stack objects that are dead at the end of the 833 // function. 834 SmallSetVector<const Value*, 16> DeadStackObjects; 835 836 // Find all of the alloca'd pointers in the entry block. 837 BasicBlock &Entry = BB.getParent()->front(); 838 for (Instruction &I : Entry) { 839 if (isa<AllocaInst>(&I)) 840 DeadStackObjects.insert(&I); 841 842 // Okay, so these are dead heap objects, but if the pointer never escapes 843 // then it's leaked by this function anyways. 844 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 845 DeadStackObjects.insert(&I); 846 } 847 848 // Treat byval or inalloca arguments the same, stores to them are dead at the 849 // end of the function. 850 for (Argument &AI : BB.getParent()->args()) 851 if (AI.hasPassPointeeByValueAttr()) 852 DeadStackObjects.insert(&AI); 853 854 const DataLayout &DL = BB.getModule()->getDataLayout(); 855 856 // Scan the basic block backwards 857 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 858 --BBI; 859 860 // If we find a store, check to see if it points into a dead stack value. 861 if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 862 // See through pointer-to-pointer bitcasts 863 SmallVector<const Value *, 4> Pointers; 864 GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); 865 866 // Stores to stack values are valid candidates for removal. 867 bool AllDead = true; 868 for (const Value *Pointer : Pointers) 869 if (!DeadStackObjects.count(Pointer)) { 870 AllDead = false; 871 break; 872 } 873 874 if (AllDead) { 875 Instruction *Dead = &*BBI; 876 877 LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 878 << *Dead << "\n Objects: "; 879 for (SmallVectorImpl<const Value *>::iterator I = 880 Pointers.begin(), 881 E = Pointers.end(); 882 I != E; ++I) { 883 dbgs() << **I; 884 if (std::next(I) != E) 885 dbgs() << ", "; 886 } dbgs() 887 << '\n'); 888 889 // DCE instructions only used to calculate that store. 890 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, ThrowableInst, 891 &DeadStackObjects); 892 ++NumFastStores; 893 MadeChange = true; 894 continue; 895 } 896 } 897 898 // Remove any dead non-memory-mutating instructions. 899 if (isInstructionTriviallyDead(&*BBI, TLI)) { 900 LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " 901 << *&*BBI << '\n'); 902 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, ThrowableInst, 903 &DeadStackObjects); 904 ++NumFastOther; 905 MadeChange = true; 906 continue; 907 } 908 909 if (isa<AllocaInst>(BBI)) { 910 // Remove allocas from the list of dead stack objects; there can't be 911 // any references before the definition. 912 DeadStackObjects.remove(&*BBI); 913 continue; 914 } 915 916 if (auto *Call = dyn_cast<CallBase>(&*BBI)) { 917 // Remove allocation function calls from the list of dead stack objects; 918 // there can't be any references before the definition. 919 if (isAllocLikeFn(&*BBI, TLI)) 920 DeadStackObjects.remove(&*BBI); 921 922 // If this call does not access memory, it can't be loading any of our 923 // pointers. 924 if (AA->doesNotAccessMemory(Call)) 925 continue; 926 927 // If the call might load from any of our allocas, then any store above 928 // the call is live. 929 DeadStackObjects.remove_if([&](const Value *I) { 930 // See if the call site touches the value. 931 return isRefSet(AA->getModRefInfo( 932 Call, I, getPointerSize(I, DL, *TLI, BB.getParent()))); 933 }); 934 935 // If all of the allocas were clobbered by the call then we're not going 936 // to find anything else to process. 937 if (DeadStackObjects.empty()) 938 break; 939 940 continue; 941 } 942 943 // We can remove the dead stores, irrespective of the fence and its ordering 944 // (release/acquire/seq_cst). Fences only constraints the ordering of 945 // already visible stores, it does not make a store visible to other 946 // threads. So, skipping over a fence does not change a store from being 947 // dead. 948 if (isa<FenceInst>(*BBI)) 949 continue; 950 951 MemoryLocation LoadedLoc; 952 953 // If we encounter a use of the pointer, it is no longer considered dead 954 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 955 if (!L->isUnordered()) // Be conservative with atomic/volatile load 956 break; 957 LoadedLoc = MemoryLocation::get(L); 958 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 959 LoadedLoc = MemoryLocation::get(V); 960 } else if (!BBI->mayReadFromMemory()) { 961 // Instruction doesn't read memory. Note that stores that weren't removed 962 // above will hit this case. 963 continue; 964 } else { 965 // Unknown inst; assume it clobbers everything. 966 break; 967 } 968 969 // Remove any allocas from the DeadPointer set that are loaded, as this 970 // makes any stores above the access live. 971 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI, BB.getParent()); 972 973 // If all of the allocas were clobbered by the access then we're not going 974 // to find anything else to process. 975 if (DeadStackObjects.empty()) 976 break; 977 } 978 979 return MadeChange; 980 } 981 982 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, 983 int64_t &EarlierSize, int64_t LaterOffset, 984 int64_t LaterSize, bool IsOverwriteEnd) { 985 // TODO: base this on the target vector size so that if the earlier 986 // store was too small to get vector writes anyway then its likely 987 // a good idea to shorten it 988 // Power of 2 vector writes are probably always a bad idea to optimize 989 // as any store/memset/memcpy is likely using vector instructions so 990 // shortening it to not vector size is likely to be slower 991 auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite); 992 unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment(); 993 if (!IsOverwriteEnd) 994 LaterOffset = int64_t(LaterOffset + LaterSize); 995 996 if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && 997 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) 998 return false; 999 1000 int64_t NewLength = IsOverwriteEnd 1001 ? LaterOffset - EarlierOffset 1002 : EarlierSize - (LaterOffset - EarlierOffset); 1003 1004 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) { 1005 // When shortening an atomic memory intrinsic, the newly shortened 1006 // length must remain an integer multiple of the element size. 1007 const uint32_t ElementSize = AMI->getElementSizeInBytes(); 1008 if (0 != NewLength % ElementSize) 1009 return false; 1010 } 1011 1012 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 1013 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " 1014 << *EarlierWrite << "\n KILLER (offset " << LaterOffset 1015 << ", " << EarlierSize << ")\n"); 1016 1017 Value *EarlierWriteLength = EarlierIntrinsic->getLength(); 1018 Value *TrimmedLength = 1019 ConstantInt::get(EarlierWriteLength->getType(), NewLength); 1020 EarlierIntrinsic->setLength(TrimmedLength); 1021 1022 EarlierSize = NewLength; 1023 if (!IsOverwriteEnd) { 1024 int64_t OffsetMoved = (LaterOffset - EarlierOffset); 1025 Value *Indices[1] = { 1026 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; 1027 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 1028 EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(), 1029 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); 1030 NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc()); 1031 EarlierIntrinsic->setDest(NewDestGEP); 1032 EarlierOffset = EarlierOffset + OffsetMoved; 1033 } 1034 return true; 1035 } 1036 1037 static bool tryToShortenEnd(Instruction *EarlierWrite, 1038 OverlapIntervalsTy &IntervalMap, 1039 int64_t &EarlierStart, int64_t &EarlierSize) { 1040 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) 1041 return false; 1042 1043 OverlapIntervalsTy::iterator OII = --IntervalMap.end(); 1044 int64_t LaterStart = OII->second; 1045 int64_t LaterSize = OII->first - LaterStart; 1046 1047 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && 1048 LaterStart + LaterSize >= EarlierStart + EarlierSize) { 1049 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1050 LaterSize, true)) { 1051 IntervalMap.erase(OII); 1052 return true; 1053 } 1054 } 1055 return false; 1056 } 1057 1058 static bool tryToShortenBegin(Instruction *EarlierWrite, 1059 OverlapIntervalsTy &IntervalMap, 1060 int64_t &EarlierStart, int64_t &EarlierSize) { 1061 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) 1062 return false; 1063 1064 OverlapIntervalsTy::iterator OII = IntervalMap.begin(); 1065 int64_t LaterStart = OII->second; 1066 int64_t LaterSize = OII->first - LaterStart; 1067 1068 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { 1069 assert(LaterStart + LaterSize < EarlierStart + EarlierSize && 1070 "Should have been handled as OW_Complete"); 1071 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1072 LaterSize, false)) { 1073 IntervalMap.erase(OII); 1074 return true; 1075 } 1076 } 1077 return false; 1078 } 1079 1080 static bool removePartiallyOverlappedStores(AliasAnalysis *AA, 1081 const DataLayout &DL, 1082 InstOverlapIntervalsTy &IOL) { 1083 bool Changed = false; 1084 for (auto OI : IOL) { 1085 Instruction *EarlierWrite = OI.first; 1086 MemoryLocation Loc = getLocForWrite(EarlierWrite); 1087 assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); 1088 1089 const Value *Ptr = Loc.Ptr->stripPointerCasts(); 1090 int64_t EarlierStart = 0; 1091 int64_t EarlierSize = int64_t(Loc.Size.getValue()); 1092 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); 1093 OverlapIntervalsTy &IntervalMap = OI.second; 1094 Changed |= 1095 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1096 if (IntervalMap.empty()) 1097 continue; 1098 Changed |= 1099 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1100 } 1101 return Changed; 1102 } 1103 1104 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, 1105 AliasAnalysis *AA, MemoryDependenceResults *MD, 1106 const DataLayout &DL, 1107 const TargetLibraryInfo *TLI, 1108 InstOverlapIntervalsTy &IOL, 1109 MapVector<Instruction *, bool> &ThrowableInst, 1110 DominatorTree *DT) { 1111 // Must be a store instruction. 1112 StoreInst *SI = dyn_cast<StoreInst>(Inst); 1113 if (!SI) 1114 return false; 1115 1116 // If we're storing the same value back to a pointer that we just loaded from, 1117 // then the store can be removed. 1118 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 1119 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 1120 isRemovable(SI) && 1121 memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) { 1122 1123 LLVM_DEBUG( 1124 dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " 1125 << *DepLoad << "\n STORE: " << *SI << '\n'); 1126 1127 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1128 ++NumRedundantStores; 1129 return true; 1130 } 1131 } 1132 1133 // Remove null stores into the calloc'ed objects 1134 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 1135 if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { 1136 Instruction *UnderlyingPointer = 1137 dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL)); 1138 1139 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 1140 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) { 1141 LLVM_DEBUG( 1142 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 1143 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 1144 1145 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1146 ++NumRedundantStores; 1147 return true; 1148 } 1149 } 1150 return false; 1151 } 1152 1153 static Constant * 1154 tryToMergePartialOverlappingStores(StoreInst *Earlier, StoreInst *Later, 1155 int64_t InstWriteOffset, 1156 int64_t DepWriteOffset, const DataLayout &DL, 1157 AliasAnalysis *AA, DominatorTree *DT) { 1158 1159 if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && 1160 DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) && 1161 Later && isa<ConstantInt>(Later->getValueOperand()) && 1162 DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) && 1163 memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) { 1164 // If the store we find is: 1165 // a) partially overwritten by the store to 'Loc' 1166 // b) the later store is fully contained in the earlier one and 1167 // c) they both have a constant value 1168 // d) none of the two stores need padding 1169 // Merge the two stores, replacing the earlier store's value with a 1170 // merge of both values. 1171 // TODO: Deal with other constant types (vectors, etc), and probably 1172 // some mem intrinsics (if needed) 1173 1174 APInt EarlierValue = 1175 cast<ConstantInt>(Earlier->getValueOperand())->getValue(); 1176 APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue(); 1177 unsigned LaterBits = LaterValue.getBitWidth(); 1178 assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); 1179 LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); 1180 1181 // Offset of the smaller store inside the larger store 1182 unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; 1183 unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() - 1184 BitOffsetDiff - LaterBits 1185 : BitOffsetDiff; 1186 APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, 1187 LShiftAmount + LaterBits); 1188 // Clear the bits we'll be replacing, then OR with the smaller 1189 // store, shifted appropriately. 1190 APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); 1191 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier 1192 << "\n Later: " << *Later 1193 << "\n Merged Value: " << Merged << '\n'); 1194 return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged); 1195 } 1196 return nullptr; 1197 } 1198 1199 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 1200 MemoryDependenceResults *MD, DominatorTree *DT, 1201 const TargetLibraryInfo *TLI) { 1202 const DataLayout &DL = BB.getModule()->getDataLayout(); 1203 bool MadeChange = false; 1204 1205 MapVector<Instruction *, bool> ThrowableInst; 1206 1207 // A map of interval maps representing partially-overwritten value parts. 1208 InstOverlapIntervalsTy IOL; 1209 1210 // Do a top-down walk on the BB. 1211 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 1212 // Handle 'free' calls specially. 1213 if (CallInst *F = isFreeCall(&*BBI, TLI)) { 1214 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, ThrowableInst); 1215 // Increment BBI after handleFree has potentially deleted instructions. 1216 // This ensures we maintain a valid iterator. 1217 ++BBI; 1218 continue; 1219 } 1220 1221 Instruction *Inst = &*BBI++; 1222 1223 if (Inst->mayThrow()) { 1224 ThrowableInst[Inst] = true; 1225 continue; 1226 } 1227 1228 // Check to see if Inst writes to memory. If not, continue. 1229 if (!hasAnalyzableMemoryWrite(Inst, *TLI)) 1230 continue; 1231 1232 // eliminateNoopStore will update in iterator, if necessary. 1233 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, 1234 ThrowableInst, DT)) { 1235 MadeChange = true; 1236 continue; 1237 } 1238 1239 // If we find something that writes memory, get its memory dependence. 1240 MemDepResult InstDep = MD->getDependency(Inst); 1241 1242 // Ignore any store where we can't find a local dependence. 1243 // FIXME: cross-block DSE would be fun. :) 1244 if (!InstDep.isDef() && !InstDep.isClobber()) 1245 continue; 1246 1247 // Figure out what location is being stored to. 1248 MemoryLocation Loc = getLocForWrite(Inst); 1249 1250 // If we didn't get a useful location, fail. 1251 if (!Loc.Ptr) 1252 continue; 1253 1254 // Loop until we find a store we can eliminate or a load that 1255 // invalidates the analysis. Without an upper bound on the number of 1256 // instructions examined, this analysis can become very time-consuming. 1257 // However, the potential gain diminishes as we process more instructions 1258 // without eliminating any of them. Therefore, we limit the number of 1259 // instructions we look at. 1260 auto Limit = MD->getDefaultBlockScanLimit(); 1261 while (InstDep.isDef() || InstDep.isClobber()) { 1262 // Get the memory clobbered by the instruction we depend on. MemDep will 1263 // skip any instructions that 'Loc' clearly doesn't interact with. If we 1264 // end up depending on a may- or must-aliased load, then we can't optimize 1265 // away the store and we bail out. However, if we depend on something 1266 // that overwrites the memory location we *can* potentially optimize it. 1267 // 1268 // Find out what memory location the dependent instruction stores. 1269 Instruction *DepWrite = InstDep.getInst(); 1270 if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) 1271 break; 1272 MemoryLocation DepLoc = getLocForWrite(DepWrite); 1273 // If we didn't get a useful location, or if it isn't a size, bail out. 1274 if (!DepLoc.Ptr) 1275 break; 1276 1277 // Find the last throwable instruction not removed by call to 1278 // deleteDeadInstruction. 1279 Instruction *LastThrowing = nullptr; 1280 if (!ThrowableInst.empty()) 1281 LastThrowing = ThrowableInst.back().first; 1282 1283 // Make sure we don't look past a call which might throw. This is an 1284 // issue because MemoryDependenceAnalysis works in the wrong direction: 1285 // it finds instructions which dominate the current instruction, rather than 1286 // instructions which are post-dominated by the current instruction. 1287 // 1288 // If the underlying object is a non-escaping memory allocation, any store 1289 // to it is dead along the unwind edge. Otherwise, we need to preserve 1290 // the store. 1291 if (LastThrowing && DepWrite->comesBefore(LastThrowing)) { 1292 const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL); 1293 bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); 1294 if (!IsStoreDeadOnUnwind) { 1295 // We're looking for a call to an allocation function 1296 // where the allocation doesn't escape before the last 1297 // throwing instruction; PointerMayBeCaptured 1298 // reasonably fast approximation. 1299 IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && 1300 !PointerMayBeCaptured(Underlying, false, true); 1301 } 1302 if (!IsStoreDeadOnUnwind) 1303 break; 1304 } 1305 1306 // If we find a write that is a) removable (i.e., non-volatile), b) is 1307 // completely obliterated by the store to 'Loc', and c) which we know that 1308 // 'Inst' doesn't load from, then we can remove it. 1309 // Also try to merge two stores if a later one only touches memory written 1310 // to by the earlier one. 1311 if (isRemovable(DepWrite) && 1312 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 1313 int64_t InstWriteOffset, DepWriteOffset; 1314 OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, 1315 InstWriteOffset, DepWrite, IOL, *AA, 1316 BB.getParent()); 1317 if (OR == OW_Complete) { 1318 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite 1319 << "\n KILLER: " << *Inst << '\n'); 1320 1321 // Delete the store and now-dead instructions that feed it. 1322 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1323 ThrowableInst); 1324 ++NumFastStores; 1325 MadeChange = true; 1326 1327 // We erased DepWrite; start over. 1328 InstDep = MD->getDependency(Inst); 1329 continue; 1330 } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || 1331 ((OR == OW_Begin && 1332 isShortenableAtTheBeginning(DepWrite)))) { 1333 assert(!EnablePartialOverwriteTracking && "Do not expect to perform " 1334 "when partial-overwrite " 1335 "tracking is enabled"); 1336 // The overwrite result is known, so these must be known, too. 1337 int64_t EarlierSize = DepLoc.Size.getValue(); 1338 int64_t LaterSize = Loc.Size.getValue(); 1339 bool IsOverwriteEnd = (OR == OW_End); 1340 MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, 1341 InstWriteOffset, LaterSize, IsOverwriteEnd); 1342 } else if (EnablePartialStoreMerging && 1343 OR == OW_PartialEarlierWithFullLater) { 1344 auto *Earlier = dyn_cast<StoreInst>(DepWrite); 1345 auto *Later = dyn_cast<StoreInst>(Inst); 1346 if (Constant *C = tryToMergePartialOverlappingStores( 1347 Earlier, Later, InstWriteOffset, DepWriteOffset, DL, AA, 1348 DT)) { 1349 auto *SI = new StoreInst( 1350 C, Earlier->getPointerOperand(), false, Earlier->getAlign(), 1351 Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); 1352 1353 unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, 1354 LLVMContext::MD_alias_scope, 1355 LLVMContext::MD_noalias, 1356 LLVMContext::MD_nontemporal}; 1357 SI->copyMetadata(*DepWrite, MDToKeep); 1358 ++NumModifiedStores; 1359 1360 // Delete the old stores and now-dead instructions that feed them. 1361 deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, 1362 ThrowableInst); 1363 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1364 ThrowableInst); 1365 MadeChange = true; 1366 1367 // We erased DepWrite and Inst (Loc); start over. 1368 break; 1369 } 1370 } 1371 } 1372 1373 // If this is a may-aliased store that is clobbering the store value, we 1374 // can keep searching past it for another must-aliased pointer that stores 1375 // to the same location. For example, in: 1376 // store -> P 1377 // store -> Q 1378 // store -> P 1379 // we can remove the first store to P even though we don't know if P and Q 1380 // alias. 1381 if (DepWrite == &BB.front()) break; 1382 1383 // Can't look past this instruction if it might read 'Loc'. 1384 if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) 1385 break; 1386 1387 InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, 1388 DepWrite->getIterator(), &BB, 1389 /*QueryInst=*/ nullptr, &Limit); 1390 } 1391 } 1392 1393 if (EnablePartialOverwriteTracking) 1394 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL); 1395 1396 // If this block ends in a return, unwind, or unreachable, all allocas are 1397 // dead at its end, which means stores to them are also dead. 1398 if (BB.getTerminator()->getNumSuccessors() == 0) 1399 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, ThrowableInst); 1400 1401 return MadeChange; 1402 } 1403 1404 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 1405 MemoryDependenceResults *MD, DominatorTree *DT, 1406 const TargetLibraryInfo *TLI) { 1407 bool MadeChange = false; 1408 for (BasicBlock &BB : F) 1409 // Only check non-dead blocks. Dead blocks may have strange pointer 1410 // cycles that will confuse alias analysis. 1411 if (DT->isReachableFromEntry(&BB)) 1412 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 1413 1414 return MadeChange; 1415 } 1416 1417 namespace { 1418 //============================================================================= 1419 // MemorySSA backed dead store elimination. 1420 // 1421 // The code below implements dead store elimination using MemorySSA. It uses 1422 // the following general approach: given a MemoryDef, walk upwards to find 1423 // clobbering MemoryDefs that may be killed by the starting def. Then check 1424 // that there are no uses that may read the location of the original MemoryDef 1425 // in between both MemoryDefs. A bit more concretely: 1426 // 1427 // For all MemoryDefs StartDef: 1428 // 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking 1429 // upwards. 1430 // 2. Check that there are no reads between DomAccess and the StartDef by 1431 // checking all uses starting at DomAccess and walking until we see StartDef. 1432 // 3. For each found DomDef, check that: 1433 // 1. There are no barrier instructions between DomDef and StartDef (like 1434 // throws or stores with ordering constraints). 1435 // 2. StartDef is executed whenever DomDef is executed. 1436 // 3. StartDef completely overwrites DomDef. 1437 // 4. Erase DomDef from the function and MemorySSA. 1438 1439 // Returns true if \p M is an intrisnic that does not read or write memory. 1440 bool isNoopIntrinsic(MemoryUseOrDef *M) { 1441 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) { 1442 switch (II->getIntrinsicID()) { 1443 case Intrinsic::lifetime_start: 1444 case Intrinsic::lifetime_end: 1445 case Intrinsic::invariant_end: 1446 case Intrinsic::launder_invariant_group: 1447 case Intrinsic::assume: 1448 return true; 1449 case Intrinsic::dbg_addr: 1450 case Intrinsic::dbg_declare: 1451 case Intrinsic::dbg_label: 1452 case Intrinsic::dbg_value: 1453 llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); 1454 default: 1455 return false; 1456 } 1457 } 1458 return false; 1459 } 1460 1461 // Check if we can ignore \p D for DSE. 1462 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { 1463 Instruction *DI = D->getMemoryInst(); 1464 // Calls that only access inaccessible memory cannot read or write any memory 1465 // locations we consider for elimination. 1466 if (auto *CB = dyn_cast<CallBase>(DI)) 1467 if (CB->onlyAccessesInaccessibleMemory()) 1468 return true; 1469 1470 // We can eliminate stores to locations not visible to the caller across 1471 // throwing instructions. 1472 if (DI->mayThrow() && !DefVisibleToCaller) 1473 return true; 1474 1475 // We can remove the dead stores, irrespective of the fence and its ordering 1476 // (release/acquire/seq_cst). Fences only constraints the ordering of 1477 // already visible stores, it does not make a store visible to other 1478 // threads. So, skipping over a fence does not change a store from being 1479 // dead. 1480 if (isa<FenceInst>(DI)) 1481 return true; 1482 1483 // Skip intrinsics that do not really read or modify memory. 1484 if (isNoopIntrinsic(D)) 1485 return true; 1486 1487 return false; 1488 } 1489 1490 struct DSEState { 1491 Function &F; 1492 AliasAnalysis &AA; 1493 MemorySSA &MSSA; 1494 DominatorTree &DT; 1495 PostDominatorTree &PDT; 1496 const TargetLibraryInfo &TLI; 1497 1498 // All MemoryDefs that potentially could kill other MemDefs. 1499 SmallVector<MemoryDef *, 64> MemDefs; 1500 // Any that should be skipped as they are already deleted 1501 SmallPtrSet<MemoryAccess *, 4> SkipStores; 1502 // Keep track of all of the objects that are invisible to the caller before 1503 // the function returns. 1504 SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet; 1505 // Keep track of all of the objects that are invisible to the caller after 1506 // the function returns. 1507 SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet; 1508 // Keep track of blocks with throwing instructions not modeled in MemorySSA. 1509 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks; 1510 // Post-order numbers for each basic block. Used to figure out if memory 1511 // accesses are executed before another access. 1512 DenseMap<BasicBlock *, unsigned> PostOrderNumbers; 1513 1514 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per 1515 /// basic block. 1516 DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs; 1517 1518 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, 1519 PostDominatorTree &PDT, const TargetLibraryInfo &TLI) 1520 : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} 1521 1522 static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, 1523 DominatorTree &DT, PostDominatorTree &PDT, 1524 const TargetLibraryInfo &TLI) { 1525 DSEState State(F, AA, MSSA, DT, PDT, TLI); 1526 // Collect blocks with throwing instructions not modeled in MemorySSA and 1527 // alloc-like objects. 1528 unsigned PO = 0; 1529 for (BasicBlock *BB : post_order(&F)) { 1530 State.PostOrderNumbers[BB] = PO++; 1531 for (Instruction &I : *BB) { 1532 MemoryAccess *MA = MSSA.getMemoryAccess(&I); 1533 if (I.mayThrow() && !MA) 1534 State.ThrowingBlocks.insert(I.getParent()); 1535 1536 auto *MD = dyn_cast_or_null<MemoryDef>(MA); 1537 if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && 1538 hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) 1539 State.MemDefs.push_back(MD); 1540 1541 // Track whether alloca and alloca-like objects are visible in the 1542 // caller before and after the function returns. Alloca objects are 1543 // invalid in the caller, so they are neither visible before or after 1544 // the function returns. 1545 if (isa<AllocaInst>(&I)) { 1546 State.InvisibleToCallerBeforeRet.insert(&I); 1547 State.InvisibleToCallerAfterRet.insert(&I); 1548 } 1549 1550 // For alloca-like objects we need to check if they are captured before 1551 // the function returns and if the return might capture the object. 1552 if (isAllocLikeFn(&I, &TLI)) { 1553 bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true); 1554 if (!CapturesBeforeRet) { 1555 State.InvisibleToCallerBeforeRet.insert(&I); 1556 if (!PointerMayBeCaptured(&I, true, false)) 1557 State.InvisibleToCallerAfterRet.insert(&I); 1558 } 1559 } 1560 } 1561 } 1562 1563 // Treat byval or inalloca arguments the same as Allocas, stores to them are 1564 // dead at the end of the function. 1565 for (Argument &AI : F.args()) 1566 if (AI.hasPassPointeeByValueAttr()) 1567 State.InvisibleToCallerBeforeRet.insert(&AI); 1568 return State; 1569 } 1570 1571 Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const { 1572 if (!I->mayWriteToMemory()) 1573 return None; 1574 1575 if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I)) 1576 return {MemoryLocation::getForDest(MTI)}; 1577 1578 if (auto *CB = dyn_cast<CallBase>(I)) { 1579 if (Function *F = CB->getCalledFunction()) { 1580 StringRef FnName = F->getName(); 1581 if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) 1582 return {MemoryLocation(CB->getArgOperand(0))}; 1583 if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) 1584 return {MemoryLocation(CB->getArgOperand(0))}; 1585 if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) 1586 return {MemoryLocation(CB->getArgOperand(0))}; 1587 if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) 1588 return {MemoryLocation(CB->getArgOperand(0))}; 1589 } 1590 return None; 1591 } 1592 1593 return MemoryLocation::getOrNone(I); 1594 } 1595 1596 /// Returns true if \p Use completely overwrites \p DefLoc. 1597 bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { 1598 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a 1599 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a 1600 // MemoryDef. 1601 if (!UseInst->mayWriteToMemory()) 1602 return false; 1603 1604 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1605 if (CB->onlyAccessesInaccessibleMemory()) 1606 return false; 1607 1608 int64_t InstWriteOffset, DepWriteOffset; 1609 auto CC = getLocForWriteEx(UseInst); 1610 InstOverlapIntervalsTy IOL; 1611 1612 const DataLayout &DL = F.getParent()->getDataLayout(); 1613 1614 return CC && 1615 isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, InstWriteOffset, 1616 UseInst, IOL, AA, &F) == OW_Complete; 1617 } 1618 1619 /// Returns true if \p Use may read from \p DefLoc. 1620 bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { 1621 if (!UseInst->mayReadFromMemory()) 1622 return false; 1623 1624 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1625 if (CB->onlyAccessesInaccessibleMemory()) 1626 return false; 1627 1628 ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); 1629 // If necessary, perform additional analysis. 1630 if (isRefSet(MR)) 1631 MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); 1632 return isRefSet(MR); 1633 } 1634 1635 // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no 1636 // read access between them or on any other path to a function exit block if 1637 // \p DefLoc is not accessible after the function returns. If there is no such 1638 // MemoryDef, return None. The returned value may not (completely) overwrite 1639 // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse 1640 // (read). 1641 Optional<MemoryAccess *> 1642 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, 1643 MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet, 1644 bool DefVisibleToCallerAfterRet, int &ScanLimit) const { 1645 MemoryAccess *DomAccess; 1646 bool StepAgain; 1647 LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current 1648 << "\n"); 1649 // Find the next clobbering Mod access for DefLoc, starting at Current. 1650 do { 1651 StepAgain = false; 1652 // Reached TOP. 1653 if (MSSA.isLiveOnEntryDef(Current)) 1654 return None; 1655 1656 if (isa<MemoryPhi>(Current)) { 1657 DomAccess = Current; 1658 break; 1659 } 1660 MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current); 1661 // Look for access that clobber DefLoc. 1662 DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD, 1663 DefLoc); 1664 if (MSSA.isLiveOnEntryDef(DomAccess)) 1665 return None; 1666 1667 if (isa<MemoryPhi>(DomAccess)) 1668 break; 1669 1670 // Check if we can skip DomDef for DSE. 1671 MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess); 1672 if (DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) { 1673 StepAgain = true; 1674 Current = DomDef->getDefiningAccess(); 1675 } 1676 1677 } while (StepAgain); 1678 1679 // Accesses to objects accessible after the function returns can only be 1680 // eliminated if the access is killed along all paths to the exit. Collect 1681 // the blocks with killing (=completely overwriting MemoryDefs) and check if 1682 // they cover all paths from DomAccess to any function exit. 1683 SmallPtrSet<BasicBlock *, 16> KillingBlocks = {KillingDef->getBlock()}; 1684 LLVM_DEBUG({ 1685 dbgs() << " Checking for reads of " << *DomAccess; 1686 if (isa<MemoryDef>(DomAccess)) 1687 dbgs() << " (" << *cast<MemoryDef>(DomAccess)->getMemoryInst() << ")\n"; 1688 else 1689 dbgs() << ")\n"; 1690 }); 1691 1692 SmallSetVector<MemoryAccess *, 32> WorkList; 1693 auto PushMemUses = [&WorkList](MemoryAccess *Acc) { 1694 for (Use &U : Acc->uses()) 1695 WorkList.insert(cast<MemoryAccess>(U.getUser())); 1696 }; 1697 PushMemUses(DomAccess); 1698 1699 // Check if DomDef may be read. 1700 for (unsigned I = 0; I < WorkList.size(); I++) { 1701 MemoryAccess *UseAccess = WorkList[I]; 1702 1703 LLVM_DEBUG(dbgs() << " " << *UseAccess); 1704 if (--ScanLimit == 0) { 1705 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); 1706 return None; 1707 } 1708 1709 if (isa<MemoryPhi>(UseAccess)) { 1710 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n"); 1711 PushMemUses(UseAccess); 1712 continue; 1713 } 1714 1715 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); 1716 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); 1717 1718 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) { 1719 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n"); 1720 PushMemUses(UseAccess); 1721 continue; 1722 } 1723 1724 // Uses which may read the original MemoryDef mean we cannot eliminate the 1725 // original MD. Stop walk. 1726 if (isReadClobber(DefLoc, UseInst)) { 1727 LLVM_DEBUG(dbgs() << " ... found read clobber\n"); 1728 return None; 1729 } 1730 1731 // For the KillingDef and DomAccess we only have to check if it reads the 1732 // memory location. 1733 // TODO: It would probably be better to check for self-reads before 1734 // calling the function. 1735 if (KillingDef == UseAccess || DomAccess == UseAccess) { 1736 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); 1737 continue; 1738 } 1739 1740 // Check all uses for MemoryDefs, except for defs completely overwriting 1741 // the original location. Otherwise we have to check uses of *all* 1742 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might 1743 // miss cases like the following 1744 // 1 = Def(LoE) ; <----- DomDef stores [0,1] 1745 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] 1746 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. 1747 // (The Use points to the *first* Def it may alias) 1748 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, 1749 // stores [0,1] 1750 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) { 1751 if (isCompleteOverwrite(DefLoc, UseInst)) { 1752 if (DefVisibleToCallerAfterRet && UseAccess != DomAccess) { 1753 BasicBlock *MaybeKillingBlock = UseInst->getParent(); 1754 if (PostOrderNumbers.find(MaybeKillingBlock)->second < 1755 PostOrderNumbers.find(DomAccess->getBlock())->second) { 1756 1757 LLVM_DEBUG(dbgs() << " ... found killing block " 1758 << MaybeKillingBlock->getName() << "\n"); 1759 KillingBlocks.insert(MaybeKillingBlock); 1760 } 1761 } 1762 } else 1763 PushMemUses(UseDef); 1764 } 1765 } 1766 1767 // For accesses to locations visible after the function returns, make sure 1768 // that the location is killed (=overwritten) along all paths from DomAccess 1769 // to the exit. 1770 if (DefVisibleToCallerAfterRet) { 1771 assert(!KillingBlocks.empty() && 1772 "Expected at least a single killing block"); 1773 // Find the common post-dominator of all killing blocks. 1774 BasicBlock *CommonPred = *KillingBlocks.begin(); 1775 for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end(); 1776 I != E; I++) { 1777 if (!CommonPred) 1778 break; 1779 CommonPred = PDT.findNearestCommonDominator(CommonPred, *I); 1780 } 1781 1782 // If CommonPred is in the set of killing blocks, just check if it 1783 // post-dominates DomAccess. 1784 if (KillingBlocks.count(CommonPred)) { 1785 if (PDT.dominates(CommonPred, DomAccess->getBlock())) 1786 return {DomAccess}; 1787 return None; 1788 } 1789 1790 // If the common post-dominator does not post-dominate DomAccess, there 1791 // is a path from DomAccess to an exit not going through a killing block. 1792 if (PDT.dominates(CommonPred, DomAccess->getBlock())) { 1793 SetVector<BasicBlock *> WorkList; 1794 1795 // DomAccess's post-order number provides an upper bound of the blocks 1796 // on a path starting at DomAccess. 1797 unsigned UpperBound = 1798 PostOrderNumbers.find(DomAccess->getBlock())->second; 1799 1800 // If CommonPred is null, there are multiple exits from the function. 1801 // They all have to be added to the worklist. 1802 if (CommonPred) 1803 WorkList.insert(CommonPred); 1804 else 1805 for (BasicBlock *R : PDT.getRoots()) { 1806 if (!DT.isReachableFromEntry(R)) 1807 continue; 1808 WorkList.insert(R); 1809 } 1810 1811 NumCFGTries++; 1812 // Check if all paths starting from an exit node go through one of the 1813 // killing blocks before reaching DomAccess. 1814 for (unsigned I = 0; I < WorkList.size(); I++) { 1815 NumCFGChecks++; 1816 BasicBlock *Current = WorkList[I]; 1817 if (KillingBlocks.count(Current)) 1818 continue; 1819 if (Current == DomAccess->getBlock()) 1820 return None; 1821 unsigned CPO = PostOrderNumbers.find(Current)->second; 1822 // Current block is not on a path starting at DomAccess. 1823 if (CPO > UpperBound) 1824 continue; 1825 for (BasicBlock *Pred : predecessors(Current)) 1826 WorkList.insert(Pred); 1827 1828 if (WorkList.size() >= MemorySSAPathCheckLimit) 1829 return None; 1830 } 1831 NumCFGSuccess++; 1832 return {DomAccess}; 1833 } 1834 return None; 1835 } 1836 1837 // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. 1838 return {DomAccess}; 1839 } 1840 1841 // Delete dead memory defs 1842 void deleteDeadInstruction(Instruction *SI) { 1843 MemorySSAUpdater Updater(&MSSA); 1844 SmallVector<Instruction *, 32> NowDeadInsts; 1845 NowDeadInsts.push_back(SI); 1846 --NumFastOther; 1847 1848 while (!NowDeadInsts.empty()) { 1849 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 1850 ++NumFastOther; 1851 1852 // Try to preserve debug information attached to the dead instruction. 1853 salvageDebugInfo(*DeadInst); 1854 salvageKnowledge(DeadInst); 1855 1856 // Remove the Instruction from MSSA. 1857 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { 1858 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { 1859 SkipStores.insert(MD); 1860 } 1861 Updater.removeMemoryAccess(MA); 1862 } 1863 1864 auto I = IOLs.find(DeadInst->getParent()); 1865 if (I != IOLs.end()) 1866 I->second.erase(DeadInst); 1867 // Remove its operands 1868 for (Use &O : DeadInst->operands()) 1869 if (Instruction *OpI = dyn_cast<Instruction>(O)) { 1870 O = nullptr; 1871 if (isInstructionTriviallyDead(OpI, &TLI)) 1872 NowDeadInsts.push_back(OpI); 1873 } 1874 1875 DeadInst->eraseFromParent(); 1876 } 1877 } 1878 1879 // Check for any extra throws between SI and NI that block DSE. This only 1880 // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may 1881 // throw are handled during the walk from one def to the next. 1882 bool mayThrowBetween(Instruction *SI, Instruction *NI, 1883 const Value *SILocUnd) const { 1884 // First see if we can ignore it by using the fact that SI is an 1885 // alloca/alloca like object that is not visible to the caller during 1886 // execution of the function. 1887 if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd)) 1888 return false; 1889 1890 if (SI->getParent() == NI->getParent()) 1891 return ThrowingBlocks.count(SI->getParent()); 1892 return !ThrowingBlocks.empty(); 1893 } 1894 1895 // Check if \p NI acts as a DSE barrier for \p SI. The following instructions 1896 // act as barriers: 1897 // * A memory instruction that may throw and \p SI accesses a non-stack 1898 // object. 1899 // * Atomic stores stronger that monotonic. 1900 bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, 1901 const Value *SILocUnd, Instruction *NI, 1902 MemoryLocation &NILoc) const { 1903 // If NI may throw it acts as a barrier, unless we are to an alloca/alloca 1904 // like object that does not escape. 1905 if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd)) 1906 return true; 1907 1908 if (NI->isAtomic()) { 1909 if (auto *NSI = dyn_cast<StoreInst>(NI)) { 1910 if (isStrongerThanMonotonic(NSI->getOrdering())) 1911 return true; 1912 } else 1913 llvm_unreachable( 1914 "Other instructions should be modeled/skipped in MemorySSA"); 1915 } 1916 1917 return false; 1918 } 1919 }; 1920 1921 /// \returns true if \p KillingDef stores the result of \p Load to the source of 1922 /// \p Load. 1923 static bool storeIsNoop(MemorySSA &MSSA, LoadInst *Load, 1924 MemoryDef *KillingDef) { 1925 Instruction *Store = KillingDef->getMemoryInst(); 1926 // If the load's operand isn't the destination of the store, bail. 1927 if (Load->getPointerOperand() != Store->getOperand(1)) 1928 return false; 1929 1930 // Get the defining access for the load. 1931 auto *LoadAccess = MSSA.getMemoryAccess(Load)->getDefiningAccess(); 1932 // The store is dead if the defining accesses are the same. 1933 return LoadAccess == KillingDef->getDefiningAccess(); 1934 } 1935 1936 bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, 1937 MemorySSA &MSSA, DominatorTree &DT, 1938 PostDominatorTree &PDT, 1939 const TargetLibraryInfo &TLI) { 1940 const DataLayout &DL = F.getParent()->getDataLayout(); 1941 bool MadeChange = false; 1942 1943 DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); 1944 // For each store: 1945 for (unsigned I = 0; I < State.MemDefs.size(); I++) { 1946 MemoryDef *KillingDef = State.MemDefs[I]; 1947 if (State.SkipStores.count(KillingDef)) 1948 continue; 1949 Instruction *SI = KillingDef->getMemoryInst(); 1950 1951 // Check if we're storing a value that we just loaded. 1952 if (auto *Load = dyn_cast<LoadInst>(SI->getOperand(0))) { 1953 if (storeIsNoop(MSSA, Load, KillingDef)) { 1954 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI 1955 << '\n'); 1956 State.deleteDeadInstruction(SI); 1957 NumNoopStores++; 1958 MadeChange = true; 1959 continue; 1960 } 1961 } 1962 1963 auto MaybeSILoc = State.getLocForWriteEx(SI); 1964 if (!MaybeSILoc) { 1965 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " 1966 << *SI << "\n"); 1967 continue; 1968 } 1969 MemoryLocation SILoc = *MaybeSILoc; 1970 assert(SILoc.Ptr && "SILoc should not be null"); 1971 const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); 1972 Instruction *DefObj = 1973 const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd)); 1974 bool DefVisibleToCallerBeforeRet = 1975 !State.InvisibleToCallerBeforeRet.count(SILocUnd); 1976 bool DefVisibleToCallerAfterRet = 1977 !State.InvisibleToCallerAfterRet.count(SILocUnd); 1978 if (DefObj && isAllocLikeFn(DefObj, &TLI)) { 1979 if (DefVisibleToCallerBeforeRet) 1980 DefVisibleToCallerBeforeRet = 1981 PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT); 1982 } 1983 1984 MemoryAccess *Current = KillingDef; 1985 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " 1986 << *KillingDef << " (" << *SI << ")\n"); 1987 1988 int ScanLimit = MemorySSAScanLimit; 1989 // Worklist of MemoryAccesses that may be killed by KillingDef. 1990 SetVector<MemoryAccess *> ToCheck; 1991 ToCheck.insert(KillingDef->getDefiningAccess()); 1992 1993 // Check if MemoryAccesses in the worklist are killed by KillingDef. 1994 for (unsigned I = 0; I < ToCheck.size(); I++) { 1995 Current = ToCheck[I]; 1996 if (State.SkipStores.count(Current)) 1997 continue; 1998 1999 Optional<MemoryAccess *> Next = State.getDomMemoryDef( 2000 KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet, 2001 DefVisibleToCallerAfterRet, ScanLimit); 2002 2003 if (!Next) { 2004 LLVM_DEBUG(dbgs() << " finished walk\n"); 2005 continue; 2006 } 2007 2008 MemoryAccess *DomAccess = *Next; 2009 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess); 2010 if (isa<MemoryPhi>(DomAccess)) { 2011 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); 2012 for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) { 2013 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V); 2014 BasicBlock *IncomingBlock = IncomingAccess->getBlock(); 2015 BasicBlock *PhiBlock = DomAccess->getBlock(); 2016 2017 // We only consider incoming MemoryAccesses that come before the 2018 // MemoryPhi. Otherwise we could discover candidates that do not 2019 // strictly dominate our starting def. 2020 if (State.PostOrderNumbers[IncomingBlock] > 2021 State.PostOrderNumbers[PhiBlock]) 2022 ToCheck.insert(IncomingAccess); 2023 } 2024 continue; 2025 } 2026 MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess); 2027 Instruction *NI = NextDef->getMemoryInst(); 2028 LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); 2029 2030 if (!hasAnalyzableMemoryWrite(NI, TLI)) { 2031 LLVM_DEBUG(dbgs() << " ... skip, cannot analyze def\n"); 2032 continue; 2033 } 2034 2035 if (!isRemovable(NI)) { 2036 LLVM_DEBUG(dbgs() << " ... skip, cannot remove def\n"); 2037 continue; 2038 } 2039 2040 MemoryLocation NILoc = *State.getLocForWriteEx(NI); 2041 // Check for anything that looks like it will be a barrier to further 2042 // removal 2043 if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) { 2044 LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); 2045 continue; 2046 } 2047 2048 // Before we try to remove anything, check for any extra throwing 2049 // instructions that block us from DSEing 2050 if (State.mayThrowBetween(SI, NI, SILocUnd)) { 2051 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); 2052 break; 2053 } 2054 2055 if (!DebugCounter::shouldExecute(MemorySSACounter)) 2056 continue; 2057 2058 // Check if NI overwrites SI. 2059 int64_t InstWriteOffset, DepWriteOffset; 2060 auto Iter = State.IOLs.insert( 2061 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>( 2062 NI->getParent(), InstOverlapIntervalsTy())); 2063 auto &IOL = Iter.first->second; 2064 OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, 2065 InstWriteOffset, NI, IOL, AA, &F); 2066 2067 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { 2068 auto *Earlier = dyn_cast<StoreInst>(NI); 2069 auto *Later = dyn_cast<StoreInst>(SI); 2070 if (Constant *Merged = tryToMergePartialOverlappingStores( 2071 Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA, 2072 &DT)) { 2073 2074 // Update stored value of earlier store to merged constant. 2075 Earlier->setOperand(0, Merged); 2076 ++NumModifiedStores; 2077 MadeChange = true; 2078 2079 // Remove later store and remove any outstanding overlap intervals for 2080 // the updated store. 2081 State.deleteDeadInstruction(Later); 2082 auto I = State.IOLs.find(Earlier->getParent()); 2083 if (I != State.IOLs.end()) 2084 I->second.erase(Earlier); 2085 break; 2086 } 2087 } 2088 2089 ToCheck.insert(NextDef->getDefiningAccess()); 2090 if (OR == OW_Complete) { 2091 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI 2092 << "\n KILLER: " << *SI << '\n'); 2093 State.deleteDeadInstruction(NI); 2094 ++NumFastStores; 2095 MadeChange = true; 2096 } 2097 } 2098 } 2099 2100 if (EnablePartialOverwriteTracking) 2101 for (auto &KV : State.IOLs) 2102 MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); 2103 2104 return MadeChange; 2105 } 2106 } // end anonymous namespace 2107 2108 //===----------------------------------------------------------------------===// 2109 // DSE Pass 2110 //===----------------------------------------------------------------------===// 2111 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 2112 AliasAnalysis &AA = AM.getResult<AAManager>(F); 2113 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); 2114 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 2115 2116 bool Changed = false; 2117 if (EnableMemorySSA) { 2118 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 2119 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 2120 2121 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2122 } else { 2123 MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); 2124 2125 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2126 } 2127 2128 #ifdef LLVM_ENABLE_STATS 2129 if (AreStatisticsEnabled()) 2130 for (auto &I : instructions(F)) 2131 NumRemainingStores += isa<StoreInst>(&I); 2132 #endif 2133 2134 if (!Changed) 2135 return PreservedAnalyses::all(); 2136 2137 PreservedAnalyses PA; 2138 PA.preserveSet<CFGAnalyses>(); 2139 PA.preserve<GlobalsAA>(); 2140 if (EnableMemorySSA) 2141 PA.preserve<MemorySSAAnalysis>(); 2142 else 2143 PA.preserve<MemoryDependenceAnalysis>(); 2144 return PA; 2145 } 2146 2147 namespace { 2148 2149 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 2150 class DSELegacyPass : public FunctionPass { 2151 public: 2152 static char ID; // Pass identification, replacement for typeid 2153 2154 DSELegacyPass() : FunctionPass(ID) { 2155 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 2156 } 2157 2158 bool runOnFunction(Function &F) override { 2159 if (skipFunction(F)) 2160 return false; 2161 2162 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2163 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2164 const TargetLibraryInfo &TLI = 2165 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2166 2167 bool Changed = false; 2168 if (EnableMemorySSA) { 2169 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2170 PostDominatorTree &PDT = 2171 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 2172 2173 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2174 } else { 2175 MemoryDependenceResults &MD = 2176 getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 2177 2178 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2179 } 2180 2181 #ifdef LLVM_ENABLE_STATS 2182 if (AreStatisticsEnabled()) 2183 for (auto &I : instructions(F)) 2184 NumRemainingStores += isa<StoreInst>(&I); 2185 #endif 2186 2187 return Changed; 2188 } 2189 2190 void getAnalysisUsage(AnalysisUsage &AU) const override { 2191 AU.setPreservesCFG(); 2192 AU.addRequired<AAResultsWrapperPass>(); 2193 AU.addRequired<TargetLibraryInfoWrapperPass>(); 2194 AU.addPreserved<GlobalsAAWrapperPass>(); 2195 AU.addRequired<DominatorTreeWrapperPass>(); 2196 AU.addPreserved<DominatorTreeWrapperPass>(); 2197 2198 if (EnableMemorySSA) { 2199 AU.addRequired<PostDominatorTreeWrapperPass>(); 2200 AU.addRequired<MemorySSAWrapperPass>(); 2201 AU.addPreserved<PostDominatorTreeWrapperPass>(); 2202 AU.addPreserved<MemorySSAWrapperPass>(); 2203 } else { 2204 AU.addRequired<MemoryDependenceWrapperPass>(); 2205 AU.addPreserved<MemoryDependenceWrapperPass>(); 2206 } 2207 } 2208 }; 2209 2210 } // end anonymous namespace 2211 2212 char DSELegacyPass::ID = 0; 2213 2214 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 2215 false) 2216 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2217 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 2218 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 2219 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 2220 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 2221 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 2222 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2223 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 2224 false) 2225 2226 FunctionPass *llvm::createDeadStoreEliminationPass() { 2227 return new DSELegacyPass(); 2228 } 2229