1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a trivial dead store elimination that only considers 11 // basic-block local redundant stores. 12 // 13 // FIXME: This should eventually be extended to be a post-dominator tree 14 // traversal. Doing so would be pretty trivial. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/Analysis/AliasAnalysis.h" 27 #include "llvm/Analysis/CaptureTracking.h" 28 #include "llvm/Analysis/GlobalsModRef.h" 29 #include "llvm/Analysis/MemoryBuiltins.h" 30 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 31 #include "llvm/Analysis/MemoryLocation.h" 32 #include "llvm/Analysis/TargetLibraryInfo.h" 33 #include "llvm/Analysis/Utils/Local.h" 34 #include "llvm/Analysis/ValueTracking.h" 35 #include "llvm/IR/Argument.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/CallSite.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/DataLayout.h" 41 #include "llvm/IR/Dominators.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/InstrTypes.h" 44 #include "llvm/IR/Instruction.h" 45 #include "llvm/IR/Instructions.h" 46 #include "llvm/IR/IntrinsicInst.h" 47 #include "llvm/IR/Intrinsics.h" 48 #include "llvm/IR/LLVMContext.h" 49 #include "llvm/IR/Module.h" 50 #include "llvm/IR/PassManager.h" 51 #include "llvm/IR/Value.h" 52 #include "llvm/Pass.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/ErrorHandling.h" 57 #include "llvm/Support/MathExtras.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/Scalar.h" 60 #include <algorithm> 61 #include <cassert> 62 #include <cstddef> 63 #include <cstdint> 64 #include <iterator> 65 #include <map> 66 #include <utility> 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "dse" 71 72 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 73 STATISTIC(NumFastStores, "Number of stores deleted"); 74 STATISTIC(NumFastOther , "Number of other instrs removed"); 75 STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); 76 STATISTIC(NumModifiedStores, "Number of stores modified"); 77 78 static cl::opt<bool> 79 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", 80 cl::init(true), cl::Hidden, 81 cl::desc("Enable partial-overwrite tracking in DSE")); 82 83 static cl::opt<bool> 84 EnablePartialStoreMerging("enable-dse-partial-store-merging", 85 cl::init(true), cl::Hidden, 86 cl::desc("Enable partial store merging in DSE")); 87 88 //===----------------------------------------------------------------------===// 89 // Helper functions 90 //===----------------------------------------------------------------------===// 91 using OverlapIntervalsTy = std::map<int64_t, int64_t>; 92 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; 93 94 /// Delete this instruction. Before we do, go through and zero out all the 95 /// operands of this instruction. If any of them become dead, delete them and 96 /// the computation tree that feeds them. 97 /// If ValueSet is non-null, remove any deleted instructions from it as well. 98 static void 99 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, 100 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, 101 InstOverlapIntervalsTy &IOL, 102 DenseMap<Instruction*, size_t> *InstrOrdering, 103 SmallSetVector<Value *, 16> *ValueSet = nullptr) { 104 SmallVector<Instruction*, 32> NowDeadInsts; 105 106 NowDeadInsts.push_back(I); 107 --NumFastOther; 108 109 // Keeping the iterator straight is a pain, so we let this routine tell the 110 // caller what the next instruction is after we're done mucking about. 111 BasicBlock::iterator NewIter = *BBI; 112 113 // Before we touch this instruction, remove it from memdep! 114 do { 115 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 116 ++NumFastOther; 117 118 // Try to preserve debug information attached to the dead instruction. 119 salvageDebugInfo(*DeadInst); 120 121 // This instruction is dead, zap it, in stages. Start by removing it from 122 // MemDep, which needs to know the operands and needs it to be in the 123 // function. 124 MD.removeInstruction(DeadInst); 125 126 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 127 Value *Op = DeadInst->getOperand(op); 128 DeadInst->setOperand(op, nullptr); 129 130 // If this operand just became dead, add it to the NowDeadInsts list. 131 if (!Op->use_empty()) continue; 132 133 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 134 if (isInstructionTriviallyDead(OpI, &TLI)) 135 NowDeadInsts.push_back(OpI); 136 } 137 138 if (ValueSet) ValueSet->remove(DeadInst); 139 InstrOrdering->erase(DeadInst); 140 IOL.erase(DeadInst); 141 142 if (NewIter == DeadInst->getIterator()) 143 NewIter = DeadInst->eraseFromParent(); 144 else 145 DeadInst->eraseFromParent(); 146 } while (!NowDeadInsts.empty()); 147 *BBI = NewIter; 148 } 149 150 /// Does this instruction write some memory? This only returns true for things 151 /// that we can analyze with other helpers below. 152 static bool hasAnalyzableMemoryWrite(Instruction *I, 153 const TargetLibraryInfo &TLI) { 154 if (isa<StoreInst>(I)) 155 return true; 156 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 157 switch (II->getIntrinsicID()) { 158 default: 159 return false; 160 case Intrinsic::memset: 161 case Intrinsic::memmove: 162 case Intrinsic::memcpy: 163 case Intrinsic::memcpy_element_unordered_atomic: 164 case Intrinsic::memmove_element_unordered_atomic: 165 case Intrinsic::memset_element_unordered_atomic: 166 case Intrinsic::init_trampoline: 167 case Intrinsic::lifetime_end: 168 return true; 169 } 170 } 171 if (auto CS = CallSite(I)) { 172 if (Function *F = CS.getCalledFunction()) { 173 StringRef FnName = F->getName(); 174 if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) 175 return true; 176 if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) 177 return true; 178 if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) 179 return true; 180 if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) 181 return true; 182 } 183 } 184 return false; 185 } 186 187 /// Return a Location stored to by the specified instruction. If isRemovable 188 /// returns true, this function and getLocForRead completely describe the memory 189 /// operations for this instruction. 190 static MemoryLocation getLocForWrite(Instruction *Inst) { 191 192 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 193 return MemoryLocation::get(SI); 194 195 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) { 196 // memcpy/memmove/memset. 197 MemoryLocation Loc = MemoryLocation::getForDest(MI); 198 return Loc; 199 } 200 201 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 202 switch (II->getIntrinsicID()) { 203 default: 204 return MemoryLocation(); // Unhandled intrinsic. 205 case Intrinsic::init_trampoline: 206 return MemoryLocation(II->getArgOperand(0)); 207 case Intrinsic::lifetime_end: { 208 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 209 return MemoryLocation(II->getArgOperand(1), Len); 210 } 211 } 212 } 213 if (auto CS = CallSite(Inst)) 214 // All the supported TLI functions so far happen to have dest as their 215 // first argument. 216 return MemoryLocation(CS.getArgument(0)); 217 return MemoryLocation(); 218 } 219 220 /// Return the location read by the specified "hasAnalyzableMemoryWrite" 221 /// instruction if any. 222 static MemoryLocation getLocForRead(Instruction *Inst, 223 const TargetLibraryInfo &TLI) { 224 assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); 225 226 // The only instructions that both read and write are the mem transfer 227 // instructions (memcpy/memmove). 228 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst)) 229 return MemoryLocation::getForSource(MTI); 230 return MemoryLocation(); 231 } 232 233 /// If the value of this instruction and the memory it writes to is unused, may 234 /// we delete this instruction? 235 static bool isRemovable(Instruction *I) { 236 // Don't remove volatile/atomic stores. 237 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 238 return SI->isUnordered(); 239 240 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 241 switch (II->getIntrinsicID()) { 242 default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); 243 case Intrinsic::lifetime_end: 244 // Never remove dead lifetime_end's, e.g. because it is followed by a 245 // free. 246 return false; 247 case Intrinsic::init_trampoline: 248 // Always safe to remove init_trampoline. 249 return true; 250 case Intrinsic::memset: 251 case Intrinsic::memmove: 252 case Intrinsic::memcpy: 253 // Don't remove volatile memory intrinsics. 254 return !cast<MemIntrinsic>(II)->isVolatile(); 255 case Intrinsic::memcpy_element_unordered_atomic: 256 case Intrinsic::memmove_element_unordered_atomic: 257 case Intrinsic::memset_element_unordered_atomic: 258 return true; 259 } 260 } 261 262 // note: only get here for calls with analyzable writes - i.e. libcalls 263 if (auto CS = CallSite(I)) 264 return CS.getInstruction()->use_empty(); 265 266 return false; 267 } 268 269 /// Returns true if the end of this instruction can be safely shortened in 270 /// length. 271 static bool isShortenableAtTheEnd(Instruction *I) { 272 // Don't shorten stores for now 273 if (isa<StoreInst>(I)) 274 return false; 275 276 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 277 switch (II->getIntrinsicID()) { 278 default: return false; 279 case Intrinsic::memset: 280 case Intrinsic::memcpy: 281 case Intrinsic::memcpy_element_unordered_atomic: 282 case Intrinsic::memset_element_unordered_atomic: 283 // Do shorten memory intrinsics. 284 // FIXME: Add memmove if it's also safe to transform. 285 return true; 286 } 287 } 288 289 // Don't shorten libcalls calls for now. 290 291 return false; 292 } 293 294 /// Returns true if the beginning of this instruction can be safely shortened 295 /// in length. 296 static bool isShortenableAtTheBeginning(Instruction *I) { 297 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 298 // easily done by offsetting the source address. 299 return isa<AnyMemSetInst>(I); 300 } 301 302 /// Return the pointer that is being written to. 303 static Value *getStoredPointerOperand(Instruction *I) { 304 //TODO: factor this to reuse getLocForWrite 305 MemoryLocation Loc = getLocForWrite(I); 306 assert(Loc.Ptr && 307 "unable to find pointer writen for analyzable instruction?"); 308 // TODO: most APIs don't expect const Value * 309 return const_cast<Value*>(Loc.Ptr); 310 } 311 312 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 313 const TargetLibraryInfo &TLI) { 314 uint64_t Size; 315 if (getObjectSize(V, Size, DL, &TLI)) 316 return Size; 317 return MemoryLocation::UnknownSize; 318 } 319 320 namespace { 321 322 enum OverwriteResult { 323 OW_Begin, 324 OW_Complete, 325 OW_End, 326 OW_PartialEarlierWithFullLater, 327 OW_Unknown 328 }; 329 330 } // end anonymous namespace 331 332 /// Return 'OW_Complete' if a store to the 'Later' location completely 333 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the 334 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the 335 /// beginning of the 'Earlier' location is overwritten by 'Later'. 336 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was 337 /// overwritten by a latter (smaller) store which doesn't write outside the big 338 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. 339 static OverwriteResult isOverwrite(const MemoryLocation &Later, 340 const MemoryLocation &Earlier, 341 const DataLayout &DL, 342 const TargetLibraryInfo &TLI, 343 int64_t &EarlierOff, int64_t &LaterOff, 344 Instruction *DepWrite, 345 InstOverlapIntervalsTy &IOL, 346 AliasAnalysis &AA) { 347 // If we don't know the sizes of either access, then we can't do a comparison. 348 if (Later.Size == MemoryLocation::UnknownSize || 349 Earlier.Size == MemoryLocation::UnknownSize) 350 return OW_Unknown; 351 352 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 353 const Value *P2 = Later.Ptr->stripPointerCasts(); 354 355 // If the start pointers are the same, we just have to compare sizes to see if 356 // the later store was larger than the earlier store. 357 if (P1 == P2 || AA.isMustAlias(P1, P2)) { 358 // Make sure that the Later size is >= the Earlier size. 359 if (Later.Size >= Earlier.Size) 360 return OW_Complete; 361 } 362 363 // Check to see if the later store is to the entire object (either a global, 364 // an alloca, or a byval/inalloca argument). If so, then it clearly 365 // overwrites any other store to the same object. 366 const Value *UO1 = GetUnderlyingObject(P1, DL), 367 *UO2 = GetUnderlyingObject(P2, DL); 368 369 // If we can't resolve the same pointers to the same object, then we can't 370 // analyze them at all. 371 if (UO1 != UO2) 372 return OW_Unknown; 373 374 // If the "Later" store is to a recognizable object, get its size. 375 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI); 376 if (ObjectSize != MemoryLocation::UnknownSize) 377 if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) 378 return OW_Complete; 379 380 // Okay, we have stores to two completely different pointers. Try to 381 // decompose the pointer into a "base + constant_offset" form. If the base 382 // pointers are equal, then we can reason about the two stores. 383 EarlierOff = 0; 384 LaterOff = 0; 385 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 386 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 387 388 // If the base pointers still differ, we have two completely different stores. 389 if (BP1 != BP2) 390 return OW_Unknown; 391 392 // The later store completely overlaps the earlier store if: 393 // 394 // 1. Both start at the same offset and the later one's size is greater than 395 // or equal to the earlier one's, or 396 // 397 // |--earlier--| 398 // |-- later --| 399 // 400 // 2. The earlier store has an offset greater than the later offset, but which 401 // still lies completely within the later store. 402 // 403 // |--earlier--| 404 // |----- later ------| 405 // 406 // We have to be careful here as *Off is signed while *.Size is unsigned. 407 if (EarlierOff >= LaterOff && 408 Later.Size >= Earlier.Size && 409 uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) 410 return OW_Complete; 411 412 // We may now overlap, although the overlap is not complete. There might also 413 // be other incomplete overlaps, and together, they might cover the complete 414 // earlier write. 415 // Note: The correctness of this logic depends on the fact that this function 416 // is not even called providing DepWrite when there are any intervening reads. 417 if (EnablePartialOverwriteTracking && 418 LaterOff < int64_t(EarlierOff + Earlier.Size) && 419 int64_t(LaterOff + Later.Size) >= EarlierOff) { 420 421 // Insert our part of the overlap into the map. 422 auto &IM = IOL[DepWrite]; 423 DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff << ", " << 424 int64_t(EarlierOff + Earlier.Size) << ") Later [" << 425 LaterOff << ", " << int64_t(LaterOff + Later.Size) << ")\n"); 426 427 // Make sure that we only insert non-overlapping intervals and combine 428 // adjacent intervals. The intervals are stored in the map with the ending 429 // offset as the key (in the half-open sense) and the starting offset as 430 // the value. 431 int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + Later.Size; 432 433 // Find any intervals ending at, or after, LaterIntStart which start 434 // before LaterIntEnd. 435 auto ILI = IM.lower_bound(LaterIntStart); 436 if (ILI != IM.end() && ILI->second <= LaterIntEnd) { 437 // This existing interval is overlapped with the current store somewhere 438 // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing 439 // intervals and adjusting our start and end. 440 LaterIntStart = std::min(LaterIntStart, ILI->second); 441 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 442 ILI = IM.erase(ILI); 443 444 // Continue erasing and adjusting our end in case other previous 445 // intervals are also overlapped with the current store. 446 // 447 // |--- ealier 1 ---| |--- ealier 2 ---| 448 // |------- later---------| 449 // 450 while (ILI != IM.end() && ILI->second <= LaterIntEnd) { 451 assert(ILI->second > LaterIntStart && "Unexpected interval"); 452 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 453 ILI = IM.erase(ILI); 454 } 455 } 456 457 IM[LaterIntEnd] = LaterIntStart; 458 459 ILI = IM.begin(); 460 if (ILI->second <= EarlierOff && 461 ILI->first >= int64_t(EarlierOff + Earlier.Size)) { 462 DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" << 463 EarlierOff << ", " << 464 int64_t(EarlierOff + Earlier.Size) << 465 ") Composite Later [" << 466 ILI->second << ", " << ILI->first << ")\n"); 467 ++NumCompletePartials; 468 return OW_Complete; 469 } 470 } 471 472 // Check for an earlier store which writes to all the memory locations that 473 // the later store writes to. 474 if (EnablePartialStoreMerging && LaterOff >= EarlierOff && 475 int64_t(EarlierOff + Earlier.Size) > LaterOff && 476 uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) { 477 DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff 478 << ", " << int64_t(EarlierOff + Earlier.Size) 479 << ") by a later store [" << LaterOff << ", " 480 << int64_t(LaterOff + Later.Size) << ")\n"); 481 // TODO: Maybe come up with a better name? 482 return OW_PartialEarlierWithFullLater; 483 } 484 485 // Another interesting case is if the later store overwrites the end of the 486 // earlier store. 487 // 488 // |--earlier--| 489 // |-- later --| 490 // 491 // In this case we may want to trim the size of earlier to avoid generating 492 // writes to addresses which will definitely be overwritten later 493 if (!EnablePartialOverwriteTracking && 494 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) && 495 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))) 496 return OW_End; 497 498 // Finally, we also need to check if the later store overwrites the beginning 499 // of the earlier store. 500 // 501 // |--earlier--| 502 // |-- later --| 503 // 504 // In this case we may want to move the destination address and trim the size 505 // of earlier to avoid generating writes to addresses which will definitely 506 // be overwritten later. 507 if (!EnablePartialOverwriteTracking && 508 (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) { 509 assert(int64_t(LaterOff + Later.Size) < 510 int64_t(EarlierOff + Earlier.Size) && 511 "Expect to be handled as OW_Complete"); 512 return OW_Begin; 513 } 514 // Otherwise, they don't completely overlap. 515 return OW_Unknown; 516 } 517 518 /// If 'Inst' might be a self read (i.e. a noop copy of a 519 /// memory region into an identical pointer) then it doesn't actually make its 520 /// input dead in the traditional sense. Consider this case: 521 /// 522 /// memmove(A <- B) 523 /// memmove(A <- A) 524 /// 525 /// In this case, the second store to A does not make the first store to A dead. 526 /// The usual situation isn't an explicit A<-A store like this (which can be 527 /// trivially removed) but a case where two pointers may alias. 528 /// 529 /// This function detects when it is unsafe to remove a dependent instruction 530 /// because the DSE inducing instruction may be a self-read. 531 static bool isPossibleSelfRead(Instruction *Inst, 532 const MemoryLocation &InstStoreLoc, 533 Instruction *DepWrite, 534 const TargetLibraryInfo &TLI, 535 AliasAnalysis &AA) { 536 // Self reads can only happen for instructions that read memory. Get the 537 // location read. 538 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 539 if (!InstReadLoc.Ptr) 540 return false; // Not a reading instruction. 541 542 // If the read and written loc obviously don't alias, it isn't a read. 543 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) 544 return false; 545 546 if (isa<AnyMemCpyInst>(Inst)) { 547 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) 548 // but in practice memcpy(A <- B) either means that A and B are disjoint or 549 // are equal (i.e. there are not partial overlaps). Given that, if we have: 550 // 551 // memcpy/memmove(A <- B) // DepWrite 552 // memcpy(A <- B) // Inst 553 // 554 // with Inst reading/writing a >= size than DepWrite, we can reason as 555 // follows: 556 // 557 // - If A == B then both the copies are no-ops, so the DepWrite can be 558 // removed. 559 // - If A != B then A and B are disjoint locations in Inst. Since 560 // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. 561 // Therefore DepWrite can be removed. 562 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 563 564 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 565 return false; 566 } 567 568 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 569 // then it can't be considered dead. 570 return true; 571 } 572 573 /// Returns true if the memory which is accessed by the second instruction is not 574 /// modified between the first and the second instruction. 575 /// Precondition: Second instruction must be dominated by the first 576 /// instruction. 577 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 578 Instruction *SecondI, 579 AliasAnalysis *AA) { 580 SmallVector<BasicBlock *, 16> WorkList; 581 SmallPtrSet<BasicBlock *, 8> Visited; 582 BasicBlock::iterator FirstBBI(FirstI); 583 ++FirstBBI; 584 BasicBlock::iterator SecondBBI(SecondI); 585 BasicBlock *FirstBB = FirstI->getParent(); 586 BasicBlock *SecondBB = SecondI->getParent(); 587 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 588 589 // Start checking the store-block. 590 WorkList.push_back(SecondBB); 591 bool isFirstBlock = true; 592 593 // Check all blocks going backward until we reach the load-block. 594 while (!WorkList.empty()) { 595 BasicBlock *B = WorkList.pop_back_val(); 596 597 // Ignore instructions before LI if this is the FirstBB. 598 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 599 600 BasicBlock::iterator EI; 601 if (isFirstBlock) { 602 // Ignore instructions after SI if this is the first visit of SecondBB. 603 assert(B == SecondBB && "first block is not the store block"); 604 EI = SecondBBI; 605 isFirstBlock = false; 606 } else { 607 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 608 // In this case we also have to look at instructions after SI. 609 EI = B->end(); 610 } 611 for (; BI != EI; ++BI) { 612 Instruction *I = &*BI; 613 if (I->mayWriteToMemory() && I != SecondI) 614 if (isModSet(AA->getModRefInfo(I, MemLoc))) 615 return false; 616 } 617 if (B != FirstBB) { 618 assert(B != &FirstBB->getParent()->getEntryBlock() && 619 "Should not hit the entry block because SI must be dominated by LI"); 620 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 621 if (!Visited.insert(*PredI).second) 622 continue; 623 WorkList.push_back(*PredI); 624 } 625 } 626 } 627 return true; 628 } 629 630 /// Find all blocks that will unconditionally lead to the block BB and append 631 /// them to F. 632 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 633 BasicBlock *BB, DominatorTree *DT) { 634 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 635 BasicBlock *Pred = *I; 636 if (Pred == BB) continue; 637 TerminatorInst *PredTI = Pred->getTerminator(); 638 if (PredTI->getNumSuccessors() != 1) 639 continue; 640 641 if (DT->isReachableFromEntry(Pred)) 642 Blocks.push_back(Pred); 643 } 644 } 645 646 /// Handle frees of entire structures whose dependency is a store 647 /// to a field of that structure. 648 static bool handleFree(CallInst *F, AliasAnalysis *AA, 649 MemoryDependenceResults *MD, DominatorTree *DT, 650 const TargetLibraryInfo *TLI, 651 InstOverlapIntervalsTy &IOL, 652 DenseMap<Instruction*, size_t> *InstrOrdering) { 653 bool MadeChange = false; 654 655 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 656 SmallVector<BasicBlock *, 16> Blocks; 657 Blocks.push_back(F->getParent()); 658 const DataLayout &DL = F->getModule()->getDataLayout(); 659 660 while (!Blocks.empty()) { 661 BasicBlock *BB = Blocks.pop_back_val(); 662 Instruction *InstPt = BB->getTerminator(); 663 if (BB == F->getParent()) InstPt = F; 664 665 MemDepResult Dep = 666 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 667 while (Dep.isDef() || Dep.isClobber()) { 668 Instruction *Dependency = Dep.getInst(); 669 if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || 670 !isRemovable(Dependency)) 671 break; 672 673 Value *DepPointer = 674 GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); 675 676 // Check for aliasing. 677 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 678 break; 679 680 DEBUG(dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " 681 << *Dependency << '\n'); 682 683 // DCE instructions only used to calculate that store. 684 BasicBlock::iterator BBI(Dependency); 685 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, InstrOrdering); 686 ++NumFastStores; 687 MadeChange = true; 688 689 // Inst's old Dependency is now deleted. Compute the next dependency, 690 // which may also be dead, as in 691 // s[0] = 0; 692 // s[1] = 0; // This has just been deleted. 693 // free(s); 694 Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); 695 } 696 697 if (Dep.isNonLocal()) 698 findUnconditionalPreds(Blocks, BB, DT); 699 } 700 701 return MadeChange; 702 } 703 704 /// Check to see if the specified location may alias any of the stack objects in 705 /// the DeadStackObjects set. If so, they become live because the location is 706 /// being loaded. 707 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 708 SmallSetVector<Value *, 16> &DeadStackObjects, 709 const DataLayout &DL, AliasAnalysis *AA, 710 const TargetLibraryInfo *TLI) { 711 const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); 712 713 // A constant can't be in the dead pointer set. 714 if (isa<Constant>(UnderlyingPointer)) 715 return; 716 717 // If the kill pointer can be easily reduced to an alloca, don't bother doing 718 // extraneous AA queries. 719 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 720 DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer)); 721 return; 722 } 723 724 // Remove objects that could alias LoadedLoc. 725 DeadStackObjects.remove_if([&](Value *I) { 726 // See if the loaded location could alias the stack location. 727 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI)); 728 return !AA->isNoAlias(StackLoc, LoadedLoc); 729 }); 730 } 731 732 /// Remove dead stores to stack-allocated locations in the function end block. 733 /// Ex: 734 /// %A = alloca i32 735 /// ... 736 /// store i32 1, i32* %A 737 /// ret void 738 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 739 MemoryDependenceResults *MD, 740 const TargetLibraryInfo *TLI, 741 InstOverlapIntervalsTy &IOL, 742 DenseMap<Instruction*, size_t> *InstrOrdering) { 743 bool MadeChange = false; 744 745 // Keep track of all of the stack objects that are dead at the end of the 746 // function. 747 SmallSetVector<Value*, 16> DeadStackObjects; 748 749 // Find all of the alloca'd pointers in the entry block. 750 BasicBlock &Entry = BB.getParent()->front(); 751 for (Instruction &I : Entry) { 752 if (isa<AllocaInst>(&I)) 753 DeadStackObjects.insert(&I); 754 755 // Okay, so these are dead heap objects, but if the pointer never escapes 756 // then it's leaked by this function anyways. 757 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 758 DeadStackObjects.insert(&I); 759 } 760 761 // Treat byval or inalloca arguments the same, stores to them are dead at the 762 // end of the function. 763 for (Argument &AI : BB.getParent()->args()) 764 if (AI.hasByValOrInAllocaAttr()) 765 DeadStackObjects.insert(&AI); 766 767 const DataLayout &DL = BB.getModule()->getDataLayout(); 768 769 // Scan the basic block backwards 770 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 771 --BBI; 772 773 // If we find a store, check to see if it points into a dead stack value. 774 if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 775 // See through pointer-to-pointer bitcasts 776 SmallVector<Value *, 4> Pointers; 777 GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); 778 779 // Stores to stack values are valid candidates for removal. 780 bool AllDead = true; 781 for (Value *Pointer : Pointers) 782 if (!DeadStackObjects.count(Pointer)) { 783 AllDead = false; 784 break; 785 } 786 787 if (AllDead) { 788 Instruction *Dead = &*BBI; 789 790 DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 791 << *Dead << "\n Objects: "; 792 for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), 793 E = Pointers.end(); I != E; ++I) { 794 dbgs() << **I; 795 if (std::next(I) != E) 796 dbgs() << ", "; 797 } 798 dbgs() << '\n'); 799 800 // DCE instructions only used to calculate that store. 801 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects); 802 ++NumFastStores; 803 MadeChange = true; 804 continue; 805 } 806 } 807 808 // Remove any dead non-memory-mutating instructions. 809 if (isInstructionTriviallyDead(&*BBI, TLI)) { 810 DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " 811 << *&*BBI << '\n'); 812 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects); 813 ++NumFastOther; 814 MadeChange = true; 815 continue; 816 } 817 818 if (isa<AllocaInst>(BBI)) { 819 // Remove allocas from the list of dead stack objects; there can't be 820 // any references before the definition. 821 DeadStackObjects.remove(&*BBI); 822 continue; 823 } 824 825 if (auto CS = CallSite(&*BBI)) { 826 // Remove allocation function calls from the list of dead stack objects; 827 // there can't be any references before the definition. 828 if (isAllocLikeFn(&*BBI, TLI)) 829 DeadStackObjects.remove(&*BBI); 830 831 // If this call does not access memory, it can't be loading any of our 832 // pointers. 833 if (AA->doesNotAccessMemory(CS)) 834 continue; 835 836 // If the call might load from any of our allocas, then any store above 837 // the call is live. 838 DeadStackObjects.remove_if([&](Value *I) { 839 // See if the call site touches the value. 840 return isRefSet(AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI))); 841 }); 842 843 // If all of the allocas were clobbered by the call then we're not going 844 // to find anything else to process. 845 if (DeadStackObjects.empty()) 846 break; 847 848 continue; 849 } 850 851 // We can remove the dead stores, irrespective of the fence and its ordering 852 // (release/acquire/seq_cst). Fences only constraints the ordering of 853 // already visible stores, it does not make a store visible to other 854 // threads. So, skipping over a fence does not change a store from being 855 // dead. 856 if (isa<FenceInst>(*BBI)) 857 continue; 858 859 MemoryLocation LoadedLoc; 860 861 // If we encounter a use of the pointer, it is no longer considered dead 862 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 863 if (!L->isUnordered()) // Be conservative with atomic/volatile load 864 break; 865 LoadedLoc = MemoryLocation::get(L); 866 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 867 LoadedLoc = MemoryLocation::get(V); 868 } else if (!BBI->mayReadFromMemory()) { 869 // Instruction doesn't read memory. Note that stores that weren't removed 870 // above will hit this case. 871 continue; 872 } else { 873 // Unknown inst; assume it clobbers everything. 874 break; 875 } 876 877 // Remove any allocas from the DeadPointer set that are loaded, as this 878 // makes any stores above the access live. 879 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI); 880 881 // If all of the allocas were clobbered by the access then we're not going 882 // to find anything else to process. 883 if (DeadStackObjects.empty()) 884 break; 885 } 886 887 return MadeChange; 888 } 889 890 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, 891 int64_t &EarlierSize, int64_t LaterOffset, 892 int64_t LaterSize, bool IsOverwriteEnd) { 893 // TODO: base this on the target vector size so that if the earlier 894 // store was too small to get vector writes anyway then its likely 895 // a good idea to shorten it 896 // Power of 2 vector writes are probably always a bad idea to optimize 897 // as any store/memset/memcpy is likely using vector instructions so 898 // shortening it to not vector size is likely to be slower 899 auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite); 900 unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment(); 901 if (!IsOverwriteEnd) 902 LaterOffset = int64_t(LaterOffset + LaterSize); 903 904 if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && 905 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) 906 return false; 907 908 int64_t NewLength = IsOverwriteEnd 909 ? LaterOffset - EarlierOffset 910 : EarlierSize - (LaterOffset - EarlierOffset); 911 912 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) { 913 // When shortening an atomic memory intrinsic, the newly shortened 914 // length must remain an integer multiple of the element size. 915 const uint32_t ElementSize = AMI->getElementSizeInBytes(); 916 if (0 != NewLength % ElementSize) 917 return false; 918 } 919 920 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 921 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite 922 << "\n KILLER (offset " << LaterOffset << ", " << EarlierSize 923 << ")\n"); 924 925 Value *EarlierWriteLength = EarlierIntrinsic->getLength(); 926 Value *TrimmedLength = 927 ConstantInt::get(EarlierWriteLength->getType(), NewLength); 928 EarlierIntrinsic->setLength(TrimmedLength); 929 930 EarlierSize = NewLength; 931 if (!IsOverwriteEnd) { 932 int64_t OffsetMoved = (LaterOffset - EarlierOffset); 933 Value *Indices[1] = { 934 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; 935 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 936 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); 937 EarlierIntrinsic->setDest(NewDestGEP); 938 EarlierOffset = EarlierOffset + OffsetMoved; 939 } 940 return true; 941 } 942 943 static bool tryToShortenEnd(Instruction *EarlierWrite, 944 OverlapIntervalsTy &IntervalMap, 945 int64_t &EarlierStart, int64_t &EarlierSize) { 946 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) 947 return false; 948 949 OverlapIntervalsTy::iterator OII = --IntervalMap.end(); 950 int64_t LaterStart = OII->second; 951 int64_t LaterSize = OII->first - LaterStart; 952 953 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && 954 LaterStart + LaterSize >= EarlierStart + EarlierSize) { 955 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 956 LaterSize, true)) { 957 IntervalMap.erase(OII); 958 return true; 959 } 960 } 961 return false; 962 } 963 964 static bool tryToShortenBegin(Instruction *EarlierWrite, 965 OverlapIntervalsTy &IntervalMap, 966 int64_t &EarlierStart, int64_t &EarlierSize) { 967 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) 968 return false; 969 970 OverlapIntervalsTy::iterator OII = IntervalMap.begin(); 971 int64_t LaterStart = OII->second; 972 int64_t LaterSize = OII->first - LaterStart; 973 974 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { 975 assert(LaterStart + LaterSize < EarlierStart + EarlierSize && 976 "Should have been handled as OW_Complete"); 977 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 978 LaterSize, false)) { 979 IntervalMap.erase(OII); 980 return true; 981 } 982 } 983 return false; 984 } 985 986 static bool removePartiallyOverlappedStores(AliasAnalysis *AA, 987 const DataLayout &DL, 988 InstOverlapIntervalsTy &IOL) { 989 bool Changed = false; 990 for (auto OI : IOL) { 991 Instruction *EarlierWrite = OI.first; 992 MemoryLocation Loc = getLocForWrite(EarlierWrite); 993 assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); 994 assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc"); 995 996 const Value *Ptr = Loc.Ptr->stripPointerCasts(); 997 int64_t EarlierStart = 0; 998 int64_t EarlierSize = int64_t(Loc.Size); 999 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); 1000 OverlapIntervalsTy &IntervalMap = OI.second; 1001 Changed |= 1002 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1003 if (IntervalMap.empty()) 1004 continue; 1005 Changed |= 1006 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1007 } 1008 return Changed; 1009 } 1010 1011 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, 1012 AliasAnalysis *AA, MemoryDependenceResults *MD, 1013 const DataLayout &DL, 1014 const TargetLibraryInfo *TLI, 1015 InstOverlapIntervalsTy &IOL, 1016 DenseMap<Instruction*, size_t> *InstrOrdering) { 1017 // Must be a store instruction. 1018 StoreInst *SI = dyn_cast<StoreInst>(Inst); 1019 if (!SI) 1020 return false; 1021 1022 // If we're storing the same value back to a pointer that we just loaded from, 1023 // then the store can be removed. 1024 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 1025 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 1026 isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) { 1027 1028 DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " 1029 << *DepLoad << "\n STORE: " << *SI << '\n'); 1030 1031 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering); 1032 ++NumRedundantStores; 1033 return true; 1034 } 1035 } 1036 1037 // Remove null stores into the calloc'ed objects 1038 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 1039 if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { 1040 Instruction *UnderlyingPointer = 1041 dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL)); 1042 1043 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 1044 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) { 1045 DEBUG( 1046 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 1047 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 1048 1049 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering); 1050 ++NumRedundantStores; 1051 return true; 1052 } 1053 } 1054 return false; 1055 } 1056 1057 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 1058 MemoryDependenceResults *MD, DominatorTree *DT, 1059 const TargetLibraryInfo *TLI) { 1060 const DataLayout &DL = BB.getModule()->getDataLayout(); 1061 bool MadeChange = false; 1062 1063 // FIXME: Maybe change this to use some abstraction like OrderedBasicBlock? 1064 // The current OrderedBasicBlock can't deal with mutation at the moment. 1065 size_t LastThrowingInstIndex = 0; 1066 DenseMap<Instruction*, size_t> InstrOrdering; 1067 size_t InstrIndex = 1; 1068 1069 // A map of interval maps representing partially-overwritten value parts. 1070 InstOverlapIntervalsTy IOL; 1071 1072 // Do a top-down walk on the BB. 1073 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 1074 // Handle 'free' calls specially. 1075 if (CallInst *F = isFreeCall(&*BBI, TLI)) { 1076 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, &InstrOrdering); 1077 // Increment BBI after handleFree has potentially deleted instructions. 1078 // This ensures we maintain a valid iterator. 1079 ++BBI; 1080 continue; 1081 } 1082 1083 Instruction *Inst = &*BBI++; 1084 1085 size_t CurInstNumber = InstrIndex++; 1086 InstrOrdering.insert(std::make_pair(Inst, CurInstNumber)); 1087 if (Inst->mayThrow()) { 1088 LastThrowingInstIndex = CurInstNumber; 1089 continue; 1090 } 1091 1092 // Check to see if Inst writes to memory. If not, continue. 1093 if (!hasAnalyzableMemoryWrite(Inst, *TLI)) 1094 continue; 1095 1096 // eliminateNoopStore will update in iterator, if necessary. 1097 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) { 1098 MadeChange = true; 1099 continue; 1100 } 1101 1102 // If we find something that writes memory, get its memory dependence. 1103 MemDepResult InstDep = MD->getDependency(Inst); 1104 1105 // Ignore any store where we can't find a local dependence. 1106 // FIXME: cross-block DSE would be fun. :) 1107 if (!InstDep.isDef() && !InstDep.isClobber()) 1108 continue; 1109 1110 // Figure out what location is being stored to. 1111 MemoryLocation Loc = getLocForWrite(Inst); 1112 1113 // If we didn't get a useful location, fail. 1114 if (!Loc.Ptr) 1115 continue; 1116 1117 // Loop until we find a store we can eliminate or a load that 1118 // invalidates the analysis. Without an upper bound on the number of 1119 // instructions examined, this analysis can become very time-consuming. 1120 // However, the potential gain diminishes as we process more instructions 1121 // without eliminating any of them. Therefore, we limit the number of 1122 // instructions we look at. 1123 auto Limit = MD->getDefaultBlockScanLimit(); 1124 while (InstDep.isDef() || InstDep.isClobber()) { 1125 // Get the memory clobbered by the instruction we depend on. MemDep will 1126 // skip any instructions that 'Loc' clearly doesn't interact with. If we 1127 // end up depending on a may- or must-aliased load, then we can't optimize 1128 // away the store and we bail out. However, if we depend on something 1129 // that overwrites the memory location we *can* potentially optimize it. 1130 // 1131 // Find out what memory location the dependent instruction stores. 1132 Instruction *DepWrite = InstDep.getInst(); 1133 if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) 1134 break; 1135 MemoryLocation DepLoc = getLocForWrite(DepWrite); 1136 // If we didn't get a useful location, or if it isn't a size, bail out. 1137 if (!DepLoc.Ptr) 1138 break; 1139 1140 // Make sure we don't look past a call which might throw. This is an 1141 // issue because MemoryDependenceAnalysis works in the wrong direction: 1142 // it finds instructions which dominate the current instruction, rather than 1143 // instructions which are post-dominated by the current instruction. 1144 // 1145 // If the underlying object is a non-escaping memory allocation, any store 1146 // to it is dead along the unwind edge. Otherwise, we need to preserve 1147 // the store. 1148 size_t DepIndex = InstrOrdering.lookup(DepWrite); 1149 assert(DepIndex && "Unexpected instruction"); 1150 if (DepIndex <= LastThrowingInstIndex) { 1151 const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL); 1152 bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); 1153 if (!IsStoreDeadOnUnwind) { 1154 // We're looking for a call to an allocation function 1155 // where the allocation doesn't escape before the last 1156 // throwing instruction; PointerMayBeCaptured 1157 // reasonably fast approximation. 1158 IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && 1159 !PointerMayBeCaptured(Underlying, false, true); 1160 } 1161 if (!IsStoreDeadOnUnwind) 1162 break; 1163 } 1164 1165 // If we find a write that is a) removable (i.e., non-volatile), b) is 1166 // completely obliterated by the store to 'Loc', and c) which we know that 1167 // 'Inst' doesn't load from, then we can remove it. 1168 // Also try to merge two stores if a later one only touches memory written 1169 // to by the earlier one. 1170 if (isRemovable(DepWrite) && 1171 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 1172 int64_t InstWriteOffset, DepWriteOffset; 1173 OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, 1174 InstWriteOffset, DepWrite, IOL, *AA); 1175 if (OR == OW_Complete) { 1176 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " 1177 << *DepWrite << "\n KILLER: " << *Inst << '\n'); 1178 1179 // Delete the store and now-dead instructions that feed it. 1180 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, &InstrOrdering); 1181 ++NumFastStores; 1182 MadeChange = true; 1183 1184 // We erased DepWrite; start over. 1185 InstDep = MD->getDependency(Inst); 1186 continue; 1187 } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || 1188 ((OR == OW_Begin && 1189 isShortenableAtTheBeginning(DepWrite)))) { 1190 assert(!EnablePartialOverwriteTracking && "Do not expect to perform " 1191 "when partial-overwrite " 1192 "tracking is enabled"); 1193 int64_t EarlierSize = DepLoc.Size; 1194 int64_t LaterSize = Loc.Size; 1195 bool IsOverwriteEnd = (OR == OW_End); 1196 MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, 1197 InstWriteOffset, LaterSize, IsOverwriteEnd); 1198 } else if (EnablePartialStoreMerging && 1199 OR == OW_PartialEarlierWithFullLater) { 1200 auto *Earlier = dyn_cast<StoreInst>(DepWrite); 1201 auto *Later = dyn_cast<StoreInst>(Inst); 1202 if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && 1203 Later && isa<ConstantInt>(Later->getValueOperand()) && 1204 memoryIsNotModifiedBetween(Earlier, Later, AA)) { 1205 // If the store we find is: 1206 // a) partially overwritten by the store to 'Loc' 1207 // b) the later store is fully contained in the earlier one and 1208 // c) they both have a constant value 1209 // Merge the two stores, replacing the earlier store's value with a 1210 // merge of both values. 1211 // TODO: Deal with other constant types (vectors, etc), and probably 1212 // some mem intrinsics (if needed) 1213 1214 APInt EarlierValue = 1215 cast<ConstantInt>(Earlier->getValueOperand())->getValue(); 1216 APInt LaterValue = 1217 cast<ConstantInt>(Later->getValueOperand())->getValue(); 1218 unsigned LaterBits = LaterValue.getBitWidth(); 1219 assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); 1220 LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); 1221 1222 // Offset of the smaller store inside the larger store 1223 unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; 1224 unsigned LShiftAmount = 1225 DL.isBigEndian() 1226 ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits 1227 : BitOffsetDiff; 1228 APInt Mask = 1229 APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, 1230 LShiftAmount + LaterBits); 1231 // Clear the bits we'll be replacing, then OR with the smaller 1232 // store, shifted appropriately. 1233 APInt Merged = 1234 (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); 1235 DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *DepWrite 1236 << "\n Later: " << *Inst 1237 << "\n Merged Value: " << Merged << '\n'); 1238 1239 auto *SI = new StoreInst( 1240 ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), 1241 Earlier->getPointerOperand(), false, Earlier->getAlignment(), 1242 Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); 1243 1244 unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, 1245 LLVMContext::MD_alias_scope, 1246 LLVMContext::MD_noalias, 1247 LLVMContext::MD_nontemporal}; 1248 SI->copyMetadata(*DepWrite, MDToKeep); 1249 ++NumModifiedStores; 1250 1251 // Remove earlier, wider, store 1252 size_t Idx = InstrOrdering.lookup(DepWrite); 1253 InstrOrdering.erase(DepWrite); 1254 InstrOrdering.insert(std::make_pair(SI, Idx)); 1255 1256 // Delete the old stores and now-dead instructions that feed them. 1257 deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering); 1258 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1259 &InstrOrdering); 1260 MadeChange = true; 1261 1262 // We erased DepWrite and Inst (Loc); start over. 1263 break; 1264 } 1265 } 1266 } 1267 1268 // If this is a may-aliased store that is clobbering the store value, we 1269 // can keep searching past it for another must-aliased pointer that stores 1270 // to the same location. For example, in: 1271 // store -> P 1272 // store -> Q 1273 // store -> P 1274 // we can remove the first store to P even though we don't know if P and Q 1275 // alias. 1276 if (DepWrite == &BB.front()) break; 1277 1278 // Can't look past this instruction if it might read 'Loc'. 1279 if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) 1280 break; 1281 1282 InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, 1283 DepWrite->getIterator(), &BB, 1284 /*QueryInst=*/ nullptr, &Limit); 1285 } 1286 } 1287 1288 if (EnablePartialOverwriteTracking) 1289 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL); 1290 1291 // If this block ends in a return, unwind, or unreachable, all allocas are 1292 // dead at its end, which means stores to them are also dead. 1293 if (BB.getTerminator()->getNumSuccessors() == 0) 1294 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, &InstrOrdering); 1295 1296 return MadeChange; 1297 } 1298 1299 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 1300 MemoryDependenceResults *MD, DominatorTree *DT, 1301 const TargetLibraryInfo *TLI) { 1302 bool MadeChange = false; 1303 for (BasicBlock &BB : F) 1304 // Only check non-dead blocks. Dead blocks may have strange pointer 1305 // cycles that will confuse alias analysis. 1306 if (DT->isReachableFromEntry(&BB)) 1307 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 1308 1309 return MadeChange; 1310 } 1311 1312 //===----------------------------------------------------------------------===// 1313 // DSE Pass 1314 //===----------------------------------------------------------------------===// 1315 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 1316 AliasAnalysis *AA = &AM.getResult<AAManager>(F); 1317 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1318 MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F); 1319 const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 1320 1321 if (!eliminateDeadStores(F, AA, MD, DT, TLI)) 1322 return PreservedAnalyses::all(); 1323 1324 PreservedAnalyses PA; 1325 PA.preserveSet<CFGAnalyses>(); 1326 PA.preserve<GlobalsAA>(); 1327 PA.preserve<MemoryDependenceAnalysis>(); 1328 return PA; 1329 } 1330 1331 namespace { 1332 1333 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 1334 class DSELegacyPass : public FunctionPass { 1335 public: 1336 static char ID; // Pass identification, replacement for typeid 1337 1338 DSELegacyPass() : FunctionPass(ID) { 1339 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 1340 } 1341 1342 bool runOnFunction(Function &F) override { 1343 if (skipFunction(F)) 1344 return false; 1345 1346 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1347 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1348 MemoryDependenceResults *MD = 1349 &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 1350 const TargetLibraryInfo *TLI = 1351 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 1352 1353 return eliminateDeadStores(F, AA, MD, DT, TLI); 1354 } 1355 1356 void getAnalysisUsage(AnalysisUsage &AU) const override { 1357 AU.setPreservesCFG(); 1358 AU.addRequired<DominatorTreeWrapperPass>(); 1359 AU.addRequired<AAResultsWrapperPass>(); 1360 AU.addRequired<MemoryDependenceWrapperPass>(); 1361 AU.addRequired<TargetLibraryInfoWrapperPass>(); 1362 AU.addPreserved<DominatorTreeWrapperPass>(); 1363 AU.addPreserved<GlobalsAAWrapperPass>(); 1364 AU.addPreserved<MemoryDependenceWrapperPass>(); 1365 } 1366 }; 1367 1368 } // end anonymous namespace 1369 1370 char DSELegacyPass::ID = 0; 1371 1372 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 1373 false) 1374 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1375 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1376 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1377 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 1378 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 1379 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 1380 false) 1381 1382 FunctionPass *llvm::createDeadStoreEliminationPass() { 1383 return new DSELegacyPass(); 1384 } 1385