1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a trivial dead store elimination that only considers 11 // basic-block local redundant stores. 12 // 13 // FIXME: This should eventually be extended to be a post-dominator tree 14 // traversal. Doing so would be pretty trivial. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/Analysis/AliasAnalysis.h" 27 #include "llvm/Analysis/CaptureTracking.h" 28 #include "llvm/Analysis/GlobalsModRef.h" 29 #include "llvm/Analysis/MemoryBuiltins.h" 30 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 31 #include "llvm/Analysis/MemoryLocation.h" 32 #include "llvm/Analysis/TargetLibraryInfo.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/BasicBlock.h" 36 #include "llvm/IR/CallSite.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/IR/Dominators.h" 41 #include "llvm/IR/Function.h" 42 #include "llvm/IR/InstrTypes.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Intrinsics.h" 47 #include "llvm/IR/LLVMContext.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/IR/PassManager.h" 50 #include "llvm/IR/Value.h" 51 #include "llvm/Pass.h" 52 #include "llvm/Support/Casting.h" 53 #include "llvm/Support/CommandLine.h" 54 #include "llvm/Support/Debug.h" 55 #include "llvm/Support/ErrorHandling.h" 56 #include "llvm/Support/MathExtras.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include "llvm/Transforms/Scalar.h" 59 #include "llvm/Transforms/Utils/Local.h" 60 #include <algorithm> 61 #include <cassert> 62 #include <cstdint> 63 #include <cstddef> 64 #include <iterator> 65 #include <map> 66 #include <utility> 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "dse" 71 72 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 73 STATISTIC(NumFastStores, "Number of stores deleted"); 74 STATISTIC(NumFastOther , "Number of other instrs removed"); 75 STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); 76 STATISTIC(NumModifiedStores, "Number of stores modified"); 77 78 static cl::opt<bool> 79 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", 80 cl::init(true), cl::Hidden, 81 cl::desc("Enable partial-overwrite tracking in DSE")); 82 83 static cl::opt<bool> 84 EnablePartialStoreMerging("enable-dse-partial-store-merging", 85 cl::init(true), cl::Hidden, 86 cl::desc("Enable partial store merging in DSE")); 87 88 //===----------------------------------------------------------------------===// 89 // Helper functions 90 //===----------------------------------------------------------------------===// 91 using OverlapIntervalsTy = std::map<int64_t, int64_t>; 92 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; 93 94 /// Delete this instruction. Before we do, go through and zero out all the 95 /// operands of this instruction. If any of them become dead, delete them and 96 /// the computation tree that feeds them. 97 /// If ValueSet is non-null, remove any deleted instructions from it as well. 98 static void 99 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, 100 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, 101 InstOverlapIntervalsTy &IOL, 102 DenseMap<Instruction*, size_t> *InstrOrdering, 103 SmallSetVector<Value *, 16> *ValueSet = nullptr) { 104 SmallVector<Instruction*, 32> NowDeadInsts; 105 106 NowDeadInsts.push_back(I); 107 --NumFastOther; 108 109 // Keeping the iterator straight is a pain, so we let this routine tell the 110 // caller what the next instruction is after we're done mucking about. 111 BasicBlock::iterator NewIter = *BBI; 112 113 // Before we touch this instruction, remove it from memdep! 114 do { 115 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 116 ++NumFastOther; 117 118 // This instruction is dead, zap it, in stages. Start by removing it from 119 // MemDep, which needs to know the operands and needs it to be in the 120 // function. 121 MD.removeInstruction(DeadInst); 122 123 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 124 Value *Op = DeadInst->getOperand(op); 125 DeadInst->setOperand(op, nullptr); 126 127 // If this operand just became dead, add it to the NowDeadInsts list. 128 if (!Op->use_empty()) continue; 129 130 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 131 if (isInstructionTriviallyDead(OpI, &TLI)) 132 NowDeadInsts.push_back(OpI); 133 } 134 135 if (ValueSet) ValueSet->remove(DeadInst); 136 InstrOrdering->erase(DeadInst); 137 IOL.erase(DeadInst); 138 139 if (NewIter == DeadInst->getIterator()) 140 NewIter = DeadInst->eraseFromParent(); 141 else 142 DeadInst->eraseFromParent(); 143 } while (!NowDeadInsts.empty()); 144 *BBI = NewIter; 145 } 146 147 /// Does this instruction write some memory? This only returns true for things 148 /// that we can analyze with other helpers below. 149 static bool hasAnalyzableMemoryWrite(Instruction *I, 150 const TargetLibraryInfo &TLI) { 151 if (isa<StoreInst>(I)) 152 return true; 153 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 154 switch (II->getIntrinsicID()) { 155 default: 156 return false; 157 case Intrinsic::memset: 158 case Intrinsic::memmove: 159 case Intrinsic::memcpy: 160 case Intrinsic::init_trampoline: 161 case Intrinsic::lifetime_end: 162 return true; 163 } 164 } 165 if (auto CS = CallSite(I)) { 166 if (Function *F = CS.getCalledFunction()) { 167 StringRef FnName = F->getName(); 168 if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) 169 return true; 170 if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) 171 return true; 172 if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) 173 return true; 174 if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) 175 return true; 176 } 177 } 178 return false; 179 } 180 181 /// Return a Location stored to by the specified instruction. If isRemovable 182 /// returns true, this function and getLocForRead completely describe the memory 183 /// operations for this instruction. 184 static MemoryLocation getLocForWrite(Instruction *Inst) { 185 186 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 187 return MemoryLocation::get(SI); 188 189 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { 190 // memcpy/memmove/memset. 191 MemoryLocation Loc = MemoryLocation::getForDest(MI); 192 return Loc; 193 } 194 195 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 196 switch (II->getIntrinsicID()) { 197 default: 198 return MemoryLocation(); // Unhandled intrinsic. 199 case Intrinsic::init_trampoline: 200 return MemoryLocation(II->getArgOperand(0)); 201 case Intrinsic::lifetime_end: { 202 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 203 return MemoryLocation(II->getArgOperand(1), Len); 204 } 205 } 206 } 207 if (auto CS = CallSite(Inst)) 208 // All the supported TLI functions so far happen to have dest as their 209 // first argument. 210 return MemoryLocation(CS.getArgument(0)); 211 return MemoryLocation(); 212 } 213 214 /// Return the location read by the specified "hasAnalyzableMemoryWrite" 215 /// instruction if any. 216 static MemoryLocation getLocForRead(Instruction *Inst, 217 const TargetLibraryInfo &TLI) { 218 assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); 219 220 // The only instructions that both read and write are the mem transfer 221 // instructions (memcpy/memmove). 222 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) 223 return MemoryLocation::getForSource(MTI); 224 return MemoryLocation(); 225 } 226 227 /// If the value of this instruction and the memory it writes to is unused, may 228 /// we delete this instruction? 229 static bool isRemovable(Instruction *I) { 230 // Don't remove volatile/atomic stores. 231 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 232 return SI->isUnordered(); 233 234 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 235 switch (II->getIntrinsicID()) { 236 default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); 237 case Intrinsic::lifetime_end: 238 // Never remove dead lifetime_end's, e.g. because it is followed by a 239 // free. 240 return false; 241 case Intrinsic::init_trampoline: 242 // Always safe to remove init_trampoline. 243 return true; 244 case Intrinsic::memset: 245 case Intrinsic::memmove: 246 case Intrinsic::memcpy: 247 // Don't remove volatile memory intrinsics. 248 return !cast<MemIntrinsic>(II)->isVolatile(); 249 } 250 } 251 252 // note: only get here for calls with analyzable writes - i.e. libcalls 253 if (auto CS = CallSite(I)) 254 return CS.getInstruction()->use_empty(); 255 256 return false; 257 } 258 259 /// Returns true if the end of this instruction can be safely shortened in 260 /// length. 261 static bool isShortenableAtTheEnd(Instruction *I) { 262 // Don't shorten stores for now 263 if (isa<StoreInst>(I)) 264 return false; 265 266 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 267 switch (II->getIntrinsicID()) { 268 default: return false; 269 case Intrinsic::memset: 270 case Intrinsic::memcpy: 271 // Do shorten memory intrinsics. 272 // FIXME: Add memmove if it's also safe to transform. 273 return true; 274 } 275 } 276 277 // Don't shorten libcalls calls for now. 278 279 return false; 280 } 281 282 /// Returns true if the beginning of this instruction can be safely shortened 283 /// in length. 284 static bool isShortenableAtTheBeginning(Instruction *I) { 285 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 286 // easily done by offsetting the source address. 287 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 288 return II && II->getIntrinsicID() == Intrinsic::memset; 289 } 290 291 /// Return the pointer that is being written to. 292 static Value *getStoredPointerOperand(Instruction *I) { 293 //TODO: factor this to reuse getLocForWrite 294 MemoryLocation Loc = getLocForWrite(I); 295 assert(Loc.Ptr && 296 "unable to find pointer writen for analyzable instruction?"); 297 // TODO: most APIs don't expect const Value * 298 return const_cast<Value*>(Loc.Ptr); 299 } 300 301 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 302 const TargetLibraryInfo &TLI) { 303 uint64_t Size; 304 if (getObjectSize(V, Size, DL, &TLI)) 305 return Size; 306 return MemoryLocation::UnknownSize; 307 } 308 309 namespace { 310 311 enum OverwriteResult { 312 OW_Begin, 313 OW_Complete, 314 OW_End, 315 OW_PartialEarlierWithFullLater, 316 OW_Unknown 317 }; 318 319 } // end anonymous namespace 320 321 /// Return 'OW_Complete' if a store to the 'Later' location completely 322 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the 323 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the 324 /// beginning of the 'Earlier' location is overwritten by 'Later'. 325 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was 326 /// overwritten by a latter (smaller) store which doesn't write outside the big 327 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. 328 static OverwriteResult isOverwrite(const MemoryLocation &Later, 329 const MemoryLocation &Earlier, 330 const DataLayout &DL, 331 const TargetLibraryInfo &TLI, 332 int64_t &EarlierOff, int64_t &LaterOff, 333 Instruction *DepWrite, 334 InstOverlapIntervalsTy &IOL) { 335 // If we don't know the sizes of either access, then we can't do a comparison. 336 if (Later.Size == MemoryLocation::UnknownSize || 337 Earlier.Size == MemoryLocation::UnknownSize) 338 return OW_Unknown; 339 340 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 341 const Value *P2 = Later.Ptr->stripPointerCasts(); 342 343 // If the start pointers are the same, we just have to compare sizes to see if 344 // the later store was larger than the earlier store. 345 if (P1 == P2) { 346 // Make sure that the Later size is >= the Earlier size. 347 if (Later.Size >= Earlier.Size) 348 return OW_Complete; 349 } 350 351 // Check to see if the later store is to the entire object (either a global, 352 // an alloca, or a byval/inalloca argument). If so, then it clearly 353 // overwrites any other store to the same object. 354 const Value *UO1 = GetUnderlyingObject(P1, DL), 355 *UO2 = GetUnderlyingObject(P2, DL); 356 357 // If we can't resolve the same pointers to the same object, then we can't 358 // analyze them at all. 359 if (UO1 != UO2) 360 return OW_Unknown; 361 362 // If the "Later" store is to a recognizable object, get its size. 363 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI); 364 if (ObjectSize != MemoryLocation::UnknownSize) 365 if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) 366 return OW_Complete; 367 368 // Okay, we have stores to two completely different pointers. Try to 369 // decompose the pointer into a "base + constant_offset" form. If the base 370 // pointers are equal, then we can reason about the two stores. 371 EarlierOff = 0; 372 LaterOff = 0; 373 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 374 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 375 376 // If the base pointers still differ, we have two completely different stores. 377 if (BP1 != BP2) 378 return OW_Unknown; 379 380 // The later store completely overlaps the earlier store if: 381 // 382 // 1. Both start at the same offset and the later one's size is greater than 383 // or equal to the earlier one's, or 384 // 385 // |--earlier--| 386 // |-- later --| 387 // 388 // 2. The earlier store has an offset greater than the later offset, but which 389 // still lies completely within the later store. 390 // 391 // |--earlier--| 392 // |----- later ------| 393 // 394 // We have to be careful here as *Off is signed while *.Size is unsigned. 395 if (EarlierOff >= LaterOff && 396 Later.Size >= Earlier.Size && 397 uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) 398 return OW_Complete; 399 400 // We may now overlap, although the overlap is not complete. There might also 401 // be other incomplete overlaps, and together, they might cover the complete 402 // earlier write. 403 // Note: The correctness of this logic depends on the fact that this function 404 // is not even called providing DepWrite when there are any intervening reads. 405 if (EnablePartialOverwriteTracking && 406 LaterOff < int64_t(EarlierOff + Earlier.Size) && 407 int64_t(LaterOff + Later.Size) >= EarlierOff) { 408 409 // Insert our part of the overlap into the map. 410 auto &IM = IOL[DepWrite]; 411 DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff << ", " << 412 int64_t(EarlierOff + Earlier.Size) << ") Later [" << 413 LaterOff << ", " << int64_t(LaterOff + Later.Size) << ")\n"); 414 415 // Make sure that we only insert non-overlapping intervals and combine 416 // adjacent intervals. The intervals are stored in the map with the ending 417 // offset as the key (in the half-open sense) and the starting offset as 418 // the value. 419 int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + Later.Size; 420 421 // Find any intervals ending at, or after, LaterIntStart which start 422 // before LaterIntEnd. 423 auto ILI = IM.lower_bound(LaterIntStart); 424 if (ILI != IM.end() && ILI->second <= LaterIntEnd) { 425 // This existing interval is overlapped with the current store somewhere 426 // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing 427 // intervals and adjusting our start and end. 428 LaterIntStart = std::min(LaterIntStart, ILI->second); 429 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 430 ILI = IM.erase(ILI); 431 432 // Continue erasing and adjusting our end in case other previous 433 // intervals are also overlapped with the current store. 434 // 435 // |--- ealier 1 ---| |--- ealier 2 ---| 436 // |------- later---------| 437 // 438 while (ILI != IM.end() && ILI->second <= LaterIntEnd) { 439 assert(ILI->second > LaterIntStart && "Unexpected interval"); 440 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 441 ILI = IM.erase(ILI); 442 } 443 } 444 445 IM[LaterIntEnd] = LaterIntStart; 446 447 ILI = IM.begin(); 448 if (ILI->second <= EarlierOff && 449 ILI->first >= int64_t(EarlierOff + Earlier.Size)) { 450 DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" << 451 EarlierOff << ", " << 452 int64_t(EarlierOff + Earlier.Size) << 453 ") Composite Later [" << 454 ILI->second << ", " << ILI->first << ")\n"); 455 ++NumCompletePartials; 456 return OW_Complete; 457 } 458 } 459 460 // Check for an earlier store which writes to all the memory locations that 461 // the later store writes to. 462 if (EnablePartialStoreMerging && LaterOff >= EarlierOff && 463 int64_t(EarlierOff + Earlier.Size) > LaterOff && 464 uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) { 465 DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff 466 << ", " << int64_t(EarlierOff + Earlier.Size) 467 << ") by a later store [" << LaterOff << ", " 468 << int64_t(LaterOff + Later.Size) << ")\n"); 469 // TODO: Maybe come up with a better name? 470 return OW_PartialEarlierWithFullLater; 471 } 472 473 // Another interesting case is if the later store overwrites the end of the 474 // earlier store. 475 // 476 // |--earlier--| 477 // |-- later --| 478 // 479 // In this case we may want to trim the size of earlier to avoid generating 480 // writes to addresses which will definitely be overwritten later 481 if (!EnablePartialOverwriteTracking && 482 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) && 483 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))) 484 return OW_End; 485 486 // Finally, we also need to check if the later store overwrites the beginning 487 // of the earlier store. 488 // 489 // |--earlier--| 490 // |-- later --| 491 // 492 // In this case we may want to move the destination address and trim the size 493 // of earlier to avoid generating writes to addresses which will definitely 494 // be overwritten later. 495 if (!EnablePartialOverwriteTracking && 496 (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) { 497 assert(int64_t(LaterOff + Later.Size) < 498 int64_t(EarlierOff + Earlier.Size) && 499 "Expect to be handled as OW_Complete"); 500 return OW_Begin; 501 } 502 // Otherwise, they don't completely overlap. 503 return OW_Unknown; 504 } 505 506 /// If 'Inst' might be a self read (i.e. a noop copy of a 507 /// memory region into an identical pointer) then it doesn't actually make its 508 /// input dead in the traditional sense. Consider this case: 509 /// 510 /// memcpy(A <- B) 511 /// memcpy(A <- A) 512 /// 513 /// In this case, the second store to A does not make the first store to A dead. 514 /// The usual situation isn't an explicit A<-A store like this (which can be 515 /// trivially removed) but a case where two pointers may alias. 516 /// 517 /// This function detects when it is unsafe to remove a dependent instruction 518 /// because the DSE inducing instruction may be a self-read. 519 static bool isPossibleSelfRead(Instruction *Inst, 520 const MemoryLocation &InstStoreLoc, 521 Instruction *DepWrite, 522 const TargetLibraryInfo &TLI, 523 AliasAnalysis &AA) { 524 // Self reads can only happen for instructions that read memory. Get the 525 // location read. 526 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 527 if (!InstReadLoc.Ptr) return false; // Not a reading instruction. 528 529 // If the read and written loc obviously don't alias, it isn't a read. 530 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; 531 532 // Okay, 'Inst' may copy over itself. However, we can still remove a the 533 // DepWrite instruction if we can prove that it reads from the same location 534 // as Inst. This handles useful cases like: 535 // memcpy(A <- B) 536 // memcpy(A <- B) 537 // Here we don't know if A/B may alias, but we do know that B/B are must 538 // aliases, so removing the first memcpy is safe (assuming it writes <= # 539 // bytes as the second one. 540 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 541 542 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 543 return false; 544 545 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 546 // then it can't be considered dead. 547 return true; 548 } 549 550 /// Returns true if the memory which is accessed by the second instruction is not 551 /// modified between the first and the second instruction. 552 /// Precondition: Second instruction must be dominated by the first 553 /// instruction. 554 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 555 Instruction *SecondI, 556 AliasAnalysis *AA) { 557 SmallVector<BasicBlock *, 16> WorkList; 558 SmallPtrSet<BasicBlock *, 8> Visited; 559 BasicBlock::iterator FirstBBI(FirstI); 560 ++FirstBBI; 561 BasicBlock::iterator SecondBBI(SecondI); 562 BasicBlock *FirstBB = FirstI->getParent(); 563 BasicBlock *SecondBB = SecondI->getParent(); 564 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 565 566 // Start checking the store-block. 567 WorkList.push_back(SecondBB); 568 bool isFirstBlock = true; 569 570 // Check all blocks going backward until we reach the load-block. 571 while (!WorkList.empty()) { 572 BasicBlock *B = WorkList.pop_back_val(); 573 574 // Ignore instructions before LI if this is the FirstBB. 575 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 576 577 BasicBlock::iterator EI; 578 if (isFirstBlock) { 579 // Ignore instructions after SI if this is the first visit of SecondBB. 580 assert(B == SecondBB && "first block is not the store block"); 581 EI = SecondBBI; 582 isFirstBlock = false; 583 } else { 584 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 585 // In this case we also have to look at instructions after SI. 586 EI = B->end(); 587 } 588 for (; BI != EI; ++BI) { 589 Instruction *I = &*BI; 590 if (I->mayWriteToMemory() && I != SecondI) 591 if (isModSet(AA->getModRefInfo(I, MemLoc))) 592 return false; 593 } 594 if (B != FirstBB) { 595 assert(B != &FirstBB->getParent()->getEntryBlock() && 596 "Should not hit the entry block because SI must be dominated by LI"); 597 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 598 if (!Visited.insert(*PredI).second) 599 continue; 600 WorkList.push_back(*PredI); 601 } 602 } 603 } 604 return true; 605 } 606 607 /// Find all blocks that will unconditionally lead to the block BB and append 608 /// them to F. 609 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 610 BasicBlock *BB, DominatorTree *DT) { 611 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 612 BasicBlock *Pred = *I; 613 if (Pred == BB) continue; 614 TerminatorInst *PredTI = Pred->getTerminator(); 615 if (PredTI->getNumSuccessors() != 1) 616 continue; 617 618 if (DT->isReachableFromEntry(Pred)) 619 Blocks.push_back(Pred); 620 } 621 } 622 623 /// Handle frees of entire structures whose dependency is a store 624 /// to a field of that structure. 625 static bool handleFree(CallInst *F, AliasAnalysis *AA, 626 MemoryDependenceResults *MD, DominatorTree *DT, 627 const TargetLibraryInfo *TLI, 628 InstOverlapIntervalsTy &IOL, 629 DenseMap<Instruction*, size_t> *InstrOrdering) { 630 bool MadeChange = false; 631 632 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 633 SmallVector<BasicBlock *, 16> Blocks; 634 Blocks.push_back(F->getParent()); 635 const DataLayout &DL = F->getModule()->getDataLayout(); 636 637 while (!Blocks.empty()) { 638 BasicBlock *BB = Blocks.pop_back_val(); 639 Instruction *InstPt = BB->getTerminator(); 640 if (BB == F->getParent()) InstPt = F; 641 642 MemDepResult Dep = 643 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 644 while (Dep.isDef() || Dep.isClobber()) { 645 Instruction *Dependency = Dep.getInst(); 646 if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || 647 !isRemovable(Dependency)) 648 break; 649 650 Value *DepPointer = 651 GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); 652 653 // Check for aliasing. 654 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 655 break; 656 657 DEBUG(dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " 658 << *Dependency << '\n'); 659 660 // DCE instructions only used to calculate that store. 661 BasicBlock::iterator BBI(Dependency); 662 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, InstrOrdering); 663 ++NumFastStores; 664 MadeChange = true; 665 666 // Inst's old Dependency is now deleted. Compute the next dependency, 667 // which may also be dead, as in 668 // s[0] = 0; 669 // s[1] = 0; // This has just been deleted. 670 // free(s); 671 Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); 672 } 673 674 if (Dep.isNonLocal()) 675 findUnconditionalPreds(Blocks, BB, DT); 676 } 677 678 return MadeChange; 679 } 680 681 /// Check to see if the specified location may alias any of the stack objects in 682 /// the DeadStackObjects set. If so, they become live because the location is 683 /// being loaded. 684 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 685 SmallSetVector<Value *, 16> &DeadStackObjects, 686 const DataLayout &DL, AliasAnalysis *AA, 687 const TargetLibraryInfo *TLI) { 688 const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); 689 690 // A constant can't be in the dead pointer set. 691 if (isa<Constant>(UnderlyingPointer)) 692 return; 693 694 // If the kill pointer can be easily reduced to an alloca, don't bother doing 695 // extraneous AA queries. 696 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 697 DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer)); 698 return; 699 } 700 701 // Remove objects that could alias LoadedLoc. 702 DeadStackObjects.remove_if([&](Value *I) { 703 // See if the loaded location could alias the stack location. 704 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI)); 705 return !AA->isNoAlias(StackLoc, LoadedLoc); 706 }); 707 } 708 709 /// Remove dead stores to stack-allocated locations in the function end block. 710 /// Ex: 711 /// %A = alloca i32 712 /// ... 713 /// store i32 1, i32* %A 714 /// ret void 715 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 716 MemoryDependenceResults *MD, 717 const TargetLibraryInfo *TLI, 718 InstOverlapIntervalsTy &IOL, 719 DenseMap<Instruction*, size_t> *InstrOrdering) { 720 bool MadeChange = false; 721 722 // Keep track of all of the stack objects that are dead at the end of the 723 // function. 724 SmallSetVector<Value*, 16> DeadStackObjects; 725 726 // Find all of the alloca'd pointers in the entry block. 727 BasicBlock &Entry = BB.getParent()->front(); 728 for (Instruction &I : Entry) { 729 if (isa<AllocaInst>(&I)) 730 DeadStackObjects.insert(&I); 731 732 // Okay, so these are dead heap objects, but if the pointer never escapes 733 // then it's leaked by this function anyways. 734 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 735 DeadStackObjects.insert(&I); 736 } 737 738 // Treat byval or inalloca arguments the same, stores to them are dead at the 739 // end of the function. 740 for (Argument &AI : BB.getParent()->args()) 741 if (AI.hasByValOrInAllocaAttr()) 742 DeadStackObjects.insert(&AI); 743 744 const DataLayout &DL = BB.getModule()->getDataLayout(); 745 746 // Scan the basic block backwards 747 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 748 --BBI; 749 750 // If we find a store, check to see if it points into a dead stack value. 751 if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 752 // See through pointer-to-pointer bitcasts 753 SmallVector<Value *, 4> Pointers; 754 GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); 755 756 // Stores to stack values are valid candidates for removal. 757 bool AllDead = true; 758 for (Value *Pointer : Pointers) 759 if (!DeadStackObjects.count(Pointer)) { 760 AllDead = false; 761 break; 762 } 763 764 if (AllDead) { 765 Instruction *Dead = &*BBI; 766 767 DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 768 << *Dead << "\n Objects: "; 769 for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), 770 E = Pointers.end(); I != E; ++I) { 771 dbgs() << **I; 772 if (std::next(I) != E) 773 dbgs() << ", "; 774 } 775 dbgs() << '\n'); 776 777 // DCE instructions only used to calculate that store. 778 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects); 779 ++NumFastStores; 780 MadeChange = true; 781 continue; 782 } 783 } 784 785 // Remove any dead non-memory-mutating instructions. 786 if (isInstructionTriviallyDead(&*BBI, TLI)) { 787 DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " 788 << *&*BBI << '\n'); 789 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects); 790 ++NumFastOther; 791 MadeChange = true; 792 continue; 793 } 794 795 if (isa<AllocaInst>(BBI)) { 796 // Remove allocas from the list of dead stack objects; there can't be 797 // any references before the definition. 798 DeadStackObjects.remove(&*BBI); 799 continue; 800 } 801 802 if (auto CS = CallSite(&*BBI)) { 803 // Remove allocation function calls from the list of dead stack objects; 804 // there can't be any references before the definition. 805 if (isAllocLikeFn(&*BBI, TLI)) 806 DeadStackObjects.remove(&*BBI); 807 808 // If this call does not access memory, it can't be loading any of our 809 // pointers. 810 if (AA->doesNotAccessMemory(CS)) 811 continue; 812 813 // If the call might load from any of our allocas, then any store above 814 // the call is live. 815 DeadStackObjects.remove_if([&](Value *I) { 816 // See if the call site touches the value. 817 return isRefSet(AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI))); 818 }); 819 820 // If all of the allocas were clobbered by the call then we're not going 821 // to find anything else to process. 822 if (DeadStackObjects.empty()) 823 break; 824 825 continue; 826 } 827 828 // We can remove the dead stores, irrespective of the fence and its ordering 829 // (release/acquire/seq_cst). Fences only constraints the ordering of 830 // already visible stores, it does not make a store visible to other 831 // threads. So, skipping over a fence does not change a store from being 832 // dead. 833 if (isa<FenceInst>(*BBI)) 834 continue; 835 836 MemoryLocation LoadedLoc; 837 838 // If we encounter a use of the pointer, it is no longer considered dead 839 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 840 if (!L->isUnordered()) // Be conservative with atomic/volatile load 841 break; 842 LoadedLoc = MemoryLocation::get(L); 843 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 844 LoadedLoc = MemoryLocation::get(V); 845 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { 846 LoadedLoc = MemoryLocation::getForSource(MTI); 847 } else if (!BBI->mayReadFromMemory()) { 848 // Instruction doesn't read memory. Note that stores that weren't removed 849 // above will hit this case. 850 continue; 851 } else { 852 // Unknown inst; assume it clobbers everything. 853 break; 854 } 855 856 // Remove any allocas from the DeadPointer set that are loaded, as this 857 // makes any stores above the access live. 858 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI); 859 860 // If all of the allocas were clobbered by the access then we're not going 861 // to find anything else to process. 862 if (DeadStackObjects.empty()) 863 break; 864 } 865 866 return MadeChange; 867 } 868 869 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, 870 int64_t &EarlierSize, int64_t LaterOffset, 871 int64_t LaterSize, bool IsOverwriteEnd) { 872 // TODO: base this on the target vector size so that if the earlier 873 // store was too small to get vector writes anyway then its likely 874 // a good idea to shorten it 875 // Power of 2 vector writes are probably always a bad idea to optimize 876 // as any store/memset/memcpy is likely using vector instructions so 877 // shortening it to not vector size is likely to be slower 878 MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite); 879 unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment(); 880 if (!IsOverwriteEnd) 881 LaterOffset = int64_t(LaterOffset + LaterSize); 882 883 if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && 884 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) 885 return false; 886 887 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 888 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite 889 << "\n KILLER (offset " << LaterOffset << ", " << EarlierSize 890 << ")\n"); 891 892 int64_t NewLength = IsOverwriteEnd 893 ? LaterOffset - EarlierOffset 894 : EarlierSize - (LaterOffset - EarlierOffset); 895 896 Value *EarlierWriteLength = EarlierIntrinsic->getLength(); 897 Value *TrimmedLength = 898 ConstantInt::get(EarlierWriteLength->getType(), NewLength); 899 EarlierIntrinsic->setLength(TrimmedLength); 900 901 EarlierSize = NewLength; 902 if (!IsOverwriteEnd) { 903 int64_t OffsetMoved = (LaterOffset - EarlierOffset); 904 Value *Indices[1] = { 905 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; 906 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 907 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); 908 EarlierIntrinsic->setDest(NewDestGEP); 909 EarlierOffset = EarlierOffset + OffsetMoved; 910 } 911 return true; 912 } 913 914 static bool tryToShortenEnd(Instruction *EarlierWrite, 915 OverlapIntervalsTy &IntervalMap, 916 int64_t &EarlierStart, int64_t &EarlierSize) { 917 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) 918 return false; 919 920 OverlapIntervalsTy::iterator OII = --IntervalMap.end(); 921 int64_t LaterStart = OII->second; 922 int64_t LaterSize = OII->first - LaterStart; 923 924 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && 925 LaterStart + LaterSize >= EarlierStart + EarlierSize) { 926 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 927 LaterSize, true)) { 928 IntervalMap.erase(OII); 929 return true; 930 } 931 } 932 return false; 933 } 934 935 static bool tryToShortenBegin(Instruction *EarlierWrite, 936 OverlapIntervalsTy &IntervalMap, 937 int64_t &EarlierStart, int64_t &EarlierSize) { 938 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) 939 return false; 940 941 OverlapIntervalsTy::iterator OII = IntervalMap.begin(); 942 int64_t LaterStart = OII->second; 943 int64_t LaterSize = OII->first - LaterStart; 944 945 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { 946 assert(LaterStart + LaterSize < EarlierStart + EarlierSize && 947 "Should have been handled as OW_Complete"); 948 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 949 LaterSize, false)) { 950 IntervalMap.erase(OII); 951 return true; 952 } 953 } 954 return false; 955 } 956 957 static bool removePartiallyOverlappedStores(AliasAnalysis *AA, 958 const DataLayout &DL, 959 InstOverlapIntervalsTy &IOL) { 960 bool Changed = false; 961 for (auto OI : IOL) { 962 Instruction *EarlierWrite = OI.first; 963 MemoryLocation Loc = getLocForWrite(EarlierWrite); 964 assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); 965 assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc"); 966 967 const Value *Ptr = Loc.Ptr->stripPointerCasts(); 968 int64_t EarlierStart = 0; 969 int64_t EarlierSize = int64_t(Loc.Size); 970 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); 971 OverlapIntervalsTy &IntervalMap = OI.second; 972 Changed |= 973 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 974 if (IntervalMap.empty()) 975 continue; 976 Changed |= 977 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 978 } 979 return Changed; 980 } 981 982 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, 983 AliasAnalysis *AA, MemoryDependenceResults *MD, 984 const DataLayout &DL, 985 const TargetLibraryInfo *TLI, 986 InstOverlapIntervalsTy &IOL, 987 DenseMap<Instruction*, size_t> *InstrOrdering) { 988 // Must be a store instruction. 989 StoreInst *SI = dyn_cast<StoreInst>(Inst); 990 if (!SI) 991 return false; 992 993 // If we're storing the same value back to a pointer that we just loaded from, 994 // then the store can be removed. 995 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 996 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 997 isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) { 998 999 DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " 1000 << *DepLoad << "\n STORE: " << *SI << '\n'); 1001 1002 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering); 1003 ++NumRedundantStores; 1004 return true; 1005 } 1006 } 1007 1008 // Remove null stores into the calloc'ed objects 1009 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 1010 if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { 1011 Instruction *UnderlyingPointer = 1012 dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL)); 1013 1014 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 1015 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) { 1016 DEBUG( 1017 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 1018 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 1019 1020 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering); 1021 ++NumRedundantStores; 1022 return true; 1023 } 1024 } 1025 return false; 1026 } 1027 1028 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 1029 MemoryDependenceResults *MD, DominatorTree *DT, 1030 const TargetLibraryInfo *TLI) { 1031 const DataLayout &DL = BB.getModule()->getDataLayout(); 1032 bool MadeChange = false; 1033 1034 // FIXME: Maybe change this to use some abstraction like OrderedBasicBlock? 1035 // The current OrderedBasicBlock can't deal with mutation at the moment. 1036 size_t LastThrowingInstIndex = 0; 1037 DenseMap<Instruction*, size_t> InstrOrdering; 1038 size_t InstrIndex = 1; 1039 1040 // A map of interval maps representing partially-overwritten value parts. 1041 InstOverlapIntervalsTy IOL; 1042 1043 // Do a top-down walk on the BB. 1044 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 1045 // Handle 'free' calls specially. 1046 if (CallInst *F = isFreeCall(&*BBI, TLI)) { 1047 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, &InstrOrdering); 1048 // Increment BBI after handleFree has potentially deleted instructions. 1049 // This ensures we maintain a valid iterator. 1050 ++BBI; 1051 continue; 1052 } 1053 1054 Instruction *Inst = &*BBI++; 1055 1056 size_t CurInstNumber = InstrIndex++; 1057 InstrOrdering.insert(std::make_pair(Inst, CurInstNumber)); 1058 if (Inst->mayThrow()) { 1059 LastThrowingInstIndex = CurInstNumber; 1060 continue; 1061 } 1062 1063 // Check to see if Inst writes to memory. If not, continue. 1064 if (!hasAnalyzableMemoryWrite(Inst, *TLI)) 1065 continue; 1066 1067 // eliminateNoopStore will update in iterator, if necessary. 1068 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) { 1069 MadeChange = true; 1070 continue; 1071 } 1072 1073 // If we find something that writes memory, get its memory dependence. 1074 MemDepResult InstDep = MD->getDependency(Inst); 1075 1076 // Ignore any store where we can't find a local dependence. 1077 // FIXME: cross-block DSE would be fun. :) 1078 if (!InstDep.isDef() && !InstDep.isClobber()) 1079 continue; 1080 1081 // Figure out what location is being stored to. 1082 MemoryLocation Loc = getLocForWrite(Inst); 1083 1084 // If we didn't get a useful location, fail. 1085 if (!Loc.Ptr) 1086 continue; 1087 1088 // Loop until we find a store we can eliminate or a load that 1089 // invalidates the analysis. Without an upper bound on the number of 1090 // instructions examined, this analysis can become very time-consuming. 1091 // However, the potential gain diminishes as we process more instructions 1092 // without eliminating any of them. Therefore, we limit the number of 1093 // instructions we look at. 1094 auto Limit = MD->getDefaultBlockScanLimit(); 1095 while (InstDep.isDef() || InstDep.isClobber()) { 1096 // Get the memory clobbered by the instruction we depend on. MemDep will 1097 // skip any instructions that 'Loc' clearly doesn't interact with. If we 1098 // end up depending on a may- or must-aliased load, then we can't optimize 1099 // away the store and we bail out. However, if we depend on something 1100 // that overwrites the memory location we *can* potentially optimize it. 1101 // 1102 // Find out what memory location the dependent instruction stores. 1103 Instruction *DepWrite = InstDep.getInst(); 1104 if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) 1105 break; 1106 MemoryLocation DepLoc = getLocForWrite(DepWrite); 1107 // If we didn't get a useful location, or if it isn't a size, bail out. 1108 if (!DepLoc.Ptr) 1109 break; 1110 1111 // Make sure we don't look past a call which might throw. This is an 1112 // issue because MemoryDependenceAnalysis works in the wrong direction: 1113 // it finds instructions which dominate the current instruction, rather than 1114 // instructions which are post-dominated by the current instruction. 1115 // 1116 // If the underlying object is a non-escaping memory allocation, any store 1117 // to it is dead along the unwind edge. Otherwise, we need to preserve 1118 // the store. 1119 size_t DepIndex = InstrOrdering.lookup(DepWrite); 1120 assert(DepIndex && "Unexpected instruction"); 1121 if (DepIndex <= LastThrowingInstIndex) { 1122 const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL); 1123 bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); 1124 if (!IsStoreDeadOnUnwind) { 1125 // We're looking for a call to an allocation function 1126 // where the allocation doesn't escape before the last 1127 // throwing instruction; PointerMayBeCaptured 1128 // reasonably fast approximation. 1129 IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && 1130 !PointerMayBeCaptured(Underlying, false, true); 1131 } 1132 if (!IsStoreDeadOnUnwind) 1133 break; 1134 } 1135 1136 // If we find a write that is a) removable (i.e., non-volatile), b) is 1137 // completely obliterated by the store to 'Loc', and c) which we know that 1138 // 'Inst' doesn't load from, then we can remove it. 1139 // Also try to merge two stores if a later one only touches memory written 1140 // to by the earlier one. 1141 if (isRemovable(DepWrite) && 1142 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 1143 int64_t InstWriteOffset, DepWriteOffset; 1144 OverwriteResult OR = 1145 isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset, 1146 DepWrite, IOL); 1147 if (OR == OW_Complete) { 1148 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " 1149 << *DepWrite << "\n KILLER: " << *Inst << '\n'); 1150 1151 // Delete the store and now-dead instructions that feed it. 1152 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, &InstrOrdering); 1153 ++NumFastStores; 1154 MadeChange = true; 1155 1156 // We erased DepWrite; start over. 1157 InstDep = MD->getDependency(Inst); 1158 continue; 1159 } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || 1160 ((OR == OW_Begin && 1161 isShortenableAtTheBeginning(DepWrite)))) { 1162 assert(!EnablePartialOverwriteTracking && "Do not expect to perform " 1163 "when partial-overwrite " 1164 "tracking is enabled"); 1165 int64_t EarlierSize = DepLoc.Size; 1166 int64_t LaterSize = Loc.Size; 1167 bool IsOverwriteEnd = (OR == OW_End); 1168 MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, 1169 InstWriteOffset, LaterSize, IsOverwriteEnd); 1170 } else if (EnablePartialStoreMerging && 1171 OR == OW_PartialEarlierWithFullLater) { 1172 auto *Earlier = dyn_cast<StoreInst>(DepWrite); 1173 auto *Later = dyn_cast<StoreInst>(Inst); 1174 if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && 1175 Later && isa<ConstantInt>(Later->getValueOperand())) { 1176 // If the store we find is: 1177 // a) partially overwritten by the store to 'Loc' 1178 // b) the later store is fully contained in the earlier one and 1179 // c) they both have a constant value 1180 // Merge the two stores, replacing the earlier store's value with a 1181 // merge of both values. 1182 // TODO: Deal with other constant types (vectors, etc), and probably 1183 // some mem intrinsics (if needed) 1184 1185 APInt EarlierValue = 1186 cast<ConstantInt>(Earlier->getValueOperand())->getValue(); 1187 APInt LaterValue = 1188 cast<ConstantInt>(Later->getValueOperand())->getValue(); 1189 unsigned LaterBits = LaterValue.getBitWidth(); 1190 assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); 1191 LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); 1192 1193 // Offset of the smaller store inside the larger store 1194 unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; 1195 unsigned LShiftAmount = 1196 DL.isBigEndian() 1197 ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits 1198 : BitOffsetDiff; 1199 APInt Mask = 1200 APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, 1201 LShiftAmount + LaterBits); 1202 // Clear the bits we'll be replacing, then OR with the smaller 1203 // store, shifted appropriately. 1204 APInt Merged = 1205 (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); 1206 DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *DepWrite 1207 << "\n Later: " << *Inst 1208 << "\n Merged Value: " << Merged << '\n'); 1209 1210 auto *SI = new StoreInst( 1211 ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), 1212 Earlier->getPointerOperand(), false, Earlier->getAlignment(), 1213 Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); 1214 1215 unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, 1216 LLVMContext::MD_alias_scope, 1217 LLVMContext::MD_noalias, 1218 LLVMContext::MD_nontemporal}; 1219 SI->copyMetadata(*DepWrite, MDToKeep); 1220 ++NumModifiedStores; 1221 1222 // Remove earlier, wider, store 1223 size_t Idx = InstrOrdering.lookup(DepWrite); 1224 InstrOrdering.erase(DepWrite); 1225 InstrOrdering.insert(std::make_pair(SI, Idx)); 1226 1227 // Delete the old stores and now-dead instructions that feed them. 1228 deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering); 1229 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1230 &InstrOrdering); 1231 MadeChange = true; 1232 1233 // We erased DepWrite and Inst (Loc); start over. 1234 break; 1235 } 1236 } 1237 } 1238 1239 // If this is a may-aliased store that is clobbering the store value, we 1240 // can keep searching past it for another must-aliased pointer that stores 1241 // to the same location. For example, in: 1242 // store -> P 1243 // store -> Q 1244 // store -> P 1245 // we can remove the first store to P even though we don't know if P and Q 1246 // alias. 1247 if (DepWrite == &BB.front()) break; 1248 1249 // Can't look past this instruction if it might read 'Loc'. 1250 if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) 1251 break; 1252 1253 InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, 1254 DepWrite->getIterator(), &BB, 1255 /*QueryInst=*/ nullptr, &Limit); 1256 } 1257 } 1258 1259 if (EnablePartialOverwriteTracking) 1260 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL); 1261 1262 // If this block ends in a return, unwind, or unreachable, all allocas are 1263 // dead at its end, which means stores to them are also dead. 1264 if (BB.getTerminator()->getNumSuccessors() == 0) 1265 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, &InstrOrdering); 1266 1267 return MadeChange; 1268 } 1269 1270 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 1271 MemoryDependenceResults *MD, DominatorTree *DT, 1272 const TargetLibraryInfo *TLI) { 1273 bool MadeChange = false; 1274 for (BasicBlock &BB : F) 1275 // Only check non-dead blocks. Dead blocks may have strange pointer 1276 // cycles that will confuse alias analysis. 1277 if (DT->isReachableFromEntry(&BB)) 1278 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 1279 1280 return MadeChange; 1281 } 1282 1283 //===----------------------------------------------------------------------===// 1284 // DSE Pass 1285 //===----------------------------------------------------------------------===// 1286 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 1287 AliasAnalysis *AA = &AM.getResult<AAManager>(F); 1288 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1289 MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F); 1290 const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 1291 1292 if (!eliminateDeadStores(F, AA, MD, DT, TLI)) 1293 return PreservedAnalyses::all(); 1294 1295 PreservedAnalyses PA; 1296 PA.preserveSet<CFGAnalyses>(); 1297 PA.preserve<GlobalsAA>(); 1298 PA.preserve<MemoryDependenceAnalysis>(); 1299 return PA; 1300 } 1301 1302 namespace { 1303 1304 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 1305 class DSELegacyPass : public FunctionPass { 1306 public: 1307 static char ID; // Pass identification, replacement for typeid 1308 1309 DSELegacyPass() : FunctionPass(ID) { 1310 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 1311 } 1312 1313 bool runOnFunction(Function &F) override { 1314 if (skipFunction(F)) 1315 return false; 1316 1317 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1318 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1319 MemoryDependenceResults *MD = 1320 &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 1321 const TargetLibraryInfo *TLI = 1322 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 1323 1324 return eliminateDeadStores(F, AA, MD, DT, TLI); 1325 } 1326 1327 void getAnalysisUsage(AnalysisUsage &AU) const override { 1328 AU.setPreservesCFG(); 1329 AU.addRequired<DominatorTreeWrapperPass>(); 1330 AU.addRequired<AAResultsWrapperPass>(); 1331 AU.addRequired<MemoryDependenceWrapperPass>(); 1332 AU.addRequired<TargetLibraryInfoWrapperPass>(); 1333 AU.addPreserved<DominatorTreeWrapperPass>(); 1334 AU.addPreserved<GlobalsAAWrapperPass>(); 1335 AU.addPreserved<MemoryDependenceWrapperPass>(); 1336 } 1337 }; 1338 1339 } // end anonymous namespace 1340 1341 char DSELegacyPass::ID = 0; 1342 1343 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 1344 false) 1345 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1346 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1347 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1348 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 1349 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 1350 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 1351 false) 1352 1353 FunctionPass *llvm::createDeadStoreEliminationPass() { 1354 return new DSELegacyPass(); 1355 } 1356