1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a trivial dead store elimination that only considers 11 // basic-block local redundant stores. 12 // 13 // FIXME: This should eventually be extended to be a post-dominator tree 14 // traversal. Doing so would be pretty trivial. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/Analysis/CaptureTracking.h" 24 #include "llvm/Analysis/GlobalsModRef.h" 25 #include "llvm/Analysis/MemoryBuiltins.h" 26 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 27 #include "llvm/Analysis/TargetLibraryInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/DataLayout.h" 31 #include "llvm/IR/Dominators.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/GlobalVariable.h" 34 #include "llvm/IR/Instructions.h" 35 #include "llvm/IR/IntrinsicInst.h" 36 #include "llvm/Pass.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Transforms/Scalar.h" 40 #include "llvm/Transforms/Utils/Local.h" 41 using namespace llvm; 42 43 #define DEBUG_TYPE "dse" 44 45 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 46 STATISTIC(NumFastStores, "Number of stores deleted"); 47 STATISTIC(NumFastOther , "Number of other instrs removed"); 48 49 50 //===----------------------------------------------------------------------===// 51 // Helper functions 52 //===----------------------------------------------------------------------===// 53 54 /// Delete this instruction. Before we do, go through and zero out all the 55 /// operands of this instruction. If any of them become dead, delete them and 56 /// the computation tree that feeds them. 57 /// If ValueSet is non-null, remove any deleted instructions from it as well. 58 static void 59 deleteDeadInstruction(Instruction *I, MemoryDependenceResults &MD, 60 const TargetLibraryInfo &TLI, 61 SmallSetVector<Value *, 16> *ValueSet = nullptr) { 62 SmallVector<Instruction*, 32> NowDeadInsts; 63 64 NowDeadInsts.push_back(I); 65 --NumFastOther; 66 67 // Before we touch this instruction, remove it from memdep! 68 do { 69 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 70 ++NumFastOther; 71 72 // This instruction is dead, zap it, in stages. Start by removing it from 73 // MemDep, which needs to know the operands and needs it to be in the 74 // function. 75 MD.removeInstruction(DeadInst); 76 77 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 78 Value *Op = DeadInst->getOperand(op); 79 DeadInst->setOperand(op, nullptr); 80 81 // If this operand just became dead, add it to the NowDeadInsts list. 82 if (!Op->use_empty()) continue; 83 84 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 85 if (isInstructionTriviallyDead(OpI, &TLI)) 86 NowDeadInsts.push_back(OpI); 87 } 88 89 DeadInst->eraseFromParent(); 90 91 if (ValueSet) ValueSet->remove(DeadInst); 92 } while (!NowDeadInsts.empty()); 93 } 94 95 /// Does this instruction write some memory? This only returns true for things 96 /// that we can analyze with other helpers below. 97 static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) { 98 if (isa<StoreInst>(I)) 99 return true; 100 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 101 switch (II->getIntrinsicID()) { 102 default: 103 return false; 104 case Intrinsic::memset: 105 case Intrinsic::memmove: 106 case Intrinsic::memcpy: 107 case Intrinsic::init_trampoline: 108 case Intrinsic::lifetime_end: 109 return true; 110 } 111 } 112 if (auto CS = CallSite(I)) { 113 if (Function *F = CS.getCalledFunction()) { 114 if (TLI.has(LibFunc::strcpy) && 115 F->getName() == TLI.getName(LibFunc::strcpy)) { 116 return true; 117 } 118 if (TLI.has(LibFunc::strncpy) && 119 F->getName() == TLI.getName(LibFunc::strncpy)) { 120 return true; 121 } 122 if (TLI.has(LibFunc::strcat) && 123 F->getName() == TLI.getName(LibFunc::strcat)) { 124 return true; 125 } 126 if (TLI.has(LibFunc::strncat) && 127 F->getName() == TLI.getName(LibFunc::strncat)) { 128 return true; 129 } 130 } 131 } 132 return false; 133 } 134 135 /// Return a Location stored to by the specified instruction. If isRemovable 136 /// returns true, this function and getLocForRead completely describe the memory 137 /// operations for this instruction. 138 static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { 139 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 140 return MemoryLocation::get(SI); 141 142 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { 143 // memcpy/memmove/memset. 144 MemoryLocation Loc = MemoryLocation::getForDest(MI); 145 return Loc; 146 } 147 148 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); 149 if (!II) 150 return MemoryLocation(); 151 152 switch (II->getIntrinsicID()) { 153 default: 154 return MemoryLocation(); // Unhandled intrinsic. 155 case Intrinsic::init_trampoline: 156 // FIXME: We don't know the size of the trampoline, so we can't really 157 // handle it here. 158 return MemoryLocation(II->getArgOperand(0)); 159 case Intrinsic::lifetime_end: { 160 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 161 return MemoryLocation(II->getArgOperand(1), Len); 162 } 163 } 164 } 165 166 /// Return the location read by the specified "hasMemoryWrite" instruction if 167 /// any. 168 static MemoryLocation getLocForRead(Instruction *Inst, 169 const TargetLibraryInfo &TLI) { 170 assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case"); 171 172 // The only instructions that both read and write are the mem transfer 173 // instructions (memcpy/memmove). 174 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) 175 return MemoryLocation::getForSource(MTI); 176 return MemoryLocation(); 177 } 178 179 /// If the value of this instruction and the memory it writes to is unused, may 180 /// we delete this instruction? 181 static bool isRemovable(Instruction *I) { 182 // Don't remove volatile/atomic stores. 183 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 184 return SI->isUnordered(); 185 186 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 187 switch (II->getIntrinsicID()) { 188 default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); 189 case Intrinsic::lifetime_end: 190 // Never remove dead lifetime_end's, e.g. because it is followed by a 191 // free. 192 return false; 193 case Intrinsic::init_trampoline: 194 // Always safe to remove init_trampoline. 195 return true; 196 197 case Intrinsic::memset: 198 case Intrinsic::memmove: 199 case Intrinsic::memcpy: 200 // Don't remove volatile memory intrinsics. 201 return !cast<MemIntrinsic>(II)->isVolatile(); 202 } 203 } 204 205 if (auto CS = CallSite(I)) 206 return CS.getInstruction()->use_empty(); 207 208 return false; 209 } 210 211 212 /// Returns true if the end of this instruction can be safely shortened in 213 /// length. 214 static bool isShortenableAtTheEnd(Instruction *I) { 215 // Don't shorten stores for now 216 if (isa<StoreInst>(I)) 217 return false; 218 219 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 220 switch (II->getIntrinsicID()) { 221 default: return false; 222 case Intrinsic::memset: 223 case Intrinsic::memcpy: 224 // Do shorten memory intrinsics. 225 // FIXME: Add memmove if it's also safe to transform. 226 return true; 227 } 228 } 229 230 // Don't shorten libcalls calls for now. 231 232 return false; 233 } 234 235 /// Returns true if the beginning of this instruction can be safely shortened 236 /// in length. 237 static bool isShortenableAtTheBeginning(Instruction *I) { 238 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 239 // easily done by offsetting the source address. 240 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 241 return II && II->getIntrinsicID() == Intrinsic::memset; 242 } 243 244 /// Return the pointer that is being written to. 245 static Value *getStoredPointerOperand(Instruction *I) { 246 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 247 return SI->getPointerOperand(); 248 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) 249 return MI->getDest(); 250 251 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 252 switch (II->getIntrinsicID()) { 253 default: llvm_unreachable("Unexpected intrinsic!"); 254 case Intrinsic::init_trampoline: 255 return II->getArgOperand(0); 256 } 257 } 258 259 CallSite CS(I); 260 // All the supported functions so far happen to have dest as their first 261 // argument. 262 return CS.getArgument(0); 263 } 264 265 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 266 const TargetLibraryInfo &TLI) { 267 uint64_t Size; 268 if (getObjectSize(V, Size, DL, &TLI)) 269 return Size; 270 return MemoryLocation::UnknownSize; 271 } 272 273 namespace { 274 enum OverwriteResult { 275 OverwriteBegin, 276 OverwriteComplete, 277 OverwriteEnd, 278 OverwriteUnknown 279 }; 280 } 281 282 /// Return 'OverwriteComplete' if a store to the 'Later' location completely 283 /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of 284 /// the 'Earlier' location is completely overwritten by 'Later', 285 /// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten 286 /// by 'Later', or 'OverwriteUnknown' if nothing can be determined. 287 static OverwriteResult isOverwrite(const MemoryLocation &Later, 288 const MemoryLocation &Earlier, 289 const DataLayout &DL, 290 const TargetLibraryInfo &TLI, 291 int64_t &EarlierOff, int64_t &LaterOff) { 292 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 293 const Value *P2 = Later.Ptr->stripPointerCasts(); 294 295 // If the start pointers are the same, we just have to compare sizes to see if 296 // the later store was larger than the earlier store. 297 if (P1 == P2) { 298 // If we don't know the sizes of either access, then we can't do a 299 // comparison. 300 if (Later.Size == MemoryLocation::UnknownSize || 301 Earlier.Size == MemoryLocation::UnknownSize) 302 return OverwriteUnknown; 303 304 // Make sure that the Later size is >= the Earlier size. 305 if (Later.Size >= Earlier.Size) 306 return OverwriteComplete; 307 } 308 309 // Otherwise, we have to have size information, and the later store has to be 310 // larger than the earlier one. 311 if (Later.Size == MemoryLocation::UnknownSize || 312 Earlier.Size == MemoryLocation::UnknownSize) 313 return OverwriteUnknown; 314 315 // Check to see if the later store is to the entire object (either a global, 316 // an alloca, or a byval/inalloca argument). If so, then it clearly 317 // overwrites any other store to the same object. 318 const Value *UO1 = GetUnderlyingObject(P1, DL), 319 *UO2 = GetUnderlyingObject(P2, DL); 320 321 // If we can't resolve the same pointers to the same object, then we can't 322 // analyze them at all. 323 if (UO1 != UO2) 324 return OverwriteUnknown; 325 326 // If the "Later" store is to a recognizable object, get its size. 327 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI); 328 if (ObjectSize != MemoryLocation::UnknownSize) 329 if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) 330 return OverwriteComplete; 331 332 // Okay, we have stores to two completely different pointers. Try to 333 // decompose the pointer into a "base + constant_offset" form. If the base 334 // pointers are equal, then we can reason about the two stores. 335 EarlierOff = 0; 336 LaterOff = 0; 337 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 338 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 339 340 // If the base pointers still differ, we have two completely different stores. 341 if (BP1 != BP2) 342 return OverwriteUnknown; 343 344 // The later store completely overlaps the earlier store if: 345 // 346 // 1. Both start at the same offset and the later one's size is greater than 347 // or equal to the earlier one's, or 348 // 349 // |--earlier--| 350 // |-- later --| 351 // 352 // 2. The earlier store has an offset greater than the later offset, but which 353 // still lies completely within the later store. 354 // 355 // |--earlier--| 356 // |----- later ------| 357 // 358 // We have to be careful here as *Off is signed while *.Size is unsigned. 359 if (EarlierOff >= LaterOff && 360 Later.Size >= Earlier.Size && 361 uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) 362 return OverwriteComplete; 363 364 // Another interesting case is if the later store overwrites the end of the 365 // earlier store. 366 // 367 // |--earlier--| 368 // |-- later --| 369 // 370 // In this case we may want to trim the size of earlier to avoid generating 371 // writes to addresses which will definitely be overwritten later 372 if (LaterOff > EarlierOff && 373 LaterOff < int64_t(EarlierOff + Earlier.Size) && 374 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)) 375 return OverwriteEnd; 376 377 // Finally, we also need to check if the later store overwrites the beginning 378 // of the earlier store. 379 // 380 // |--earlier--| 381 // |-- later --| 382 // 383 // In this case we may want to move the destination address and trim the size 384 // of earlier to avoid generating writes to addresses which will definitely 385 // be overwritten later. 386 if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) { 387 assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size) 388 && "Expect to be handled as OverwriteComplete" ); 389 return OverwriteBegin; 390 } 391 // Otherwise, they don't completely overlap. 392 return OverwriteUnknown; 393 } 394 395 /// If 'Inst' might be a self read (i.e. a noop copy of a 396 /// memory region into an identical pointer) then it doesn't actually make its 397 /// input dead in the traditional sense. Consider this case: 398 /// 399 /// memcpy(A <- B) 400 /// memcpy(A <- A) 401 /// 402 /// In this case, the second store to A does not make the first store to A dead. 403 /// The usual situation isn't an explicit A<-A store like this (which can be 404 /// trivially removed) but a case where two pointers may alias. 405 /// 406 /// This function detects when it is unsafe to remove a dependent instruction 407 /// because the DSE inducing instruction may be a self-read. 408 static bool isPossibleSelfRead(Instruction *Inst, 409 const MemoryLocation &InstStoreLoc, 410 Instruction *DepWrite, 411 const TargetLibraryInfo &TLI, 412 AliasAnalysis &AA) { 413 // Self reads can only happen for instructions that read memory. Get the 414 // location read. 415 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 416 if (!InstReadLoc.Ptr) return false; // Not a reading instruction. 417 418 // If the read and written loc obviously don't alias, it isn't a read. 419 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; 420 421 // Okay, 'Inst' may copy over itself. However, we can still remove a the 422 // DepWrite instruction if we can prove that it reads from the same location 423 // as Inst. This handles useful cases like: 424 // memcpy(A <- B) 425 // memcpy(A <- B) 426 // Here we don't know if A/B may alias, but we do know that B/B are must 427 // aliases, so removing the first memcpy is safe (assuming it writes <= # 428 // bytes as the second one. 429 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 430 431 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 432 return false; 433 434 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 435 // then it can't be considered dead. 436 return true; 437 } 438 439 440 /// Returns true if the memory which is accessed by the second instruction is not 441 /// modified between the first and the second instruction. 442 /// Precondition: Second instruction must be dominated by the first 443 /// instruction. 444 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 445 Instruction *SecondI, 446 AliasAnalysis *AA) { 447 SmallVector<BasicBlock *, 16> WorkList; 448 SmallPtrSet<BasicBlock *, 8> Visited; 449 BasicBlock::iterator FirstBBI(FirstI); 450 ++FirstBBI; 451 BasicBlock::iterator SecondBBI(SecondI); 452 BasicBlock *FirstBB = FirstI->getParent(); 453 BasicBlock *SecondBB = SecondI->getParent(); 454 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 455 456 // Start checking the store-block. 457 WorkList.push_back(SecondBB); 458 bool isFirstBlock = true; 459 460 // Check all blocks going backward until we reach the load-block. 461 while (!WorkList.empty()) { 462 BasicBlock *B = WorkList.pop_back_val(); 463 464 // Ignore instructions before LI if this is the FirstBB. 465 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 466 467 BasicBlock::iterator EI; 468 if (isFirstBlock) { 469 // Ignore instructions after SI if this is the first visit of SecondBB. 470 assert(B == SecondBB && "first block is not the store block"); 471 EI = SecondBBI; 472 isFirstBlock = false; 473 } else { 474 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 475 // In this case we also have to look at instructions after SI. 476 EI = B->end(); 477 } 478 for (; BI != EI; ++BI) { 479 Instruction *I = &*BI; 480 if (I->mayWriteToMemory() && I != SecondI) { 481 auto Res = AA->getModRefInfo(I, MemLoc); 482 if (Res != MRI_NoModRef) 483 return false; 484 } 485 } 486 if (B != FirstBB) { 487 assert(B != &FirstBB->getParent()->getEntryBlock() && 488 "Should not hit the entry block because SI must be dominated by LI"); 489 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 490 if (!Visited.insert(*PredI).second) 491 continue; 492 WorkList.push_back(*PredI); 493 } 494 } 495 } 496 return true; 497 } 498 499 /// Find all blocks that will unconditionally lead to the block BB and append 500 /// them to F. 501 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 502 BasicBlock *BB, DominatorTree *DT) { 503 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 504 BasicBlock *Pred = *I; 505 if (Pred == BB) continue; 506 TerminatorInst *PredTI = Pred->getTerminator(); 507 if (PredTI->getNumSuccessors() != 1) 508 continue; 509 510 if (DT->isReachableFromEntry(Pred)) 511 Blocks.push_back(Pred); 512 } 513 } 514 515 /// Handle frees of entire structures whose dependency is a store 516 /// to a field of that structure. 517 static bool handleFree(CallInst *F, AliasAnalysis *AA, 518 MemoryDependenceResults *MD, DominatorTree *DT, 519 const TargetLibraryInfo *TLI) { 520 bool MadeChange = false; 521 522 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 523 SmallVector<BasicBlock *, 16> Blocks; 524 Blocks.push_back(F->getParent()); 525 const DataLayout &DL = F->getModule()->getDataLayout(); 526 527 while (!Blocks.empty()) { 528 BasicBlock *BB = Blocks.pop_back_val(); 529 Instruction *InstPt = BB->getTerminator(); 530 if (BB == F->getParent()) InstPt = F; 531 532 MemDepResult Dep = 533 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 534 while (Dep.isDef() || Dep.isClobber()) { 535 Instruction *Dependency = Dep.getInst(); 536 if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency)) 537 break; 538 539 Value *DepPointer = 540 GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); 541 542 // Check for aliasing. 543 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 544 break; 545 546 auto Next = ++Dependency->getIterator(); 547 548 // DCE instructions only used to calculate that store 549 deleteDeadInstruction(Dependency, *MD, *TLI); 550 ++NumFastStores; 551 MadeChange = true; 552 553 // Inst's old Dependency is now deleted. Compute the next dependency, 554 // which may also be dead, as in 555 // s[0] = 0; 556 // s[1] = 0; // This has just been deleted. 557 // free(s); 558 Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB); 559 } 560 561 if (Dep.isNonLocal()) 562 findUnconditionalPreds(Blocks, BB, DT); 563 } 564 565 return MadeChange; 566 } 567 568 /// Check to see if the specified location may alias any of the stack objects in 569 /// the DeadStackObjects set. If so, they become live because the location is 570 /// being loaded. 571 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 572 SmallSetVector<Value *, 16> &DeadStackObjects, 573 const DataLayout &DL, AliasAnalysis *AA, 574 const TargetLibraryInfo *TLI) { 575 const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); 576 577 // A constant can't be in the dead pointer set. 578 if (isa<Constant>(UnderlyingPointer)) 579 return; 580 581 // If the kill pointer can be easily reduced to an alloca, don't bother doing 582 // extraneous AA queries. 583 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 584 DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer)); 585 return; 586 } 587 588 // Remove objects that could alias LoadedLoc. 589 DeadStackObjects.remove_if([&](Value *I) { 590 // See if the loaded location could alias the stack location. 591 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI)); 592 return !AA->isNoAlias(StackLoc, LoadedLoc); 593 }); 594 } 595 596 /// Remove dead stores to stack-allocated locations in the function end block. 597 /// Ex: 598 /// %A = alloca i32 599 /// ... 600 /// store i32 1, i32* %A 601 /// ret void 602 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 603 MemoryDependenceResults *MD, 604 const TargetLibraryInfo *TLI) { 605 bool MadeChange = false; 606 607 // Keep track of all of the stack objects that are dead at the end of the 608 // function. 609 SmallSetVector<Value*, 16> DeadStackObjects; 610 611 // Find all of the alloca'd pointers in the entry block. 612 BasicBlock &Entry = BB.getParent()->front(); 613 for (Instruction &I : Entry) { 614 if (isa<AllocaInst>(&I)) 615 DeadStackObjects.insert(&I); 616 617 // Okay, so these are dead heap objects, but if the pointer never escapes 618 // then it's leaked by this function anyways. 619 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 620 DeadStackObjects.insert(&I); 621 } 622 623 // Treat byval or inalloca arguments the same, stores to them are dead at the 624 // end of the function. 625 for (Argument &AI : BB.getParent()->args()) 626 if (AI.hasByValOrInAllocaAttr()) 627 DeadStackObjects.insert(&AI); 628 629 const DataLayout &DL = BB.getModule()->getDataLayout(); 630 631 // Scan the basic block backwards 632 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 633 --BBI; 634 635 // If we find a store, check to see if it points into a dead stack value. 636 if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 637 // See through pointer-to-pointer bitcasts 638 SmallVector<Value *, 4> Pointers; 639 GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); 640 641 // Stores to stack values are valid candidates for removal. 642 bool AllDead = true; 643 for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), 644 E = Pointers.end(); I != E; ++I) 645 if (!DeadStackObjects.count(*I)) { 646 AllDead = false; 647 break; 648 } 649 650 if (AllDead) { 651 Instruction *Dead = &*BBI++; 652 653 DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 654 << *Dead << "\n Objects: "; 655 for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), 656 E = Pointers.end(); I != E; ++I) { 657 dbgs() << **I; 658 if (std::next(I) != E) 659 dbgs() << ", "; 660 } 661 dbgs() << '\n'); 662 663 // DCE instructions only used to calculate that store. 664 deleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects); 665 ++NumFastStores; 666 MadeChange = true; 667 continue; 668 } 669 } 670 671 // Remove any dead non-memory-mutating instructions. 672 if (isInstructionTriviallyDead(&*BBI, TLI)) { 673 Instruction *Inst = &*BBI++; 674 deleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects); 675 ++NumFastOther; 676 MadeChange = true; 677 continue; 678 } 679 680 if (isa<AllocaInst>(BBI)) { 681 // Remove allocas from the list of dead stack objects; there can't be 682 // any references before the definition. 683 DeadStackObjects.remove(&*BBI); 684 continue; 685 } 686 687 if (auto CS = CallSite(&*BBI)) { 688 // Remove allocation function calls from the list of dead stack objects; 689 // there can't be any references before the definition. 690 if (isAllocLikeFn(&*BBI, TLI)) 691 DeadStackObjects.remove(&*BBI); 692 693 // If this call does not access memory, it can't be loading any of our 694 // pointers. 695 if (AA->doesNotAccessMemory(CS)) 696 continue; 697 698 // If the call might load from any of our allocas, then any store above 699 // the call is live. 700 DeadStackObjects.remove_if([&](Value *I) { 701 // See if the call site touches the value. 702 ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI)); 703 704 return A == MRI_ModRef || A == MRI_Ref; 705 }); 706 707 // If all of the allocas were clobbered by the call then we're not going 708 // to find anything else to process. 709 if (DeadStackObjects.empty()) 710 break; 711 712 continue; 713 } 714 715 MemoryLocation LoadedLoc; 716 717 // If we encounter a use of the pointer, it is no longer considered dead 718 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 719 if (!L->isUnordered()) // Be conservative with atomic/volatile load 720 break; 721 LoadedLoc = MemoryLocation::get(L); 722 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 723 LoadedLoc = MemoryLocation::get(V); 724 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { 725 LoadedLoc = MemoryLocation::getForSource(MTI); 726 } else if (!BBI->mayReadFromMemory()) { 727 // Instruction doesn't read memory. Note that stores that weren't removed 728 // above will hit this case. 729 continue; 730 } else { 731 // Unknown inst; assume it clobbers everything. 732 break; 733 } 734 735 // Remove any allocas from the DeadPointer set that are loaded, as this 736 // makes any stores above the access live. 737 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI); 738 739 // If all of the allocas were clobbered by the access then we're not going 740 // to find anything else to process. 741 if (DeadStackObjects.empty()) 742 break; 743 } 744 745 return MadeChange; 746 } 747 748 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 749 MemoryDependenceResults *MD, DominatorTree *DT, 750 const TargetLibraryInfo *TLI) { 751 const DataLayout &DL = BB.getModule()->getDataLayout(); 752 bool MadeChange = false; 753 754 // Do a top-down walk on the BB. 755 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 756 Instruction *Inst = &*BBI++; 757 758 // Handle 'free' calls specially. 759 if (CallInst *F = isFreeCall(Inst, TLI)) { 760 MadeChange |= handleFree(F, AA, MD, DT, TLI); 761 continue; 762 } 763 764 // If we find something that writes memory, get its memory dependence. 765 if (!hasMemoryWrite(Inst, *TLI)) 766 continue; 767 768 // If we're storing the same value back to a pointer that we just 769 // loaded from, then the store can be removed. 770 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 771 772 auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) { 773 // deleteDeadInstruction can delete the current instruction. Save BBI 774 // in case we need it. 775 WeakVH NextInst(&*BBI); 776 777 deleteDeadInstruction(DeadInst, *MD, *TLI); 778 779 if (!NextInst) // Next instruction deleted. 780 BBI = BB.begin(); 781 else if (BBI != BB.begin()) // Revisit this instruction if possible. 782 --BBI; 783 ++NumRedundantStores; 784 MadeChange = true; 785 }; 786 787 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 788 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 789 isRemovable(SI) && 790 memoryIsNotModifiedBetween(DepLoad, SI, AA)) { 791 792 DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " 793 << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); 794 795 RemoveDeadInstAndUpdateBBI(SI); 796 continue; 797 } 798 } 799 800 // Remove null stores into the calloc'ed objects 801 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 802 803 if (StoredConstant && StoredConstant->isNullValue() && 804 isRemovable(SI)) { 805 Instruction *UnderlyingPointer = dyn_cast<Instruction>( 806 GetUnderlyingObject(SI->getPointerOperand(), DL)); 807 808 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 809 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) { 810 DEBUG(dbgs() 811 << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 812 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 813 814 RemoveDeadInstAndUpdateBBI(SI); 815 continue; 816 } 817 } 818 } 819 820 MemDepResult InstDep = MD->getDependency(Inst); 821 822 // Ignore any store where we can't find a local dependence. 823 // FIXME: cross-block DSE would be fun. :) 824 if (!InstDep.isDef() && !InstDep.isClobber()) 825 continue; 826 827 // Figure out what location is being stored to. 828 MemoryLocation Loc = getLocForWrite(Inst, *AA); 829 830 // If we didn't get a useful location, fail. 831 if (!Loc.Ptr) 832 continue; 833 834 while (InstDep.isDef() || InstDep.isClobber()) { 835 // Get the memory clobbered by the instruction we depend on. MemDep will 836 // skip any instructions that 'Loc' clearly doesn't interact with. If we 837 // end up depending on a may- or must-aliased load, then we can't optimize 838 // away the store and we bail out. However, if we depend on on something 839 // that overwrites the memory location we *can* potentially optimize it. 840 // 841 // Find out what memory location the dependent instruction stores. 842 Instruction *DepWrite = InstDep.getInst(); 843 MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); 844 // If we didn't get a useful location, or if it isn't a size, bail out. 845 if (!DepLoc.Ptr) 846 break; 847 848 // If we find a write that is a) removable (i.e., non-volatile), b) is 849 // completely obliterated by the store to 'Loc', and c) which we know that 850 // 'Inst' doesn't load from, then we can remove it. 851 if (isRemovable(DepWrite) && 852 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 853 int64_t InstWriteOffset, DepWriteOffset; 854 OverwriteResult OR = 855 isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); 856 if (OR == OverwriteComplete) { 857 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " 858 << *DepWrite << "\n KILLER: " << *Inst << '\n'); 859 860 // Delete the store and now-dead instructions that feed it. 861 deleteDeadInstruction(DepWrite, *MD, *TLI); 862 ++NumFastStores; 863 MadeChange = true; 864 865 // deleteDeadInstruction can delete the current instruction in loop 866 // cases, reset BBI. 867 BBI = Inst->getIterator(); 868 if (BBI != BB.begin()) 869 --BBI; 870 break; 871 } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) || 872 ((OR == OverwriteBegin && 873 isShortenableAtTheBeginning(DepWrite)))) { 874 // TODO: base this on the target vector size so that if the earlier 875 // store was too small to get vector writes anyway then its likely 876 // a good idea to shorten it 877 // Power of 2 vector writes are probably always a bad idea to optimize 878 // as any store/memset/memcpy is likely using vector instructions so 879 // shortening it to not vector size is likely to be slower 880 MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite); 881 unsigned DepWriteAlign = DepIntrinsic->getAlignment(); 882 bool IsOverwriteEnd = (OR == OverwriteEnd); 883 if (!IsOverwriteEnd) 884 InstWriteOffset = int64_t(InstWriteOffset + Loc.Size); 885 886 if ((llvm::isPowerOf2_64(InstWriteOffset) && 887 DepWriteAlign <= InstWriteOffset) || 888 ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { 889 890 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 891 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " 892 << *DepWrite << "\n KILLER (offset " 893 << InstWriteOffset << ", " << DepLoc.Size << ")" 894 << *Inst << '\n'); 895 896 int64_t NewLength = 897 IsOverwriteEnd 898 ? InstWriteOffset - DepWriteOffset 899 : DepLoc.Size - (InstWriteOffset - DepWriteOffset); 900 901 Value *DepWriteLength = DepIntrinsic->getLength(); 902 Value *TrimmedLength = 903 ConstantInt::get(DepWriteLength->getType(), NewLength); 904 DepIntrinsic->setLength(TrimmedLength); 905 906 if (!IsOverwriteEnd) { 907 int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset); 908 Value *Indices[1] = { 909 ConstantInt::get(DepWriteLength->getType(), OffsetMoved)}; 910 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 911 DepIntrinsic->getRawDest(), Indices, "", DepWrite); 912 DepIntrinsic->setDest(NewDestGEP); 913 } 914 MadeChange = true; 915 } 916 } 917 } 918 919 // If this is a may-aliased store that is clobbering the store value, we 920 // can keep searching past it for another must-aliased pointer that stores 921 // to the same location. For example, in: 922 // store -> P 923 // store -> Q 924 // store -> P 925 // we can remove the first store to P even though we don't know if P and Q 926 // alias. 927 if (DepWrite == &BB.front()) break; 928 929 // Can't look past this instruction if it might read 'Loc'. 930 if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) 931 break; 932 933 InstDep = MD->getPointerDependencyFrom(Loc, false, 934 DepWrite->getIterator(), &BB); 935 } 936 } 937 938 // If this block ends in a return, unwind, or unreachable, all allocas are 939 // dead at its end, which means stores to them are also dead. 940 if (BB.getTerminator()->getNumSuccessors() == 0) 941 MadeChange |= handleEndBlock(BB, AA, MD, TLI); 942 943 return MadeChange; 944 } 945 946 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 947 MemoryDependenceResults *MD, DominatorTree *DT, 948 const TargetLibraryInfo *TLI) { 949 bool MadeChange = false; 950 for (BasicBlock &BB : F) 951 // Only check non-dead blocks. Dead blocks may have strange pointer 952 // cycles that will confuse alias analysis. 953 if (DT->isReachableFromEntry(&BB)) 954 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 955 return MadeChange; 956 } 957 958 //===----------------------------------------------------------------------===// 959 // DSE Pass 960 //===----------------------------------------------------------------------===// 961 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 962 AliasAnalysis *AA = &AM.getResult<AAManager>(F); 963 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 964 MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F); 965 const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 966 967 if (!eliminateDeadStores(F, AA, MD, DT, TLI)) 968 return PreservedAnalyses::all(); 969 PreservedAnalyses PA; 970 PA.preserve<DominatorTreeAnalysis>(); 971 PA.preserve<GlobalsAA>(); 972 PA.preserve<MemoryDependenceAnalysis>(); 973 return PA; 974 } 975 976 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 977 class DSELegacyPass : public FunctionPass { 978 public: 979 DSELegacyPass() : FunctionPass(ID) { 980 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 981 } 982 983 bool runOnFunction(Function &F) override { 984 if (skipFunction(F)) 985 return false; 986 987 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 988 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 989 MemoryDependenceResults *MD = 990 &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 991 const TargetLibraryInfo *TLI = 992 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 993 994 return eliminateDeadStores(F, AA, MD, DT, TLI); 995 } 996 997 void getAnalysisUsage(AnalysisUsage &AU) const override { 998 AU.setPreservesCFG(); 999 AU.addRequired<DominatorTreeWrapperPass>(); 1000 AU.addRequired<AAResultsWrapperPass>(); 1001 AU.addRequired<MemoryDependenceWrapperPass>(); 1002 AU.addRequired<TargetLibraryInfoWrapperPass>(); 1003 AU.addPreserved<DominatorTreeWrapperPass>(); 1004 AU.addPreserved<GlobalsAAWrapperPass>(); 1005 AU.addPreserved<MemoryDependenceWrapperPass>(); 1006 } 1007 1008 static char ID; // Pass identification, replacement for typeid 1009 }; 1010 1011 char DSELegacyPass::ID = 0; 1012 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 1013 false) 1014 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1015 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1016 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1017 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 1018 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 1019 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 1020 false) 1021 1022 FunctionPass *llvm::createDeadStoreEliminationPass() { 1023 return new DSELegacyPass(); 1024 } 1025