1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a trivial dead store elimination that only considers 11 // basic-block local redundant stores. 12 // 13 // FIXME: This should eventually be extended to be a post-dominator tree 14 // traversal. Doing so would be pretty trivial. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Scalar.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/Analysis/CaptureTracking.h" 24 #include "llvm/Analysis/GlobalsModRef.h" 25 #include "llvm/Analysis/MemoryBuiltins.h" 26 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 27 #include "llvm/Analysis/TargetLibraryInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/DataLayout.h" 31 #include "llvm/IR/Dominators.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/GlobalVariable.h" 34 #include "llvm/IR/Instructions.h" 35 #include "llvm/IR/IntrinsicInst.h" 36 #include "llvm/Pass.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Transforms/Utils/Local.h" 40 using namespace llvm; 41 42 #define DEBUG_TYPE "dse" 43 44 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 45 STATISTIC(NumFastStores, "Number of stores deleted"); 46 STATISTIC(NumFastOther , "Number of other instrs removed"); 47 48 namespace { 49 struct DSE : public FunctionPass { 50 AliasAnalysis *AA; 51 MemoryDependenceResults *MD; 52 DominatorTree *DT; 53 const TargetLibraryInfo *TLI; 54 55 static char ID; // Pass identification, replacement for typeid 56 DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) { 57 initializeDSEPass(*PassRegistry::getPassRegistry()); 58 } 59 60 bool runOnFunction(Function &F) override { 61 if (skipFunction(F)) 62 return false; 63 64 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 65 MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 66 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 67 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 68 69 bool Changed = false; 70 for (BasicBlock &I : F) 71 // Only check non-dead blocks. Dead blocks may have strange pointer 72 // cycles that will confuse alias analysis. 73 if (DT->isReachableFromEntry(&I)) 74 Changed |= runOnBasicBlock(I); 75 76 AA = nullptr; MD = nullptr; DT = nullptr; 77 return Changed; 78 } 79 80 bool runOnBasicBlock(BasicBlock &BB); 81 bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI); 82 bool HandleFree(CallInst *F); 83 bool handleEndBlock(BasicBlock &BB); 84 void RemoveAccessedObjects(const MemoryLocation &LoadedLoc, 85 SmallSetVector<Value *, 16> &DeadStackObjects, 86 const DataLayout &DL); 87 88 void getAnalysisUsage(AnalysisUsage &AU) const override { 89 AU.setPreservesCFG(); 90 AU.addRequired<DominatorTreeWrapperPass>(); 91 AU.addRequired<AAResultsWrapperPass>(); 92 AU.addRequired<MemoryDependenceWrapperPass>(); 93 AU.addRequired<TargetLibraryInfoWrapperPass>(); 94 AU.addPreserved<DominatorTreeWrapperPass>(); 95 AU.addPreserved<GlobalsAAWrapperPass>(); 96 AU.addPreserved<MemoryDependenceWrapperPass>(); 97 } 98 }; 99 } 100 101 char DSE::ID = 0; 102 INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false) 103 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 104 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 105 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 106 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 107 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 108 INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false) 109 110 FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } 111 112 //===----------------------------------------------------------------------===// 113 // Helper functions 114 //===----------------------------------------------------------------------===// 115 116 /// DeleteDeadInstruction - Delete this instruction. Before we do, go through 117 /// and zero out all the operands of this instruction. If any of them become 118 /// dead, delete them and the computation tree that feeds them. 119 /// 120 /// If ValueSet is non-null, remove any deleted instructions from it as well. 121 /// 122 static void DeleteDeadInstruction(Instruction *I, 123 MemoryDependenceResults &MD, 124 const TargetLibraryInfo &TLI, 125 SmallSetVector<Value*, 16> *ValueSet = nullptr) { 126 SmallVector<Instruction*, 32> NowDeadInsts; 127 128 NowDeadInsts.push_back(I); 129 --NumFastOther; 130 131 // Before we touch this instruction, remove it from memdep! 132 do { 133 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 134 ++NumFastOther; 135 136 // This instruction is dead, zap it, in stages. Start by removing it from 137 // MemDep, which needs to know the operands and needs it to be in the 138 // function. 139 MD.removeInstruction(DeadInst); 140 141 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 142 Value *Op = DeadInst->getOperand(op); 143 DeadInst->setOperand(op, nullptr); 144 145 // If this operand just became dead, add it to the NowDeadInsts list. 146 if (!Op->use_empty()) continue; 147 148 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 149 if (isInstructionTriviallyDead(OpI, &TLI)) 150 NowDeadInsts.push_back(OpI); 151 } 152 153 DeadInst->eraseFromParent(); 154 155 if (ValueSet) ValueSet->remove(DeadInst); 156 } while (!NowDeadInsts.empty()); 157 } 158 159 160 /// hasMemoryWrite - Does this instruction write some memory? This only returns 161 /// true for things that we can analyze with other helpers below. 162 static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) { 163 if (isa<StoreInst>(I)) 164 return true; 165 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 166 switch (II->getIntrinsicID()) { 167 default: 168 return false; 169 case Intrinsic::memset: 170 case Intrinsic::memmove: 171 case Intrinsic::memcpy: 172 case Intrinsic::init_trampoline: 173 case Intrinsic::lifetime_end: 174 return true; 175 } 176 } 177 if (auto CS = CallSite(I)) { 178 if (Function *F = CS.getCalledFunction()) { 179 if (TLI.has(LibFunc::strcpy) && 180 F->getName() == TLI.getName(LibFunc::strcpy)) { 181 return true; 182 } 183 if (TLI.has(LibFunc::strncpy) && 184 F->getName() == TLI.getName(LibFunc::strncpy)) { 185 return true; 186 } 187 if (TLI.has(LibFunc::strcat) && 188 F->getName() == TLI.getName(LibFunc::strcat)) { 189 return true; 190 } 191 if (TLI.has(LibFunc::strncat) && 192 F->getName() == TLI.getName(LibFunc::strncat)) { 193 return true; 194 } 195 } 196 } 197 return false; 198 } 199 200 /// getLocForWrite - Return a Location stored to by the specified instruction. 201 /// If isRemovable returns true, this function and getLocForRead completely 202 /// describe the memory operations for this instruction. 203 static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { 204 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 205 return MemoryLocation::get(SI); 206 207 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { 208 // memcpy/memmove/memset. 209 MemoryLocation Loc = MemoryLocation::getForDest(MI); 210 return Loc; 211 } 212 213 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); 214 if (!II) 215 return MemoryLocation(); 216 217 switch (II->getIntrinsicID()) { 218 default: 219 return MemoryLocation(); // Unhandled intrinsic. 220 case Intrinsic::init_trampoline: 221 // FIXME: We don't know the size of the trampoline, so we can't really 222 // handle it here. 223 return MemoryLocation(II->getArgOperand(0)); 224 case Intrinsic::lifetime_end: { 225 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 226 return MemoryLocation(II->getArgOperand(1), Len); 227 } 228 } 229 } 230 231 /// getLocForRead - Return the location read by the specified "hasMemoryWrite" 232 /// instruction if any. 233 static MemoryLocation getLocForRead(Instruction *Inst, 234 const TargetLibraryInfo &TLI) { 235 assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case"); 236 237 // The only instructions that both read and write are the mem transfer 238 // instructions (memcpy/memmove). 239 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) 240 return MemoryLocation::getForSource(MTI); 241 return MemoryLocation(); 242 } 243 244 245 /// isRemovable - If the value of this instruction and the memory it writes to 246 /// is unused, may we delete this instruction? 247 static bool isRemovable(Instruction *I) { 248 // Don't remove volatile/atomic stores. 249 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 250 return SI->isUnordered(); 251 252 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 253 switch (II->getIntrinsicID()) { 254 default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); 255 case Intrinsic::lifetime_end: 256 // Never remove dead lifetime_end's, e.g. because it is followed by a 257 // free. 258 return false; 259 case Intrinsic::init_trampoline: 260 // Always safe to remove init_trampoline. 261 return true; 262 263 case Intrinsic::memset: 264 case Intrinsic::memmove: 265 case Intrinsic::memcpy: 266 // Don't remove volatile memory intrinsics. 267 return !cast<MemIntrinsic>(II)->isVolatile(); 268 } 269 } 270 271 if (auto CS = CallSite(I)) 272 return CS.getInstruction()->use_empty(); 273 274 return false; 275 } 276 277 278 /// Returns true if the end of this instruction can be safely shortened in 279 /// length. 280 static bool isShortenableAtTheEnd(Instruction *I) { 281 // Don't shorten stores for now 282 if (isa<StoreInst>(I)) 283 return false; 284 285 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 286 switch (II->getIntrinsicID()) { 287 default: return false; 288 case Intrinsic::memset: 289 case Intrinsic::memcpy: 290 // Do shorten memory intrinsics. 291 // FIXME: Add memmove if it's also safe to transform. 292 return true; 293 } 294 } 295 296 // Don't shorten libcalls calls for now. 297 298 return false; 299 } 300 301 /// Returns true if the beginning of this instruction can be safely shortened 302 /// in length. 303 static bool isShortenableAtTheBeginning(Instruction *I) { 304 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 305 // easily done by offsetting the source address. 306 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 307 return II && II->getIntrinsicID() == Intrinsic::memset; 308 } 309 310 /// getStoredPointerOperand - Return the pointer that is being written to. 311 static Value *getStoredPointerOperand(Instruction *I) { 312 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 313 return SI->getPointerOperand(); 314 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) 315 return MI->getDest(); 316 317 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 318 switch (II->getIntrinsicID()) { 319 default: llvm_unreachable("Unexpected intrinsic!"); 320 case Intrinsic::init_trampoline: 321 return II->getArgOperand(0); 322 } 323 } 324 325 CallSite CS(I); 326 // All the supported functions so far happen to have dest as their first 327 // argument. 328 return CS.getArgument(0); 329 } 330 331 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 332 const TargetLibraryInfo &TLI) { 333 uint64_t Size; 334 if (getObjectSize(V, Size, DL, &TLI)) 335 return Size; 336 return MemoryLocation::UnknownSize; 337 } 338 339 namespace { 340 enum OverwriteResult { 341 OverwriteBegin, 342 OverwriteComplete, 343 OverwriteEnd, 344 OverwriteUnknown 345 }; 346 } 347 348 /// Return 'OverwriteComplete' if a store to the 'Later' location completely 349 /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of 350 /// the 'Earlier' location is completely overwritten by 'Later', 351 /// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten 352 /// by 'Later', or 'OverwriteUnknown' if nothing can be determined. 353 static OverwriteResult isOverwrite(const MemoryLocation &Later, 354 const MemoryLocation &Earlier, 355 const DataLayout &DL, 356 const TargetLibraryInfo &TLI, 357 int64_t &EarlierOff, int64_t &LaterOff) { 358 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 359 const Value *P2 = Later.Ptr->stripPointerCasts(); 360 361 // If the start pointers are the same, we just have to compare sizes to see if 362 // the later store was larger than the earlier store. 363 if (P1 == P2) { 364 // If we don't know the sizes of either access, then we can't do a 365 // comparison. 366 if (Later.Size == MemoryLocation::UnknownSize || 367 Earlier.Size == MemoryLocation::UnknownSize) 368 return OverwriteUnknown; 369 370 // Make sure that the Later size is >= the Earlier size. 371 if (Later.Size >= Earlier.Size) 372 return OverwriteComplete; 373 } 374 375 // Otherwise, we have to have size information, and the later store has to be 376 // larger than the earlier one. 377 if (Later.Size == MemoryLocation::UnknownSize || 378 Earlier.Size == MemoryLocation::UnknownSize) 379 return OverwriteUnknown; 380 381 // Check to see if the later store is to the entire object (either a global, 382 // an alloca, or a byval/inalloca argument). If so, then it clearly 383 // overwrites any other store to the same object. 384 const Value *UO1 = GetUnderlyingObject(P1, DL), 385 *UO2 = GetUnderlyingObject(P2, DL); 386 387 // If we can't resolve the same pointers to the same object, then we can't 388 // analyze them at all. 389 if (UO1 != UO2) 390 return OverwriteUnknown; 391 392 // If the "Later" store is to a recognizable object, get its size. 393 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI); 394 if (ObjectSize != MemoryLocation::UnknownSize) 395 if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) 396 return OverwriteComplete; 397 398 // Okay, we have stores to two completely different pointers. Try to 399 // decompose the pointer into a "base + constant_offset" form. If the base 400 // pointers are equal, then we can reason about the two stores. 401 EarlierOff = 0; 402 LaterOff = 0; 403 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 404 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 405 406 // If the base pointers still differ, we have two completely different stores. 407 if (BP1 != BP2) 408 return OverwriteUnknown; 409 410 // The later store completely overlaps the earlier store if: 411 // 412 // 1. Both start at the same offset and the later one's size is greater than 413 // or equal to the earlier one's, or 414 // 415 // |--earlier--| 416 // |-- later --| 417 // 418 // 2. The earlier store has an offset greater than the later offset, but which 419 // still lies completely within the later store. 420 // 421 // |--earlier--| 422 // |----- later ------| 423 // 424 // We have to be careful here as *Off is signed while *.Size is unsigned. 425 if (EarlierOff >= LaterOff && 426 Later.Size >= Earlier.Size && 427 uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) 428 return OverwriteComplete; 429 430 // Another interesting case is if the later store overwrites the end of the 431 // earlier store. 432 // 433 // |--earlier--| 434 // |-- later --| 435 // 436 // In this case we may want to trim the size of earlier to avoid generating 437 // writes to addresses which will definitely be overwritten later 438 if (LaterOff > EarlierOff && 439 LaterOff < int64_t(EarlierOff + Earlier.Size) && 440 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)) 441 return OverwriteEnd; 442 443 // Finally, we also need to check if the later store overwrites the beginning 444 // of the earlier store. 445 // 446 // |--earlier--| 447 // |-- later --| 448 // 449 // In this case we may want to move the destination address and trim the size 450 // of earlier to avoid generating writes to addresses which will definitely 451 // be overwritten later. 452 if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) { 453 assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size) 454 && "Expect to be handled as OverwriteComplete" ); 455 return OverwriteBegin; 456 } 457 // Otherwise, they don't completely overlap. 458 return OverwriteUnknown; 459 } 460 461 /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a 462 /// memory region into an identical pointer) then it doesn't actually make its 463 /// input dead in the traditional sense. Consider this case: 464 /// 465 /// memcpy(A <- B) 466 /// memcpy(A <- A) 467 /// 468 /// In this case, the second store to A does not make the first store to A dead. 469 /// The usual situation isn't an explicit A<-A store like this (which can be 470 /// trivially removed) but a case where two pointers may alias. 471 /// 472 /// This function detects when it is unsafe to remove a dependent instruction 473 /// because the DSE inducing instruction may be a self-read. 474 static bool isPossibleSelfRead(Instruction *Inst, 475 const MemoryLocation &InstStoreLoc, 476 Instruction *DepWrite, 477 const TargetLibraryInfo &TLI, 478 AliasAnalysis &AA) { 479 // Self reads can only happen for instructions that read memory. Get the 480 // location read. 481 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 482 if (!InstReadLoc.Ptr) return false; // Not a reading instruction. 483 484 // If the read and written loc obviously don't alias, it isn't a read. 485 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; 486 487 // Okay, 'Inst' may copy over itself. However, we can still remove a the 488 // DepWrite instruction if we can prove that it reads from the same location 489 // as Inst. This handles useful cases like: 490 // memcpy(A <- B) 491 // memcpy(A <- B) 492 // Here we don't know if A/B may alias, but we do know that B/B are must 493 // aliases, so removing the first memcpy is safe (assuming it writes <= # 494 // bytes as the second one. 495 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 496 497 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 498 return false; 499 500 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 501 // then it can't be considered dead. 502 return true; 503 } 504 505 506 //===----------------------------------------------------------------------===// 507 // DSE Pass 508 //===----------------------------------------------------------------------===// 509 510 bool DSE::runOnBasicBlock(BasicBlock &BB) { 511 const DataLayout &DL = BB.getModule()->getDataLayout(); 512 bool MadeChange = false; 513 514 // Do a top-down walk on the BB. 515 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 516 Instruction *Inst = &*BBI++; 517 518 // Handle 'free' calls specially. 519 if (CallInst *F = isFreeCall(Inst, TLI)) { 520 MadeChange |= HandleFree(F); 521 continue; 522 } 523 524 // If we find something that writes memory, get its memory dependence. 525 if (!hasMemoryWrite(Inst, *TLI)) 526 continue; 527 528 // If we're storing the same value back to a pointer that we just 529 // loaded from, then the store can be removed. 530 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 531 532 auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) { 533 // DeleteDeadInstruction can delete the current instruction. Save BBI 534 // in case we need it. 535 WeakVH NextInst(&*BBI); 536 537 DeleteDeadInstruction(DeadInst, *MD, *TLI); 538 539 if (!NextInst) // Next instruction deleted. 540 BBI = BB.begin(); 541 else if (BBI != BB.begin()) // Revisit this instruction if possible. 542 --BBI; 543 ++NumRedundantStores; 544 MadeChange = true; 545 }; 546 547 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 548 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 549 isRemovable(SI) && 550 MemoryIsNotModifiedBetween(DepLoad, SI)) { 551 552 DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " 553 << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); 554 555 RemoveDeadInstAndUpdateBBI(SI); 556 continue; 557 } 558 } 559 560 // Remove null stores into the calloc'ed objects 561 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 562 563 if (StoredConstant && StoredConstant->isNullValue() && 564 isRemovable(SI)) { 565 Instruction *UnderlyingPointer = dyn_cast<Instruction>( 566 GetUnderlyingObject(SI->getPointerOperand(), DL)); 567 568 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 569 MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) { 570 DEBUG(dbgs() 571 << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 572 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 573 574 RemoveDeadInstAndUpdateBBI(SI); 575 continue; 576 } 577 } 578 } 579 580 MemDepResult InstDep = MD->getDependency(Inst); 581 582 // Ignore any store where we can't find a local dependence. 583 // FIXME: cross-block DSE would be fun. :) 584 if (!InstDep.isDef() && !InstDep.isClobber()) 585 continue; 586 587 // Figure out what location is being stored to. 588 MemoryLocation Loc = getLocForWrite(Inst, *AA); 589 590 // If we didn't get a useful location, fail. 591 if (!Loc.Ptr) 592 continue; 593 594 while (InstDep.isDef() || InstDep.isClobber()) { 595 // Get the memory clobbered by the instruction we depend on. MemDep will 596 // skip any instructions that 'Loc' clearly doesn't interact with. If we 597 // end up depending on a may- or must-aliased load, then we can't optimize 598 // away the store and we bail out. However, if we depend on on something 599 // that overwrites the memory location we *can* potentially optimize it. 600 // 601 // Find out what memory location the dependent instruction stores. 602 Instruction *DepWrite = InstDep.getInst(); 603 MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); 604 // If we didn't get a useful location, or if it isn't a size, bail out. 605 if (!DepLoc.Ptr) 606 break; 607 608 // If we find a write that is a) removable (i.e., non-volatile), b) is 609 // completely obliterated by the store to 'Loc', and c) which we know that 610 // 'Inst' doesn't load from, then we can remove it. 611 if (isRemovable(DepWrite) && 612 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 613 int64_t InstWriteOffset, DepWriteOffset; 614 OverwriteResult OR = 615 isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); 616 if (OR == OverwriteComplete) { 617 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " 618 << *DepWrite << "\n KILLER: " << *Inst << '\n'); 619 620 // Delete the store and now-dead instructions that feed it. 621 DeleteDeadInstruction(DepWrite, *MD, *TLI); 622 ++NumFastStores; 623 MadeChange = true; 624 625 // DeleteDeadInstruction can delete the current instruction in loop 626 // cases, reset BBI. 627 BBI = Inst->getIterator(); 628 if (BBI != BB.begin()) 629 --BBI; 630 break; 631 } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) || 632 ((OR == OverwriteBegin && 633 isShortenableAtTheBeginning(DepWrite)))) { 634 // TODO: base this on the target vector size so that if the earlier 635 // store was too small to get vector writes anyway then its likely 636 // a good idea to shorten it 637 // Power of 2 vector writes are probably always a bad idea to optimize 638 // as any store/memset/memcpy is likely using vector instructions so 639 // shortening it to not vector size is likely to be slower 640 MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite); 641 unsigned DepWriteAlign = DepIntrinsic->getAlignment(); 642 bool IsOverwriteEnd = (OR == OverwriteEnd); 643 if (!IsOverwriteEnd) 644 InstWriteOffset = int64_t(InstWriteOffset + Loc.Size); 645 646 if ((llvm::isPowerOf2_64(InstWriteOffset) && 647 DepWriteAlign <= InstWriteOffset) || 648 ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { 649 650 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 651 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " 652 << *DepWrite << "\n KILLER (offset " 653 << InstWriteOffset << ", " << DepLoc.Size << ")" 654 << *Inst << '\n'); 655 656 int64_t NewLength = 657 IsOverwriteEnd 658 ? InstWriteOffset - DepWriteOffset 659 : DepLoc.Size - (InstWriteOffset - DepWriteOffset); 660 661 Value *DepWriteLength = DepIntrinsic->getLength(); 662 Value *TrimmedLength = 663 ConstantInt::get(DepWriteLength->getType(), NewLength); 664 DepIntrinsic->setLength(TrimmedLength); 665 666 if (!IsOverwriteEnd) { 667 int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset); 668 Value *Indices[1] = { 669 ConstantInt::get(DepWriteLength->getType(), OffsetMoved)}; 670 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 671 DepIntrinsic->getRawDest(), Indices, "", DepWrite); 672 DepIntrinsic->setDest(NewDestGEP); 673 } 674 MadeChange = true; 675 } 676 } 677 } 678 679 // If this is a may-aliased store that is clobbering the store value, we 680 // can keep searching past it for another must-aliased pointer that stores 681 // to the same location. For example, in: 682 // store -> P 683 // store -> Q 684 // store -> P 685 // we can remove the first store to P even though we don't know if P and Q 686 // alias. 687 if (DepWrite == &BB.front()) break; 688 689 // Can't look past this instruction if it might read 'Loc'. 690 if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) 691 break; 692 693 InstDep = MD->getPointerDependencyFrom(Loc, false, 694 DepWrite->getIterator(), &BB); 695 } 696 } 697 698 // If this block ends in a return, unwind, or unreachable, all allocas are 699 // dead at its end, which means stores to them are also dead. 700 if (BB.getTerminator()->getNumSuccessors() == 0) 701 MadeChange |= handleEndBlock(BB); 702 703 return MadeChange; 704 } 705 706 /// Returns true if the memory which is accessed by the second instruction is not 707 /// modified between the first and the second instruction. 708 /// Precondition: Second instruction must be dominated by the first 709 /// instruction. 710 bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI, 711 Instruction *SecondI) { 712 SmallVector<BasicBlock *, 16> WorkList; 713 SmallPtrSet<BasicBlock *, 8> Visited; 714 BasicBlock::iterator FirstBBI(FirstI); 715 ++FirstBBI; 716 BasicBlock::iterator SecondBBI(SecondI); 717 BasicBlock *FirstBB = FirstI->getParent(); 718 BasicBlock *SecondBB = SecondI->getParent(); 719 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 720 721 // Start checking the store-block. 722 WorkList.push_back(SecondBB); 723 bool isFirstBlock = true; 724 725 // Check all blocks going backward until we reach the load-block. 726 while (!WorkList.empty()) { 727 BasicBlock *B = WorkList.pop_back_val(); 728 729 // Ignore instructions before LI if this is the FirstBB. 730 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 731 732 BasicBlock::iterator EI; 733 if (isFirstBlock) { 734 // Ignore instructions after SI if this is the first visit of SecondBB. 735 assert(B == SecondBB && "first block is not the store block"); 736 EI = SecondBBI; 737 isFirstBlock = false; 738 } else { 739 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 740 // In this case we also have to look at instructions after SI. 741 EI = B->end(); 742 } 743 for (; BI != EI; ++BI) { 744 Instruction *I = &*BI; 745 if (I->mayWriteToMemory() && I != SecondI) { 746 auto Res = AA->getModRefInfo(I, MemLoc); 747 if (Res != MRI_NoModRef) 748 return false; 749 } 750 } 751 if (B != FirstBB) { 752 assert(B != &FirstBB->getParent()->getEntryBlock() && 753 "Should not hit the entry block because SI must be dominated by LI"); 754 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 755 if (!Visited.insert(*PredI).second) 756 continue; 757 WorkList.push_back(*PredI); 758 } 759 } 760 } 761 return true; 762 } 763 764 /// Find all blocks that will unconditionally lead to the block BB and append 765 /// them to F. 766 static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 767 BasicBlock *BB, DominatorTree *DT) { 768 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 769 BasicBlock *Pred = *I; 770 if (Pred == BB) continue; 771 TerminatorInst *PredTI = Pred->getTerminator(); 772 if (PredTI->getNumSuccessors() != 1) 773 continue; 774 775 if (DT->isReachableFromEntry(Pred)) 776 Blocks.push_back(Pred); 777 } 778 } 779 780 /// HandleFree - Handle frees of entire structures whose dependency is a store 781 /// to a field of that structure. 782 bool DSE::HandleFree(CallInst *F) { 783 bool MadeChange = false; 784 785 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 786 SmallVector<BasicBlock *, 16> Blocks; 787 Blocks.push_back(F->getParent()); 788 const DataLayout &DL = F->getModule()->getDataLayout(); 789 790 while (!Blocks.empty()) { 791 BasicBlock *BB = Blocks.pop_back_val(); 792 Instruction *InstPt = BB->getTerminator(); 793 if (BB == F->getParent()) InstPt = F; 794 795 MemDepResult Dep = 796 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 797 while (Dep.isDef() || Dep.isClobber()) { 798 Instruction *Dependency = Dep.getInst(); 799 if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency)) 800 break; 801 802 Value *DepPointer = 803 GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); 804 805 // Check for aliasing. 806 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 807 break; 808 809 auto Next = ++Dependency->getIterator(); 810 811 // DCE instructions only used to calculate that store 812 DeleteDeadInstruction(Dependency, *MD, *TLI); 813 ++NumFastStores; 814 MadeChange = true; 815 816 // Inst's old Dependency is now deleted. Compute the next dependency, 817 // which may also be dead, as in 818 // s[0] = 0; 819 // s[1] = 0; // This has just been deleted. 820 // free(s); 821 Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB); 822 } 823 824 if (Dep.isNonLocal()) 825 FindUnconditionalPreds(Blocks, BB, DT); 826 } 827 828 return MadeChange; 829 } 830 831 /// handleEndBlock - Remove dead stores to stack-allocated locations in the 832 /// function end block. Ex: 833 /// %A = alloca i32 834 /// ... 835 /// store i32 1, i32* %A 836 /// ret void 837 bool DSE::handleEndBlock(BasicBlock &BB) { 838 bool MadeChange = false; 839 840 // Keep track of all of the stack objects that are dead at the end of the 841 // function. 842 SmallSetVector<Value*, 16> DeadStackObjects; 843 844 // Find all of the alloca'd pointers in the entry block. 845 BasicBlock &Entry = BB.getParent()->front(); 846 for (Instruction &I : Entry) { 847 if (isa<AllocaInst>(&I)) 848 DeadStackObjects.insert(&I); 849 850 // Okay, so these are dead heap objects, but if the pointer never escapes 851 // then it's leaked by this function anyways. 852 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 853 DeadStackObjects.insert(&I); 854 } 855 856 // Treat byval or inalloca arguments the same, stores to them are dead at the 857 // end of the function. 858 for (Argument &AI : BB.getParent()->args()) 859 if (AI.hasByValOrInAllocaAttr()) 860 DeadStackObjects.insert(&AI); 861 862 const DataLayout &DL = BB.getModule()->getDataLayout(); 863 864 // Scan the basic block backwards 865 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 866 --BBI; 867 868 // If we find a store, check to see if it points into a dead stack value. 869 if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 870 // See through pointer-to-pointer bitcasts 871 SmallVector<Value *, 4> Pointers; 872 GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); 873 874 // Stores to stack values are valid candidates for removal. 875 bool AllDead = true; 876 for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), 877 E = Pointers.end(); I != E; ++I) 878 if (!DeadStackObjects.count(*I)) { 879 AllDead = false; 880 break; 881 } 882 883 if (AllDead) { 884 Instruction *Dead = &*BBI++; 885 886 DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 887 << *Dead << "\n Objects: "; 888 for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), 889 E = Pointers.end(); I != E; ++I) { 890 dbgs() << **I; 891 if (std::next(I) != E) 892 dbgs() << ", "; 893 } 894 dbgs() << '\n'); 895 896 // DCE instructions only used to calculate that store. 897 DeleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects); 898 ++NumFastStores; 899 MadeChange = true; 900 continue; 901 } 902 } 903 904 // Remove any dead non-memory-mutating instructions. 905 if (isInstructionTriviallyDead(&*BBI, TLI)) { 906 Instruction *Inst = &*BBI++; 907 DeleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects); 908 ++NumFastOther; 909 MadeChange = true; 910 continue; 911 } 912 913 if (isa<AllocaInst>(BBI)) { 914 // Remove allocas from the list of dead stack objects; there can't be 915 // any references before the definition. 916 DeadStackObjects.remove(&*BBI); 917 continue; 918 } 919 920 if (auto CS = CallSite(&*BBI)) { 921 // Remove allocation function calls from the list of dead stack objects; 922 // there can't be any references before the definition. 923 if (isAllocLikeFn(&*BBI, TLI)) 924 DeadStackObjects.remove(&*BBI); 925 926 // If this call does not access memory, it can't be loading any of our 927 // pointers. 928 if (AA->doesNotAccessMemory(CS)) 929 continue; 930 931 // If the call might load from any of our allocas, then any store above 932 // the call is live. 933 DeadStackObjects.remove_if([&](Value *I) { 934 // See if the call site touches the value. 935 ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI)); 936 937 return A == MRI_ModRef || A == MRI_Ref; 938 }); 939 940 // If all of the allocas were clobbered by the call then we're not going 941 // to find anything else to process. 942 if (DeadStackObjects.empty()) 943 break; 944 945 continue; 946 } 947 948 MemoryLocation LoadedLoc; 949 950 // If we encounter a use of the pointer, it is no longer considered dead 951 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 952 if (!L->isUnordered()) // Be conservative with atomic/volatile load 953 break; 954 LoadedLoc = MemoryLocation::get(L); 955 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 956 LoadedLoc = MemoryLocation::get(V); 957 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { 958 LoadedLoc = MemoryLocation::getForSource(MTI); 959 } else if (!BBI->mayReadFromMemory()) { 960 // Instruction doesn't read memory. Note that stores that weren't removed 961 // above will hit this case. 962 continue; 963 } else { 964 // Unknown inst; assume it clobbers everything. 965 break; 966 } 967 968 // Remove any allocas from the DeadPointer set that are loaded, as this 969 // makes any stores above the access live. 970 RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL); 971 972 // If all of the allocas were clobbered by the access then we're not going 973 // to find anything else to process. 974 if (DeadStackObjects.empty()) 975 break; 976 } 977 978 return MadeChange; 979 } 980 981 /// RemoveAccessedObjects - Check to see if the specified location may alias any 982 /// of the stack objects in the DeadStackObjects set. If so, they become live 983 /// because the location is being loaded. 984 void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc, 985 SmallSetVector<Value *, 16> &DeadStackObjects, 986 const DataLayout &DL) { 987 const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); 988 989 // A constant can't be in the dead pointer set. 990 if (isa<Constant>(UnderlyingPointer)) 991 return; 992 993 // If the kill pointer can be easily reduced to an alloca, don't bother doing 994 // extraneous AA queries. 995 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 996 DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer)); 997 return; 998 } 999 1000 // Remove objects that could alias LoadedLoc. 1001 DeadStackObjects.remove_if([&](Value *I) { 1002 // See if the loaded location could alias the stack location. 1003 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI)); 1004 return !AA->isNoAlias(StackLoc, LoadedLoc); 1005 }); 1006 } 1007