1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a trivial dead store elimination that only considers 10 // basic-block local redundant stores. 11 // 12 // FIXME: This should eventually be extended to be a post-dominator tree 13 // traversal. Doing so would be pretty trivial. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/MapVector.h" 21 #include "llvm/ADT/PostOrderIterator.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/CaptureTracking.h" 29 #include "llvm/Analysis/GlobalsModRef.h" 30 #include "llvm/Analysis/MemoryBuiltins.h" 31 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/MemorySSA.h" 34 #include "llvm/Analysis/MemorySSAUpdater.h" 35 #include "llvm/Analysis/PostDominators.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/ValueTracking.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/Constant.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/InstIterator.h" 46 #include "llvm/IR/InstrTypes.h" 47 #include "llvm/IR/Instruction.h" 48 #include "llvm/IR/Instructions.h" 49 #include "llvm/IR/IntrinsicInst.h" 50 #include "llvm/IR/Intrinsics.h" 51 #include "llvm/IR/LLVMContext.h" 52 #include "llvm/IR/Module.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/Value.h" 55 #include "llvm/InitializePasses.h" 56 #include "llvm/Pass.h" 57 #include "llvm/Support/Casting.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/DebugCounter.h" 61 #include "llvm/Support/ErrorHandling.h" 62 #include "llvm/Support/MathExtras.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Scalar.h" 65 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 66 #include "llvm/Transforms/Utils/Local.h" 67 #include <algorithm> 68 #include <cassert> 69 #include <cstddef> 70 #include <cstdint> 71 #include <iterator> 72 #include <map> 73 #include <utility> 74 75 using namespace llvm; 76 77 #define DEBUG_TYPE "dse" 78 79 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE"); 80 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 81 STATISTIC(NumFastStores, "Number of stores deleted"); 82 STATISTIC(NumFastOther, "Number of other instrs removed"); 83 STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); 84 STATISTIC(NumModifiedStores, "Number of stores modified"); 85 STATISTIC(NumNoopStores, "Number of noop stores deleted"); 86 87 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", 88 "Controls which MemoryDefs are eliminated."); 89 90 static cl::opt<bool> 91 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", 92 cl::init(true), cl::Hidden, 93 cl::desc("Enable partial-overwrite tracking in DSE")); 94 95 static cl::opt<bool> 96 EnablePartialStoreMerging("enable-dse-partial-store-merging", 97 cl::init(true), cl::Hidden, 98 cl::desc("Enable partial store merging in DSE")); 99 100 static cl::opt<bool> 101 EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, 102 cl::desc("Use the new MemorySSA-backed DSE.")); 103 104 static cl::opt<unsigned> 105 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, 106 cl::desc("The number of memory instructions to scan for " 107 "dead store elimination (default = 100)")); 108 109 static cl::opt<unsigned> MemorySSADefsPerBlockLimit( 110 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, 111 cl::desc("The number of MemoryDefs we consider as candidates to eliminated " 112 "other stores per basic block (default = 5000)")); 113 114 //===----------------------------------------------------------------------===// 115 // Helper functions 116 //===----------------------------------------------------------------------===// 117 using OverlapIntervalsTy = std::map<int64_t, int64_t>; 118 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; 119 120 /// Delete this instruction. Before we do, go through and zero out all the 121 /// operands of this instruction. If any of them become dead, delete them and 122 /// the computation tree that feeds them. 123 /// If ValueSet is non-null, remove any deleted instructions from it as well. 124 static void 125 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, 126 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, 127 InstOverlapIntervalsTy &IOL, 128 MapVector<Instruction *, bool> &ThrowableInst, 129 SmallSetVector<const Value *, 16> *ValueSet = nullptr) { 130 SmallVector<Instruction*, 32> NowDeadInsts; 131 132 NowDeadInsts.push_back(I); 133 --NumFastOther; 134 135 // Keeping the iterator straight is a pain, so we let this routine tell the 136 // caller what the next instruction is after we're done mucking about. 137 BasicBlock::iterator NewIter = *BBI; 138 139 // Before we touch this instruction, remove it from memdep! 140 do { 141 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 142 // Mark the DeadInst as dead in the list of throwable instructions. 143 auto It = ThrowableInst.find(DeadInst); 144 if (It != ThrowableInst.end()) 145 ThrowableInst[It->first] = false; 146 ++NumFastOther; 147 148 // Try to preserve debug information attached to the dead instruction. 149 salvageDebugInfoOrMarkUndef(*DeadInst); 150 salvageKnowledge(DeadInst); 151 152 // This instruction is dead, zap it, in stages. Start by removing it from 153 // MemDep, which needs to know the operands and needs it to be in the 154 // function. 155 MD.removeInstruction(DeadInst); 156 157 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 158 Value *Op = DeadInst->getOperand(op); 159 DeadInst->setOperand(op, nullptr); 160 161 // If this operand just became dead, add it to the NowDeadInsts list. 162 if (!Op->use_empty()) continue; 163 164 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 165 if (isInstructionTriviallyDead(OpI, &TLI)) 166 NowDeadInsts.push_back(OpI); 167 } 168 169 if (ValueSet) ValueSet->remove(DeadInst); 170 IOL.erase(DeadInst); 171 172 if (NewIter == DeadInst->getIterator()) 173 NewIter = DeadInst->eraseFromParent(); 174 else 175 DeadInst->eraseFromParent(); 176 } while (!NowDeadInsts.empty()); 177 *BBI = NewIter; 178 // Pop dead entries from back of ThrowableInst till we find an alive entry. 179 while (!ThrowableInst.empty() && !ThrowableInst.back().second) 180 ThrowableInst.pop_back(); 181 } 182 183 /// Does this instruction write some memory? This only returns true for things 184 /// that we can analyze with other helpers below. 185 static bool hasAnalyzableMemoryWrite(Instruction *I, 186 const TargetLibraryInfo &TLI) { 187 if (isa<StoreInst>(I)) 188 return true; 189 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 190 switch (II->getIntrinsicID()) { 191 default: 192 return false; 193 case Intrinsic::memset: 194 case Intrinsic::memmove: 195 case Intrinsic::memcpy: 196 case Intrinsic::memcpy_element_unordered_atomic: 197 case Intrinsic::memmove_element_unordered_atomic: 198 case Intrinsic::memset_element_unordered_atomic: 199 case Intrinsic::init_trampoline: 200 case Intrinsic::lifetime_end: 201 return true; 202 } 203 } 204 if (auto *CB = dyn_cast<CallBase>(I)) { 205 if (Function *F = CB->getCalledFunction()) { 206 LibFunc LF; 207 if (TLI.getLibFunc(*F, LF) && TLI.has(LF)) { 208 switch (LF) { 209 case LibFunc_strcpy: 210 case LibFunc_strncpy: 211 case LibFunc_strcat: 212 case LibFunc_strncat: 213 return true; 214 default: 215 return false; 216 } 217 } 218 } 219 } 220 return false; 221 } 222 223 /// Return a Location stored to by the specified instruction. If isRemovable 224 /// returns true, this function and getLocForRead completely describe the memory 225 /// operations for this instruction. 226 static MemoryLocation getLocForWrite(Instruction *Inst) { 227 228 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 229 return MemoryLocation::get(SI); 230 231 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) { 232 // memcpy/memmove/memset. 233 MemoryLocation Loc = MemoryLocation::getForDest(MI); 234 return Loc; 235 } 236 237 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 238 switch (II->getIntrinsicID()) { 239 default: 240 return MemoryLocation(); // Unhandled intrinsic. 241 case Intrinsic::init_trampoline: 242 return MemoryLocation(II->getArgOperand(0)); 243 case Intrinsic::lifetime_end: { 244 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 245 return MemoryLocation(II->getArgOperand(1), Len); 246 } 247 } 248 } 249 if (auto *CB = dyn_cast<CallBase>(Inst)) 250 // All the supported TLI functions so far happen to have dest as their 251 // first argument. 252 return MemoryLocation(CB->getArgOperand(0)); 253 return MemoryLocation(); 254 } 255 256 /// Return the location read by the specified "hasAnalyzableMemoryWrite" 257 /// instruction if any. 258 static MemoryLocation getLocForRead(Instruction *Inst, 259 const TargetLibraryInfo &TLI) { 260 assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); 261 262 // The only instructions that both read and write are the mem transfer 263 // instructions (memcpy/memmove). 264 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst)) 265 return MemoryLocation::getForSource(MTI); 266 return MemoryLocation(); 267 } 268 269 /// If the value of this instruction and the memory it writes to is unused, may 270 /// we delete this instruction? 271 static bool isRemovable(Instruction *I) { 272 // Don't remove volatile/atomic stores. 273 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 274 return SI->isUnordered(); 275 276 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 277 switch (II->getIntrinsicID()) { 278 default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); 279 case Intrinsic::lifetime_end: 280 // Never remove dead lifetime_end's, e.g. because it is followed by a 281 // free. 282 return false; 283 case Intrinsic::init_trampoline: 284 // Always safe to remove init_trampoline. 285 return true; 286 case Intrinsic::memset: 287 case Intrinsic::memmove: 288 case Intrinsic::memcpy: 289 // Don't remove volatile memory intrinsics. 290 return !cast<MemIntrinsic>(II)->isVolatile(); 291 case Intrinsic::memcpy_element_unordered_atomic: 292 case Intrinsic::memmove_element_unordered_atomic: 293 case Intrinsic::memset_element_unordered_atomic: 294 return true; 295 } 296 } 297 298 // note: only get here for calls with analyzable writes - i.e. libcalls 299 if (auto *CB = dyn_cast<CallBase>(I)) 300 return CB->use_empty(); 301 302 return false; 303 } 304 305 /// Returns true if the end of this instruction can be safely shortened in 306 /// length. 307 static bool isShortenableAtTheEnd(Instruction *I) { 308 // Don't shorten stores for now 309 if (isa<StoreInst>(I)) 310 return false; 311 312 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 313 switch (II->getIntrinsicID()) { 314 default: return false; 315 case Intrinsic::memset: 316 case Intrinsic::memcpy: 317 case Intrinsic::memcpy_element_unordered_atomic: 318 case Intrinsic::memset_element_unordered_atomic: 319 // Do shorten memory intrinsics. 320 // FIXME: Add memmove if it's also safe to transform. 321 return true; 322 } 323 } 324 325 // Don't shorten libcalls calls for now. 326 327 return false; 328 } 329 330 /// Returns true if the beginning of this instruction can be safely shortened 331 /// in length. 332 static bool isShortenableAtTheBeginning(Instruction *I) { 333 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 334 // easily done by offsetting the source address. 335 return isa<AnyMemSetInst>(I); 336 } 337 338 /// Return the pointer that is being written to. 339 static Value *getStoredPointerOperand(Instruction *I) { 340 //TODO: factor this to reuse getLocForWrite 341 MemoryLocation Loc = getLocForWrite(I); 342 assert(Loc.Ptr && 343 "unable to find pointer written for analyzable instruction?"); 344 // TODO: most APIs don't expect const Value * 345 return const_cast<Value*>(Loc.Ptr); 346 } 347 348 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 349 const TargetLibraryInfo &TLI, 350 const Function *F) { 351 uint64_t Size; 352 ObjectSizeOpts Opts; 353 Opts.NullIsUnknownSize = NullPointerIsDefined(F); 354 355 if (getObjectSize(V, Size, DL, &TLI, Opts)) 356 return Size; 357 return MemoryLocation::UnknownSize; 358 } 359 360 namespace { 361 362 enum OverwriteResult { 363 OW_Begin, 364 OW_Complete, 365 OW_End, 366 OW_PartialEarlierWithFullLater, 367 OW_Unknown 368 }; 369 370 } // end anonymous namespace 371 372 /// Return 'OW_Complete' if a store to the 'Later' location completely 373 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the 374 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the 375 /// beginning of the 'Earlier' location is overwritten by 'Later'. 376 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was 377 /// overwritten by a latter (smaller) store which doesn't write outside the big 378 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. 379 static OverwriteResult isOverwrite(const MemoryLocation &Later, 380 const MemoryLocation &Earlier, 381 const DataLayout &DL, 382 const TargetLibraryInfo &TLI, 383 int64_t &EarlierOff, int64_t &LaterOff, 384 Instruction *DepWrite, 385 InstOverlapIntervalsTy &IOL, 386 AliasAnalysis &AA, 387 const Function *F) { 388 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll 389 // get imprecise values here, though (except for unknown sizes). 390 if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) 391 return OW_Unknown; 392 393 const uint64_t LaterSize = Later.Size.getValue(); 394 const uint64_t EarlierSize = Earlier.Size.getValue(); 395 396 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 397 const Value *P2 = Later.Ptr->stripPointerCasts(); 398 399 // If the start pointers are the same, we just have to compare sizes to see if 400 // the later store was larger than the earlier store. 401 if (P1 == P2 || AA.isMustAlias(P1, P2)) { 402 // Make sure that the Later size is >= the Earlier size. 403 if (LaterSize >= EarlierSize) 404 return OW_Complete; 405 } 406 407 // Check to see if the later store is to the entire object (either a global, 408 // an alloca, or a byval/inalloca argument). If so, then it clearly 409 // overwrites any other store to the same object. 410 const Value *UO1 = GetUnderlyingObject(P1, DL), 411 *UO2 = GetUnderlyingObject(P2, DL); 412 413 // If we can't resolve the same pointers to the same object, then we can't 414 // analyze them at all. 415 if (UO1 != UO2) 416 return OW_Unknown; 417 418 // If the "Later" store is to a recognizable object, get its size. 419 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F); 420 if (ObjectSize != MemoryLocation::UnknownSize) 421 if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) 422 return OW_Complete; 423 424 // Okay, we have stores to two completely different pointers. Try to 425 // decompose the pointer into a "base + constant_offset" form. If the base 426 // pointers are equal, then we can reason about the two stores. 427 EarlierOff = 0; 428 LaterOff = 0; 429 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 430 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 431 432 // If the base pointers still differ, we have two completely different stores. 433 if (BP1 != BP2) 434 return OW_Unknown; 435 436 // The later store completely overlaps the earlier store if: 437 // 438 // 1. Both start at the same offset and the later one's size is greater than 439 // or equal to the earlier one's, or 440 // 441 // |--earlier--| 442 // |-- later --| 443 // 444 // 2. The earlier store has an offset greater than the later offset, but which 445 // still lies completely within the later store. 446 // 447 // |--earlier--| 448 // |----- later ------| 449 // 450 // We have to be careful here as *Off is signed while *.Size is unsigned. 451 if (EarlierOff >= LaterOff && 452 LaterSize >= EarlierSize && 453 uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) 454 return OW_Complete; 455 456 // We may now overlap, although the overlap is not complete. There might also 457 // be other incomplete overlaps, and together, they might cover the complete 458 // earlier write. 459 // Note: The correctness of this logic depends on the fact that this function 460 // is not even called providing DepWrite when there are any intervening reads. 461 if (EnablePartialOverwriteTracking && 462 LaterOff < int64_t(EarlierOff + EarlierSize) && 463 int64_t(LaterOff + LaterSize) >= EarlierOff) { 464 465 // Insert our part of the overlap into the map. 466 auto &IM = IOL[DepWrite]; 467 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff 468 << ", " << int64_t(EarlierOff + EarlierSize) 469 << ") Later [" << LaterOff << ", " 470 << int64_t(LaterOff + LaterSize) << ")\n"); 471 472 // Make sure that we only insert non-overlapping intervals and combine 473 // adjacent intervals. The intervals are stored in the map with the ending 474 // offset as the key (in the half-open sense) and the starting offset as 475 // the value. 476 int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize; 477 478 // Find any intervals ending at, or after, LaterIntStart which start 479 // before LaterIntEnd. 480 auto ILI = IM.lower_bound(LaterIntStart); 481 if (ILI != IM.end() && ILI->second <= LaterIntEnd) { 482 // This existing interval is overlapped with the current store somewhere 483 // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing 484 // intervals and adjusting our start and end. 485 LaterIntStart = std::min(LaterIntStart, ILI->second); 486 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 487 ILI = IM.erase(ILI); 488 489 // Continue erasing and adjusting our end in case other previous 490 // intervals are also overlapped with the current store. 491 // 492 // |--- ealier 1 ---| |--- ealier 2 ---| 493 // |------- later---------| 494 // 495 while (ILI != IM.end() && ILI->second <= LaterIntEnd) { 496 assert(ILI->second > LaterIntStart && "Unexpected interval"); 497 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 498 ILI = IM.erase(ILI); 499 } 500 } 501 502 IM[LaterIntEnd] = LaterIntStart; 503 504 ILI = IM.begin(); 505 if (ILI->second <= EarlierOff && 506 ILI->first >= int64_t(EarlierOff + EarlierSize)) { 507 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" 508 << EarlierOff << ", " 509 << int64_t(EarlierOff + EarlierSize) 510 << ") Composite Later [" << ILI->second << ", " 511 << ILI->first << ")\n"); 512 ++NumCompletePartials; 513 return OW_Complete; 514 } 515 } 516 517 // Check for an earlier store which writes to all the memory locations that 518 // the later store writes to. 519 if (EnablePartialStoreMerging && LaterOff >= EarlierOff && 520 int64_t(EarlierOff + EarlierSize) > LaterOff && 521 uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) { 522 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" 523 << EarlierOff << ", " 524 << int64_t(EarlierOff + EarlierSize) 525 << ") by a later store [" << LaterOff << ", " 526 << int64_t(LaterOff + LaterSize) << ")\n"); 527 // TODO: Maybe come up with a better name? 528 return OW_PartialEarlierWithFullLater; 529 } 530 531 // Another interesting case is if the later store overwrites the end of the 532 // earlier store. 533 // 534 // |--earlier--| 535 // |-- later --| 536 // 537 // In this case we may want to trim the size of earlier to avoid generating 538 // writes to addresses which will definitely be overwritten later 539 if (!EnablePartialOverwriteTracking && 540 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) && 541 int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize))) 542 return OW_End; 543 544 // Finally, we also need to check if the later store overwrites the beginning 545 // of the earlier store. 546 // 547 // |--earlier--| 548 // |-- later --| 549 // 550 // In this case we may want to move the destination address and trim the size 551 // of earlier to avoid generating writes to addresses which will definitely 552 // be overwritten later. 553 if (!EnablePartialOverwriteTracking && 554 (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) { 555 assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) && 556 "Expect to be handled as OW_Complete"); 557 return OW_Begin; 558 } 559 // Otherwise, they don't completely overlap. 560 return OW_Unknown; 561 } 562 563 /// If 'Inst' might be a self read (i.e. a noop copy of a 564 /// memory region into an identical pointer) then it doesn't actually make its 565 /// input dead in the traditional sense. Consider this case: 566 /// 567 /// memmove(A <- B) 568 /// memmove(A <- A) 569 /// 570 /// In this case, the second store to A does not make the first store to A dead. 571 /// The usual situation isn't an explicit A<-A store like this (which can be 572 /// trivially removed) but a case where two pointers may alias. 573 /// 574 /// This function detects when it is unsafe to remove a dependent instruction 575 /// because the DSE inducing instruction may be a self-read. 576 static bool isPossibleSelfRead(Instruction *Inst, 577 const MemoryLocation &InstStoreLoc, 578 Instruction *DepWrite, 579 const TargetLibraryInfo &TLI, 580 AliasAnalysis &AA) { 581 // Self reads can only happen for instructions that read memory. Get the 582 // location read. 583 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 584 if (!InstReadLoc.Ptr) 585 return false; // Not a reading instruction. 586 587 // If the read and written loc obviously don't alias, it isn't a read. 588 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) 589 return false; 590 591 if (isa<AnyMemCpyInst>(Inst)) { 592 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) 593 // but in practice memcpy(A <- B) either means that A and B are disjoint or 594 // are equal (i.e. there are not partial overlaps). Given that, if we have: 595 // 596 // memcpy/memmove(A <- B) // DepWrite 597 // memcpy(A <- B) // Inst 598 // 599 // with Inst reading/writing a >= size than DepWrite, we can reason as 600 // follows: 601 // 602 // - If A == B then both the copies are no-ops, so the DepWrite can be 603 // removed. 604 // - If A != B then A and B are disjoint locations in Inst. Since 605 // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. 606 // Therefore DepWrite can be removed. 607 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 608 609 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 610 return false; 611 } 612 613 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 614 // then it can't be considered dead. 615 return true; 616 } 617 618 /// Returns true if the memory which is accessed by the second instruction is not 619 /// modified between the first and the second instruction. 620 /// Precondition: Second instruction must be dominated by the first 621 /// instruction. 622 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 623 Instruction *SecondI, 624 AliasAnalysis *AA, 625 const DataLayout &DL, 626 DominatorTree *DT) { 627 // Do a backwards scan through the CFG from SecondI to FirstI. Look for 628 // instructions which can modify the memory location accessed by SecondI. 629 // 630 // While doing the walk keep track of the address to check. It might be 631 // different in different basic blocks due to PHI translation. 632 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>; 633 SmallVector<BlockAddressPair, 16> WorkList; 634 // Keep track of the address we visited each block with. Bail out if we 635 // visit a block with different addresses. 636 DenseMap<BasicBlock *, Value *> Visited; 637 638 BasicBlock::iterator FirstBBI(FirstI); 639 ++FirstBBI; 640 BasicBlock::iterator SecondBBI(SecondI); 641 BasicBlock *FirstBB = FirstI->getParent(); 642 BasicBlock *SecondBB = SecondI->getParent(); 643 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 644 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr); 645 646 // Start checking the SecondBB. 647 WorkList.push_back( 648 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr))); 649 bool isFirstBlock = true; 650 651 // Check all blocks going backward until we reach the FirstBB. 652 while (!WorkList.empty()) { 653 BlockAddressPair Current = WorkList.pop_back_val(); 654 BasicBlock *B = Current.first; 655 PHITransAddr &Addr = Current.second; 656 Value *Ptr = Addr.getAddr(); 657 658 // Ignore instructions before FirstI if this is the FirstBB. 659 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 660 661 BasicBlock::iterator EI; 662 if (isFirstBlock) { 663 // Ignore instructions after SecondI if this is the first visit of SecondBB. 664 assert(B == SecondBB && "first block is not the store block"); 665 EI = SecondBBI; 666 isFirstBlock = false; 667 } else { 668 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 669 // In this case we also have to look at instructions after SecondI. 670 EI = B->end(); 671 } 672 for (; BI != EI; ++BI) { 673 Instruction *I = &*BI; 674 if (I->mayWriteToMemory() && I != SecondI) 675 if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) 676 return false; 677 } 678 if (B != FirstBB) { 679 assert(B != &FirstBB->getParent()->getEntryBlock() && 680 "Should not hit the entry block because SI must be dominated by LI"); 681 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 682 PHITransAddr PredAddr = Addr; 683 if (PredAddr.NeedsPHITranslationFromBlock(B)) { 684 if (!PredAddr.IsPotentiallyPHITranslatable()) 685 return false; 686 if (PredAddr.PHITranslateValue(B, *PredI, DT, false)) 687 return false; 688 } 689 Value *TranslatedPtr = PredAddr.getAddr(); 690 auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr)); 691 if (!Inserted.second) { 692 // We already visited this block before. If it was with a different 693 // address - bail out! 694 if (TranslatedPtr != Inserted.first->second) 695 return false; 696 // ... otherwise just skip it. 697 continue; 698 } 699 WorkList.push_back(std::make_pair(*PredI, PredAddr)); 700 } 701 } 702 } 703 return true; 704 } 705 706 /// Find all blocks that will unconditionally lead to the block BB and append 707 /// them to F. 708 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 709 BasicBlock *BB, DominatorTree *DT) { 710 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 711 BasicBlock *Pred = *I; 712 if (Pred == BB) continue; 713 Instruction *PredTI = Pred->getTerminator(); 714 if (PredTI->getNumSuccessors() != 1) 715 continue; 716 717 if (DT->isReachableFromEntry(Pred)) 718 Blocks.push_back(Pred); 719 } 720 } 721 722 /// Handle frees of entire structures whose dependency is a store 723 /// to a field of that structure. 724 static bool handleFree(CallInst *F, AliasAnalysis *AA, 725 MemoryDependenceResults *MD, DominatorTree *DT, 726 const TargetLibraryInfo *TLI, 727 InstOverlapIntervalsTy &IOL, 728 MapVector<Instruction *, bool> &ThrowableInst) { 729 bool MadeChange = false; 730 731 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 732 SmallVector<BasicBlock *, 16> Blocks; 733 Blocks.push_back(F->getParent()); 734 const DataLayout &DL = F->getModule()->getDataLayout(); 735 736 while (!Blocks.empty()) { 737 BasicBlock *BB = Blocks.pop_back_val(); 738 Instruction *InstPt = BB->getTerminator(); 739 if (BB == F->getParent()) InstPt = F; 740 741 MemDepResult Dep = 742 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 743 while (Dep.isDef() || Dep.isClobber()) { 744 Instruction *Dependency = Dep.getInst(); 745 if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || 746 !isRemovable(Dependency)) 747 break; 748 749 Value *DepPointer = 750 GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); 751 752 // Check for aliasing. 753 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 754 break; 755 756 LLVM_DEBUG( 757 dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " 758 << *Dependency << '\n'); 759 760 // DCE instructions only used to calculate that store. 761 BasicBlock::iterator BBI(Dependency); 762 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, 763 ThrowableInst); 764 ++NumFastStores; 765 MadeChange = true; 766 767 // Inst's old Dependency is now deleted. Compute the next dependency, 768 // which may also be dead, as in 769 // s[0] = 0; 770 // s[1] = 0; // This has just been deleted. 771 // free(s); 772 Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); 773 } 774 775 if (Dep.isNonLocal()) 776 findUnconditionalPreds(Blocks, BB, DT); 777 } 778 779 return MadeChange; 780 } 781 782 /// Check to see if the specified location may alias any of the stack objects in 783 /// the DeadStackObjects set. If so, they become live because the location is 784 /// being loaded. 785 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 786 SmallSetVector<const Value *, 16> &DeadStackObjects, 787 const DataLayout &DL, AliasAnalysis *AA, 788 const TargetLibraryInfo *TLI, 789 const Function *F) { 790 const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); 791 792 // A constant can't be in the dead pointer set. 793 if (isa<Constant>(UnderlyingPointer)) 794 return; 795 796 // If the kill pointer can be easily reduced to an alloca, don't bother doing 797 // extraneous AA queries. 798 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 799 DeadStackObjects.remove(UnderlyingPointer); 800 return; 801 } 802 803 // Remove objects that could alias LoadedLoc. 804 DeadStackObjects.remove_if([&](const Value *I) { 805 // See if the loaded location could alias the stack location. 806 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F)); 807 return !AA->isNoAlias(StackLoc, LoadedLoc); 808 }); 809 } 810 811 /// Remove dead stores to stack-allocated locations in the function end block. 812 /// Ex: 813 /// %A = alloca i32 814 /// ... 815 /// store i32 1, i32* %A 816 /// ret void 817 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 818 MemoryDependenceResults *MD, 819 const TargetLibraryInfo *TLI, 820 InstOverlapIntervalsTy &IOL, 821 MapVector<Instruction *, bool> &ThrowableInst) { 822 bool MadeChange = false; 823 824 // Keep track of all of the stack objects that are dead at the end of the 825 // function. 826 SmallSetVector<const Value*, 16> DeadStackObjects; 827 828 // Find all of the alloca'd pointers in the entry block. 829 BasicBlock &Entry = BB.getParent()->front(); 830 for (Instruction &I : Entry) { 831 if (isa<AllocaInst>(&I)) 832 DeadStackObjects.insert(&I); 833 834 // Okay, so these are dead heap objects, but if the pointer never escapes 835 // then it's leaked by this function anyways. 836 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 837 DeadStackObjects.insert(&I); 838 } 839 840 // Treat byval or inalloca arguments the same, stores to them are dead at the 841 // end of the function. 842 for (Argument &AI : BB.getParent()->args()) 843 if (AI.hasPassPointeeByValueAttr()) 844 DeadStackObjects.insert(&AI); 845 846 const DataLayout &DL = BB.getModule()->getDataLayout(); 847 848 // Scan the basic block backwards 849 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 850 --BBI; 851 852 // If we find a store, check to see if it points into a dead stack value. 853 if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 854 // See through pointer-to-pointer bitcasts 855 SmallVector<const Value *, 4> Pointers; 856 GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); 857 858 // Stores to stack values are valid candidates for removal. 859 bool AllDead = true; 860 for (const Value *Pointer : Pointers) 861 if (!DeadStackObjects.count(Pointer)) { 862 AllDead = false; 863 break; 864 } 865 866 if (AllDead) { 867 Instruction *Dead = &*BBI; 868 869 LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 870 << *Dead << "\n Objects: "; 871 for (SmallVectorImpl<const Value *>::iterator I = 872 Pointers.begin(), 873 E = Pointers.end(); 874 I != E; ++I) { 875 dbgs() << **I; 876 if (std::next(I) != E) 877 dbgs() << ", "; 878 } dbgs() 879 << '\n'); 880 881 // DCE instructions only used to calculate that store. 882 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, ThrowableInst, 883 &DeadStackObjects); 884 ++NumFastStores; 885 MadeChange = true; 886 continue; 887 } 888 } 889 890 // Remove any dead non-memory-mutating instructions. 891 if (isInstructionTriviallyDead(&*BBI, TLI)) { 892 LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " 893 << *&*BBI << '\n'); 894 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, ThrowableInst, 895 &DeadStackObjects); 896 ++NumFastOther; 897 MadeChange = true; 898 continue; 899 } 900 901 if (isa<AllocaInst>(BBI)) { 902 // Remove allocas from the list of dead stack objects; there can't be 903 // any references before the definition. 904 DeadStackObjects.remove(&*BBI); 905 continue; 906 } 907 908 if (auto *Call = dyn_cast<CallBase>(&*BBI)) { 909 // Remove allocation function calls from the list of dead stack objects; 910 // there can't be any references before the definition. 911 if (isAllocLikeFn(&*BBI, TLI)) 912 DeadStackObjects.remove(&*BBI); 913 914 // If this call does not access memory, it can't be loading any of our 915 // pointers. 916 if (AA->doesNotAccessMemory(Call)) 917 continue; 918 919 // If the call might load from any of our allocas, then any store above 920 // the call is live. 921 DeadStackObjects.remove_if([&](const Value *I) { 922 // See if the call site touches the value. 923 return isRefSet(AA->getModRefInfo( 924 Call, I, getPointerSize(I, DL, *TLI, BB.getParent()))); 925 }); 926 927 // If all of the allocas were clobbered by the call then we're not going 928 // to find anything else to process. 929 if (DeadStackObjects.empty()) 930 break; 931 932 continue; 933 } 934 935 // We can remove the dead stores, irrespective of the fence and its ordering 936 // (release/acquire/seq_cst). Fences only constraints the ordering of 937 // already visible stores, it does not make a store visible to other 938 // threads. So, skipping over a fence does not change a store from being 939 // dead. 940 if (isa<FenceInst>(*BBI)) 941 continue; 942 943 MemoryLocation LoadedLoc; 944 945 // If we encounter a use of the pointer, it is no longer considered dead 946 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 947 if (!L->isUnordered()) // Be conservative with atomic/volatile load 948 break; 949 LoadedLoc = MemoryLocation::get(L); 950 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 951 LoadedLoc = MemoryLocation::get(V); 952 } else if (!BBI->mayReadFromMemory()) { 953 // Instruction doesn't read memory. Note that stores that weren't removed 954 // above will hit this case. 955 continue; 956 } else { 957 // Unknown inst; assume it clobbers everything. 958 break; 959 } 960 961 // Remove any allocas from the DeadPointer set that are loaded, as this 962 // makes any stores above the access live. 963 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI, BB.getParent()); 964 965 // If all of the allocas were clobbered by the access then we're not going 966 // to find anything else to process. 967 if (DeadStackObjects.empty()) 968 break; 969 } 970 971 return MadeChange; 972 } 973 974 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, 975 int64_t &EarlierSize, int64_t LaterOffset, 976 int64_t LaterSize, bool IsOverwriteEnd) { 977 // TODO: base this on the target vector size so that if the earlier 978 // store was too small to get vector writes anyway then its likely 979 // a good idea to shorten it 980 // Power of 2 vector writes are probably always a bad idea to optimize 981 // as any store/memset/memcpy is likely using vector instructions so 982 // shortening it to not vector size is likely to be slower 983 auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite); 984 unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment(); 985 if (!IsOverwriteEnd) 986 LaterOffset = int64_t(LaterOffset + LaterSize); 987 988 if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && 989 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) 990 return false; 991 992 int64_t NewLength = IsOverwriteEnd 993 ? LaterOffset - EarlierOffset 994 : EarlierSize - (LaterOffset - EarlierOffset); 995 996 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) { 997 // When shortening an atomic memory intrinsic, the newly shortened 998 // length must remain an integer multiple of the element size. 999 const uint32_t ElementSize = AMI->getElementSizeInBytes(); 1000 if (0 != NewLength % ElementSize) 1001 return false; 1002 } 1003 1004 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 1005 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " 1006 << *EarlierWrite << "\n KILLER (offset " << LaterOffset 1007 << ", " << EarlierSize << ")\n"); 1008 1009 Value *EarlierWriteLength = EarlierIntrinsic->getLength(); 1010 Value *TrimmedLength = 1011 ConstantInt::get(EarlierWriteLength->getType(), NewLength); 1012 EarlierIntrinsic->setLength(TrimmedLength); 1013 1014 EarlierSize = NewLength; 1015 if (!IsOverwriteEnd) { 1016 int64_t OffsetMoved = (LaterOffset - EarlierOffset); 1017 Value *Indices[1] = { 1018 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; 1019 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 1020 EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(), 1021 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); 1022 NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc()); 1023 EarlierIntrinsic->setDest(NewDestGEP); 1024 EarlierOffset = EarlierOffset + OffsetMoved; 1025 } 1026 return true; 1027 } 1028 1029 static bool tryToShortenEnd(Instruction *EarlierWrite, 1030 OverlapIntervalsTy &IntervalMap, 1031 int64_t &EarlierStart, int64_t &EarlierSize) { 1032 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) 1033 return false; 1034 1035 OverlapIntervalsTy::iterator OII = --IntervalMap.end(); 1036 int64_t LaterStart = OII->second; 1037 int64_t LaterSize = OII->first - LaterStart; 1038 1039 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && 1040 LaterStart + LaterSize >= EarlierStart + EarlierSize) { 1041 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1042 LaterSize, true)) { 1043 IntervalMap.erase(OII); 1044 return true; 1045 } 1046 } 1047 return false; 1048 } 1049 1050 static bool tryToShortenBegin(Instruction *EarlierWrite, 1051 OverlapIntervalsTy &IntervalMap, 1052 int64_t &EarlierStart, int64_t &EarlierSize) { 1053 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) 1054 return false; 1055 1056 OverlapIntervalsTy::iterator OII = IntervalMap.begin(); 1057 int64_t LaterStart = OII->second; 1058 int64_t LaterSize = OII->first - LaterStart; 1059 1060 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { 1061 assert(LaterStart + LaterSize < EarlierStart + EarlierSize && 1062 "Should have been handled as OW_Complete"); 1063 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1064 LaterSize, false)) { 1065 IntervalMap.erase(OII); 1066 return true; 1067 } 1068 } 1069 return false; 1070 } 1071 1072 static bool removePartiallyOverlappedStores(AliasAnalysis *AA, 1073 const DataLayout &DL, 1074 InstOverlapIntervalsTy &IOL) { 1075 bool Changed = false; 1076 for (auto OI : IOL) { 1077 Instruction *EarlierWrite = OI.first; 1078 MemoryLocation Loc = getLocForWrite(EarlierWrite); 1079 assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); 1080 1081 const Value *Ptr = Loc.Ptr->stripPointerCasts(); 1082 int64_t EarlierStart = 0; 1083 int64_t EarlierSize = int64_t(Loc.Size.getValue()); 1084 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); 1085 OverlapIntervalsTy &IntervalMap = OI.second; 1086 Changed |= 1087 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1088 if (IntervalMap.empty()) 1089 continue; 1090 Changed |= 1091 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1092 } 1093 return Changed; 1094 } 1095 1096 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, 1097 AliasAnalysis *AA, MemoryDependenceResults *MD, 1098 const DataLayout &DL, 1099 const TargetLibraryInfo *TLI, 1100 InstOverlapIntervalsTy &IOL, 1101 MapVector<Instruction *, bool> &ThrowableInst, 1102 DominatorTree *DT) { 1103 // Must be a store instruction. 1104 StoreInst *SI = dyn_cast<StoreInst>(Inst); 1105 if (!SI) 1106 return false; 1107 1108 // If we're storing the same value back to a pointer that we just loaded from, 1109 // then the store can be removed. 1110 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 1111 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 1112 isRemovable(SI) && 1113 memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) { 1114 1115 LLVM_DEBUG( 1116 dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " 1117 << *DepLoad << "\n STORE: " << *SI << '\n'); 1118 1119 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1120 ++NumRedundantStores; 1121 return true; 1122 } 1123 } 1124 1125 // Remove null stores into the calloc'ed objects 1126 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 1127 if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { 1128 Instruction *UnderlyingPointer = 1129 dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL)); 1130 1131 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 1132 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) { 1133 LLVM_DEBUG( 1134 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 1135 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 1136 1137 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1138 ++NumRedundantStores; 1139 return true; 1140 } 1141 } 1142 return false; 1143 } 1144 1145 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 1146 MemoryDependenceResults *MD, DominatorTree *DT, 1147 const TargetLibraryInfo *TLI) { 1148 const DataLayout &DL = BB.getModule()->getDataLayout(); 1149 bool MadeChange = false; 1150 1151 MapVector<Instruction *, bool> ThrowableInst; 1152 1153 // A map of interval maps representing partially-overwritten value parts. 1154 InstOverlapIntervalsTy IOL; 1155 1156 // Do a top-down walk on the BB. 1157 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 1158 // Handle 'free' calls specially. 1159 if (CallInst *F = isFreeCall(&*BBI, TLI)) { 1160 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, ThrowableInst); 1161 // Increment BBI after handleFree has potentially deleted instructions. 1162 // This ensures we maintain a valid iterator. 1163 ++BBI; 1164 continue; 1165 } 1166 1167 Instruction *Inst = &*BBI++; 1168 1169 if (Inst->mayThrow()) { 1170 ThrowableInst[Inst] = true; 1171 continue; 1172 } 1173 1174 // Check to see if Inst writes to memory. If not, continue. 1175 if (!hasAnalyzableMemoryWrite(Inst, *TLI)) 1176 continue; 1177 1178 // eliminateNoopStore will update in iterator, if necessary. 1179 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, 1180 ThrowableInst, DT)) { 1181 MadeChange = true; 1182 continue; 1183 } 1184 1185 // If we find something that writes memory, get its memory dependence. 1186 MemDepResult InstDep = MD->getDependency(Inst); 1187 1188 // Ignore any store where we can't find a local dependence. 1189 // FIXME: cross-block DSE would be fun. :) 1190 if (!InstDep.isDef() && !InstDep.isClobber()) 1191 continue; 1192 1193 // Figure out what location is being stored to. 1194 MemoryLocation Loc = getLocForWrite(Inst); 1195 1196 // If we didn't get a useful location, fail. 1197 if (!Loc.Ptr) 1198 continue; 1199 1200 // Loop until we find a store we can eliminate or a load that 1201 // invalidates the analysis. Without an upper bound on the number of 1202 // instructions examined, this analysis can become very time-consuming. 1203 // However, the potential gain diminishes as we process more instructions 1204 // without eliminating any of them. Therefore, we limit the number of 1205 // instructions we look at. 1206 auto Limit = MD->getDefaultBlockScanLimit(); 1207 while (InstDep.isDef() || InstDep.isClobber()) { 1208 // Get the memory clobbered by the instruction we depend on. MemDep will 1209 // skip any instructions that 'Loc' clearly doesn't interact with. If we 1210 // end up depending on a may- or must-aliased load, then we can't optimize 1211 // away the store and we bail out. However, if we depend on something 1212 // that overwrites the memory location we *can* potentially optimize it. 1213 // 1214 // Find out what memory location the dependent instruction stores. 1215 Instruction *DepWrite = InstDep.getInst(); 1216 if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) 1217 break; 1218 MemoryLocation DepLoc = getLocForWrite(DepWrite); 1219 // If we didn't get a useful location, or if it isn't a size, bail out. 1220 if (!DepLoc.Ptr) 1221 break; 1222 1223 // Find the last throwable instruction not removed by call to 1224 // deleteDeadInstruction. 1225 Instruction *LastThrowing = nullptr; 1226 if (!ThrowableInst.empty()) 1227 LastThrowing = ThrowableInst.back().first; 1228 1229 // Make sure we don't look past a call which might throw. This is an 1230 // issue because MemoryDependenceAnalysis works in the wrong direction: 1231 // it finds instructions which dominate the current instruction, rather than 1232 // instructions which are post-dominated by the current instruction. 1233 // 1234 // If the underlying object is a non-escaping memory allocation, any store 1235 // to it is dead along the unwind edge. Otherwise, we need to preserve 1236 // the store. 1237 if (LastThrowing && DepWrite->comesBefore(LastThrowing)) { 1238 const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL); 1239 bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); 1240 if (!IsStoreDeadOnUnwind) { 1241 // We're looking for a call to an allocation function 1242 // where the allocation doesn't escape before the last 1243 // throwing instruction; PointerMayBeCaptured 1244 // reasonably fast approximation. 1245 IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && 1246 !PointerMayBeCaptured(Underlying, false, true); 1247 } 1248 if (!IsStoreDeadOnUnwind) 1249 break; 1250 } 1251 1252 // If we find a write that is a) removable (i.e., non-volatile), b) is 1253 // completely obliterated by the store to 'Loc', and c) which we know that 1254 // 'Inst' doesn't load from, then we can remove it. 1255 // Also try to merge two stores if a later one only touches memory written 1256 // to by the earlier one. 1257 if (isRemovable(DepWrite) && 1258 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 1259 int64_t InstWriteOffset, DepWriteOffset; 1260 OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, 1261 InstWriteOffset, DepWrite, IOL, *AA, 1262 BB.getParent()); 1263 if (OR == OW_Complete) { 1264 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite 1265 << "\n KILLER: " << *Inst << '\n'); 1266 1267 // Delete the store and now-dead instructions that feed it. 1268 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1269 ThrowableInst); 1270 ++NumFastStores; 1271 MadeChange = true; 1272 1273 // We erased DepWrite; start over. 1274 InstDep = MD->getDependency(Inst); 1275 continue; 1276 } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || 1277 ((OR == OW_Begin && 1278 isShortenableAtTheBeginning(DepWrite)))) { 1279 assert(!EnablePartialOverwriteTracking && "Do not expect to perform " 1280 "when partial-overwrite " 1281 "tracking is enabled"); 1282 // The overwrite result is known, so these must be known, too. 1283 int64_t EarlierSize = DepLoc.Size.getValue(); 1284 int64_t LaterSize = Loc.Size.getValue(); 1285 bool IsOverwriteEnd = (OR == OW_End); 1286 MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, 1287 InstWriteOffset, LaterSize, IsOverwriteEnd); 1288 } else if (EnablePartialStoreMerging && 1289 OR == OW_PartialEarlierWithFullLater) { 1290 auto *Earlier = dyn_cast<StoreInst>(DepWrite); 1291 auto *Later = dyn_cast<StoreInst>(Inst); 1292 if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && 1293 DL.typeSizeEqualsStoreSize( 1294 Earlier->getValueOperand()->getType()) && 1295 Later && isa<ConstantInt>(Later->getValueOperand()) && 1296 DL.typeSizeEqualsStoreSize( 1297 Later->getValueOperand()->getType()) && 1298 memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) { 1299 // If the store we find is: 1300 // a) partially overwritten by the store to 'Loc' 1301 // b) the later store is fully contained in the earlier one and 1302 // c) they both have a constant value 1303 // d) none of the two stores need padding 1304 // Merge the two stores, replacing the earlier store's value with a 1305 // merge of both values. 1306 // TODO: Deal with other constant types (vectors, etc), and probably 1307 // some mem intrinsics (if needed) 1308 1309 APInt EarlierValue = 1310 cast<ConstantInt>(Earlier->getValueOperand())->getValue(); 1311 APInt LaterValue = 1312 cast<ConstantInt>(Later->getValueOperand())->getValue(); 1313 unsigned LaterBits = LaterValue.getBitWidth(); 1314 assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); 1315 LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); 1316 1317 // Offset of the smaller store inside the larger store 1318 unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; 1319 unsigned LShiftAmount = 1320 DL.isBigEndian() 1321 ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits 1322 : BitOffsetDiff; 1323 APInt Mask = 1324 APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, 1325 LShiftAmount + LaterBits); 1326 // Clear the bits we'll be replacing, then OR with the smaller 1327 // store, shifted appropriately. 1328 APInt Merged = 1329 (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); 1330 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *DepWrite 1331 << "\n Later: " << *Inst 1332 << "\n Merged Value: " << Merged << '\n'); 1333 1334 auto *SI = new StoreInst( 1335 ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), 1336 Earlier->getPointerOperand(), false, Earlier->getAlign(), 1337 Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); 1338 1339 unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, 1340 LLVMContext::MD_alias_scope, 1341 LLVMContext::MD_noalias, 1342 LLVMContext::MD_nontemporal}; 1343 SI->copyMetadata(*DepWrite, MDToKeep); 1344 ++NumModifiedStores; 1345 1346 // Delete the old stores and now-dead instructions that feed them. 1347 deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, 1348 ThrowableInst); 1349 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1350 ThrowableInst); 1351 MadeChange = true; 1352 1353 // We erased DepWrite and Inst (Loc); start over. 1354 break; 1355 } 1356 } 1357 } 1358 1359 // If this is a may-aliased store that is clobbering the store value, we 1360 // can keep searching past it for another must-aliased pointer that stores 1361 // to the same location. For example, in: 1362 // store -> P 1363 // store -> Q 1364 // store -> P 1365 // we can remove the first store to P even though we don't know if P and Q 1366 // alias. 1367 if (DepWrite == &BB.front()) break; 1368 1369 // Can't look past this instruction if it might read 'Loc'. 1370 if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) 1371 break; 1372 1373 InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, 1374 DepWrite->getIterator(), &BB, 1375 /*QueryInst=*/ nullptr, &Limit); 1376 } 1377 } 1378 1379 if (EnablePartialOverwriteTracking) 1380 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL); 1381 1382 // If this block ends in a return, unwind, or unreachable, all allocas are 1383 // dead at its end, which means stores to them are also dead. 1384 if (BB.getTerminator()->getNumSuccessors() == 0) 1385 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, ThrowableInst); 1386 1387 return MadeChange; 1388 } 1389 1390 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 1391 MemoryDependenceResults *MD, DominatorTree *DT, 1392 const TargetLibraryInfo *TLI) { 1393 bool MadeChange = false; 1394 for (BasicBlock &BB : F) 1395 // Only check non-dead blocks. Dead blocks may have strange pointer 1396 // cycles that will confuse alias analysis. 1397 if (DT->isReachableFromEntry(&BB)) 1398 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 1399 1400 return MadeChange; 1401 } 1402 1403 namespace { 1404 //============================================================================= 1405 // MemorySSA backed dead store elimination. 1406 // 1407 // The code below implements dead store elimination using MemorySSA. It uses 1408 // the following general approach: given a MemoryDef, walk upwards to find 1409 // clobbering MemoryDefs that may be killed by the starting def. Then check 1410 // that there are no uses that may read the location of the original MemoryDef 1411 // in between both MemoryDefs. A bit more concretely: 1412 // 1413 // For all MemoryDefs StartDef: 1414 // 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking 1415 // upwards. 1416 // 2. Check that there are no reads between DomAccess and the StartDef by 1417 // checking all uses starting at DomAccess and walking until we see StartDef. 1418 // 3. For each found DomDef, check that: 1419 // 1. There are no barrier instructions between DomDef and StartDef (like 1420 // throws or stores with ordering constraints). 1421 // 2. StartDef is executed whenever DomDef is executed. 1422 // 3. StartDef completely overwrites DomDef. 1423 // 4. Erase DomDef from the function and MemorySSA. 1424 1425 // Returns true if \p M is an intrisnic that does not read or write memory. 1426 bool isNoopIntrinsic(MemoryUseOrDef *M) { 1427 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) { 1428 switch (II->getIntrinsicID()) { 1429 case Intrinsic::lifetime_start: 1430 case Intrinsic::lifetime_end: 1431 case Intrinsic::invariant_end: 1432 case Intrinsic::launder_invariant_group: 1433 case Intrinsic::assume: 1434 return true; 1435 case Intrinsic::dbg_addr: 1436 case Intrinsic::dbg_declare: 1437 case Intrinsic::dbg_label: 1438 case Intrinsic::dbg_value: 1439 llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); 1440 default: 1441 return false; 1442 } 1443 } 1444 return false; 1445 } 1446 1447 // Check if we can ignore \p D for DSE. 1448 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { 1449 Instruction *DI = D->getMemoryInst(); 1450 // Calls that only access inaccessible memory cannot read or write any memory 1451 // locations we consider for elimination. 1452 if (auto *CB = dyn_cast<CallBase>(DI)) 1453 if (CB->onlyAccessesInaccessibleMemory()) 1454 return true; 1455 1456 // We can eliminate stores to locations not visible to the caller across 1457 // throwing instructions. 1458 if (DI->mayThrow() && !DefVisibleToCaller) 1459 return true; 1460 1461 // We can remove the dead stores, irrespective of the fence and its ordering 1462 // (release/acquire/seq_cst). Fences only constraints the ordering of 1463 // already visible stores, it does not make a store visible to other 1464 // threads. So, skipping over a fence does not change a store from being 1465 // dead. 1466 if (isa<FenceInst>(DI)) 1467 return true; 1468 1469 // Skip intrinsics that do not really read or modify memory. 1470 if (isNoopIntrinsic(D)) 1471 return true; 1472 1473 return false; 1474 } 1475 1476 struct DSEState { 1477 Function &F; 1478 AliasAnalysis &AA; 1479 MemorySSA &MSSA; 1480 DominatorTree &DT; 1481 PostDominatorTree &PDT; 1482 const TargetLibraryInfo &TLI; 1483 1484 // All MemoryDefs that potentially could kill other MemDefs. 1485 SmallVector<MemoryDef *, 64> MemDefs; 1486 // Any that should be skipped as they are already deleted 1487 SmallPtrSet<MemoryAccess *, 4> SkipStores; 1488 // Keep track of all of the objects that are invisible to the caller before 1489 // the function returns. 1490 SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet; 1491 // Keep track of all of the objects that are invisible to the caller after 1492 // the function returns. 1493 SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet; 1494 // Keep track of blocks with throwing instructions not modeled in MemorySSA. 1495 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks; 1496 // Post-order numbers for each basic block. Used to figure out if memory 1497 // accesses are executed before another access. 1498 DenseMap<BasicBlock *, unsigned> PostOrderNumbers; 1499 1500 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per 1501 /// basic block. 1502 DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs; 1503 1504 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, 1505 PostDominatorTree &PDT, const TargetLibraryInfo &TLI) 1506 : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} 1507 1508 static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, 1509 DominatorTree &DT, PostDominatorTree &PDT, 1510 const TargetLibraryInfo &TLI) { 1511 DSEState State(F, AA, MSSA, DT, PDT, TLI); 1512 // Collect blocks with throwing instructions not modeled in MemorySSA and 1513 // alloc-like objects. 1514 unsigned PO = 0; 1515 for (BasicBlock *BB : post_order(&F)) { 1516 State.PostOrderNumbers[BB] = PO++; 1517 for (Instruction &I : *BB) { 1518 MemoryAccess *MA = MSSA.getMemoryAccess(&I); 1519 if (I.mayThrow() && !MA) 1520 State.ThrowingBlocks.insert(I.getParent()); 1521 1522 auto *MD = dyn_cast_or_null<MemoryDef>(MA); 1523 if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && 1524 hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) 1525 State.MemDefs.push_back(MD); 1526 1527 // Track whether alloca and alloca-like objects are visible in the 1528 // caller before and after the function returns. Alloca objects are 1529 // invalid in the caller, so they are neither visible before or after 1530 // the function returns. 1531 if (isa<AllocaInst>(&I)) { 1532 State.InvisibleToCallerBeforeRet.insert(&I); 1533 State.InvisibleToCallerAfterRet.insert(&I); 1534 } 1535 1536 // For alloca-like objects we need to check if they are captured before 1537 // the function returns and if the return might capture the object. 1538 if (isAllocLikeFn(&I, &TLI)) { 1539 bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true); 1540 if (!CapturesBeforeRet) { 1541 State.InvisibleToCallerBeforeRet.insert(&I); 1542 if (!PointerMayBeCaptured(&I, true, false)) 1543 State.InvisibleToCallerAfterRet.insert(&I); 1544 } 1545 } 1546 } 1547 } 1548 1549 // Treat byval or inalloca arguments the same as Allocas, stores to them are 1550 // dead at the end of the function. 1551 for (Argument &AI : F.args()) 1552 if (AI.hasPassPointeeByValueAttr()) 1553 State.InvisibleToCallerBeforeRet.insert(&AI); 1554 return State; 1555 } 1556 1557 Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const { 1558 if (!I->mayWriteToMemory()) 1559 return None; 1560 1561 if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I)) 1562 return {MemoryLocation::getForDest(MTI)}; 1563 1564 if (auto *CB = dyn_cast<CallBase>(I)) { 1565 if (Function *F = CB->getCalledFunction()) { 1566 StringRef FnName = F->getName(); 1567 if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) 1568 return {MemoryLocation(CB->getArgOperand(0))}; 1569 if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) 1570 return {MemoryLocation(CB->getArgOperand(0))}; 1571 if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) 1572 return {MemoryLocation(CB->getArgOperand(0))}; 1573 if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) 1574 return {MemoryLocation(CB->getArgOperand(0))}; 1575 } 1576 return None; 1577 } 1578 1579 return MemoryLocation::getOrNone(I); 1580 } 1581 1582 /// Returns true if \p Use completely overwrites \p DefLoc. 1583 bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { 1584 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a 1585 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a 1586 // MemoryDef. 1587 if (!UseInst->mayWriteToMemory()) 1588 return false; 1589 1590 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1591 if (CB->onlyAccessesInaccessibleMemory()) 1592 return false; 1593 1594 ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); 1595 // If necessary, perform additional analysis. 1596 if (isModSet(MR) && isa<CallBase>(UseInst)) 1597 MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); 1598 1599 Optional<MemoryLocation> UseLoc = getLocForWriteEx(UseInst); 1600 return isModSet(MR) && isMustSet(MR) && 1601 (UseLoc->Size.hasValue() && DefLoc.Size.hasValue() && 1602 UseLoc->Size.getValue() >= DefLoc.Size.getValue()); 1603 } 1604 1605 /// Returns true if \p Use may read from \p DefLoc. 1606 bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { 1607 if (!UseInst->mayReadFromMemory()) 1608 return false; 1609 1610 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1611 if (CB->onlyAccessesInaccessibleMemory()) 1612 return false; 1613 1614 ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); 1615 // If necessary, perform additional analysis. 1616 if (isRefSet(MR)) 1617 MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); 1618 return isRefSet(MR); 1619 } 1620 1621 // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no 1622 // read access between them or on any other path to a function exit block if 1623 // \p DefLoc is not accessible after the function returns. If there is no such 1624 // MemoryDef, return None. The returned value may not (completely) overwrite 1625 // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse 1626 // (read). 1627 Optional<MemoryAccess *> 1628 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, 1629 MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet, 1630 bool DefVisibleToCallerAfterRet, int &ScanLimit) const { 1631 MemoryAccess *DomAccess; 1632 bool StepAgain; 1633 LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current 1634 << "\n"); 1635 // Find the next clobbering Mod access for DefLoc, starting at Current. 1636 do { 1637 StepAgain = false; 1638 // Reached TOP. 1639 if (MSSA.isLiveOnEntryDef(Current)) 1640 return None; 1641 1642 if (isa<MemoryPhi>(Current)) { 1643 DomAccess = Current; 1644 break; 1645 } 1646 MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current); 1647 // Look for access that clobber DefLoc. 1648 DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD, 1649 DefLoc); 1650 if (MSSA.isLiveOnEntryDef(DomAccess)) 1651 return None; 1652 1653 if (isa<MemoryPhi>(DomAccess)) 1654 break; 1655 1656 // Check if we can skip DomDef for DSE. For accesses to objects that are 1657 // accessible after the function returns, KillingDef must execute whenever 1658 // DomDef executes and use post-dominance to ensure that. 1659 MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess); 1660 if ((DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) || 1661 (DefVisibleToCallerAfterRet && 1662 !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock()))) { 1663 StepAgain = true; 1664 Current = DomDef->getDefiningAccess(); 1665 } 1666 1667 } while (StepAgain); 1668 1669 LLVM_DEBUG({ 1670 dbgs() << " Checking for reads of " << *DomAccess; 1671 if (isa<MemoryDef>(DomAccess)) 1672 dbgs() << " (" << *cast<MemoryDef>(DomAccess)->getMemoryInst() << ")\n"; 1673 else 1674 dbgs() << ")\n"; 1675 }); 1676 1677 SmallSetVector<MemoryAccess *, 32> WorkList; 1678 auto PushMemUses = [&WorkList](MemoryAccess *Acc) { 1679 for (Use &U : Acc->uses()) 1680 WorkList.insert(cast<MemoryAccess>(U.getUser())); 1681 }; 1682 PushMemUses(DomAccess); 1683 1684 // Check if DomDef may be read. 1685 for (unsigned I = 0; I < WorkList.size(); I++) { 1686 MemoryAccess *UseAccess = WorkList[I]; 1687 1688 LLVM_DEBUG(dbgs() << " " << *UseAccess); 1689 if (--ScanLimit == 0) { 1690 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); 1691 return None; 1692 } 1693 1694 if (isa<MemoryPhi>(UseAccess)) { 1695 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n"); 1696 PushMemUses(UseAccess); 1697 continue; 1698 } 1699 1700 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); 1701 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); 1702 1703 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) { 1704 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n"); 1705 PushMemUses(UseAccess); 1706 continue; 1707 } 1708 1709 // Uses which may read the original MemoryDef mean we cannot eliminate the 1710 // original MD. Stop walk. 1711 if (isReadClobber(DefLoc, UseInst)) { 1712 LLVM_DEBUG(dbgs() << " ... found read clobber\n"); 1713 return None; 1714 } 1715 1716 // For the KillingDef and DomAccess we only have to check if it reads the 1717 // memory location. 1718 // TODO: It would probably be better to check for self-reads before 1719 // calling the function. 1720 if (KillingDef == UseAccess || DomAccess == UseAccess) { 1721 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); 1722 continue; 1723 } 1724 1725 // Check all uses for MemoryDefs, except for defs completely overwriting 1726 // the original location. Otherwise we have to check uses of *all* 1727 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might 1728 // miss cases like the following 1729 // 1 = Def(LoE) ; <----- DomDef stores [0,1] 1730 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] 1731 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. 1732 // (The Use points to the *first* Def it may alias) 1733 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, 1734 // stores [0,1] 1735 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) { 1736 if (!isCompleteOverwrite(DefLoc, UseInst)) 1737 PushMemUses(UseDef); 1738 } 1739 } 1740 1741 // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. 1742 return {DomAccess}; 1743 } 1744 1745 // Delete dead memory defs 1746 void deleteDeadInstruction(Instruction *SI) { 1747 MemorySSAUpdater Updater(&MSSA); 1748 SmallVector<Instruction *, 32> NowDeadInsts; 1749 NowDeadInsts.push_back(SI); 1750 --NumFastOther; 1751 1752 while (!NowDeadInsts.empty()) { 1753 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 1754 ++NumFastOther; 1755 1756 // Try to preserve debug information attached to the dead instruction. 1757 salvageDebugInfo(*DeadInst); 1758 salvageKnowledge(DeadInst); 1759 1760 // Remove the Instruction from MSSA. 1761 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { 1762 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { 1763 SkipStores.insert(MD); 1764 } 1765 Updater.removeMemoryAccess(MA); 1766 } 1767 1768 auto I = IOLs.find(DeadInst->getParent()); 1769 if (I != IOLs.end()) 1770 I->second.erase(DeadInst); 1771 // Remove its operands 1772 for (Use &O : DeadInst->operands()) 1773 if (Instruction *OpI = dyn_cast<Instruction>(O)) { 1774 O = nullptr; 1775 if (isInstructionTriviallyDead(OpI, &TLI)) 1776 NowDeadInsts.push_back(OpI); 1777 } 1778 1779 DeadInst->eraseFromParent(); 1780 } 1781 } 1782 1783 // Check for any extra throws between SI and NI that block DSE. This only 1784 // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may 1785 // throw are handled during the walk from one def to the next. 1786 bool mayThrowBetween(Instruction *SI, Instruction *NI, 1787 const Value *SILocUnd) const { 1788 // First see if we can ignore it by using the fact that SI is an 1789 // alloca/alloca like object that is not visible to the caller during 1790 // execution of the function. 1791 if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd)) 1792 return false; 1793 1794 if (SI->getParent() == NI->getParent()) 1795 return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end(); 1796 return !ThrowingBlocks.empty(); 1797 } 1798 1799 // Check if \p NI acts as a DSE barrier for \p SI. The following instructions 1800 // act as barriers: 1801 // * A memory instruction that may throw and \p SI accesses a non-stack 1802 // object. 1803 // * Atomic stores stronger that monotonic. 1804 bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, 1805 const Value *SILocUnd, Instruction *NI, 1806 MemoryLocation &NILoc) const { 1807 // If NI may throw it acts as a barrier, unless we are to an alloca/alloca 1808 // like object that does not escape. 1809 if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd)) 1810 return true; 1811 1812 if (NI->isAtomic()) { 1813 if (auto *NSI = dyn_cast<StoreInst>(NI)) { 1814 if (isStrongerThanMonotonic(NSI->getOrdering())) 1815 return true; 1816 } else 1817 llvm_unreachable( 1818 "Other instructions should be modeled/skipped in MemorySSA"); 1819 } 1820 1821 return false; 1822 } 1823 }; 1824 1825 /// \returns true if \p KillingDef stores the result of \p Load to the source of 1826 /// \p Load. 1827 static bool storeIsNoop(MemorySSA &MSSA, LoadInst *Load, 1828 MemoryDef *KillingDef) { 1829 Instruction *Store = KillingDef->getMemoryInst(); 1830 // If the load's operand isn't the destination of the store, bail. 1831 if (Load->getPointerOperand() != Store->getOperand(1)) 1832 return false; 1833 1834 // Get the defining access for the load. 1835 auto *LoadAccess = MSSA.getMemoryAccess(Load)->getDefiningAccess(); 1836 // The store is dead if the defining accesses are the same. 1837 return LoadAccess == KillingDef->getDefiningAccess(); 1838 } 1839 1840 bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, 1841 MemorySSA &MSSA, DominatorTree &DT, 1842 PostDominatorTree &PDT, 1843 const TargetLibraryInfo &TLI) { 1844 const DataLayout &DL = F.getParent()->getDataLayout(); 1845 bool MadeChange = false; 1846 1847 DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); 1848 // For each store: 1849 for (unsigned I = 0; I < State.MemDefs.size(); I++) { 1850 MemoryDef *KillingDef = State.MemDefs[I]; 1851 if (State.SkipStores.count(KillingDef)) 1852 continue; 1853 Instruction *SI = KillingDef->getMemoryInst(); 1854 1855 // Check if we're storing a value that we just loaded. 1856 if (auto *Load = dyn_cast<LoadInst>(SI->getOperand(0))) { 1857 if (storeIsNoop(MSSA, Load, KillingDef)) { 1858 State.deleteDeadInstruction(SI); 1859 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI 1860 << '\n'); 1861 NumNoopStores++; 1862 continue; 1863 } 1864 } 1865 1866 auto MaybeSILoc = State.getLocForWriteEx(SI); 1867 if (!MaybeSILoc) { 1868 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " 1869 << *SI << "\n"); 1870 continue; 1871 } 1872 MemoryLocation SILoc = *MaybeSILoc; 1873 assert(SILoc.Ptr && "SILoc should not be null"); 1874 const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); 1875 Instruction *DefObj = 1876 const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd)); 1877 bool DefVisibleToCallerBeforeRet = 1878 !State.InvisibleToCallerBeforeRet.count(SILocUnd); 1879 bool DefVisibleToCallerAfterRet = 1880 !State.InvisibleToCallerAfterRet.count(SILocUnd); 1881 if (DefObj && isAllocLikeFn(DefObj, &TLI)) { 1882 if (DefVisibleToCallerBeforeRet) 1883 DefVisibleToCallerBeforeRet = 1884 PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT); 1885 } 1886 1887 MemoryAccess *Current = KillingDef; 1888 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " 1889 << *KillingDef << " (" << *SI << ")\n"); 1890 1891 int ScanLimit = MemorySSAScanLimit; 1892 // Worklist of MemoryAccesses that may be killed by KillingDef. 1893 SetVector<MemoryAccess *> ToCheck; 1894 ToCheck.insert(KillingDef->getDefiningAccess()); 1895 1896 // Check if MemoryAccesses in the worklist are killed by KillingDef. 1897 for (unsigned I = 0; I < ToCheck.size(); I++) { 1898 Current = ToCheck[I]; 1899 if (State.SkipStores.count(Current)) 1900 continue; 1901 1902 Optional<MemoryAccess *> Next = State.getDomMemoryDef( 1903 KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet, 1904 DefVisibleToCallerAfterRet, ScanLimit); 1905 1906 if (!Next) { 1907 LLVM_DEBUG(dbgs() << " finished walk\n"); 1908 continue; 1909 } 1910 1911 MemoryAccess *DomAccess = *Next; 1912 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess); 1913 if (isa<MemoryPhi>(DomAccess)) { 1914 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); 1915 for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) { 1916 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V); 1917 BasicBlock *IncomingBlock = IncomingAccess->getBlock(); 1918 BasicBlock *PhiBlock = DomAccess->getBlock(); 1919 1920 // We only consider incoming MemoryAccesses that come before the 1921 // MemoryPhi. Otherwise we could discover candidates that do not 1922 // strictly dominate our starting def. 1923 if (State.PostOrderNumbers[IncomingBlock] > 1924 State.PostOrderNumbers[PhiBlock]) 1925 ToCheck.insert(IncomingAccess); 1926 } 1927 continue; 1928 } 1929 MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess); 1930 Instruction *NI = NextDef->getMemoryInst(); 1931 LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); 1932 1933 if (!hasAnalyzableMemoryWrite(NI, TLI)) { 1934 LLVM_DEBUG(dbgs() << " ... skip, cannot analyze def\n"); 1935 continue; 1936 } 1937 1938 if (!isRemovable(NI)) { 1939 LLVM_DEBUG(dbgs() << " ... skip, cannot remove def\n"); 1940 continue; 1941 } 1942 1943 MemoryLocation NILoc = *State.getLocForWriteEx(NI); 1944 // Check for anything that looks like it will be a barrier to further 1945 // removal 1946 if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) { 1947 LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); 1948 continue; 1949 } 1950 1951 // Before we try to remove anything, check for any extra throwing 1952 // instructions that block us from DSEing 1953 if (State.mayThrowBetween(SI, NI, SILocUnd)) { 1954 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); 1955 break; 1956 } 1957 1958 if (!DebugCounter::shouldExecute(MemorySSACounter)) 1959 continue; 1960 1961 // Check if NI overwrites SI. 1962 int64_t InstWriteOffset, DepWriteOffset; 1963 auto Iter = State.IOLs.insert( 1964 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>( 1965 NI->getParent(), InstOverlapIntervalsTy())); 1966 auto &IOL = Iter.first->second; 1967 OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, 1968 InstWriteOffset, NI, IOL, AA, &F); 1969 1970 ToCheck.insert(NextDef->getDefiningAccess()); 1971 if (OR == OW_Complete) { 1972 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI 1973 << "\n KILLER: " << *SI << '\n'); 1974 State.deleteDeadInstruction(NI); 1975 ++NumFastStores; 1976 MadeChange = true; 1977 } 1978 } 1979 } 1980 1981 if (EnablePartialOverwriteTracking) 1982 for (auto &KV : State.IOLs) 1983 MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); 1984 1985 return MadeChange; 1986 } 1987 } // end anonymous namespace 1988 1989 //===----------------------------------------------------------------------===// 1990 // DSE Pass 1991 //===----------------------------------------------------------------------===// 1992 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 1993 AliasAnalysis &AA = AM.getResult<AAManager>(F); 1994 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); 1995 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1996 1997 bool Changed = false; 1998 if (EnableMemorySSA) { 1999 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 2000 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 2001 2002 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2003 } else { 2004 MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); 2005 2006 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2007 } 2008 2009 #ifdef LLVM_ENABLE_STATS 2010 if (AreStatisticsEnabled()) 2011 for (auto &I : instructions(F)) 2012 NumRemainingStores += isa<StoreInst>(&I); 2013 #endif 2014 2015 if (!Changed) 2016 return PreservedAnalyses::all(); 2017 2018 PreservedAnalyses PA; 2019 PA.preserveSet<CFGAnalyses>(); 2020 PA.preserve<GlobalsAA>(); 2021 if (EnableMemorySSA) 2022 PA.preserve<MemorySSAAnalysis>(); 2023 else 2024 PA.preserve<MemoryDependenceAnalysis>(); 2025 return PA; 2026 } 2027 2028 namespace { 2029 2030 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 2031 class DSELegacyPass : public FunctionPass { 2032 public: 2033 static char ID; // Pass identification, replacement for typeid 2034 2035 DSELegacyPass() : FunctionPass(ID) { 2036 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 2037 } 2038 2039 bool runOnFunction(Function &F) override { 2040 if (skipFunction(F)) 2041 return false; 2042 2043 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2044 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2045 const TargetLibraryInfo &TLI = 2046 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2047 2048 bool Changed = false; 2049 if (EnableMemorySSA) { 2050 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2051 PostDominatorTree &PDT = 2052 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 2053 2054 eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2055 } else { 2056 MemoryDependenceResults &MD = 2057 getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 2058 2059 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2060 } 2061 2062 #ifdef LLVM_ENABLE_STATS 2063 if (AreStatisticsEnabled()) 2064 for (auto &I : instructions(F)) 2065 NumRemainingStores += isa<StoreInst>(&I); 2066 #endif 2067 2068 return Changed; 2069 } 2070 2071 void getAnalysisUsage(AnalysisUsage &AU) const override { 2072 AU.setPreservesCFG(); 2073 AU.addRequired<AAResultsWrapperPass>(); 2074 AU.addRequired<TargetLibraryInfoWrapperPass>(); 2075 AU.addPreserved<GlobalsAAWrapperPass>(); 2076 AU.addRequired<DominatorTreeWrapperPass>(); 2077 AU.addPreserved<DominatorTreeWrapperPass>(); 2078 2079 if (EnableMemorySSA) { 2080 AU.addRequired<PostDominatorTreeWrapperPass>(); 2081 AU.addRequired<MemorySSAWrapperPass>(); 2082 AU.addPreserved<PostDominatorTreeWrapperPass>(); 2083 AU.addPreserved<MemorySSAWrapperPass>(); 2084 } else { 2085 AU.addRequired<MemoryDependenceWrapperPass>(); 2086 AU.addPreserved<MemoryDependenceWrapperPass>(); 2087 } 2088 } 2089 }; 2090 2091 } // end anonymous namespace 2092 2093 char DSELegacyPass::ID = 0; 2094 2095 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 2096 false) 2097 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2098 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 2099 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 2100 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 2101 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 2102 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 2103 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2104 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 2105 false) 2106 2107 FunctionPass *llvm::createDeadStoreEliminationPass() { 2108 return new DSELegacyPass(); 2109 } 2110