1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a trivial dead store elimination that only considers 10 // basic-block local redundant stores. 11 // 12 // FIXME: This should eventually be extended to be a post-dominator tree 13 // traversal. Doing so would be pretty trivial. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/MapVector.h" 21 #include "llvm/ADT/PostOrderIterator.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/CaptureTracking.h" 29 #include "llvm/Analysis/GlobalsModRef.h" 30 #include "llvm/Analysis/MemoryBuiltins.h" 31 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/MemorySSA.h" 34 #include "llvm/Analysis/MemorySSAUpdater.h" 35 #include "llvm/Analysis/PostDominators.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/ValueTracking.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/Constant.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/InstIterator.h" 46 #include "llvm/IR/InstrTypes.h" 47 #include "llvm/IR/Instruction.h" 48 #include "llvm/IR/Instructions.h" 49 #include "llvm/IR/IntrinsicInst.h" 50 #include "llvm/IR/Intrinsics.h" 51 #include "llvm/IR/LLVMContext.h" 52 #include "llvm/IR/Module.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/Value.h" 55 #include "llvm/InitializePasses.h" 56 #include "llvm/Pass.h" 57 #include "llvm/Support/Casting.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/DebugCounter.h" 61 #include "llvm/Support/ErrorHandling.h" 62 #include "llvm/Support/MathExtras.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Scalar.h" 65 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 66 #include "llvm/Transforms/Utils/Local.h" 67 #include <algorithm> 68 #include <cassert> 69 #include <cstddef> 70 #include <cstdint> 71 #include <iterator> 72 #include <map> 73 #include <utility> 74 75 using namespace llvm; 76 77 #define DEBUG_TYPE "dse" 78 79 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE"); 80 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 81 STATISTIC(NumFastStores, "Number of stores deleted"); 82 STATISTIC(NumFastOther, "Number of other instrs removed"); 83 STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); 84 STATISTIC(NumModifiedStores, "Number of stores modified"); 85 86 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", 87 "Controls which MemoryDefs are eliminated."); 88 89 static cl::opt<bool> 90 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", 91 cl::init(true), cl::Hidden, 92 cl::desc("Enable partial-overwrite tracking in DSE")); 93 94 static cl::opt<bool> 95 EnablePartialStoreMerging("enable-dse-partial-store-merging", 96 cl::init(true), cl::Hidden, 97 cl::desc("Enable partial store merging in DSE")); 98 99 static cl::opt<bool> 100 EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, 101 cl::desc("Use the new MemorySSA-backed DSE.")); 102 103 static cl::opt<unsigned> 104 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, 105 cl::desc("The number of memory instructions to scan for " 106 "dead store elimination (default = 100)")); 107 108 static cl::opt<unsigned> MemorySSADefsPerBlockLimit( 109 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, 110 cl::desc("The number of MemoryDefs we consider as candidates to eliminated " 111 "other stores per basic block (default = 5000)")); 112 113 //===----------------------------------------------------------------------===// 114 // Helper functions 115 //===----------------------------------------------------------------------===// 116 using OverlapIntervalsTy = std::map<int64_t, int64_t>; 117 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; 118 119 /// Delete this instruction. Before we do, go through and zero out all the 120 /// operands of this instruction. If any of them become dead, delete them and 121 /// the computation tree that feeds them. 122 /// If ValueSet is non-null, remove any deleted instructions from it as well. 123 static void 124 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, 125 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, 126 InstOverlapIntervalsTy &IOL, 127 MapVector<Instruction *, bool> &ThrowableInst, 128 SmallSetVector<const Value *, 16> *ValueSet = nullptr) { 129 SmallVector<Instruction*, 32> NowDeadInsts; 130 131 NowDeadInsts.push_back(I); 132 --NumFastOther; 133 134 // Keeping the iterator straight is a pain, so we let this routine tell the 135 // caller what the next instruction is after we're done mucking about. 136 BasicBlock::iterator NewIter = *BBI; 137 138 // Before we touch this instruction, remove it from memdep! 139 do { 140 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 141 // Mark the DeadInst as dead in the list of throwable instructions. 142 auto It = ThrowableInst.find(DeadInst); 143 if (It != ThrowableInst.end()) 144 ThrowableInst[It->first] = false; 145 ++NumFastOther; 146 147 // Try to preserve debug information attached to the dead instruction. 148 salvageDebugInfoOrMarkUndef(*DeadInst); 149 salvageKnowledge(DeadInst); 150 151 // This instruction is dead, zap it, in stages. Start by removing it from 152 // MemDep, which needs to know the operands and needs it to be in the 153 // function. 154 MD.removeInstruction(DeadInst); 155 156 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 157 Value *Op = DeadInst->getOperand(op); 158 DeadInst->setOperand(op, nullptr); 159 160 // If this operand just became dead, add it to the NowDeadInsts list. 161 if (!Op->use_empty()) continue; 162 163 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 164 if (isInstructionTriviallyDead(OpI, &TLI)) 165 NowDeadInsts.push_back(OpI); 166 } 167 168 if (ValueSet) ValueSet->remove(DeadInst); 169 IOL.erase(DeadInst); 170 171 if (NewIter == DeadInst->getIterator()) 172 NewIter = DeadInst->eraseFromParent(); 173 else 174 DeadInst->eraseFromParent(); 175 } while (!NowDeadInsts.empty()); 176 *BBI = NewIter; 177 // Pop dead entries from back of ThrowableInst till we find an alive entry. 178 while (!ThrowableInst.empty() && !ThrowableInst.back().second) 179 ThrowableInst.pop_back(); 180 } 181 182 /// Does this instruction write some memory? This only returns true for things 183 /// that we can analyze with other helpers below. 184 static bool hasAnalyzableMemoryWrite(Instruction *I, 185 const TargetLibraryInfo &TLI) { 186 if (isa<StoreInst>(I)) 187 return true; 188 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 189 switch (II->getIntrinsicID()) { 190 default: 191 return false; 192 case Intrinsic::memset: 193 case Intrinsic::memmove: 194 case Intrinsic::memcpy: 195 case Intrinsic::memcpy_element_unordered_atomic: 196 case Intrinsic::memmove_element_unordered_atomic: 197 case Intrinsic::memset_element_unordered_atomic: 198 case Intrinsic::init_trampoline: 199 case Intrinsic::lifetime_end: 200 return true; 201 } 202 } 203 if (auto *CB = dyn_cast<CallBase>(I)) { 204 if (Function *F = CB->getCalledFunction()) { 205 LibFunc LF; 206 if (TLI.getLibFunc(*F, LF) && TLI.has(LF)) { 207 switch (LF) { 208 case LibFunc_strcpy: 209 case LibFunc_strncpy: 210 case LibFunc_strcat: 211 case LibFunc_strncat: 212 return true; 213 default: 214 return false; 215 } 216 } 217 } 218 } 219 return false; 220 } 221 222 /// Return a Location stored to by the specified instruction. If isRemovable 223 /// returns true, this function and getLocForRead completely describe the memory 224 /// operations for this instruction. 225 static MemoryLocation getLocForWrite(Instruction *Inst) { 226 227 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 228 return MemoryLocation::get(SI); 229 230 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) { 231 // memcpy/memmove/memset. 232 MemoryLocation Loc = MemoryLocation::getForDest(MI); 233 return Loc; 234 } 235 236 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 237 switch (II->getIntrinsicID()) { 238 default: 239 return MemoryLocation(); // Unhandled intrinsic. 240 case Intrinsic::init_trampoline: 241 return MemoryLocation(II->getArgOperand(0)); 242 case Intrinsic::lifetime_end: { 243 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 244 return MemoryLocation(II->getArgOperand(1), Len); 245 } 246 } 247 } 248 if (auto *CB = dyn_cast<CallBase>(Inst)) 249 // All the supported TLI functions so far happen to have dest as their 250 // first argument. 251 return MemoryLocation(CB->getArgOperand(0)); 252 return MemoryLocation(); 253 } 254 255 /// Return the location read by the specified "hasAnalyzableMemoryWrite" 256 /// instruction if any. 257 static MemoryLocation getLocForRead(Instruction *Inst, 258 const TargetLibraryInfo &TLI) { 259 assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); 260 261 // The only instructions that both read and write are the mem transfer 262 // instructions (memcpy/memmove). 263 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst)) 264 return MemoryLocation::getForSource(MTI); 265 return MemoryLocation(); 266 } 267 268 /// If the value of this instruction and the memory it writes to is unused, may 269 /// we delete this instruction? 270 static bool isRemovable(Instruction *I) { 271 // Don't remove volatile/atomic stores. 272 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 273 return SI->isUnordered(); 274 275 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 276 switch (II->getIntrinsicID()) { 277 default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); 278 case Intrinsic::lifetime_end: 279 // Never remove dead lifetime_end's, e.g. because it is followed by a 280 // free. 281 return false; 282 case Intrinsic::init_trampoline: 283 // Always safe to remove init_trampoline. 284 return true; 285 case Intrinsic::memset: 286 case Intrinsic::memmove: 287 case Intrinsic::memcpy: 288 // Don't remove volatile memory intrinsics. 289 return !cast<MemIntrinsic>(II)->isVolatile(); 290 case Intrinsic::memcpy_element_unordered_atomic: 291 case Intrinsic::memmove_element_unordered_atomic: 292 case Intrinsic::memset_element_unordered_atomic: 293 return true; 294 } 295 } 296 297 // note: only get here for calls with analyzable writes - i.e. libcalls 298 if (auto *CB = dyn_cast<CallBase>(I)) 299 return CB->use_empty(); 300 301 return false; 302 } 303 304 /// Returns true if the end of this instruction can be safely shortened in 305 /// length. 306 static bool isShortenableAtTheEnd(Instruction *I) { 307 // Don't shorten stores for now 308 if (isa<StoreInst>(I)) 309 return false; 310 311 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 312 switch (II->getIntrinsicID()) { 313 default: return false; 314 case Intrinsic::memset: 315 case Intrinsic::memcpy: 316 case Intrinsic::memcpy_element_unordered_atomic: 317 case Intrinsic::memset_element_unordered_atomic: 318 // Do shorten memory intrinsics. 319 // FIXME: Add memmove if it's also safe to transform. 320 return true; 321 } 322 } 323 324 // Don't shorten libcalls calls for now. 325 326 return false; 327 } 328 329 /// Returns true if the beginning of this instruction can be safely shortened 330 /// in length. 331 static bool isShortenableAtTheBeginning(Instruction *I) { 332 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 333 // easily done by offsetting the source address. 334 return isa<AnyMemSetInst>(I); 335 } 336 337 /// Return the pointer that is being written to. 338 static Value *getStoredPointerOperand(Instruction *I) { 339 //TODO: factor this to reuse getLocForWrite 340 MemoryLocation Loc = getLocForWrite(I); 341 assert(Loc.Ptr && 342 "unable to find pointer written for analyzable instruction?"); 343 // TODO: most APIs don't expect const Value * 344 return const_cast<Value*>(Loc.Ptr); 345 } 346 347 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 348 const TargetLibraryInfo &TLI, 349 const Function *F) { 350 uint64_t Size; 351 ObjectSizeOpts Opts; 352 Opts.NullIsUnknownSize = NullPointerIsDefined(F); 353 354 if (getObjectSize(V, Size, DL, &TLI, Opts)) 355 return Size; 356 return MemoryLocation::UnknownSize; 357 } 358 359 namespace { 360 361 enum OverwriteResult { 362 OW_Begin, 363 OW_Complete, 364 OW_End, 365 OW_PartialEarlierWithFullLater, 366 OW_Unknown 367 }; 368 369 } // end anonymous namespace 370 371 /// Return 'OW_Complete' if a store to the 'Later' location completely 372 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the 373 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the 374 /// beginning of the 'Earlier' location is overwritten by 'Later'. 375 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was 376 /// overwritten by a latter (smaller) store which doesn't write outside the big 377 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. 378 static OverwriteResult isOverwrite(const MemoryLocation &Later, 379 const MemoryLocation &Earlier, 380 const DataLayout &DL, 381 const TargetLibraryInfo &TLI, 382 int64_t &EarlierOff, int64_t &LaterOff, 383 Instruction *DepWrite, 384 InstOverlapIntervalsTy &IOL, 385 AliasAnalysis &AA, 386 const Function *F) { 387 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll 388 // get imprecise values here, though (except for unknown sizes). 389 if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) 390 return OW_Unknown; 391 392 const uint64_t LaterSize = Later.Size.getValue(); 393 const uint64_t EarlierSize = Earlier.Size.getValue(); 394 395 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 396 const Value *P2 = Later.Ptr->stripPointerCasts(); 397 398 // If the start pointers are the same, we just have to compare sizes to see if 399 // the later store was larger than the earlier store. 400 if (P1 == P2 || AA.isMustAlias(P1, P2)) { 401 // Make sure that the Later size is >= the Earlier size. 402 if (LaterSize >= EarlierSize) 403 return OW_Complete; 404 } 405 406 // Check to see if the later store is to the entire object (either a global, 407 // an alloca, or a byval/inalloca argument). If so, then it clearly 408 // overwrites any other store to the same object. 409 const Value *UO1 = GetUnderlyingObject(P1, DL), 410 *UO2 = GetUnderlyingObject(P2, DL); 411 412 // If we can't resolve the same pointers to the same object, then we can't 413 // analyze them at all. 414 if (UO1 != UO2) 415 return OW_Unknown; 416 417 // If the "Later" store is to a recognizable object, get its size. 418 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F); 419 if (ObjectSize != MemoryLocation::UnknownSize) 420 if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) 421 return OW_Complete; 422 423 // Okay, we have stores to two completely different pointers. Try to 424 // decompose the pointer into a "base + constant_offset" form. If the base 425 // pointers are equal, then we can reason about the two stores. 426 EarlierOff = 0; 427 LaterOff = 0; 428 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 429 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 430 431 // If the base pointers still differ, we have two completely different stores. 432 if (BP1 != BP2) 433 return OW_Unknown; 434 435 // The later store completely overlaps the earlier store if: 436 // 437 // 1. Both start at the same offset and the later one's size is greater than 438 // or equal to the earlier one's, or 439 // 440 // |--earlier--| 441 // |-- later --| 442 // 443 // 2. The earlier store has an offset greater than the later offset, but which 444 // still lies completely within the later store. 445 // 446 // |--earlier--| 447 // |----- later ------| 448 // 449 // We have to be careful here as *Off is signed while *.Size is unsigned. 450 if (EarlierOff >= LaterOff && 451 LaterSize >= EarlierSize && 452 uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) 453 return OW_Complete; 454 455 // We may now overlap, although the overlap is not complete. There might also 456 // be other incomplete overlaps, and together, they might cover the complete 457 // earlier write. 458 // Note: The correctness of this logic depends on the fact that this function 459 // is not even called providing DepWrite when there are any intervening reads. 460 if (EnablePartialOverwriteTracking && 461 LaterOff < int64_t(EarlierOff + EarlierSize) && 462 int64_t(LaterOff + LaterSize) >= EarlierOff) { 463 464 // Insert our part of the overlap into the map. 465 auto &IM = IOL[DepWrite]; 466 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff 467 << ", " << int64_t(EarlierOff + EarlierSize) 468 << ") Later [" << LaterOff << ", " 469 << int64_t(LaterOff + LaterSize) << ")\n"); 470 471 // Make sure that we only insert non-overlapping intervals and combine 472 // adjacent intervals. The intervals are stored in the map with the ending 473 // offset as the key (in the half-open sense) and the starting offset as 474 // the value. 475 int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize; 476 477 // Find any intervals ending at, or after, LaterIntStart which start 478 // before LaterIntEnd. 479 auto ILI = IM.lower_bound(LaterIntStart); 480 if (ILI != IM.end() && ILI->second <= LaterIntEnd) { 481 // This existing interval is overlapped with the current store somewhere 482 // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing 483 // intervals and adjusting our start and end. 484 LaterIntStart = std::min(LaterIntStart, ILI->second); 485 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 486 ILI = IM.erase(ILI); 487 488 // Continue erasing and adjusting our end in case other previous 489 // intervals are also overlapped with the current store. 490 // 491 // |--- ealier 1 ---| |--- ealier 2 ---| 492 // |------- later---------| 493 // 494 while (ILI != IM.end() && ILI->second <= LaterIntEnd) { 495 assert(ILI->second > LaterIntStart && "Unexpected interval"); 496 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 497 ILI = IM.erase(ILI); 498 } 499 } 500 501 IM[LaterIntEnd] = LaterIntStart; 502 503 ILI = IM.begin(); 504 if (ILI->second <= EarlierOff && 505 ILI->first >= int64_t(EarlierOff + EarlierSize)) { 506 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" 507 << EarlierOff << ", " 508 << int64_t(EarlierOff + EarlierSize) 509 << ") Composite Later [" << ILI->second << ", " 510 << ILI->first << ")\n"); 511 ++NumCompletePartials; 512 return OW_Complete; 513 } 514 } 515 516 // Check for an earlier store which writes to all the memory locations that 517 // the later store writes to. 518 if (EnablePartialStoreMerging && LaterOff >= EarlierOff && 519 int64_t(EarlierOff + EarlierSize) > LaterOff && 520 uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) { 521 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" 522 << EarlierOff << ", " 523 << int64_t(EarlierOff + EarlierSize) 524 << ") by a later store [" << LaterOff << ", " 525 << int64_t(LaterOff + LaterSize) << ")\n"); 526 // TODO: Maybe come up with a better name? 527 return OW_PartialEarlierWithFullLater; 528 } 529 530 // Another interesting case is if the later store overwrites the end of the 531 // earlier store. 532 // 533 // |--earlier--| 534 // |-- later --| 535 // 536 // In this case we may want to trim the size of earlier to avoid generating 537 // writes to addresses which will definitely be overwritten later 538 if (!EnablePartialOverwriteTracking && 539 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) && 540 int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize))) 541 return OW_End; 542 543 // Finally, we also need to check if the later store overwrites the beginning 544 // of the earlier store. 545 // 546 // |--earlier--| 547 // |-- later --| 548 // 549 // In this case we may want to move the destination address and trim the size 550 // of earlier to avoid generating writes to addresses which will definitely 551 // be overwritten later. 552 if (!EnablePartialOverwriteTracking && 553 (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) { 554 assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) && 555 "Expect to be handled as OW_Complete"); 556 return OW_Begin; 557 } 558 // Otherwise, they don't completely overlap. 559 return OW_Unknown; 560 } 561 562 /// If 'Inst' might be a self read (i.e. a noop copy of a 563 /// memory region into an identical pointer) then it doesn't actually make its 564 /// input dead in the traditional sense. Consider this case: 565 /// 566 /// memmove(A <- B) 567 /// memmove(A <- A) 568 /// 569 /// In this case, the second store to A does not make the first store to A dead. 570 /// The usual situation isn't an explicit A<-A store like this (which can be 571 /// trivially removed) but a case where two pointers may alias. 572 /// 573 /// This function detects when it is unsafe to remove a dependent instruction 574 /// because the DSE inducing instruction may be a self-read. 575 static bool isPossibleSelfRead(Instruction *Inst, 576 const MemoryLocation &InstStoreLoc, 577 Instruction *DepWrite, 578 const TargetLibraryInfo &TLI, 579 AliasAnalysis &AA) { 580 // Self reads can only happen for instructions that read memory. Get the 581 // location read. 582 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 583 if (!InstReadLoc.Ptr) 584 return false; // Not a reading instruction. 585 586 // If the read and written loc obviously don't alias, it isn't a read. 587 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) 588 return false; 589 590 if (isa<AnyMemCpyInst>(Inst)) { 591 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) 592 // but in practice memcpy(A <- B) either means that A and B are disjoint or 593 // are equal (i.e. there are not partial overlaps). Given that, if we have: 594 // 595 // memcpy/memmove(A <- B) // DepWrite 596 // memcpy(A <- B) // Inst 597 // 598 // with Inst reading/writing a >= size than DepWrite, we can reason as 599 // follows: 600 // 601 // - If A == B then both the copies are no-ops, so the DepWrite can be 602 // removed. 603 // - If A != B then A and B are disjoint locations in Inst. Since 604 // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. 605 // Therefore DepWrite can be removed. 606 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 607 608 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 609 return false; 610 } 611 612 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 613 // then it can't be considered dead. 614 return true; 615 } 616 617 /// Returns true if the memory which is accessed by the second instruction is not 618 /// modified between the first and the second instruction. 619 /// Precondition: Second instruction must be dominated by the first 620 /// instruction. 621 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 622 Instruction *SecondI, 623 AliasAnalysis *AA, 624 const DataLayout &DL, 625 DominatorTree *DT) { 626 // Do a backwards scan through the CFG from SecondI to FirstI. Look for 627 // instructions which can modify the memory location accessed by SecondI. 628 // 629 // While doing the walk keep track of the address to check. It might be 630 // different in different basic blocks due to PHI translation. 631 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>; 632 SmallVector<BlockAddressPair, 16> WorkList; 633 // Keep track of the address we visited each block with. Bail out if we 634 // visit a block with different addresses. 635 DenseMap<BasicBlock *, Value *> Visited; 636 637 BasicBlock::iterator FirstBBI(FirstI); 638 ++FirstBBI; 639 BasicBlock::iterator SecondBBI(SecondI); 640 BasicBlock *FirstBB = FirstI->getParent(); 641 BasicBlock *SecondBB = SecondI->getParent(); 642 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 643 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr); 644 645 // Start checking the SecondBB. 646 WorkList.push_back( 647 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr))); 648 bool isFirstBlock = true; 649 650 // Check all blocks going backward until we reach the FirstBB. 651 while (!WorkList.empty()) { 652 BlockAddressPair Current = WorkList.pop_back_val(); 653 BasicBlock *B = Current.first; 654 PHITransAddr &Addr = Current.second; 655 Value *Ptr = Addr.getAddr(); 656 657 // Ignore instructions before FirstI if this is the FirstBB. 658 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 659 660 BasicBlock::iterator EI; 661 if (isFirstBlock) { 662 // Ignore instructions after SecondI if this is the first visit of SecondBB. 663 assert(B == SecondBB && "first block is not the store block"); 664 EI = SecondBBI; 665 isFirstBlock = false; 666 } else { 667 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 668 // In this case we also have to look at instructions after SecondI. 669 EI = B->end(); 670 } 671 for (; BI != EI; ++BI) { 672 Instruction *I = &*BI; 673 if (I->mayWriteToMemory() && I != SecondI) 674 if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) 675 return false; 676 } 677 if (B != FirstBB) { 678 assert(B != &FirstBB->getParent()->getEntryBlock() && 679 "Should not hit the entry block because SI must be dominated by LI"); 680 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 681 PHITransAddr PredAddr = Addr; 682 if (PredAddr.NeedsPHITranslationFromBlock(B)) { 683 if (!PredAddr.IsPotentiallyPHITranslatable()) 684 return false; 685 if (PredAddr.PHITranslateValue(B, *PredI, DT, false)) 686 return false; 687 } 688 Value *TranslatedPtr = PredAddr.getAddr(); 689 auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr)); 690 if (!Inserted.second) { 691 // We already visited this block before. If it was with a different 692 // address - bail out! 693 if (TranslatedPtr != Inserted.first->second) 694 return false; 695 // ... otherwise just skip it. 696 continue; 697 } 698 WorkList.push_back(std::make_pair(*PredI, PredAddr)); 699 } 700 } 701 } 702 return true; 703 } 704 705 /// Find all blocks that will unconditionally lead to the block BB and append 706 /// them to F. 707 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 708 BasicBlock *BB, DominatorTree *DT) { 709 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 710 BasicBlock *Pred = *I; 711 if (Pred == BB) continue; 712 Instruction *PredTI = Pred->getTerminator(); 713 if (PredTI->getNumSuccessors() != 1) 714 continue; 715 716 if (DT->isReachableFromEntry(Pred)) 717 Blocks.push_back(Pred); 718 } 719 } 720 721 /// Handle frees of entire structures whose dependency is a store 722 /// to a field of that structure. 723 static bool handleFree(CallInst *F, AliasAnalysis *AA, 724 MemoryDependenceResults *MD, DominatorTree *DT, 725 const TargetLibraryInfo *TLI, 726 InstOverlapIntervalsTy &IOL, 727 MapVector<Instruction *, bool> &ThrowableInst) { 728 bool MadeChange = false; 729 730 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 731 SmallVector<BasicBlock *, 16> Blocks; 732 Blocks.push_back(F->getParent()); 733 const DataLayout &DL = F->getModule()->getDataLayout(); 734 735 while (!Blocks.empty()) { 736 BasicBlock *BB = Blocks.pop_back_val(); 737 Instruction *InstPt = BB->getTerminator(); 738 if (BB == F->getParent()) InstPt = F; 739 740 MemDepResult Dep = 741 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 742 while (Dep.isDef() || Dep.isClobber()) { 743 Instruction *Dependency = Dep.getInst(); 744 if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || 745 !isRemovable(Dependency)) 746 break; 747 748 Value *DepPointer = 749 GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); 750 751 // Check for aliasing. 752 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 753 break; 754 755 LLVM_DEBUG( 756 dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " 757 << *Dependency << '\n'); 758 759 // DCE instructions only used to calculate that store. 760 BasicBlock::iterator BBI(Dependency); 761 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, 762 ThrowableInst); 763 ++NumFastStores; 764 MadeChange = true; 765 766 // Inst's old Dependency is now deleted. Compute the next dependency, 767 // which may also be dead, as in 768 // s[0] = 0; 769 // s[1] = 0; // This has just been deleted. 770 // free(s); 771 Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); 772 } 773 774 if (Dep.isNonLocal()) 775 findUnconditionalPreds(Blocks, BB, DT); 776 } 777 778 return MadeChange; 779 } 780 781 /// Check to see if the specified location may alias any of the stack objects in 782 /// the DeadStackObjects set. If so, they become live because the location is 783 /// being loaded. 784 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 785 SmallSetVector<const Value *, 16> &DeadStackObjects, 786 const DataLayout &DL, AliasAnalysis *AA, 787 const TargetLibraryInfo *TLI, 788 const Function *F) { 789 const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); 790 791 // A constant can't be in the dead pointer set. 792 if (isa<Constant>(UnderlyingPointer)) 793 return; 794 795 // If the kill pointer can be easily reduced to an alloca, don't bother doing 796 // extraneous AA queries. 797 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 798 DeadStackObjects.remove(UnderlyingPointer); 799 return; 800 } 801 802 // Remove objects that could alias LoadedLoc. 803 DeadStackObjects.remove_if([&](const Value *I) { 804 // See if the loaded location could alias the stack location. 805 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F)); 806 return !AA->isNoAlias(StackLoc, LoadedLoc); 807 }); 808 } 809 810 /// Remove dead stores to stack-allocated locations in the function end block. 811 /// Ex: 812 /// %A = alloca i32 813 /// ... 814 /// store i32 1, i32* %A 815 /// ret void 816 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 817 MemoryDependenceResults *MD, 818 const TargetLibraryInfo *TLI, 819 InstOverlapIntervalsTy &IOL, 820 MapVector<Instruction *, bool> &ThrowableInst) { 821 bool MadeChange = false; 822 823 // Keep track of all of the stack objects that are dead at the end of the 824 // function. 825 SmallSetVector<const Value*, 16> DeadStackObjects; 826 827 // Find all of the alloca'd pointers in the entry block. 828 BasicBlock &Entry = BB.getParent()->front(); 829 for (Instruction &I : Entry) { 830 if (isa<AllocaInst>(&I)) 831 DeadStackObjects.insert(&I); 832 833 // Okay, so these are dead heap objects, but if the pointer never escapes 834 // then it's leaked by this function anyways. 835 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 836 DeadStackObjects.insert(&I); 837 } 838 839 // Treat byval or inalloca arguments the same, stores to them are dead at the 840 // end of the function. 841 for (Argument &AI : BB.getParent()->args()) 842 if (AI.hasPassPointeeByValueAttr()) 843 DeadStackObjects.insert(&AI); 844 845 const DataLayout &DL = BB.getModule()->getDataLayout(); 846 847 // Scan the basic block backwards 848 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 849 --BBI; 850 851 // If we find a store, check to see if it points into a dead stack value. 852 if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 853 // See through pointer-to-pointer bitcasts 854 SmallVector<const Value *, 4> Pointers; 855 GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); 856 857 // Stores to stack values are valid candidates for removal. 858 bool AllDead = true; 859 for (const Value *Pointer : Pointers) 860 if (!DeadStackObjects.count(Pointer)) { 861 AllDead = false; 862 break; 863 } 864 865 if (AllDead) { 866 Instruction *Dead = &*BBI; 867 868 LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 869 << *Dead << "\n Objects: "; 870 for (SmallVectorImpl<const Value *>::iterator I = 871 Pointers.begin(), 872 E = Pointers.end(); 873 I != E; ++I) { 874 dbgs() << **I; 875 if (std::next(I) != E) 876 dbgs() << ", "; 877 } dbgs() 878 << '\n'); 879 880 // DCE instructions only used to calculate that store. 881 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, ThrowableInst, 882 &DeadStackObjects); 883 ++NumFastStores; 884 MadeChange = true; 885 continue; 886 } 887 } 888 889 // Remove any dead non-memory-mutating instructions. 890 if (isInstructionTriviallyDead(&*BBI, TLI)) { 891 LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " 892 << *&*BBI << '\n'); 893 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, ThrowableInst, 894 &DeadStackObjects); 895 ++NumFastOther; 896 MadeChange = true; 897 continue; 898 } 899 900 if (isa<AllocaInst>(BBI)) { 901 // Remove allocas from the list of dead stack objects; there can't be 902 // any references before the definition. 903 DeadStackObjects.remove(&*BBI); 904 continue; 905 } 906 907 if (auto *Call = dyn_cast<CallBase>(&*BBI)) { 908 // Remove allocation function calls from the list of dead stack objects; 909 // there can't be any references before the definition. 910 if (isAllocLikeFn(&*BBI, TLI)) 911 DeadStackObjects.remove(&*BBI); 912 913 // If this call does not access memory, it can't be loading any of our 914 // pointers. 915 if (AA->doesNotAccessMemory(Call)) 916 continue; 917 918 // If the call might load from any of our allocas, then any store above 919 // the call is live. 920 DeadStackObjects.remove_if([&](const Value *I) { 921 // See if the call site touches the value. 922 return isRefSet(AA->getModRefInfo( 923 Call, I, getPointerSize(I, DL, *TLI, BB.getParent()))); 924 }); 925 926 // If all of the allocas were clobbered by the call then we're not going 927 // to find anything else to process. 928 if (DeadStackObjects.empty()) 929 break; 930 931 continue; 932 } 933 934 // We can remove the dead stores, irrespective of the fence and its ordering 935 // (release/acquire/seq_cst). Fences only constraints the ordering of 936 // already visible stores, it does not make a store visible to other 937 // threads. So, skipping over a fence does not change a store from being 938 // dead. 939 if (isa<FenceInst>(*BBI)) 940 continue; 941 942 MemoryLocation LoadedLoc; 943 944 // If we encounter a use of the pointer, it is no longer considered dead 945 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 946 if (!L->isUnordered()) // Be conservative with atomic/volatile load 947 break; 948 LoadedLoc = MemoryLocation::get(L); 949 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 950 LoadedLoc = MemoryLocation::get(V); 951 } else if (!BBI->mayReadFromMemory()) { 952 // Instruction doesn't read memory. Note that stores that weren't removed 953 // above will hit this case. 954 continue; 955 } else { 956 // Unknown inst; assume it clobbers everything. 957 break; 958 } 959 960 // Remove any allocas from the DeadPointer set that are loaded, as this 961 // makes any stores above the access live. 962 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI, BB.getParent()); 963 964 // If all of the allocas were clobbered by the access then we're not going 965 // to find anything else to process. 966 if (DeadStackObjects.empty()) 967 break; 968 } 969 970 return MadeChange; 971 } 972 973 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, 974 int64_t &EarlierSize, int64_t LaterOffset, 975 int64_t LaterSize, bool IsOverwriteEnd) { 976 // TODO: base this on the target vector size so that if the earlier 977 // store was too small to get vector writes anyway then its likely 978 // a good idea to shorten it 979 // Power of 2 vector writes are probably always a bad idea to optimize 980 // as any store/memset/memcpy is likely using vector instructions so 981 // shortening it to not vector size is likely to be slower 982 auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite); 983 unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment(); 984 if (!IsOverwriteEnd) 985 LaterOffset = int64_t(LaterOffset + LaterSize); 986 987 if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && 988 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) 989 return false; 990 991 int64_t NewLength = IsOverwriteEnd 992 ? LaterOffset - EarlierOffset 993 : EarlierSize - (LaterOffset - EarlierOffset); 994 995 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) { 996 // When shortening an atomic memory intrinsic, the newly shortened 997 // length must remain an integer multiple of the element size. 998 const uint32_t ElementSize = AMI->getElementSizeInBytes(); 999 if (0 != NewLength % ElementSize) 1000 return false; 1001 } 1002 1003 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 1004 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " 1005 << *EarlierWrite << "\n KILLER (offset " << LaterOffset 1006 << ", " << EarlierSize << ")\n"); 1007 1008 Value *EarlierWriteLength = EarlierIntrinsic->getLength(); 1009 Value *TrimmedLength = 1010 ConstantInt::get(EarlierWriteLength->getType(), NewLength); 1011 EarlierIntrinsic->setLength(TrimmedLength); 1012 1013 EarlierSize = NewLength; 1014 if (!IsOverwriteEnd) { 1015 int64_t OffsetMoved = (LaterOffset - EarlierOffset); 1016 Value *Indices[1] = { 1017 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; 1018 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 1019 EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(), 1020 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); 1021 NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc()); 1022 EarlierIntrinsic->setDest(NewDestGEP); 1023 EarlierOffset = EarlierOffset + OffsetMoved; 1024 } 1025 return true; 1026 } 1027 1028 static bool tryToShortenEnd(Instruction *EarlierWrite, 1029 OverlapIntervalsTy &IntervalMap, 1030 int64_t &EarlierStart, int64_t &EarlierSize) { 1031 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) 1032 return false; 1033 1034 OverlapIntervalsTy::iterator OII = --IntervalMap.end(); 1035 int64_t LaterStart = OII->second; 1036 int64_t LaterSize = OII->first - LaterStart; 1037 1038 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && 1039 LaterStart + LaterSize >= EarlierStart + EarlierSize) { 1040 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1041 LaterSize, true)) { 1042 IntervalMap.erase(OII); 1043 return true; 1044 } 1045 } 1046 return false; 1047 } 1048 1049 static bool tryToShortenBegin(Instruction *EarlierWrite, 1050 OverlapIntervalsTy &IntervalMap, 1051 int64_t &EarlierStart, int64_t &EarlierSize) { 1052 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) 1053 return false; 1054 1055 OverlapIntervalsTy::iterator OII = IntervalMap.begin(); 1056 int64_t LaterStart = OII->second; 1057 int64_t LaterSize = OII->first - LaterStart; 1058 1059 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { 1060 assert(LaterStart + LaterSize < EarlierStart + EarlierSize && 1061 "Should have been handled as OW_Complete"); 1062 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1063 LaterSize, false)) { 1064 IntervalMap.erase(OII); 1065 return true; 1066 } 1067 } 1068 return false; 1069 } 1070 1071 static bool removePartiallyOverlappedStores(AliasAnalysis *AA, 1072 const DataLayout &DL, 1073 InstOverlapIntervalsTy &IOL) { 1074 bool Changed = false; 1075 for (auto OI : IOL) { 1076 Instruction *EarlierWrite = OI.first; 1077 MemoryLocation Loc = getLocForWrite(EarlierWrite); 1078 assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); 1079 1080 const Value *Ptr = Loc.Ptr->stripPointerCasts(); 1081 int64_t EarlierStart = 0; 1082 int64_t EarlierSize = int64_t(Loc.Size.getValue()); 1083 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); 1084 OverlapIntervalsTy &IntervalMap = OI.second; 1085 Changed |= 1086 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1087 if (IntervalMap.empty()) 1088 continue; 1089 Changed |= 1090 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1091 } 1092 return Changed; 1093 } 1094 1095 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, 1096 AliasAnalysis *AA, MemoryDependenceResults *MD, 1097 const DataLayout &DL, 1098 const TargetLibraryInfo *TLI, 1099 InstOverlapIntervalsTy &IOL, 1100 MapVector<Instruction *, bool> &ThrowableInst, 1101 DominatorTree *DT) { 1102 // Must be a store instruction. 1103 StoreInst *SI = dyn_cast<StoreInst>(Inst); 1104 if (!SI) 1105 return false; 1106 1107 // If we're storing the same value back to a pointer that we just loaded from, 1108 // then the store can be removed. 1109 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 1110 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 1111 isRemovable(SI) && 1112 memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) { 1113 1114 LLVM_DEBUG( 1115 dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " 1116 << *DepLoad << "\n STORE: " << *SI << '\n'); 1117 1118 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1119 ++NumRedundantStores; 1120 return true; 1121 } 1122 } 1123 1124 // Remove null stores into the calloc'ed objects 1125 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 1126 if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { 1127 Instruction *UnderlyingPointer = 1128 dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL)); 1129 1130 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 1131 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) { 1132 LLVM_DEBUG( 1133 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 1134 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 1135 1136 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1137 ++NumRedundantStores; 1138 return true; 1139 } 1140 } 1141 return false; 1142 } 1143 1144 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 1145 MemoryDependenceResults *MD, DominatorTree *DT, 1146 const TargetLibraryInfo *TLI) { 1147 const DataLayout &DL = BB.getModule()->getDataLayout(); 1148 bool MadeChange = false; 1149 1150 MapVector<Instruction *, bool> ThrowableInst; 1151 1152 // A map of interval maps representing partially-overwritten value parts. 1153 InstOverlapIntervalsTy IOL; 1154 1155 // Do a top-down walk on the BB. 1156 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 1157 // Handle 'free' calls specially. 1158 if (CallInst *F = isFreeCall(&*BBI, TLI)) { 1159 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, ThrowableInst); 1160 // Increment BBI after handleFree has potentially deleted instructions. 1161 // This ensures we maintain a valid iterator. 1162 ++BBI; 1163 continue; 1164 } 1165 1166 Instruction *Inst = &*BBI++; 1167 1168 if (Inst->mayThrow()) { 1169 ThrowableInst[Inst] = true; 1170 continue; 1171 } 1172 1173 // Check to see if Inst writes to memory. If not, continue. 1174 if (!hasAnalyzableMemoryWrite(Inst, *TLI)) 1175 continue; 1176 1177 // eliminateNoopStore will update in iterator, if necessary. 1178 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, 1179 ThrowableInst, DT)) { 1180 MadeChange = true; 1181 continue; 1182 } 1183 1184 // If we find something that writes memory, get its memory dependence. 1185 MemDepResult InstDep = MD->getDependency(Inst); 1186 1187 // Ignore any store where we can't find a local dependence. 1188 // FIXME: cross-block DSE would be fun. :) 1189 if (!InstDep.isDef() && !InstDep.isClobber()) 1190 continue; 1191 1192 // Figure out what location is being stored to. 1193 MemoryLocation Loc = getLocForWrite(Inst); 1194 1195 // If we didn't get a useful location, fail. 1196 if (!Loc.Ptr) 1197 continue; 1198 1199 // Loop until we find a store we can eliminate or a load that 1200 // invalidates the analysis. Without an upper bound on the number of 1201 // instructions examined, this analysis can become very time-consuming. 1202 // However, the potential gain diminishes as we process more instructions 1203 // without eliminating any of them. Therefore, we limit the number of 1204 // instructions we look at. 1205 auto Limit = MD->getDefaultBlockScanLimit(); 1206 while (InstDep.isDef() || InstDep.isClobber()) { 1207 // Get the memory clobbered by the instruction we depend on. MemDep will 1208 // skip any instructions that 'Loc' clearly doesn't interact with. If we 1209 // end up depending on a may- or must-aliased load, then we can't optimize 1210 // away the store and we bail out. However, if we depend on something 1211 // that overwrites the memory location we *can* potentially optimize it. 1212 // 1213 // Find out what memory location the dependent instruction stores. 1214 Instruction *DepWrite = InstDep.getInst(); 1215 if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) 1216 break; 1217 MemoryLocation DepLoc = getLocForWrite(DepWrite); 1218 // If we didn't get a useful location, or if it isn't a size, bail out. 1219 if (!DepLoc.Ptr) 1220 break; 1221 1222 // Find the last throwable instruction not removed by call to 1223 // deleteDeadInstruction. 1224 Instruction *LastThrowing = nullptr; 1225 if (!ThrowableInst.empty()) 1226 LastThrowing = ThrowableInst.back().first; 1227 1228 // Make sure we don't look past a call which might throw. This is an 1229 // issue because MemoryDependenceAnalysis works in the wrong direction: 1230 // it finds instructions which dominate the current instruction, rather than 1231 // instructions which are post-dominated by the current instruction. 1232 // 1233 // If the underlying object is a non-escaping memory allocation, any store 1234 // to it is dead along the unwind edge. Otherwise, we need to preserve 1235 // the store. 1236 if (LastThrowing && DepWrite->comesBefore(LastThrowing)) { 1237 const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL); 1238 bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); 1239 if (!IsStoreDeadOnUnwind) { 1240 // We're looking for a call to an allocation function 1241 // where the allocation doesn't escape before the last 1242 // throwing instruction; PointerMayBeCaptured 1243 // reasonably fast approximation. 1244 IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && 1245 !PointerMayBeCaptured(Underlying, false, true); 1246 } 1247 if (!IsStoreDeadOnUnwind) 1248 break; 1249 } 1250 1251 // If we find a write that is a) removable (i.e., non-volatile), b) is 1252 // completely obliterated by the store to 'Loc', and c) which we know that 1253 // 'Inst' doesn't load from, then we can remove it. 1254 // Also try to merge two stores if a later one only touches memory written 1255 // to by the earlier one. 1256 if (isRemovable(DepWrite) && 1257 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 1258 int64_t InstWriteOffset, DepWriteOffset; 1259 OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, 1260 InstWriteOffset, DepWrite, IOL, *AA, 1261 BB.getParent()); 1262 if (OR == OW_Complete) { 1263 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite 1264 << "\n KILLER: " << *Inst << '\n'); 1265 1266 // Delete the store and now-dead instructions that feed it. 1267 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1268 ThrowableInst); 1269 ++NumFastStores; 1270 MadeChange = true; 1271 1272 // We erased DepWrite; start over. 1273 InstDep = MD->getDependency(Inst); 1274 continue; 1275 } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || 1276 ((OR == OW_Begin && 1277 isShortenableAtTheBeginning(DepWrite)))) { 1278 assert(!EnablePartialOverwriteTracking && "Do not expect to perform " 1279 "when partial-overwrite " 1280 "tracking is enabled"); 1281 // The overwrite result is known, so these must be known, too. 1282 int64_t EarlierSize = DepLoc.Size.getValue(); 1283 int64_t LaterSize = Loc.Size.getValue(); 1284 bool IsOverwriteEnd = (OR == OW_End); 1285 MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, 1286 InstWriteOffset, LaterSize, IsOverwriteEnd); 1287 } else if (EnablePartialStoreMerging && 1288 OR == OW_PartialEarlierWithFullLater) { 1289 auto *Earlier = dyn_cast<StoreInst>(DepWrite); 1290 auto *Later = dyn_cast<StoreInst>(Inst); 1291 if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && 1292 DL.typeSizeEqualsStoreSize( 1293 Earlier->getValueOperand()->getType()) && 1294 Later && isa<ConstantInt>(Later->getValueOperand()) && 1295 DL.typeSizeEqualsStoreSize( 1296 Later->getValueOperand()->getType()) && 1297 memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) { 1298 // If the store we find is: 1299 // a) partially overwritten by the store to 'Loc' 1300 // b) the later store is fully contained in the earlier one and 1301 // c) they both have a constant value 1302 // d) none of the two stores need padding 1303 // Merge the two stores, replacing the earlier store's value with a 1304 // merge of both values. 1305 // TODO: Deal with other constant types (vectors, etc), and probably 1306 // some mem intrinsics (if needed) 1307 1308 APInt EarlierValue = 1309 cast<ConstantInt>(Earlier->getValueOperand())->getValue(); 1310 APInt LaterValue = 1311 cast<ConstantInt>(Later->getValueOperand())->getValue(); 1312 unsigned LaterBits = LaterValue.getBitWidth(); 1313 assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); 1314 LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); 1315 1316 // Offset of the smaller store inside the larger store 1317 unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; 1318 unsigned LShiftAmount = 1319 DL.isBigEndian() 1320 ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits 1321 : BitOffsetDiff; 1322 APInt Mask = 1323 APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, 1324 LShiftAmount + LaterBits); 1325 // Clear the bits we'll be replacing, then OR with the smaller 1326 // store, shifted appropriately. 1327 APInt Merged = 1328 (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); 1329 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *DepWrite 1330 << "\n Later: " << *Inst 1331 << "\n Merged Value: " << Merged << '\n'); 1332 1333 auto *SI = new StoreInst( 1334 ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), 1335 Earlier->getPointerOperand(), false, Earlier->getAlign(), 1336 Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); 1337 1338 unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, 1339 LLVMContext::MD_alias_scope, 1340 LLVMContext::MD_noalias, 1341 LLVMContext::MD_nontemporal}; 1342 SI->copyMetadata(*DepWrite, MDToKeep); 1343 ++NumModifiedStores; 1344 1345 // Delete the old stores and now-dead instructions that feed them. 1346 deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, 1347 ThrowableInst); 1348 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1349 ThrowableInst); 1350 MadeChange = true; 1351 1352 // We erased DepWrite and Inst (Loc); start over. 1353 break; 1354 } 1355 } 1356 } 1357 1358 // If this is a may-aliased store that is clobbering the store value, we 1359 // can keep searching past it for another must-aliased pointer that stores 1360 // to the same location. For example, in: 1361 // store -> P 1362 // store -> Q 1363 // store -> P 1364 // we can remove the first store to P even though we don't know if P and Q 1365 // alias. 1366 if (DepWrite == &BB.front()) break; 1367 1368 // Can't look past this instruction if it might read 'Loc'. 1369 if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) 1370 break; 1371 1372 InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, 1373 DepWrite->getIterator(), &BB, 1374 /*QueryInst=*/ nullptr, &Limit); 1375 } 1376 } 1377 1378 if (EnablePartialOverwriteTracking) 1379 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL); 1380 1381 // If this block ends in a return, unwind, or unreachable, all allocas are 1382 // dead at its end, which means stores to them are also dead. 1383 if (BB.getTerminator()->getNumSuccessors() == 0) 1384 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, ThrowableInst); 1385 1386 return MadeChange; 1387 } 1388 1389 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 1390 MemoryDependenceResults *MD, DominatorTree *DT, 1391 const TargetLibraryInfo *TLI) { 1392 bool MadeChange = false; 1393 for (BasicBlock &BB : F) 1394 // Only check non-dead blocks. Dead blocks may have strange pointer 1395 // cycles that will confuse alias analysis. 1396 if (DT->isReachableFromEntry(&BB)) 1397 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 1398 1399 return MadeChange; 1400 } 1401 1402 namespace { 1403 //============================================================================= 1404 // MemorySSA backed dead store elimination. 1405 // 1406 // The code below implements dead store elimination using MemorySSA. It uses 1407 // the following general approach: given a MemoryDef, walk upwards to find 1408 // clobbering MemoryDefs that may be killed by the starting def. Then check 1409 // that there are no uses that may read the location of the original MemoryDef 1410 // in between both MemoryDefs. A bit more concretely: 1411 // 1412 // For all MemoryDefs StartDef: 1413 // 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking 1414 // upwards. 1415 // 2. Check that there are no reads between DomAccess and the StartDef by 1416 // checking all uses starting at DomAccess and walking until we see StartDef. 1417 // 3. For each found DomDef, check that: 1418 // 1. There are no barrier instructions between DomDef and StartDef (like 1419 // throws or stores with ordering constraints). 1420 // 2. StartDef is executed whenever DomDef is executed. 1421 // 3. StartDef completely overwrites DomDef. 1422 // 4. Erase DomDef from the function and MemorySSA. 1423 1424 // Returns true if \p M is an intrisnic that does not read or write memory. 1425 bool isNoopIntrinsic(MemoryUseOrDef *M) { 1426 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) { 1427 switch (II->getIntrinsicID()) { 1428 case Intrinsic::lifetime_start: 1429 case Intrinsic::lifetime_end: 1430 case Intrinsic::invariant_end: 1431 case Intrinsic::launder_invariant_group: 1432 case Intrinsic::assume: 1433 return true; 1434 case Intrinsic::dbg_addr: 1435 case Intrinsic::dbg_declare: 1436 case Intrinsic::dbg_label: 1437 case Intrinsic::dbg_value: 1438 llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); 1439 default: 1440 return false; 1441 } 1442 } 1443 return false; 1444 } 1445 1446 // Check if we can ignore \p D for DSE. 1447 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { 1448 Instruction *DI = D->getMemoryInst(); 1449 // Calls that only access inaccessible memory cannot read or write any memory 1450 // locations we consider for elimination. 1451 if (auto *CB = dyn_cast<CallBase>(DI)) 1452 if (CB->onlyAccessesInaccessibleMemory()) 1453 return true; 1454 1455 // We can eliminate stores to locations not visible to the caller across 1456 // throwing instructions. 1457 if (DI->mayThrow() && !DefVisibleToCaller) 1458 return true; 1459 1460 // We can remove the dead stores, irrespective of the fence and its ordering 1461 // (release/acquire/seq_cst). Fences only constraints the ordering of 1462 // already visible stores, it does not make a store visible to other 1463 // threads. So, skipping over a fence does not change a store from being 1464 // dead. 1465 if (isa<FenceInst>(DI)) 1466 return true; 1467 1468 // Skip intrinsics that do not really read or modify memory. 1469 if (isNoopIntrinsic(D)) 1470 return true; 1471 1472 return false; 1473 } 1474 1475 struct DSEState { 1476 Function &F; 1477 AliasAnalysis &AA; 1478 MemorySSA &MSSA; 1479 DominatorTree &DT; 1480 PostDominatorTree &PDT; 1481 const TargetLibraryInfo &TLI; 1482 1483 // All MemoryDefs that potentially could kill other MemDefs. 1484 SmallVector<MemoryDef *, 64> MemDefs; 1485 // Any that should be skipped as they are already deleted 1486 SmallPtrSet<MemoryAccess *, 4> SkipStores; 1487 // Keep track of all of the objects that are invisible to the caller before 1488 // the function returns. 1489 SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet; 1490 // Keep track of all of the objects that are invisible to the caller after 1491 // the function returns. 1492 SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet; 1493 // Keep track of blocks with throwing instructions not modeled in MemorySSA. 1494 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks; 1495 // Post-order numbers for each basic block. Used to figure out if memory 1496 // accesses are executed before another access. 1497 DenseMap<BasicBlock *, unsigned> PostOrderNumbers; 1498 1499 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per 1500 /// basic block. 1501 DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs; 1502 1503 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, 1504 PostDominatorTree &PDT, const TargetLibraryInfo &TLI) 1505 : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} 1506 1507 static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, 1508 DominatorTree &DT, PostDominatorTree &PDT, 1509 const TargetLibraryInfo &TLI) { 1510 DSEState State(F, AA, MSSA, DT, PDT, TLI); 1511 // Collect blocks with throwing instructions not modeled in MemorySSA and 1512 // alloc-like objects. 1513 unsigned PO = 0; 1514 for (BasicBlock *BB : post_order(&F)) { 1515 State.PostOrderNumbers[BB] = PO++; 1516 for (Instruction &I : *BB) { 1517 MemoryAccess *MA = MSSA.getMemoryAccess(&I); 1518 if (I.mayThrow() && !MA) 1519 State.ThrowingBlocks.insert(I.getParent()); 1520 1521 auto *MD = dyn_cast_or_null<MemoryDef>(MA); 1522 if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && 1523 hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) 1524 State.MemDefs.push_back(MD); 1525 1526 // Track whether alloca and alloca-like objects are visible in the 1527 // caller before and after the function returns. Alloca objects are 1528 // invalid in the caller, so they are neither visible before or after 1529 // the function returns. 1530 if (isa<AllocaInst>(&I)) { 1531 State.InvisibleToCallerBeforeRet.insert(&I); 1532 State.InvisibleToCallerAfterRet.insert(&I); 1533 } 1534 1535 // For alloca-like objects we need to check if they are captured before 1536 // the function returns and if the return might capture the object. 1537 if (isAllocLikeFn(&I, &TLI)) { 1538 bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true); 1539 if (!CapturesBeforeRet) { 1540 State.InvisibleToCallerBeforeRet.insert(&I); 1541 if (!PointerMayBeCaptured(&I, true, false)) 1542 State.InvisibleToCallerAfterRet.insert(&I); 1543 } 1544 } 1545 } 1546 } 1547 1548 // Treat byval or inalloca arguments the same as Allocas, stores to them are 1549 // dead at the end of the function. 1550 for (Argument &AI : F.args()) 1551 if (AI.hasPassPointeeByValueAttr()) 1552 State.InvisibleToCallerBeforeRet.insert(&AI); 1553 return State; 1554 } 1555 1556 Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const { 1557 if (!I->mayWriteToMemory()) 1558 return None; 1559 1560 if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I)) 1561 return {MemoryLocation::getForDest(MTI)}; 1562 1563 if (auto *CB = dyn_cast<CallBase>(I)) { 1564 if (Function *F = CB->getCalledFunction()) { 1565 StringRef FnName = F->getName(); 1566 if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) 1567 return {MemoryLocation(CB->getArgOperand(0))}; 1568 if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) 1569 return {MemoryLocation(CB->getArgOperand(0))}; 1570 if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) 1571 return {MemoryLocation(CB->getArgOperand(0))}; 1572 if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) 1573 return {MemoryLocation(CB->getArgOperand(0))}; 1574 } 1575 return None; 1576 } 1577 1578 return MemoryLocation::getOrNone(I); 1579 } 1580 1581 /// Returns true if \p Use completely overwrites \p DefLoc. 1582 bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { 1583 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a 1584 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a 1585 // MemoryDef. 1586 if (!UseInst->mayWriteToMemory()) 1587 return false; 1588 1589 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1590 if (CB->onlyAccessesInaccessibleMemory()) 1591 return false; 1592 1593 ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); 1594 // If necessary, perform additional analysis. 1595 if (isModSet(MR) && isa<CallBase>(UseInst)) 1596 MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); 1597 1598 Optional<MemoryLocation> UseLoc = getLocForWriteEx(UseInst); 1599 return isModSet(MR) && isMustSet(MR) && 1600 (UseLoc->Size.hasValue() && DefLoc.Size.hasValue() && 1601 UseLoc->Size.getValue() >= DefLoc.Size.getValue()); 1602 } 1603 1604 /// Returns true if \p Use may read from \p DefLoc. 1605 bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { 1606 if (!UseInst->mayReadFromMemory()) 1607 return false; 1608 1609 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1610 if (CB->onlyAccessesInaccessibleMemory()) 1611 return false; 1612 1613 ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); 1614 // If necessary, perform additional analysis. 1615 if (isRefSet(MR)) 1616 MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); 1617 return isRefSet(MR); 1618 } 1619 1620 // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no 1621 // read access between them or on any other path to a function exit block if 1622 // \p DefLoc is not accessible after the function returns. If there is no such 1623 // MemoryDef, return None. The returned value may not (completely) overwrite 1624 // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse 1625 // (read). 1626 Optional<MemoryAccess *> 1627 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, 1628 MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet, 1629 bool DefVisibleToCallerAfterRet, int &ScanLimit) const { 1630 MemoryAccess *DomAccess; 1631 bool StepAgain; 1632 LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current 1633 << "\n"); 1634 // Find the next clobbering Mod access for DefLoc, starting at Current. 1635 do { 1636 StepAgain = false; 1637 // Reached TOP. 1638 if (MSSA.isLiveOnEntryDef(Current)) 1639 return None; 1640 1641 if (isa<MemoryPhi>(Current)) { 1642 DomAccess = Current; 1643 break; 1644 } 1645 MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current); 1646 // Look for access that clobber DefLoc. 1647 DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD, 1648 DefLoc); 1649 if (MSSA.isLiveOnEntryDef(DomAccess)) 1650 return None; 1651 1652 if (isa<MemoryPhi>(DomAccess)) 1653 break; 1654 1655 // Check if we can skip DomDef for DSE. For accesses to objects that are 1656 // accessible after the function returns, KillingDef must execute whenever 1657 // DomDef executes and use post-dominance to ensure that. 1658 MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess); 1659 if ((DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) || 1660 (DefVisibleToCallerAfterRet && 1661 !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock()))) { 1662 StepAgain = true; 1663 Current = DomDef->getDefiningAccess(); 1664 } 1665 1666 } while (StepAgain); 1667 1668 LLVM_DEBUG({ 1669 dbgs() << " Checking for reads of " << *DomAccess; 1670 if (isa<MemoryDef>(DomAccess)) 1671 dbgs() << " (" << *cast<MemoryDef>(DomAccess)->getMemoryInst() << ")\n"; 1672 else 1673 dbgs() << ")\n"; 1674 }); 1675 1676 SmallSetVector<MemoryAccess *, 32> WorkList; 1677 auto PushMemUses = [&WorkList](MemoryAccess *Acc) { 1678 for (Use &U : Acc->uses()) 1679 WorkList.insert(cast<MemoryAccess>(U.getUser())); 1680 }; 1681 PushMemUses(DomAccess); 1682 1683 // Check if DomDef may be read. 1684 for (unsigned I = 0; I < WorkList.size(); I++) { 1685 MemoryAccess *UseAccess = WorkList[I]; 1686 1687 LLVM_DEBUG(dbgs() << " " << *UseAccess); 1688 if (--ScanLimit == 0) { 1689 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); 1690 return None; 1691 } 1692 1693 if (isa<MemoryPhi>(UseAccess)) { 1694 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n"); 1695 PushMemUses(UseAccess); 1696 continue; 1697 } 1698 1699 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); 1700 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); 1701 1702 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) { 1703 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n"); 1704 PushMemUses(UseAccess); 1705 continue; 1706 } 1707 1708 // Uses which may read the original MemoryDef mean we cannot eliminate the 1709 // original MD. Stop walk. 1710 if (isReadClobber(DefLoc, UseInst)) { 1711 LLVM_DEBUG(dbgs() << " ... found read clobber\n"); 1712 return None; 1713 } 1714 1715 // For the KillingDef and DomAccess we only have to check if it reads the 1716 // memory location. 1717 // TODO: It would probably be better to check for self-reads before 1718 // calling the function. 1719 if (KillingDef == UseAccess || DomAccess == UseAccess) { 1720 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); 1721 continue; 1722 } 1723 1724 // Check all uses for MemoryDefs, except for defs completely overwriting 1725 // the original location. Otherwise we have to check uses of *all* 1726 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might 1727 // miss cases like the following 1728 // 1 = Def(LoE) ; <----- DomDef stores [0,1] 1729 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] 1730 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. 1731 // (The Use points to the *first* Def it may alias) 1732 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, 1733 // stores [0,1] 1734 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) { 1735 if (!isCompleteOverwrite(DefLoc, UseInst)) 1736 PushMemUses(UseDef); 1737 } 1738 } 1739 1740 // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. 1741 return {DomAccess}; 1742 } 1743 1744 // Delete dead memory defs 1745 void deleteDeadInstruction(Instruction *SI) { 1746 MemorySSAUpdater Updater(&MSSA); 1747 SmallVector<Instruction *, 32> NowDeadInsts; 1748 NowDeadInsts.push_back(SI); 1749 --NumFastOther; 1750 1751 while (!NowDeadInsts.empty()) { 1752 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 1753 ++NumFastOther; 1754 1755 // Try to preserve debug information attached to the dead instruction. 1756 salvageDebugInfo(*DeadInst); 1757 salvageKnowledge(DeadInst); 1758 1759 // Remove the Instruction from MSSA. 1760 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { 1761 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { 1762 SkipStores.insert(MD); 1763 } 1764 Updater.removeMemoryAccess(MA); 1765 } 1766 1767 auto I = IOLs.find(DeadInst->getParent()); 1768 if (I != IOLs.end()) 1769 I->second.erase(DeadInst); 1770 // Remove its operands 1771 for (Use &O : DeadInst->operands()) 1772 if (Instruction *OpI = dyn_cast<Instruction>(O)) { 1773 O = nullptr; 1774 if (isInstructionTriviallyDead(OpI, &TLI)) 1775 NowDeadInsts.push_back(OpI); 1776 } 1777 1778 DeadInst->eraseFromParent(); 1779 } 1780 } 1781 1782 // Check for any extra throws between SI and NI that block DSE. This only 1783 // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may 1784 // throw are handled during the walk from one def to the next. 1785 bool mayThrowBetween(Instruction *SI, Instruction *NI, 1786 const Value *SILocUnd) const { 1787 // First see if we can ignore it by using the fact that SI is an 1788 // alloca/alloca like object that is not visible to the caller during 1789 // execution of the function. 1790 if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd)) 1791 return false; 1792 1793 if (SI->getParent() == NI->getParent()) 1794 return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end(); 1795 return !ThrowingBlocks.empty(); 1796 } 1797 1798 // Check if \p NI acts as a DSE barrier for \p SI. The following instructions 1799 // act as barriers: 1800 // * A memory instruction that may throw and \p SI accesses a non-stack 1801 // object. 1802 // * Atomic stores stronger that monotonic. 1803 bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, 1804 const Value *SILocUnd, Instruction *NI, 1805 MemoryLocation &NILoc) const { 1806 // If NI may throw it acts as a barrier, unless we are to an alloca/alloca 1807 // like object that does not escape. 1808 if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd)) 1809 return true; 1810 1811 if (NI->isAtomic()) { 1812 if (auto *NSI = dyn_cast<StoreInst>(NI)) { 1813 if (isStrongerThanMonotonic(NSI->getOrdering())) 1814 return true; 1815 } else 1816 llvm_unreachable( 1817 "Other instructions should be modeled/skipped in MemorySSA"); 1818 } 1819 1820 return false; 1821 } 1822 }; 1823 1824 bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, 1825 MemorySSA &MSSA, DominatorTree &DT, 1826 PostDominatorTree &PDT, 1827 const TargetLibraryInfo &TLI) { 1828 const DataLayout &DL = F.getParent()->getDataLayout(); 1829 bool MadeChange = false; 1830 1831 DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); 1832 // For each store: 1833 for (unsigned I = 0; I < State.MemDefs.size(); I++) { 1834 MemoryDef *KillingDef = State.MemDefs[I]; 1835 if (State.SkipStores.count(KillingDef)) 1836 continue; 1837 Instruction *SI = KillingDef->getMemoryInst(); 1838 auto MaybeSILoc = State.getLocForWriteEx(SI); 1839 if (!MaybeSILoc) { 1840 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " 1841 << *SI << "\n"); 1842 continue; 1843 } 1844 MemoryLocation SILoc = *MaybeSILoc; 1845 assert(SILoc.Ptr && "SILoc should not be null"); 1846 const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); 1847 Instruction *DefObj = 1848 const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd)); 1849 bool DefVisibleToCallerBeforeRet = 1850 !State.InvisibleToCallerBeforeRet.count(SILocUnd); 1851 bool DefVisibleToCallerAfterRet = 1852 !State.InvisibleToCallerAfterRet.count(SILocUnd); 1853 if (DefObj && isAllocLikeFn(DefObj, &TLI)) { 1854 if (DefVisibleToCallerBeforeRet) 1855 DefVisibleToCallerBeforeRet = 1856 PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT); 1857 } 1858 1859 MemoryAccess *Current = KillingDef; 1860 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " 1861 << *KillingDef << " (" << *SI << ")\n"); 1862 1863 int ScanLimit = MemorySSAScanLimit; 1864 // Worklist of MemoryAccesses that may be killed by KillingDef. 1865 SetVector<MemoryAccess *> ToCheck; 1866 ToCheck.insert(KillingDef->getDefiningAccess()); 1867 1868 // Check if MemoryAccesses in the worklist are killed by KillingDef. 1869 for (unsigned I = 0; I < ToCheck.size(); I++) { 1870 Current = ToCheck[I]; 1871 if (State.SkipStores.count(Current)) 1872 continue; 1873 1874 Optional<MemoryAccess *> Next = State.getDomMemoryDef( 1875 KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet, 1876 DefVisibleToCallerAfterRet, ScanLimit); 1877 1878 if (!Next) { 1879 LLVM_DEBUG(dbgs() << " finished walk\n"); 1880 continue; 1881 } 1882 1883 MemoryAccess *DomAccess = *Next; 1884 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess); 1885 if (isa<MemoryPhi>(DomAccess)) { 1886 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); 1887 for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) { 1888 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V); 1889 BasicBlock *IncomingBlock = IncomingAccess->getBlock(); 1890 BasicBlock *PhiBlock = DomAccess->getBlock(); 1891 1892 // We only consider incoming MemoryAccesses that come before the 1893 // MemoryPhi. Otherwise we could discover candidates that do not 1894 // strictly dominate our starting def. 1895 if (State.PostOrderNumbers[IncomingBlock] > 1896 State.PostOrderNumbers[PhiBlock]) 1897 ToCheck.insert(IncomingAccess); 1898 } 1899 continue; 1900 } 1901 MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess); 1902 Instruction *NI = NextDef->getMemoryInst(); 1903 LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); 1904 1905 if (!hasAnalyzableMemoryWrite(NI, TLI)) { 1906 LLVM_DEBUG(dbgs() << " ... skip, cannot analyze def\n"); 1907 continue; 1908 } 1909 1910 if (!isRemovable(NI)) { 1911 LLVM_DEBUG(dbgs() << " ... skip, cannot remove def\n"); 1912 continue; 1913 } 1914 1915 MemoryLocation NILoc = *State.getLocForWriteEx(NI); 1916 // Check for anything that looks like it will be a barrier to further 1917 // removal 1918 if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) { 1919 LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); 1920 continue; 1921 } 1922 1923 // Before we try to remove anything, check for any extra throwing 1924 // instructions that block us from DSEing 1925 if (State.mayThrowBetween(SI, NI, SILocUnd)) { 1926 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); 1927 break; 1928 } 1929 1930 if (!DebugCounter::shouldExecute(MemorySSACounter)) 1931 continue; 1932 1933 // Check if NI overwrites SI. 1934 int64_t InstWriteOffset, DepWriteOffset; 1935 auto Iter = State.IOLs.insert( 1936 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>( 1937 NI->getParent(), InstOverlapIntervalsTy())); 1938 auto &IOL = Iter.first->second; 1939 OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, 1940 InstWriteOffset, NI, IOL, AA, &F); 1941 1942 ToCheck.insert(NextDef->getDefiningAccess()); 1943 if (OR == OW_Complete) { 1944 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI 1945 << "\n KILLER: " << *SI << '\n'); 1946 State.deleteDeadInstruction(NI); 1947 ++NumFastStores; 1948 MadeChange = true; 1949 } 1950 } 1951 } 1952 1953 if (EnablePartialOverwriteTracking) 1954 for (auto &KV : State.IOLs) 1955 MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); 1956 1957 return MadeChange; 1958 } 1959 } // end anonymous namespace 1960 1961 //===----------------------------------------------------------------------===// 1962 // DSE Pass 1963 //===----------------------------------------------------------------------===// 1964 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 1965 AliasAnalysis &AA = AM.getResult<AAManager>(F); 1966 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); 1967 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1968 1969 bool Changed = false; 1970 if (EnableMemorySSA) { 1971 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 1972 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 1973 1974 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 1975 } else { 1976 MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); 1977 1978 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 1979 } 1980 1981 #ifdef LLVM_ENABLE_STATS 1982 if (AreStatisticsEnabled()) 1983 for (auto &I : instructions(F)) 1984 NumRemainingStores += isa<StoreInst>(&I); 1985 #endif 1986 1987 if (!Changed) 1988 return PreservedAnalyses::all(); 1989 1990 PreservedAnalyses PA; 1991 PA.preserveSet<CFGAnalyses>(); 1992 PA.preserve<GlobalsAA>(); 1993 if (EnableMemorySSA) 1994 PA.preserve<MemorySSAAnalysis>(); 1995 else 1996 PA.preserve<MemoryDependenceAnalysis>(); 1997 return PA; 1998 } 1999 2000 namespace { 2001 2002 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 2003 class DSELegacyPass : public FunctionPass { 2004 public: 2005 static char ID; // Pass identification, replacement for typeid 2006 2007 DSELegacyPass() : FunctionPass(ID) { 2008 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 2009 } 2010 2011 bool runOnFunction(Function &F) override { 2012 if (skipFunction(F)) 2013 return false; 2014 2015 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2016 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2017 const TargetLibraryInfo &TLI = 2018 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2019 2020 bool Changed = false; 2021 if (EnableMemorySSA) { 2022 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2023 PostDominatorTree &PDT = 2024 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 2025 2026 eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2027 } else { 2028 MemoryDependenceResults &MD = 2029 getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 2030 2031 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2032 } 2033 2034 #ifdef LLVM_ENABLE_STATS 2035 if (AreStatisticsEnabled()) 2036 for (auto &I : instructions(F)) 2037 NumRemainingStores += isa<StoreInst>(&I); 2038 #endif 2039 2040 return Changed; 2041 } 2042 2043 void getAnalysisUsage(AnalysisUsage &AU) const override { 2044 AU.setPreservesCFG(); 2045 AU.addRequired<AAResultsWrapperPass>(); 2046 AU.addRequired<TargetLibraryInfoWrapperPass>(); 2047 AU.addPreserved<GlobalsAAWrapperPass>(); 2048 AU.addRequired<DominatorTreeWrapperPass>(); 2049 AU.addPreserved<DominatorTreeWrapperPass>(); 2050 2051 if (EnableMemorySSA) { 2052 AU.addRequired<PostDominatorTreeWrapperPass>(); 2053 AU.addRequired<MemorySSAWrapperPass>(); 2054 AU.addPreserved<PostDominatorTreeWrapperPass>(); 2055 AU.addPreserved<MemorySSAWrapperPass>(); 2056 } else { 2057 AU.addRequired<MemoryDependenceWrapperPass>(); 2058 AU.addPreserved<MemoryDependenceWrapperPass>(); 2059 } 2060 } 2061 }; 2062 2063 } // end anonymous namespace 2064 2065 char DSELegacyPass::ID = 0; 2066 2067 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 2068 false) 2069 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2070 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 2071 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 2072 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 2073 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 2074 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 2075 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2076 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 2077 false) 2078 2079 FunctionPass *llvm::createDeadStoreEliminationPass() { 2080 return new DSELegacyPass(); 2081 } 2082