1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a trivial dead store elimination that only considers 10 // basic-block local redundant stores. 11 // 12 // FIXME: This should eventually be extended to be a post-dominator tree 13 // traversal. Doing so would be pretty trivial. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/MapVector.h" 21 #include "llvm/ADT/PostOrderIterator.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/CaptureTracking.h" 29 #include "llvm/Analysis/GlobalsModRef.h" 30 #include "llvm/Analysis/MemoryBuiltins.h" 31 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/MemorySSA.h" 34 #include "llvm/Analysis/MemorySSAUpdater.h" 35 #include "llvm/Analysis/PostDominators.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/ValueTracking.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/Constant.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/InstIterator.h" 46 #include "llvm/IR/InstrTypes.h" 47 #include "llvm/IR/Instruction.h" 48 #include "llvm/IR/Instructions.h" 49 #include "llvm/IR/IntrinsicInst.h" 50 #include "llvm/IR/Intrinsics.h" 51 #include "llvm/IR/LLVMContext.h" 52 #include "llvm/IR/Module.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/PatternMatch.h" 55 #include "llvm/IR/Value.h" 56 #include "llvm/InitializePasses.h" 57 #include "llvm/Pass.h" 58 #include "llvm/Support/Casting.h" 59 #include "llvm/Support/CommandLine.h" 60 #include "llvm/Support/Debug.h" 61 #include "llvm/Support/DebugCounter.h" 62 #include "llvm/Support/ErrorHandling.h" 63 #include "llvm/Support/MathExtras.h" 64 #include "llvm/Support/raw_ostream.h" 65 #include "llvm/Transforms/Scalar.h" 66 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 67 #include "llvm/Transforms/Utils/Local.h" 68 #include <algorithm> 69 #include <cassert> 70 #include <cstddef> 71 #include <cstdint> 72 #include <iterator> 73 #include <map> 74 #include <utility> 75 76 using namespace llvm; 77 using namespace PatternMatch; 78 79 #define DEBUG_TYPE "dse" 80 81 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE"); 82 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 83 STATISTIC(NumFastStores, "Number of stores deleted"); 84 STATISTIC(NumFastOther, "Number of other instrs removed"); 85 STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); 86 STATISTIC(NumModifiedStores, "Number of stores modified"); 87 STATISTIC(NumNoopStores, "Number of noop stores deleted"); 88 STATISTIC(NumCFGChecks, "Number of stores modified"); 89 STATISTIC(NumCFGTries, "Number of stores modified"); 90 STATISTIC(NumCFGSuccess, "Number of stores modified"); 91 92 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", 93 "Controls which MemoryDefs are eliminated."); 94 95 static cl::opt<bool> 96 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", 97 cl::init(true), cl::Hidden, 98 cl::desc("Enable partial-overwrite tracking in DSE")); 99 100 static cl::opt<bool> 101 EnablePartialStoreMerging("enable-dse-partial-store-merging", 102 cl::init(true), cl::Hidden, 103 cl::desc("Enable partial store merging in DSE")); 104 105 static cl::opt<bool> 106 EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, 107 cl::desc("Use the new MemorySSA-backed DSE.")); 108 109 static cl::opt<unsigned> 110 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, 111 cl::desc("The number of memory instructions to scan for " 112 "dead store elimination (default = 100)")); 113 114 static cl::opt<unsigned> MemorySSADefsPerBlockLimit( 115 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, 116 cl::desc("The number of MemoryDefs we consider as candidates to eliminated " 117 "other stores per basic block (default = 5000)")); 118 119 static cl::opt<unsigned> MemorySSAPathCheckLimit( 120 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, 121 cl::desc("The maximum number of blocks to check when trying to prove that " 122 "all paths to an exit go through a killing block (default = 50)")); 123 124 //===----------------------------------------------------------------------===// 125 // Helper functions 126 //===----------------------------------------------------------------------===// 127 using OverlapIntervalsTy = std::map<int64_t, int64_t>; 128 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; 129 130 /// Delete this instruction. Before we do, go through and zero out all the 131 /// operands of this instruction. If any of them become dead, delete them and 132 /// the computation tree that feeds them. 133 /// If ValueSet is non-null, remove any deleted instructions from it as well. 134 static void 135 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, 136 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, 137 InstOverlapIntervalsTy &IOL, 138 MapVector<Instruction *, bool> &ThrowableInst, 139 SmallSetVector<const Value *, 16> *ValueSet = nullptr) { 140 SmallVector<Instruction*, 32> NowDeadInsts; 141 142 NowDeadInsts.push_back(I); 143 --NumFastOther; 144 145 // Keeping the iterator straight is a pain, so we let this routine tell the 146 // caller what the next instruction is after we're done mucking about. 147 BasicBlock::iterator NewIter = *BBI; 148 149 // Before we touch this instruction, remove it from memdep! 150 do { 151 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 152 // Mark the DeadInst as dead in the list of throwable instructions. 153 auto It = ThrowableInst.find(DeadInst); 154 if (It != ThrowableInst.end()) 155 ThrowableInst[It->first] = false; 156 ++NumFastOther; 157 158 // Try to preserve debug information attached to the dead instruction. 159 salvageDebugInfo(*DeadInst); 160 salvageKnowledge(DeadInst); 161 162 // This instruction is dead, zap it, in stages. Start by removing it from 163 // MemDep, which needs to know the operands and needs it to be in the 164 // function. 165 MD.removeInstruction(DeadInst); 166 167 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 168 Value *Op = DeadInst->getOperand(op); 169 DeadInst->setOperand(op, nullptr); 170 171 // If this operand just became dead, add it to the NowDeadInsts list. 172 if (!Op->use_empty()) continue; 173 174 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 175 if (isInstructionTriviallyDead(OpI, &TLI)) 176 NowDeadInsts.push_back(OpI); 177 } 178 179 if (ValueSet) ValueSet->remove(DeadInst); 180 IOL.erase(DeadInst); 181 182 if (NewIter == DeadInst->getIterator()) 183 NewIter = DeadInst->eraseFromParent(); 184 else 185 DeadInst->eraseFromParent(); 186 } while (!NowDeadInsts.empty()); 187 *BBI = NewIter; 188 // Pop dead entries from back of ThrowableInst till we find an alive entry. 189 while (!ThrowableInst.empty() && !ThrowableInst.back().second) 190 ThrowableInst.pop_back(); 191 } 192 193 /// Does this instruction write some memory? This only returns true for things 194 /// that we can analyze with other helpers below. 195 static bool hasAnalyzableMemoryWrite(Instruction *I, 196 const TargetLibraryInfo &TLI) { 197 if (isa<StoreInst>(I)) 198 return true; 199 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 200 switch (II->getIntrinsicID()) { 201 default: 202 return false; 203 case Intrinsic::memset: 204 case Intrinsic::memmove: 205 case Intrinsic::memcpy: 206 case Intrinsic::memcpy_element_unordered_atomic: 207 case Intrinsic::memmove_element_unordered_atomic: 208 case Intrinsic::memset_element_unordered_atomic: 209 case Intrinsic::init_trampoline: 210 case Intrinsic::lifetime_end: 211 return true; 212 } 213 } 214 if (auto *CB = dyn_cast<CallBase>(I)) { 215 LibFunc LF; 216 if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { 217 switch (LF) { 218 case LibFunc_strcpy: 219 case LibFunc_strncpy: 220 case LibFunc_strcat: 221 case LibFunc_strncat: 222 return true; 223 default: 224 return false; 225 } 226 } 227 } 228 return false; 229 } 230 231 /// Return a Location stored to by the specified instruction. If isRemovable 232 /// returns true, this function and getLocForRead completely describe the memory 233 /// operations for this instruction. 234 static MemoryLocation getLocForWrite(Instruction *Inst) { 235 236 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 237 return MemoryLocation::get(SI); 238 239 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) { 240 // memcpy/memmove/memset. 241 MemoryLocation Loc = MemoryLocation::getForDest(MI); 242 return Loc; 243 } 244 245 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 246 switch (II->getIntrinsicID()) { 247 default: 248 return MemoryLocation(); // Unhandled intrinsic. 249 case Intrinsic::init_trampoline: 250 return MemoryLocation(II->getArgOperand(0)); 251 case Intrinsic::lifetime_end: { 252 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 253 return MemoryLocation(II->getArgOperand(1), Len); 254 } 255 } 256 } 257 if (auto *CB = dyn_cast<CallBase>(Inst)) 258 // All the supported TLI functions so far happen to have dest as their 259 // first argument. 260 return MemoryLocation(CB->getArgOperand(0)); 261 return MemoryLocation(); 262 } 263 264 /// Return the location read by the specified "hasAnalyzableMemoryWrite" 265 /// instruction if any. 266 static MemoryLocation getLocForRead(Instruction *Inst, 267 const TargetLibraryInfo &TLI) { 268 assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); 269 270 // The only instructions that both read and write are the mem transfer 271 // instructions (memcpy/memmove). 272 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst)) 273 return MemoryLocation::getForSource(MTI); 274 return MemoryLocation(); 275 } 276 277 /// If the value of this instruction and the memory it writes to is unused, may 278 /// we delete this instruction? 279 static bool isRemovable(Instruction *I) { 280 // Don't remove volatile/atomic stores. 281 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 282 return SI->isUnordered(); 283 284 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 285 switch (II->getIntrinsicID()) { 286 default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); 287 case Intrinsic::lifetime_end: 288 // Never remove dead lifetime_end's, e.g. because it is followed by a 289 // free. 290 return false; 291 case Intrinsic::init_trampoline: 292 // Always safe to remove init_trampoline. 293 return true; 294 case Intrinsic::memset: 295 case Intrinsic::memmove: 296 case Intrinsic::memcpy: 297 // Don't remove volatile memory intrinsics. 298 return !cast<MemIntrinsic>(II)->isVolatile(); 299 case Intrinsic::memcpy_element_unordered_atomic: 300 case Intrinsic::memmove_element_unordered_atomic: 301 case Intrinsic::memset_element_unordered_atomic: 302 return true; 303 } 304 } 305 306 // note: only get here for calls with analyzable writes - i.e. libcalls 307 if (auto *CB = dyn_cast<CallBase>(I)) 308 return CB->use_empty(); 309 310 return false; 311 } 312 313 /// Returns true if the end of this instruction can be safely shortened in 314 /// length. 315 static bool isShortenableAtTheEnd(Instruction *I) { 316 // Don't shorten stores for now 317 if (isa<StoreInst>(I)) 318 return false; 319 320 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 321 switch (II->getIntrinsicID()) { 322 default: return false; 323 case Intrinsic::memset: 324 case Intrinsic::memcpy: 325 case Intrinsic::memcpy_element_unordered_atomic: 326 case Intrinsic::memset_element_unordered_atomic: 327 // Do shorten memory intrinsics. 328 // FIXME: Add memmove if it's also safe to transform. 329 return true; 330 } 331 } 332 333 // Don't shorten libcalls calls for now. 334 335 return false; 336 } 337 338 /// Returns true if the beginning of this instruction can be safely shortened 339 /// in length. 340 static bool isShortenableAtTheBeginning(Instruction *I) { 341 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 342 // easily done by offsetting the source address. 343 return isa<AnyMemSetInst>(I); 344 } 345 346 /// Return the pointer that is being written to. 347 static Value *getStoredPointerOperand(Instruction *I) { 348 //TODO: factor this to reuse getLocForWrite 349 MemoryLocation Loc = getLocForWrite(I); 350 assert(Loc.Ptr && 351 "unable to find pointer written for analyzable instruction?"); 352 // TODO: most APIs don't expect const Value * 353 return const_cast<Value*>(Loc.Ptr); 354 } 355 356 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 357 const TargetLibraryInfo &TLI, 358 const Function *F) { 359 uint64_t Size; 360 ObjectSizeOpts Opts; 361 Opts.NullIsUnknownSize = NullPointerIsDefined(F); 362 363 if (getObjectSize(V, Size, DL, &TLI, Opts)) 364 return Size; 365 return MemoryLocation::UnknownSize; 366 } 367 368 namespace { 369 370 enum OverwriteResult { 371 OW_Begin, 372 OW_Complete, 373 OW_End, 374 OW_PartialEarlierWithFullLater, 375 OW_Unknown 376 }; 377 378 } // end anonymous namespace 379 380 /// Return 'OW_Complete' if a store to the 'Later' location completely 381 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the 382 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the 383 /// beginning of the 'Earlier' location is overwritten by 'Later'. 384 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was 385 /// overwritten by a latter (smaller) store which doesn't write outside the big 386 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. 387 static OverwriteResult isOverwrite(const MemoryLocation &Later, 388 const MemoryLocation &Earlier, 389 const DataLayout &DL, 390 const TargetLibraryInfo &TLI, 391 int64_t &EarlierOff, int64_t &LaterOff, 392 Instruction *DepWrite, 393 InstOverlapIntervalsTy &IOL, 394 AliasAnalysis &AA, 395 const Function *F) { 396 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll 397 // get imprecise values here, though (except for unknown sizes). 398 if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) 399 return OW_Unknown; 400 401 const uint64_t LaterSize = Later.Size.getValue(); 402 const uint64_t EarlierSize = Earlier.Size.getValue(); 403 404 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 405 const Value *P2 = Later.Ptr->stripPointerCasts(); 406 407 // If the start pointers are the same, we just have to compare sizes to see if 408 // the later store was larger than the earlier store. 409 if (P1 == P2 || AA.isMustAlias(P1, P2)) { 410 // Make sure that the Later size is >= the Earlier size. 411 if (LaterSize >= EarlierSize) 412 return OW_Complete; 413 } 414 415 // Check to see if the later store is to the entire object (either a global, 416 // an alloca, or a byval/inalloca argument). If so, then it clearly 417 // overwrites any other store to the same object. 418 const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); 419 420 // If we can't resolve the same pointers to the same object, then we can't 421 // analyze them at all. 422 if (UO1 != UO2) 423 return OW_Unknown; 424 425 // If the "Later" store is to a recognizable object, get its size. 426 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F); 427 if (ObjectSize != MemoryLocation::UnknownSize) 428 if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) 429 return OW_Complete; 430 431 // Okay, we have stores to two completely different pointers. Try to 432 // decompose the pointer into a "base + constant_offset" form. If the base 433 // pointers are equal, then we can reason about the two stores. 434 EarlierOff = 0; 435 LaterOff = 0; 436 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 437 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 438 439 // If the base pointers still differ, we have two completely different stores. 440 if (BP1 != BP2) 441 return OW_Unknown; 442 443 // The later store completely overlaps the earlier store if: 444 // 445 // 1. Both start at the same offset and the later one's size is greater than 446 // or equal to the earlier one's, or 447 // 448 // |--earlier--| 449 // |-- later --| 450 // 451 // 2. The earlier store has an offset greater than the later offset, but which 452 // still lies completely within the later store. 453 // 454 // |--earlier--| 455 // |----- later ------| 456 // 457 // We have to be careful here as *Off is signed while *.Size is unsigned. 458 if (EarlierOff >= LaterOff && 459 LaterSize >= EarlierSize && 460 uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) 461 return OW_Complete; 462 463 // We may now overlap, although the overlap is not complete. There might also 464 // be other incomplete overlaps, and together, they might cover the complete 465 // earlier write. 466 // Note: The correctness of this logic depends on the fact that this function 467 // is not even called providing DepWrite when there are any intervening reads. 468 if (EnablePartialOverwriteTracking && 469 LaterOff < int64_t(EarlierOff + EarlierSize) && 470 int64_t(LaterOff + LaterSize) >= EarlierOff) { 471 472 // Insert our part of the overlap into the map. 473 auto &IM = IOL[DepWrite]; 474 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff 475 << ", " << int64_t(EarlierOff + EarlierSize) 476 << ") Later [" << LaterOff << ", " 477 << int64_t(LaterOff + LaterSize) << ")\n"); 478 479 // Make sure that we only insert non-overlapping intervals and combine 480 // adjacent intervals. The intervals are stored in the map with the ending 481 // offset as the key (in the half-open sense) and the starting offset as 482 // the value. 483 int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize; 484 485 // Find any intervals ending at, or after, LaterIntStart which start 486 // before LaterIntEnd. 487 auto ILI = IM.lower_bound(LaterIntStart); 488 if (ILI != IM.end() && ILI->second <= LaterIntEnd) { 489 // This existing interval is overlapped with the current store somewhere 490 // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing 491 // intervals and adjusting our start and end. 492 LaterIntStart = std::min(LaterIntStart, ILI->second); 493 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 494 ILI = IM.erase(ILI); 495 496 // Continue erasing and adjusting our end in case other previous 497 // intervals are also overlapped with the current store. 498 // 499 // |--- ealier 1 ---| |--- ealier 2 ---| 500 // |------- later---------| 501 // 502 while (ILI != IM.end() && ILI->second <= LaterIntEnd) { 503 assert(ILI->second > LaterIntStart && "Unexpected interval"); 504 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 505 ILI = IM.erase(ILI); 506 } 507 } 508 509 IM[LaterIntEnd] = LaterIntStart; 510 511 ILI = IM.begin(); 512 if (ILI->second <= EarlierOff && 513 ILI->first >= int64_t(EarlierOff + EarlierSize)) { 514 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" 515 << EarlierOff << ", " 516 << int64_t(EarlierOff + EarlierSize) 517 << ") Composite Later [" << ILI->second << ", " 518 << ILI->first << ")\n"); 519 ++NumCompletePartials; 520 return OW_Complete; 521 } 522 } 523 524 // Check for an earlier store which writes to all the memory locations that 525 // the later store writes to. 526 if (EnablePartialStoreMerging && LaterOff >= EarlierOff && 527 int64_t(EarlierOff + EarlierSize) > LaterOff && 528 uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) { 529 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" 530 << EarlierOff << ", " 531 << int64_t(EarlierOff + EarlierSize) 532 << ") by a later store [" << LaterOff << ", " 533 << int64_t(LaterOff + LaterSize) << ")\n"); 534 // TODO: Maybe come up with a better name? 535 return OW_PartialEarlierWithFullLater; 536 } 537 538 // Another interesting case is if the later store overwrites the end of the 539 // earlier store. 540 // 541 // |--earlier--| 542 // |-- later --| 543 // 544 // In this case we may want to trim the size of earlier to avoid generating 545 // writes to addresses which will definitely be overwritten later 546 if (!EnablePartialOverwriteTracking && 547 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) && 548 int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize))) 549 return OW_End; 550 551 // Finally, we also need to check if the later store overwrites the beginning 552 // of the earlier store. 553 // 554 // |--earlier--| 555 // |-- later --| 556 // 557 // In this case we may want to move the destination address and trim the size 558 // of earlier to avoid generating writes to addresses which will definitely 559 // be overwritten later. 560 if (!EnablePartialOverwriteTracking && 561 (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) { 562 assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) && 563 "Expect to be handled as OW_Complete"); 564 return OW_Begin; 565 } 566 // Otherwise, they don't completely overlap. 567 return OW_Unknown; 568 } 569 570 /// If 'Inst' might be a self read (i.e. a noop copy of a 571 /// memory region into an identical pointer) then it doesn't actually make its 572 /// input dead in the traditional sense. Consider this case: 573 /// 574 /// memmove(A <- B) 575 /// memmove(A <- A) 576 /// 577 /// In this case, the second store to A does not make the first store to A dead. 578 /// The usual situation isn't an explicit A<-A store like this (which can be 579 /// trivially removed) but a case where two pointers may alias. 580 /// 581 /// This function detects when it is unsafe to remove a dependent instruction 582 /// because the DSE inducing instruction may be a self-read. 583 static bool isPossibleSelfRead(Instruction *Inst, 584 const MemoryLocation &InstStoreLoc, 585 Instruction *DepWrite, 586 const TargetLibraryInfo &TLI, 587 AliasAnalysis &AA) { 588 // Self reads can only happen for instructions that read memory. Get the 589 // location read. 590 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 591 if (!InstReadLoc.Ptr) 592 return false; // Not a reading instruction. 593 594 // If the read and written loc obviously don't alias, it isn't a read. 595 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) 596 return false; 597 598 if (isa<AnyMemCpyInst>(Inst)) { 599 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) 600 // but in practice memcpy(A <- B) either means that A and B are disjoint or 601 // are equal (i.e. there are not partial overlaps). Given that, if we have: 602 // 603 // memcpy/memmove(A <- B) // DepWrite 604 // memcpy(A <- B) // Inst 605 // 606 // with Inst reading/writing a >= size than DepWrite, we can reason as 607 // follows: 608 // 609 // - If A == B then both the copies are no-ops, so the DepWrite can be 610 // removed. 611 // - If A != B then A and B are disjoint locations in Inst. Since 612 // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. 613 // Therefore DepWrite can be removed. 614 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 615 616 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 617 return false; 618 } 619 620 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 621 // then it can't be considered dead. 622 return true; 623 } 624 625 /// Returns true if the memory which is accessed by the second instruction is not 626 /// modified between the first and the second instruction. 627 /// Precondition: Second instruction must be dominated by the first 628 /// instruction. 629 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 630 Instruction *SecondI, 631 AliasAnalysis *AA, 632 const DataLayout &DL, 633 DominatorTree *DT) { 634 // Do a backwards scan through the CFG from SecondI to FirstI. Look for 635 // instructions which can modify the memory location accessed by SecondI. 636 // 637 // While doing the walk keep track of the address to check. It might be 638 // different in different basic blocks due to PHI translation. 639 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>; 640 SmallVector<BlockAddressPair, 16> WorkList; 641 // Keep track of the address we visited each block with. Bail out if we 642 // visit a block with different addresses. 643 DenseMap<BasicBlock *, Value *> Visited; 644 645 BasicBlock::iterator FirstBBI(FirstI); 646 ++FirstBBI; 647 BasicBlock::iterator SecondBBI(SecondI); 648 BasicBlock *FirstBB = FirstI->getParent(); 649 BasicBlock *SecondBB = SecondI->getParent(); 650 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 651 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr); 652 653 // Start checking the SecondBB. 654 WorkList.push_back( 655 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr))); 656 bool isFirstBlock = true; 657 658 // Check all blocks going backward until we reach the FirstBB. 659 while (!WorkList.empty()) { 660 BlockAddressPair Current = WorkList.pop_back_val(); 661 BasicBlock *B = Current.first; 662 PHITransAddr &Addr = Current.second; 663 Value *Ptr = Addr.getAddr(); 664 665 // Ignore instructions before FirstI if this is the FirstBB. 666 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 667 668 BasicBlock::iterator EI; 669 if (isFirstBlock) { 670 // Ignore instructions after SecondI if this is the first visit of SecondBB. 671 assert(B == SecondBB && "first block is not the store block"); 672 EI = SecondBBI; 673 isFirstBlock = false; 674 } else { 675 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 676 // In this case we also have to look at instructions after SecondI. 677 EI = B->end(); 678 } 679 for (; BI != EI; ++BI) { 680 Instruction *I = &*BI; 681 if (I->mayWriteToMemory() && I != SecondI) 682 if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) 683 return false; 684 } 685 if (B != FirstBB) { 686 assert(B != &FirstBB->getParent()->getEntryBlock() && 687 "Should not hit the entry block because SI must be dominated by LI"); 688 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 689 PHITransAddr PredAddr = Addr; 690 if (PredAddr.NeedsPHITranslationFromBlock(B)) { 691 if (!PredAddr.IsPotentiallyPHITranslatable()) 692 return false; 693 if (PredAddr.PHITranslateValue(B, *PredI, DT, false)) 694 return false; 695 } 696 Value *TranslatedPtr = PredAddr.getAddr(); 697 auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr)); 698 if (!Inserted.second) { 699 // We already visited this block before. If it was with a different 700 // address - bail out! 701 if (TranslatedPtr != Inserted.first->second) 702 return false; 703 // ... otherwise just skip it. 704 continue; 705 } 706 WorkList.push_back(std::make_pair(*PredI, PredAddr)); 707 } 708 } 709 } 710 return true; 711 } 712 713 /// Find all blocks that will unconditionally lead to the block BB and append 714 /// them to F. 715 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 716 BasicBlock *BB, DominatorTree *DT) { 717 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 718 BasicBlock *Pred = *I; 719 if (Pred == BB) continue; 720 Instruction *PredTI = Pred->getTerminator(); 721 if (PredTI->getNumSuccessors() != 1) 722 continue; 723 724 if (DT->isReachableFromEntry(Pred)) 725 Blocks.push_back(Pred); 726 } 727 } 728 729 /// Handle frees of entire structures whose dependency is a store 730 /// to a field of that structure. 731 static bool handleFree(CallInst *F, AliasAnalysis *AA, 732 MemoryDependenceResults *MD, DominatorTree *DT, 733 const TargetLibraryInfo *TLI, 734 InstOverlapIntervalsTy &IOL, 735 MapVector<Instruction *, bool> &ThrowableInst) { 736 bool MadeChange = false; 737 738 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 739 SmallVector<BasicBlock *, 16> Blocks; 740 Blocks.push_back(F->getParent()); 741 742 while (!Blocks.empty()) { 743 BasicBlock *BB = Blocks.pop_back_val(); 744 Instruction *InstPt = BB->getTerminator(); 745 if (BB == F->getParent()) InstPt = F; 746 747 MemDepResult Dep = 748 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 749 while (Dep.isDef() || Dep.isClobber()) { 750 Instruction *Dependency = Dep.getInst(); 751 if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || 752 !isRemovable(Dependency)) 753 break; 754 755 Value *DepPointer = 756 getUnderlyingObject(getStoredPointerOperand(Dependency)); 757 758 // Check for aliasing. 759 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 760 break; 761 762 LLVM_DEBUG( 763 dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " 764 << *Dependency << '\n'); 765 766 // DCE instructions only used to calculate that store. 767 BasicBlock::iterator BBI(Dependency); 768 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, 769 ThrowableInst); 770 ++NumFastStores; 771 MadeChange = true; 772 773 // Inst's old Dependency is now deleted. Compute the next dependency, 774 // which may also be dead, as in 775 // s[0] = 0; 776 // s[1] = 0; // This has just been deleted. 777 // free(s); 778 Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); 779 } 780 781 if (Dep.isNonLocal()) 782 findUnconditionalPreds(Blocks, BB, DT); 783 } 784 785 return MadeChange; 786 } 787 788 /// Check to see if the specified location may alias any of the stack objects in 789 /// the DeadStackObjects set. If so, they become live because the location is 790 /// being loaded. 791 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 792 SmallSetVector<const Value *, 16> &DeadStackObjects, 793 const DataLayout &DL, AliasAnalysis *AA, 794 const TargetLibraryInfo *TLI, 795 const Function *F) { 796 const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr); 797 798 // A constant can't be in the dead pointer set. 799 if (isa<Constant>(UnderlyingPointer)) 800 return; 801 802 // If the kill pointer can be easily reduced to an alloca, don't bother doing 803 // extraneous AA queries. 804 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 805 DeadStackObjects.remove(UnderlyingPointer); 806 return; 807 } 808 809 // Remove objects that could alias LoadedLoc. 810 DeadStackObjects.remove_if([&](const Value *I) { 811 // See if the loaded location could alias the stack location. 812 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F)); 813 return !AA->isNoAlias(StackLoc, LoadedLoc); 814 }); 815 } 816 817 /// Remove dead stores to stack-allocated locations in the function end block. 818 /// Ex: 819 /// %A = alloca i32 820 /// ... 821 /// store i32 1, i32* %A 822 /// ret void 823 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 824 MemoryDependenceResults *MD, 825 const TargetLibraryInfo *TLI, 826 InstOverlapIntervalsTy &IOL, 827 MapVector<Instruction *, bool> &ThrowableInst) { 828 bool MadeChange = false; 829 830 // Keep track of all of the stack objects that are dead at the end of the 831 // function. 832 SmallSetVector<const Value*, 16> DeadStackObjects; 833 834 // Find all of the alloca'd pointers in the entry block. 835 BasicBlock &Entry = BB.getParent()->front(); 836 for (Instruction &I : Entry) { 837 if (isa<AllocaInst>(&I)) 838 DeadStackObjects.insert(&I); 839 840 // Okay, so these are dead heap objects, but if the pointer never escapes 841 // then it's leaked by this function anyways. 842 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 843 DeadStackObjects.insert(&I); 844 } 845 846 // Treat byval or inalloca arguments the same, stores to them are dead at the 847 // end of the function. 848 for (Argument &AI : BB.getParent()->args()) 849 if (AI.hasPassPointeeByValueCopyAttr()) 850 DeadStackObjects.insert(&AI); 851 852 const DataLayout &DL = BB.getModule()->getDataLayout(); 853 854 // Scan the basic block backwards 855 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 856 --BBI; 857 858 // If we find a store, check to see if it points into a dead stack value. 859 if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 860 // See through pointer-to-pointer bitcasts 861 SmallVector<const Value *, 4> Pointers; 862 getUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers); 863 864 // Stores to stack values are valid candidates for removal. 865 bool AllDead = true; 866 for (const Value *Pointer : Pointers) 867 if (!DeadStackObjects.count(Pointer)) { 868 AllDead = false; 869 break; 870 } 871 872 if (AllDead) { 873 Instruction *Dead = &*BBI; 874 875 LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 876 << *Dead << "\n Objects: "; 877 for (SmallVectorImpl<const Value *>::iterator I = 878 Pointers.begin(), 879 E = Pointers.end(); 880 I != E; ++I) { 881 dbgs() << **I; 882 if (std::next(I) != E) 883 dbgs() << ", "; 884 } dbgs() 885 << '\n'); 886 887 // DCE instructions only used to calculate that store. 888 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, ThrowableInst, 889 &DeadStackObjects); 890 ++NumFastStores; 891 MadeChange = true; 892 continue; 893 } 894 } 895 896 // Remove any dead non-memory-mutating instructions. 897 if (isInstructionTriviallyDead(&*BBI, TLI)) { 898 LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " 899 << *&*BBI << '\n'); 900 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, ThrowableInst, 901 &DeadStackObjects); 902 ++NumFastOther; 903 MadeChange = true; 904 continue; 905 } 906 907 if (isa<AllocaInst>(BBI)) { 908 // Remove allocas from the list of dead stack objects; there can't be 909 // any references before the definition. 910 DeadStackObjects.remove(&*BBI); 911 continue; 912 } 913 914 if (auto *Call = dyn_cast<CallBase>(&*BBI)) { 915 // Remove allocation function calls from the list of dead stack objects; 916 // there can't be any references before the definition. 917 if (isAllocLikeFn(&*BBI, TLI)) 918 DeadStackObjects.remove(&*BBI); 919 920 // If this call does not access memory, it can't be loading any of our 921 // pointers. 922 if (AA->doesNotAccessMemory(Call)) 923 continue; 924 925 // If the call might load from any of our allocas, then any store above 926 // the call is live. 927 DeadStackObjects.remove_if([&](const Value *I) { 928 // See if the call site touches the value. 929 return isRefSet(AA->getModRefInfo( 930 Call, I, getPointerSize(I, DL, *TLI, BB.getParent()))); 931 }); 932 933 // If all of the allocas were clobbered by the call then we're not going 934 // to find anything else to process. 935 if (DeadStackObjects.empty()) 936 break; 937 938 continue; 939 } 940 941 // We can remove the dead stores, irrespective of the fence and its ordering 942 // (release/acquire/seq_cst). Fences only constraints the ordering of 943 // already visible stores, it does not make a store visible to other 944 // threads. So, skipping over a fence does not change a store from being 945 // dead. 946 if (isa<FenceInst>(*BBI)) 947 continue; 948 949 MemoryLocation LoadedLoc; 950 951 // If we encounter a use of the pointer, it is no longer considered dead 952 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 953 if (!L->isUnordered()) // Be conservative with atomic/volatile load 954 break; 955 LoadedLoc = MemoryLocation::get(L); 956 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 957 LoadedLoc = MemoryLocation::get(V); 958 } else if (!BBI->mayReadFromMemory()) { 959 // Instruction doesn't read memory. Note that stores that weren't removed 960 // above will hit this case. 961 continue; 962 } else { 963 // Unknown inst; assume it clobbers everything. 964 break; 965 } 966 967 // Remove any allocas from the DeadPointer set that are loaded, as this 968 // makes any stores above the access live. 969 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI, BB.getParent()); 970 971 // If all of the allocas were clobbered by the access then we're not going 972 // to find anything else to process. 973 if (DeadStackObjects.empty()) 974 break; 975 } 976 977 return MadeChange; 978 } 979 980 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, 981 int64_t &EarlierSize, int64_t LaterOffset, 982 int64_t LaterSize, bool IsOverwriteEnd) { 983 // TODO: base this on the target vector size so that if the earlier 984 // store was too small to get vector writes anyway then its likely 985 // a good idea to shorten it 986 // Power of 2 vector writes are probably always a bad idea to optimize 987 // as any store/memset/memcpy is likely using vector instructions so 988 // shortening it to not vector size is likely to be slower 989 auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite); 990 unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment(); 991 if (!IsOverwriteEnd) 992 LaterOffset = int64_t(LaterOffset + LaterSize); 993 994 if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && 995 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) 996 return false; 997 998 int64_t NewLength = IsOverwriteEnd 999 ? LaterOffset - EarlierOffset 1000 : EarlierSize - (LaterOffset - EarlierOffset); 1001 1002 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) { 1003 // When shortening an atomic memory intrinsic, the newly shortened 1004 // length must remain an integer multiple of the element size. 1005 const uint32_t ElementSize = AMI->getElementSizeInBytes(); 1006 if (0 != NewLength % ElementSize) 1007 return false; 1008 } 1009 1010 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 1011 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " 1012 << *EarlierWrite << "\n KILLER (offset " << LaterOffset 1013 << ", " << EarlierSize << ")\n"); 1014 1015 Value *EarlierWriteLength = EarlierIntrinsic->getLength(); 1016 Value *TrimmedLength = 1017 ConstantInt::get(EarlierWriteLength->getType(), NewLength); 1018 EarlierIntrinsic->setLength(TrimmedLength); 1019 1020 EarlierSize = NewLength; 1021 if (!IsOverwriteEnd) { 1022 int64_t OffsetMoved = (LaterOffset - EarlierOffset); 1023 Value *Indices[1] = { 1024 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; 1025 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 1026 EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(), 1027 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); 1028 NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc()); 1029 EarlierIntrinsic->setDest(NewDestGEP); 1030 EarlierOffset = EarlierOffset + OffsetMoved; 1031 } 1032 return true; 1033 } 1034 1035 static bool tryToShortenEnd(Instruction *EarlierWrite, 1036 OverlapIntervalsTy &IntervalMap, 1037 int64_t &EarlierStart, int64_t &EarlierSize) { 1038 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) 1039 return false; 1040 1041 OverlapIntervalsTy::iterator OII = --IntervalMap.end(); 1042 int64_t LaterStart = OII->second; 1043 int64_t LaterSize = OII->first - LaterStart; 1044 1045 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && 1046 LaterStart + LaterSize >= EarlierStart + EarlierSize) { 1047 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1048 LaterSize, true)) { 1049 IntervalMap.erase(OII); 1050 return true; 1051 } 1052 } 1053 return false; 1054 } 1055 1056 static bool tryToShortenBegin(Instruction *EarlierWrite, 1057 OverlapIntervalsTy &IntervalMap, 1058 int64_t &EarlierStart, int64_t &EarlierSize) { 1059 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) 1060 return false; 1061 1062 OverlapIntervalsTy::iterator OII = IntervalMap.begin(); 1063 int64_t LaterStart = OII->second; 1064 int64_t LaterSize = OII->first - LaterStart; 1065 1066 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { 1067 assert(LaterStart + LaterSize < EarlierStart + EarlierSize && 1068 "Should have been handled as OW_Complete"); 1069 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1070 LaterSize, false)) { 1071 IntervalMap.erase(OII); 1072 return true; 1073 } 1074 } 1075 return false; 1076 } 1077 1078 static bool removePartiallyOverlappedStores(AliasAnalysis *AA, 1079 const DataLayout &DL, 1080 InstOverlapIntervalsTy &IOL) { 1081 bool Changed = false; 1082 for (auto OI : IOL) { 1083 Instruction *EarlierWrite = OI.first; 1084 MemoryLocation Loc = getLocForWrite(EarlierWrite); 1085 assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); 1086 1087 const Value *Ptr = Loc.Ptr->stripPointerCasts(); 1088 int64_t EarlierStart = 0; 1089 int64_t EarlierSize = int64_t(Loc.Size.getValue()); 1090 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); 1091 OverlapIntervalsTy &IntervalMap = OI.second; 1092 Changed |= 1093 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1094 if (IntervalMap.empty()) 1095 continue; 1096 Changed |= 1097 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1098 } 1099 return Changed; 1100 } 1101 1102 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, 1103 AliasAnalysis *AA, MemoryDependenceResults *MD, 1104 const DataLayout &DL, 1105 const TargetLibraryInfo *TLI, 1106 InstOverlapIntervalsTy &IOL, 1107 MapVector<Instruction *, bool> &ThrowableInst, 1108 DominatorTree *DT) { 1109 // Must be a store instruction. 1110 StoreInst *SI = dyn_cast<StoreInst>(Inst); 1111 if (!SI) 1112 return false; 1113 1114 // If we're storing the same value back to a pointer that we just loaded from, 1115 // then the store can be removed. 1116 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 1117 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 1118 isRemovable(SI) && 1119 memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) { 1120 1121 LLVM_DEBUG( 1122 dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " 1123 << *DepLoad << "\n STORE: " << *SI << '\n'); 1124 1125 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1126 ++NumRedundantStores; 1127 return true; 1128 } 1129 } 1130 1131 // Remove null stores into the calloc'ed objects 1132 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 1133 if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { 1134 Instruction *UnderlyingPointer = 1135 dyn_cast<Instruction>(getUnderlyingObject(SI->getPointerOperand())); 1136 1137 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 1138 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) { 1139 LLVM_DEBUG( 1140 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 1141 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 1142 1143 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1144 ++NumRedundantStores; 1145 return true; 1146 } 1147 } 1148 return false; 1149 } 1150 1151 static Constant * 1152 tryToMergePartialOverlappingStores(StoreInst *Earlier, StoreInst *Later, 1153 int64_t InstWriteOffset, 1154 int64_t DepWriteOffset, const DataLayout &DL, 1155 AliasAnalysis *AA, DominatorTree *DT) { 1156 1157 if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && 1158 DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) && 1159 Later && isa<ConstantInt>(Later->getValueOperand()) && 1160 DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) && 1161 memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) { 1162 // If the store we find is: 1163 // a) partially overwritten by the store to 'Loc' 1164 // b) the later store is fully contained in the earlier one and 1165 // c) they both have a constant value 1166 // d) none of the two stores need padding 1167 // Merge the two stores, replacing the earlier store's value with a 1168 // merge of both values. 1169 // TODO: Deal with other constant types (vectors, etc), and probably 1170 // some mem intrinsics (if needed) 1171 1172 APInt EarlierValue = 1173 cast<ConstantInt>(Earlier->getValueOperand())->getValue(); 1174 APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue(); 1175 unsigned LaterBits = LaterValue.getBitWidth(); 1176 assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); 1177 LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); 1178 1179 // Offset of the smaller store inside the larger store 1180 unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; 1181 unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() - 1182 BitOffsetDiff - LaterBits 1183 : BitOffsetDiff; 1184 APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, 1185 LShiftAmount + LaterBits); 1186 // Clear the bits we'll be replacing, then OR with the smaller 1187 // store, shifted appropriately. 1188 APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); 1189 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier 1190 << "\n Later: " << *Later 1191 << "\n Merged Value: " << Merged << '\n'); 1192 return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged); 1193 } 1194 return nullptr; 1195 } 1196 1197 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 1198 MemoryDependenceResults *MD, DominatorTree *DT, 1199 const TargetLibraryInfo *TLI) { 1200 const DataLayout &DL = BB.getModule()->getDataLayout(); 1201 bool MadeChange = false; 1202 1203 MapVector<Instruction *, bool> ThrowableInst; 1204 1205 // A map of interval maps representing partially-overwritten value parts. 1206 InstOverlapIntervalsTy IOL; 1207 1208 // Do a top-down walk on the BB. 1209 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 1210 // Handle 'free' calls specially. 1211 if (CallInst *F = isFreeCall(&*BBI, TLI)) { 1212 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, ThrowableInst); 1213 // Increment BBI after handleFree has potentially deleted instructions. 1214 // This ensures we maintain a valid iterator. 1215 ++BBI; 1216 continue; 1217 } 1218 1219 Instruction *Inst = &*BBI++; 1220 1221 if (Inst->mayThrow()) { 1222 ThrowableInst[Inst] = true; 1223 continue; 1224 } 1225 1226 // Check to see if Inst writes to memory. If not, continue. 1227 if (!hasAnalyzableMemoryWrite(Inst, *TLI)) 1228 continue; 1229 1230 // eliminateNoopStore will update in iterator, if necessary. 1231 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, 1232 ThrowableInst, DT)) { 1233 MadeChange = true; 1234 continue; 1235 } 1236 1237 // If we find something that writes memory, get its memory dependence. 1238 MemDepResult InstDep = MD->getDependency(Inst); 1239 1240 // Ignore any store where we can't find a local dependence. 1241 // FIXME: cross-block DSE would be fun. :) 1242 if (!InstDep.isDef() && !InstDep.isClobber()) 1243 continue; 1244 1245 // Figure out what location is being stored to. 1246 MemoryLocation Loc = getLocForWrite(Inst); 1247 1248 // If we didn't get a useful location, fail. 1249 if (!Loc.Ptr) 1250 continue; 1251 1252 // Loop until we find a store we can eliminate or a load that 1253 // invalidates the analysis. Without an upper bound on the number of 1254 // instructions examined, this analysis can become very time-consuming. 1255 // However, the potential gain diminishes as we process more instructions 1256 // without eliminating any of them. Therefore, we limit the number of 1257 // instructions we look at. 1258 auto Limit = MD->getDefaultBlockScanLimit(); 1259 while (InstDep.isDef() || InstDep.isClobber()) { 1260 // Get the memory clobbered by the instruction we depend on. MemDep will 1261 // skip any instructions that 'Loc' clearly doesn't interact with. If we 1262 // end up depending on a may- or must-aliased load, then we can't optimize 1263 // away the store and we bail out. However, if we depend on something 1264 // that overwrites the memory location we *can* potentially optimize it. 1265 // 1266 // Find out what memory location the dependent instruction stores. 1267 Instruction *DepWrite = InstDep.getInst(); 1268 if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) 1269 break; 1270 MemoryLocation DepLoc = getLocForWrite(DepWrite); 1271 // If we didn't get a useful location, or if it isn't a size, bail out. 1272 if (!DepLoc.Ptr) 1273 break; 1274 1275 // Find the last throwable instruction not removed by call to 1276 // deleteDeadInstruction. 1277 Instruction *LastThrowing = nullptr; 1278 if (!ThrowableInst.empty()) 1279 LastThrowing = ThrowableInst.back().first; 1280 1281 // Make sure we don't look past a call which might throw. This is an 1282 // issue because MemoryDependenceAnalysis works in the wrong direction: 1283 // it finds instructions which dominate the current instruction, rather than 1284 // instructions which are post-dominated by the current instruction. 1285 // 1286 // If the underlying object is a non-escaping memory allocation, any store 1287 // to it is dead along the unwind edge. Otherwise, we need to preserve 1288 // the store. 1289 if (LastThrowing && DepWrite->comesBefore(LastThrowing)) { 1290 const Value *Underlying = getUnderlyingObject(DepLoc.Ptr); 1291 bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); 1292 if (!IsStoreDeadOnUnwind) { 1293 // We're looking for a call to an allocation function 1294 // where the allocation doesn't escape before the last 1295 // throwing instruction; PointerMayBeCaptured 1296 // reasonably fast approximation. 1297 IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && 1298 !PointerMayBeCaptured(Underlying, false, true); 1299 } 1300 if (!IsStoreDeadOnUnwind) 1301 break; 1302 } 1303 1304 // If we find a write that is a) removable (i.e., non-volatile), b) is 1305 // completely obliterated by the store to 'Loc', and c) which we know that 1306 // 'Inst' doesn't load from, then we can remove it. 1307 // Also try to merge two stores if a later one only touches memory written 1308 // to by the earlier one. 1309 if (isRemovable(DepWrite) && 1310 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 1311 int64_t InstWriteOffset, DepWriteOffset; 1312 OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, 1313 InstWriteOffset, DepWrite, IOL, *AA, 1314 BB.getParent()); 1315 if (OR == OW_Complete) { 1316 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite 1317 << "\n KILLER: " << *Inst << '\n'); 1318 1319 // Delete the store and now-dead instructions that feed it. 1320 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1321 ThrowableInst); 1322 ++NumFastStores; 1323 MadeChange = true; 1324 1325 // We erased DepWrite; start over. 1326 InstDep = MD->getDependency(Inst); 1327 continue; 1328 } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || 1329 ((OR == OW_Begin && 1330 isShortenableAtTheBeginning(DepWrite)))) { 1331 assert(!EnablePartialOverwriteTracking && "Do not expect to perform " 1332 "when partial-overwrite " 1333 "tracking is enabled"); 1334 // The overwrite result is known, so these must be known, too. 1335 int64_t EarlierSize = DepLoc.Size.getValue(); 1336 int64_t LaterSize = Loc.Size.getValue(); 1337 bool IsOverwriteEnd = (OR == OW_End); 1338 MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, 1339 InstWriteOffset, LaterSize, IsOverwriteEnd); 1340 } else if (EnablePartialStoreMerging && 1341 OR == OW_PartialEarlierWithFullLater) { 1342 auto *Earlier = dyn_cast<StoreInst>(DepWrite); 1343 auto *Later = dyn_cast<StoreInst>(Inst); 1344 if (Constant *C = tryToMergePartialOverlappingStores( 1345 Earlier, Later, InstWriteOffset, DepWriteOffset, DL, AA, 1346 DT)) { 1347 auto *SI = new StoreInst( 1348 C, Earlier->getPointerOperand(), false, Earlier->getAlign(), 1349 Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); 1350 1351 unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, 1352 LLVMContext::MD_alias_scope, 1353 LLVMContext::MD_noalias, 1354 LLVMContext::MD_nontemporal}; 1355 SI->copyMetadata(*DepWrite, MDToKeep); 1356 ++NumModifiedStores; 1357 1358 // Delete the old stores and now-dead instructions that feed them. 1359 deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, 1360 ThrowableInst); 1361 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1362 ThrowableInst); 1363 MadeChange = true; 1364 1365 // We erased DepWrite and Inst (Loc); start over. 1366 break; 1367 } 1368 } 1369 } 1370 1371 // If this is a may-aliased store that is clobbering the store value, we 1372 // can keep searching past it for another must-aliased pointer that stores 1373 // to the same location. For example, in: 1374 // store -> P 1375 // store -> Q 1376 // store -> P 1377 // we can remove the first store to P even though we don't know if P and Q 1378 // alias. 1379 if (DepWrite == &BB.front()) break; 1380 1381 // Can't look past this instruction if it might read 'Loc'. 1382 if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) 1383 break; 1384 1385 InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, 1386 DepWrite->getIterator(), &BB, 1387 /*QueryInst=*/ nullptr, &Limit); 1388 } 1389 } 1390 1391 if (EnablePartialOverwriteTracking) 1392 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL); 1393 1394 // If this block ends in a return, unwind, or unreachable, all allocas are 1395 // dead at its end, which means stores to them are also dead. 1396 if (BB.getTerminator()->getNumSuccessors() == 0) 1397 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, ThrowableInst); 1398 1399 return MadeChange; 1400 } 1401 1402 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 1403 MemoryDependenceResults *MD, DominatorTree *DT, 1404 const TargetLibraryInfo *TLI) { 1405 bool MadeChange = false; 1406 for (BasicBlock &BB : F) 1407 // Only check non-dead blocks. Dead blocks may have strange pointer 1408 // cycles that will confuse alias analysis. 1409 if (DT->isReachableFromEntry(&BB)) 1410 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 1411 1412 return MadeChange; 1413 } 1414 1415 namespace { 1416 //============================================================================= 1417 // MemorySSA backed dead store elimination. 1418 // 1419 // The code below implements dead store elimination using MemorySSA. It uses 1420 // the following general approach: given a MemoryDef, walk upwards to find 1421 // clobbering MemoryDefs that may be killed by the starting def. Then check 1422 // that there are no uses that may read the location of the original MemoryDef 1423 // in between both MemoryDefs. A bit more concretely: 1424 // 1425 // For all MemoryDefs StartDef: 1426 // 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking 1427 // upwards. 1428 // 2. Check that there are no reads between DomAccess and the StartDef by 1429 // checking all uses starting at DomAccess and walking until we see StartDef. 1430 // 3. For each found DomDef, check that: 1431 // 1. There are no barrier instructions between DomDef and StartDef (like 1432 // throws or stores with ordering constraints). 1433 // 2. StartDef is executed whenever DomDef is executed. 1434 // 3. StartDef completely overwrites DomDef. 1435 // 4. Erase DomDef from the function and MemorySSA. 1436 1437 // Returns true if \p M is an intrisnic that does not read or write memory. 1438 bool isNoopIntrinsic(MemoryUseOrDef *M) { 1439 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) { 1440 switch (II->getIntrinsicID()) { 1441 case Intrinsic::lifetime_start: 1442 case Intrinsic::lifetime_end: 1443 case Intrinsic::invariant_end: 1444 case Intrinsic::launder_invariant_group: 1445 case Intrinsic::assume: 1446 return true; 1447 case Intrinsic::dbg_addr: 1448 case Intrinsic::dbg_declare: 1449 case Intrinsic::dbg_label: 1450 case Intrinsic::dbg_value: 1451 llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); 1452 default: 1453 return false; 1454 } 1455 } 1456 return false; 1457 } 1458 1459 // Check if we can ignore \p D for DSE. 1460 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { 1461 Instruction *DI = D->getMemoryInst(); 1462 // Calls that only access inaccessible memory cannot read or write any memory 1463 // locations we consider for elimination. 1464 if (auto *CB = dyn_cast<CallBase>(DI)) 1465 if (CB->onlyAccessesInaccessibleMemory()) 1466 return true; 1467 1468 // We can eliminate stores to locations not visible to the caller across 1469 // throwing instructions. 1470 if (DI->mayThrow() && !DefVisibleToCaller) 1471 return true; 1472 1473 // We can remove the dead stores, irrespective of the fence and its ordering 1474 // (release/acquire/seq_cst). Fences only constraints the ordering of 1475 // already visible stores, it does not make a store visible to other 1476 // threads. So, skipping over a fence does not change a store from being 1477 // dead. 1478 if (isa<FenceInst>(DI)) 1479 return true; 1480 1481 // Skip intrinsics that do not really read or modify memory. 1482 if (isNoopIntrinsic(D)) 1483 return true; 1484 1485 return false; 1486 } 1487 1488 struct DSEState { 1489 Function &F; 1490 AliasAnalysis &AA; 1491 MemorySSA &MSSA; 1492 DominatorTree &DT; 1493 PostDominatorTree &PDT; 1494 const TargetLibraryInfo &TLI; 1495 1496 // All MemoryDefs that potentially could kill other MemDefs. 1497 SmallVector<MemoryDef *, 64> MemDefs; 1498 // Any that should be skipped as they are already deleted 1499 SmallPtrSet<MemoryAccess *, 4> SkipStores; 1500 // Keep track of all of the objects that are invisible to the caller before 1501 // the function returns. 1502 SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet; 1503 // Keep track of all of the objects that are invisible to the caller after 1504 // the function returns. 1505 SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet; 1506 // Keep track of blocks with throwing instructions not modeled in MemorySSA. 1507 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks; 1508 // Post-order numbers for each basic block. Used to figure out if memory 1509 // accesses are executed before another access. 1510 DenseMap<BasicBlock *, unsigned> PostOrderNumbers; 1511 1512 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per 1513 /// basic block. 1514 DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs; 1515 1516 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, 1517 PostDominatorTree &PDT, const TargetLibraryInfo &TLI) 1518 : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} 1519 1520 static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, 1521 DominatorTree &DT, PostDominatorTree &PDT, 1522 const TargetLibraryInfo &TLI) { 1523 DSEState State(F, AA, MSSA, DT, PDT, TLI); 1524 // Collect blocks with throwing instructions not modeled in MemorySSA and 1525 // alloc-like objects. 1526 unsigned PO = 0; 1527 for (BasicBlock *BB : post_order(&F)) { 1528 State.PostOrderNumbers[BB] = PO++; 1529 for (Instruction &I : *BB) { 1530 MemoryAccess *MA = MSSA.getMemoryAccess(&I); 1531 if (I.mayThrow() && !MA) 1532 State.ThrowingBlocks.insert(I.getParent()); 1533 1534 auto *MD = dyn_cast_or_null<MemoryDef>(MA); 1535 if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && 1536 (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I))) 1537 State.MemDefs.push_back(MD); 1538 1539 // Track whether alloca and alloca-like objects are visible in the 1540 // caller before and after the function returns. Alloca objects are 1541 // invalid in the caller, so they are neither visible before or after 1542 // the function returns. 1543 if (isa<AllocaInst>(&I)) { 1544 State.InvisibleToCallerBeforeRet.insert(&I); 1545 State.InvisibleToCallerAfterRet.insert(&I); 1546 } 1547 1548 // For alloca-like objects we need to check if they are captured before 1549 // the function returns and if the return might capture the object. 1550 if (isAllocLikeFn(&I, &TLI)) { 1551 bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true); 1552 if (!CapturesBeforeRet) { 1553 State.InvisibleToCallerBeforeRet.insert(&I); 1554 if (!PointerMayBeCaptured(&I, true, false)) 1555 State.InvisibleToCallerAfterRet.insert(&I); 1556 } 1557 } 1558 } 1559 } 1560 1561 // Treat byval or inalloca arguments the same as Allocas, stores to them are 1562 // dead at the end of the function. 1563 for (Argument &AI : F.args()) 1564 if (AI.hasPassPointeeByValueCopyAttr()) { 1565 // For byval, the caller doesn't know the address of the allocation. 1566 if (AI.hasByValAttr()) 1567 State.InvisibleToCallerBeforeRet.insert(&AI); 1568 State.InvisibleToCallerAfterRet.insert(&AI); 1569 } 1570 1571 return State; 1572 } 1573 1574 Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const { 1575 if (!I->mayWriteToMemory()) 1576 return None; 1577 1578 if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I)) 1579 return {MemoryLocation::getForDest(MTI)}; 1580 1581 if (auto *CB = dyn_cast<CallBase>(I)) { 1582 LibFunc LF; 1583 if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { 1584 switch (LF) { 1585 case LibFunc_strcpy: 1586 case LibFunc_strncpy: 1587 case LibFunc_strcat: 1588 case LibFunc_strncat: 1589 return {MemoryLocation(CB->getArgOperand(0))}; 1590 default: 1591 break; 1592 } 1593 } 1594 switch (CB->getIntrinsicID()) { 1595 case Intrinsic::init_trampoline: 1596 return {MemoryLocation(CB->getArgOperand(0))}; 1597 default: 1598 break; 1599 } 1600 return None; 1601 } 1602 1603 return MemoryLocation::getOrNone(I); 1604 } 1605 1606 /// Returns true if \p Use completely overwrites \p DefLoc. 1607 bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { 1608 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a 1609 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a 1610 // MemoryDef. 1611 if (!UseInst->mayWriteToMemory()) 1612 return false; 1613 1614 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1615 if (CB->onlyAccessesInaccessibleMemory()) 1616 return false; 1617 1618 int64_t InstWriteOffset, DepWriteOffset; 1619 auto CC = getLocForWriteEx(UseInst); 1620 InstOverlapIntervalsTy IOL; 1621 1622 const DataLayout &DL = F.getParent()->getDataLayout(); 1623 1624 return CC && 1625 isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, InstWriteOffset, 1626 UseInst, IOL, AA, &F) == OW_Complete; 1627 } 1628 1629 /// Returns true if \p Def is not read before returning from the function. 1630 bool isWriteAtEndOfFunction(MemoryDef *Def) { 1631 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " (" 1632 << *Def->getMemoryInst() 1633 << ") is at the end the function \n"); 1634 1635 auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); 1636 if (!MaybeLoc) { 1637 LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); 1638 return false; 1639 } 1640 1641 SmallVector<MemoryAccess *, 4> WorkList; 1642 SmallPtrSet<MemoryAccess *, 8> Visited; 1643 auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) { 1644 if (!Visited.insert(Acc).second) 1645 return; 1646 for (Use &U : Acc->uses()) 1647 WorkList.push_back(cast<MemoryAccess>(U.getUser())); 1648 }; 1649 PushMemUses(Def); 1650 for (unsigned I = 0; I < WorkList.size(); I++) { 1651 if (WorkList.size() >= MemorySSAScanLimit) { 1652 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n"); 1653 return false; 1654 } 1655 1656 MemoryAccess *UseAccess = WorkList[I]; 1657 if (isa<MemoryPhi>(UseAccess)) { 1658 PushMemUses(UseAccess); 1659 continue; 1660 } 1661 1662 // TODO: Checking for aliasing is expensive. Consider reducing the amount 1663 // of times this is called and/or caching it. 1664 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); 1665 if (isReadClobber(*MaybeLoc, UseInst)) { 1666 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n"); 1667 return false; 1668 } 1669 1670 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) 1671 PushMemUses(UseDef); 1672 } 1673 return true; 1674 } 1675 1676 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a 1677 /// pair with the MemoryLocation terminated by \p I and a boolean flag 1678 /// indicating whether \p I is a free-like call. 1679 Optional<std::pair<MemoryLocation, bool>> 1680 getLocForTerminator(Instruction *I) const { 1681 uint64_t Len; 1682 Value *Ptr; 1683 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len), 1684 m_Value(Ptr)))) 1685 return {std::make_pair(MemoryLocation(Ptr, Len), false)}; 1686 1687 if (auto *CB = dyn_cast<CallBase>(I)) { 1688 if (isFreeCall(I, &TLI)) 1689 return {std::make_pair(MemoryLocation(CB->getArgOperand(0)), true)}; 1690 } 1691 1692 return None; 1693 } 1694 1695 /// Returns true if \p I is a memory terminator instruction like 1696 /// llvm.lifetime.end or free. 1697 bool isMemTerminatorInst(Instruction *I) const { 1698 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 1699 return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || 1700 isFreeCall(I, &TLI); 1701 } 1702 1703 /// Returns true if \p MaybeTerm is a memory terminator for the same 1704 /// underlying object as \p DefLoc. 1705 bool isMemTerminator(MemoryLocation DefLoc, Instruction *MaybeTerm) const { 1706 Optional<std::pair<MemoryLocation, bool>> MaybeTermLoc = 1707 getLocForTerminator(MaybeTerm); 1708 1709 if (!MaybeTermLoc) 1710 return false; 1711 1712 // If the terminator is a free-like call, all accesses to the underlying 1713 // object can be considered terminated. 1714 if (MaybeTermLoc->second) { 1715 DataLayout DL = MaybeTerm->getParent()->getModule()->getDataLayout(); 1716 DefLoc = MemoryLocation(getUnderlyingObject(DefLoc.Ptr)); 1717 } 1718 return AA.isMustAlias(MaybeTermLoc->first, DefLoc); 1719 } 1720 1721 // Returns true if \p Use may read from \p DefLoc. 1722 bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { 1723 if (!UseInst->mayReadFromMemory()) 1724 return false; 1725 1726 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1727 if (CB->onlyAccessesInaccessibleMemory()) 1728 return false; 1729 1730 ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); 1731 // If necessary, perform additional analysis. 1732 if (isRefSet(MR)) 1733 MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); 1734 return isRefSet(MR); 1735 } 1736 1737 // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no 1738 // read access between them or on any other path to a function exit block if 1739 // \p DefLoc is not accessible after the function returns. If there is no such 1740 // MemoryDef, return None. The returned value may not (completely) overwrite 1741 // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse 1742 // (read). 1743 Optional<MemoryAccess *> 1744 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, 1745 MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet, 1746 bool DefVisibleToCallerAfterRet, int &ScanLimit) const { 1747 MemoryAccess *DomAccess; 1748 bool StepAgain; 1749 LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current 1750 << "\n"); 1751 // Find the next clobbering Mod access for DefLoc, starting at Current. 1752 do { 1753 StepAgain = false; 1754 // Reached TOP. 1755 if (MSSA.isLiveOnEntryDef(Current)) 1756 return None; 1757 1758 if (isa<MemoryPhi>(Current)) { 1759 DomAccess = Current; 1760 break; 1761 } 1762 MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current); 1763 // Look for access that clobber DefLoc. 1764 DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD, 1765 DefLoc); 1766 if (MSSA.isLiveOnEntryDef(DomAccess)) 1767 return None; 1768 1769 if (isa<MemoryPhi>(DomAccess)) 1770 break; 1771 1772 // Check if we can skip DomDef for DSE. 1773 MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess); 1774 if (DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) { 1775 StepAgain = true; 1776 Current = DomDef->getDefiningAccess(); 1777 } 1778 1779 } while (StepAgain); 1780 1781 // Accesses to objects accessible after the function returns can only be 1782 // eliminated if the access is killed along all paths to the exit. Collect 1783 // the blocks with killing (=completely overwriting MemoryDefs) and check if 1784 // they cover all paths from DomAccess to any function exit. 1785 SmallPtrSet<BasicBlock *, 16> KillingBlocks = {KillingDef->getBlock()}; 1786 LLVM_DEBUG({ 1787 dbgs() << " Checking for reads of " << *DomAccess; 1788 if (isa<MemoryDef>(DomAccess)) 1789 dbgs() << " (" << *cast<MemoryDef>(DomAccess)->getMemoryInst() << ")\n"; 1790 else 1791 dbgs() << ")\n"; 1792 }); 1793 1794 SmallSetVector<MemoryAccess *, 32> WorkList; 1795 auto PushMemUses = [&WorkList](MemoryAccess *Acc) { 1796 for (Use &U : Acc->uses()) 1797 WorkList.insert(cast<MemoryAccess>(U.getUser())); 1798 }; 1799 PushMemUses(DomAccess); 1800 1801 // Check if DomDef may be read. 1802 for (unsigned I = 0; I < WorkList.size(); I++) { 1803 MemoryAccess *UseAccess = WorkList[I]; 1804 1805 LLVM_DEBUG(dbgs() << " " << *UseAccess); 1806 if (--ScanLimit == 0) { 1807 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); 1808 return None; 1809 } 1810 1811 if (isa<MemoryPhi>(UseAccess)) { 1812 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n"); 1813 PushMemUses(UseAccess); 1814 continue; 1815 } 1816 1817 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); 1818 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); 1819 1820 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) { 1821 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n"); 1822 PushMemUses(UseAccess); 1823 continue; 1824 } 1825 1826 // A memory terminator kills all preceeding MemoryDefs and all succeeding 1827 // MemoryAccesses. We do not have to check it's users. 1828 if (isMemTerminator(DefLoc, UseInst)) 1829 continue; 1830 1831 // Uses which may read the original MemoryDef mean we cannot eliminate the 1832 // original MD. Stop walk. 1833 if (isReadClobber(DefLoc, UseInst)) { 1834 LLVM_DEBUG(dbgs() << " ... found read clobber\n"); 1835 return None; 1836 } 1837 1838 // For the KillingDef and DomAccess we only have to check if it reads the 1839 // memory location. 1840 // TODO: It would probably be better to check for self-reads before 1841 // calling the function. 1842 if (KillingDef == UseAccess || DomAccess == UseAccess) { 1843 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); 1844 continue; 1845 } 1846 1847 // Check all uses for MemoryDefs, except for defs completely overwriting 1848 // the original location. Otherwise we have to check uses of *all* 1849 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might 1850 // miss cases like the following 1851 // 1 = Def(LoE) ; <----- DomDef stores [0,1] 1852 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] 1853 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. 1854 // (The Use points to the *first* Def it may alias) 1855 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, 1856 // stores [0,1] 1857 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) { 1858 if (isCompleteOverwrite(DefLoc, UseInst)) { 1859 if (DefVisibleToCallerAfterRet && UseAccess != DomAccess) { 1860 BasicBlock *MaybeKillingBlock = UseInst->getParent(); 1861 if (PostOrderNumbers.find(MaybeKillingBlock)->second < 1862 PostOrderNumbers.find(DomAccess->getBlock())->second) { 1863 1864 LLVM_DEBUG(dbgs() << " ... found killing block " 1865 << MaybeKillingBlock->getName() << "\n"); 1866 KillingBlocks.insert(MaybeKillingBlock); 1867 } 1868 } 1869 } else 1870 PushMemUses(UseDef); 1871 } 1872 } 1873 1874 // For accesses to locations visible after the function returns, make sure 1875 // that the location is killed (=overwritten) along all paths from DomAccess 1876 // to the exit. 1877 if (DefVisibleToCallerAfterRet) { 1878 assert(!KillingBlocks.empty() && 1879 "Expected at least a single killing block"); 1880 // Find the common post-dominator of all killing blocks. 1881 BasicBlock *CommonPred = *KillingBlocks.begin(); 1882 for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end(); 1883 I != E; I++) { 1884 if (!CommonPred) 1885 break; 1886 CommonPred = PDT.findNearestCommonDominator(CommonPred, *I); 1887 } 1888 1889 // If CommonPred is in the set of killing blocks, just check if it 1890 // post-dominates DomAccess. 1891 if (KillingBlocks.count(CommonPred)) { 1892 if (PDT.dominates(CommonPred, DomAccess->getBlock())) 1893 return {DomAccess}; 1894 return None; 1895 } 1896 1897 // If the common post-dominator does not post-dominate DomAccess, there 1898 // is a path from DomAccess to an exit not going through a killing block. 1899 if (PDT.dominates(CommonPred, DomAccess->getBlock())) { 1900 SetVector<BasicBlock *> WorkList; 1901 1902 // DomAccess's post-order number provides an upper bound of the blocks 1903 // on a path starting at DomAccess. 1904 unsigned UpperBound = 1905 PostOrderNumbers.find(DomAccess->getBlock())->second; 1906 1907 // If CommonPred is null, there are multiple exits from the function. 1908 // They all have to be added to the worklist. 1909 if (CommonPred) 1910 WorkList.insert(CommonPred); 1911 else 1912 for (BasicBlock *R : PDT.roots()) 1913 WorkList.insert(R); 1914 1915 NumCFGTries++; 1916 // Check if all paths starting from an exit node go through one of the 1917 // killing blocks before reaching DomAccess. 1918 for (unsigned I = 0; I < WorkList.size(); I++) { 1919 NumCFGChecks++; 1920 BasicBlock *Current = WorkList[I]; 1921 if (KillingBlocks.count(Current)) 1922 continue; 1923 if (Current == DomAccess->getBlock()) 1924 return None; 1925 1926 // DomAccess is reachable from the entry, so we don't have to explore 1927 // unreachable blocks further. 1928 if (!DT.isReachableFromEntry(Current)) 1929 continue; 1930 1931 unsigned CPO = PostOrderNumbers.find(Current)->second; 1932 // Current block is not on a path starting at DomAccess. 1933 if (CPO > UpperBound) 1934 continue; 1935 for (BasicBlock *Pred : predecessors(Current)) 1936 WorkList.insert(Pred); 1937 1938 if (WorkList.size() >= MemorySSAPathCheckLimit) 1939 return None; 1940 } 1941 NumCFGSuccess++; 1942 return {DomAccess}; 1943 } 1944 return None; 1945 } 1946 1947 // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. 1948 return {DomAccess}; 1949 } 1950 1951 // Delete dead memory defs 1952 void deleteDeadInstruction(Instruction *SI) { 1953 MemorySSAUpdater Updater(&MSSA); 1954 SmallVector<Instruction *, 32> NowDeadInsts; 1955 NowDeadInsts.push_back(SI); 1956 --NumFastOther; 1957 1958 while (!NowDeadInsts.empty()) { 1959 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 1960 ++NumFastOther; 1961 1962 // Try to preserve debug information attached to the dead instruction. 1963 salvageDebugInfo(*DeadInst); 1964 salvageKnowledge(DeadInst); 1965 1966 // Remove the Instruction from MSSA. 1967 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { 1968 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { 1969 SkipStores.insert(MD); 1970 } 1971 Updater.removeMemoryAccess(MA); 1972 } 1973 1974 auto I = IOLs.find(DeadInst->getParent()); 1975 if (I != IOLs.end()) 1976 I->second.erase(DeadInst); 1977 // Remove its operands 1978 for (Use &O : DeadInst->operands()) 1979 if (Instruction *OpI = dyn_cast<Instruction>(O)) { 1980 O = nullptr; 1981 if (isInstructionTriviallyDead(OpI, &TLI)) 1982 NowDeadInsts.push_back(OpI); 1983 } 1984 1985 DeadInst->eraseFromParent(); 1986 } 1987 } 1988 1989 // Check for any extra throws between SI and NI that block DSE. This only 1990 // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may 1991 // throw are handled during the walk from one def to the next. 1992 bool mayThrowBetween(Instruction *SI, Instruction *NI, 1993 const Value *SILocUnd) const { 1994 // First see if we can ignore it by using the fact that SI is an 1995 // alloca/alloca like object that is not visible to the caller during 1996 // execution of the function. 1997 if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd)) 1998 return false; 1999 2000 if (SI->getParent() == NI->getParent()) 2001 return ThrowingBlocks.count(SI->getParent()); 2002 return !ThrowingBlocks.empty(); 2003 } 2004 2005 // Check if \p NI acts as a DSE barrier for \p SI. The following instructions 2006 // act as barriers: 2007 // * A memory instruction that may throw and \p SI accesses a non-stack 2008 // object. 2009 // * Atomic stores stronger that monotonic. 2010 bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) const { 2011 // If NI may throw it acts as a barrier, unless we are to an alloca/alloca 2012 // like object that does not escape. 2013 if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd)) 2014 return true; 2015 2016 // If NI is an atomic load/store stronger than monotonic, do not try to 2017 // eliminate/reorder it. 2018 if (NI->isAtomic()) { 2019 if (auto *LI = dyn_cast<LoadInst>(NI)) 2020 return isStrongerThanMonotonic(LI->getOrdering()); 2021 if (auto *SI = dyn_cast<StoreInst>(NI)) 2022 return isStrongerThanMonotonic(SI->getOrdering()); 2023 llvm_unreachable("other instructions should be skipped in MemorySSA"); 2024 } 2025 return false; 2026 } 2027 2028 /// Eliminate writes to objects that are not visible in the caller and are not 2029 /// accessed before returning from the function. 2030 bool eliminateDeadWritesAtEndOfFunction() { 2031 bool MadeChange = false; 2032 LLVM_DEBUG( 2033 dbgs() 2034 << "Trying to eliminate MemoryDefs at the end of the function\n"); 2035 for (int I = MemDefs.size() - 1; I >= 0; I--) { 2036 MemoryDef *Def = MemDefs[I]; 2037 if (SkipStores.find(Def) != SkipStores.end() || 2038 !isRemovable(Def->getMemoryInst())) 2039 continue; 2040 2041 // TODO: Consider doing the underlying object check first, if it is 2042 // beneficial compile-time wise. 2043 if (isWriteAtEndOfFunction(Def)) { 2044 Instruction *DefI = Def->getMemoryInst(); 2045 // See through pointer-to-pointer bitcasts 2046 SmallVector<const Value *, 4> Pointers; 2047 getUnderlyingObjects(getLocForWriteEx(DefI)->Ptr, Pointers); 2048 2049 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " 2050 "of the function\n"); 2051 bool CanKill = true; 2052 for (const Value *Pointer : Pointers) { 2053 if (!InvisibleToCallerAfterRet.count(Pointer)) { 2054 CanKill = false; 2055 break; 2056 } 2057 } 2058 2059 if (CanKill) { 2060 deleteDeadInstruction(DefI); 2061 ++NumFastStores; 2062 MadeChange = true; 2063 } 2064 } 2065 } 2066 return MadeChange; 2067 } 2068 2069 /// \returns true if \p Def is a no-op store, either because it 2070 /// directly stores back a loaded value or stores zero to a calloced object. 2071 bool storeIsNoop(MemoryDef *Def, MemoryLocation DefLoc, const Value *DefUO) { 2072 StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst()); 2073 if (!Store) 2074 return false; 2075 2076 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) { 2077 if (LoadI->getPointerOperand() == Store->getOperand(1)) { 2078 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess(); 2079 // If both accesses share the same defining access, no instructions 2080 // between them can modify the memory location. 2081 return LoadAccess == Def->getDefiningAccess(); 2082 } 2083 } 2084 2085 Constant *StoredConstant = dyn_cast<Constant>(Store->getOperand(0)); 2086 if (StoredConstant && StoredConstant->isNullValue()) { 2087 auto *DefUOInst = dyn_cast<Instruction>(DefUO); 2088 if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) { 2089 auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst)); 2090 // If UnderlyingDef is the clobbering access of Def, no instructions 2091 // between them can modify the memory location. 2092 auto *ClobberDef = 2093 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); 2094 return UnderlyingDef == ClobberDef; 2095 } 2096 } 2097 return false; 2098 } 2099 }; 2100 2101 bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, 2102 MemorySSA &MSSA, DominatorTree &DT, 2103 PostDominatorTree &PDT, 2104 const TargetLibraryInfo &TLI) { 2105 const DataLayout &DL = F.getParent()->getDataLayout(); 2106 bool MadeChange = false; 2107 2108 DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); 2109 // For each store: 2110 for (unsigned I = 0; I < State.MemDefs.size(); I++) { 2111 MemoryDef *KillingDef = State.MemDefs[I]; 2112 if (State.SkipStores.count(KillingDef)) 2113 continue; 2114 Instruction *SI = KillingDef->getMemoryInst(); 2115 2116 auto MaybeSILoc = State.getLocForWriteEx(SI); 2117 if (State.isMemTerminatorInst(SI)) 2118 MaybeSILoc = State.getLocForTerminator(SI).map( 2119 [](const std::pair<MemoryLocation, bool> &P) { return P.first; }); 2120 else 2121 MaybeSILoc = State.getLocForWriteEx(SI); 2122 2123 if (!MaybeSILoc) { 2124 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " 2125 << *SI << "\n"); 2126 continue; 2127 } 2128 MemoryLocation SILoc = *MaybeSILoc; 2129 assert(SILoc.Ptr && "SILoc should not be null"); 2130 const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr); 2131 2132 // Check if the store is a no-op. 2133 if (isRemovable(SI) && State.storeIsNoop(KillingDef, SILoc, SILocUnd)) { 2134 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n'); 2135 State.deleteDeadInstruction(SI); 2136 NumNoopStores++; 2137 MadeChange = true; 2138 continue; 2139 } 2140 2141 Instruction *DefObj = 2142 const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd)); 2143 bool DefVisibleToCallerBeforeRet = 2144 !State.InvisibleToCallerBeforeRet.count(SILocUnd); 2145 bool DefVisibleToCallerAfterRet = 2146 !State.InvisibleToCallerAfterRet.count(SILocUnd); 2147 if (DefObj && isAllocLikeFn(DefObj, &TLI)) { 2148 if (DefVisibleToCallerBeforeRet) 2149 DefVisibleToCallerBeforeRet = 2150 PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT); 2151 } 2152 2153 MemoryAccess *Current = KillingDef; 2154 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " 2155 << *KillingDef << " (" << *SI << ")\n"); 2156 2157 int ScanLimit = MemorySSAScanLimit; 2158 // Worklist of MemoryAccesses that may be killed by KillingDef. 2159 SetVector<MemoryAccess *> ToCheck; 2160 ToCheck.insert(KillingDef->getDefiningAccess()); 2161 2162 // Check if MemoryAccesses in the worklist are killed by KillingDef. 2163 for (unsigned I = 0; I < ToCheck.size(); I++) { 2164 Current = ToCheck[I]; 2165 if (State.SkipStores.count(Current)) 2166 continue; 2167 2168 Optional<MemoryAccess *> Next = State.getDomMemoryDef( 2169 KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet, 2170 DefVisibleToCallerAfterRet, ScanLimit); 2171 2172 if (!Next) { 2173 LLVM_DEBUG(dbgs() << " finished walk\n"); 2174 continue; 2175 } 2176 2177 MemoryAccess *DomAccess = *Next; 2178 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess); 2179 if (isa<MemoryPhi>(DomAccess)) { 2180 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); 2181 for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) { 2182 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V); 2183 BasicBlock *IncomingBlock = IncomingAccess->getBlock(); 2184 BasicBlock *PhiBlock = DomAccess->getBlock(); 2185 2186 // We only consider incoming MemoryAccesses that come before the 2187 // MemoryPhi. Otherwise we could discover candidates that do not 2188 // strictly dominate our starting def. 2189 if (State.PostOrderNumbers[IncomingBlock] > 2190 State.PostOrderNumbers[PhiBlock]) 2191 ToCheck.insert(IncomingAccess); 2192 } 2193 continue; 2194 } 2195 MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess); 2196 Instruction *NI = NextDef->getMemoryInst(); 2197 LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); 2198 2199 // Before we try to remove anything, check for any extra throwing 2200 // instructions that block us from DSEing 2201 if (State.mayThrowBetween(SI, NI, SILocUnd)) { 2202 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); 2203 break; 2204 } 2205 2206 // Check for anything that looks like it will be a barrier to further 2207 // removal 2208 if (State.isDSEBarrier(SILocUnd, NI)) { 2209 LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); 2210 continue; 2211 } 2212 2213 ToCheck.insert(NextDef->getDefiningAccess()); 2214 2215 if (!hasAnalyzableMemoryWrite(NI, TLI)) { 2216 LLVM_DEBUG(dbgs() << " ... skip, cannot analyze def\n"); 2217 continue; 2218 } 2219 2220 if (!isRemovable(NI)) { 2221 LLVM_DEBUG(dbgs() << " ... skip, cannot remove def\n"); 2222 continue; 2223 } 2224 2225 if (!DebugCounter::shouldExecute(MemorySSACounter)) 2226 continue; 2227 2228 MemoryLocation NILoc = *State.getLocForWriteEx(NI); 2229 2230 if (State.isMemTerminatorInst(SI)) { 2231 const Value *NIUnd = getUnderlyingObject(NILoc.Ptr); 2232 if (!SILocUnd || SILocUnd != NIUnd) 2233 continue; 2234 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI 2235 << "\n KILLER: " << *SI << '\n'); 2236 State.deleteDeadInstruction(NI); 2237 ++NumFastStores; 2238 MadeChange = true; 2239 } else { 2240 // Check if NI overwrites SI. 2241 int64_t InstWriteOffset, DepWriteOffset; 2242 auto Iter = State.IOLs.insert( 2243 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>( 2244 NI->getParent(), InstOverlapIntervalsTy())); 2245 auto &IOL = Iter.first->second; 2246 OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, 2247 InstWriteOffset, NI, IOL, AA, &F); 2248 2249 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { 2250 auto *Earlier = dyn_cast<StoreInst>(NI); 2251 auto *Later = dyn_cast<StoreInst>(SI); 2252 if (Constant *Merged = tryToMergePartialOverlappingStores( 2253 Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA, 2254 &DT)) { 2255 2256 // Update stored value of earlier store to merged constant. 2257 Earlier->setOperand(0, Merged); 2258 ++NumModifiedStores; 2259 MadeChange = true; 2260 2261 // Remove later store and remove any outstanding overlap intervals 2262 // for the updated store. 2263 State.deleteDeadInstruction(Later); 2264 auto I = State.IOLs.find(Earlier->getParent()); 2265 if (I != State.IOLs.end()) 2266 I->second.erase(Earlier); 2267 break; 2268 } 2269 } 2270 2271 if (OR == OW_Complete) { 2272 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI 2273 << "\n KILLER: " << *SI << '\n'); 2274 State.deleteDeadInstruction(NI); 2275 ++NumFastStores; 2276 MadeChange = true; 2277 } 2278 } 2279 } 2280 } 2281 2282 if (EnablePartialOverwriteTracking) 2283 for (auto &KV : State.IOLs) 2284 MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); 2285 2286 MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); 2287 return MadeChange; 2288 } 2289 } // end anonymous namespace 2290 2291 //===----------------------------------------------------------------------===// 2292 // DSE Pass 2293 //===----------------------------------------------------------------------===// 2294 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 2295 AliasAnalysis &AA = AM.getResult<AAManager>(F); 2296 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); 2297 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 2298 2299 bool Changed = false; 2300 if (EnableMemorySSA) { 2301 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 2302 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 2303 2304 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2305 } else { 2306 MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); 2307 2308 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2309 } 2310 2311 #ifdef LLVM_ENABLE_STATS 2312 if (AreStatisticsEnabled()) 2313 for (auto &I : instructions(F)) 2314 NumRemainingStores += isa<StoreInst>(&I); 2315 #endif 2316 2317 if (!Changed) 2318 return PreservedAnalyses::all(); 2319 2320 PreservedAnalyses PA; 2321 PA.preserveSet<CFGAnalyses>(); 2322 PA.preserve<GlobalsAA>(); 2323 if (EnableMemorySSA) 2324 PA.preserve<MemorySSAAnalysis>(); 2325 else 2326 PA.preserve<MemoryDependenceAnalysis>(); 2327 return PA; 2328 } 2329 2330 namespace { 2331 2332 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 2333 class DSELegacyPass : public FunctionPass { 2334 public: 2335 static char ID; // Pass identification, replacement for typeid 2336 2337 DSELegacyPass() : FunctionPass(ID) { 2338 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 2339 } 2340 2341 bool runOnFunction(Function &F) override { 2342 if (skipFunction(F)) 2343 return false; 2344 2345 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2346 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2347 const TargetLibraryInfo &TLI = 2348 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2349 2350 bool Changed = false; 2351 if (EnableMemorySSA) { 2352 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2353 PostDominatorTree &PDT = 2354 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 2355 2356 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2357 } else { 2358 MemoryDependenceResults &MD = 2359 getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 2360 2361 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2362 } 2363 2364 #ifdef LLVM_ENABLE_STATS 2365 if (AreStatisticsEnabled()) 2366 for (auto &I : instructions(F)) 2367 NumRemainingStores += isa<StoreInst>(&I); 2368 #endif 2369 2370 return Changed; 2371 } 2372 2373 void getAnalysisUsage(AnalysisUsage &AU) const override { 2374 AU.setPreservesCFG(); 2375 AU.addRequired<AAResultsWrapperPass>(); 2376 AU.addRequired<TargetLibraryInfoWrapperPass>(); 2377 AU.addPreserved<GlobalsAAWrapperPass>(); 2378 AU.addRequired<DominatorTreeWrapperPass>(); 2379 AU.addPreserved<DominatorTreeWrapperPass>(); 2380 2381 if (EnableMemorySSA) { 2382 AU.addRequired<PostDominatorTreeWrapperPass>(); 2383 AU.addRequired<MemorySSAWrapperPass>(); 2384 AU.addPreserved<PostDominatorTreeWrapperPass>(); 2385 AU.addPreserved<MemorySSAWrapperPass>(); 2386 } else { 2387 AU.addRequired<MemoryDependenceWrapperPass>(); 2388 AU.addPreserved<MemoryDependenceWrapperPass>(); 2389 } 2390 } 2391 }; 2392 2393 } // end anonymous namespace 2394 2395 char DSELegacyPass::ID = 0; 2396 2397 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 2398 false) 2399 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2400 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 2401 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 2402 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 2403 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 2404 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 2405 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2406 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 2407 false) 2408 2409 FunctionPass *llvm::createDeadStoreEliminationPass() { 2410 return new DSELegacyPass(); 2411 } 2412