1 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a trivial dead store elimination that only considers 10 // basic-block local redundant stores. 11 // 12 // FIXME: This should eventually be extended to be a post-dominator tree 13 // traversal. Doing so would be pretty trivial. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/MapVector.h" 21 #include "llvm/ADT/PostOrderIterator.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/CaptureTracking.h" 29 #include "llvm/Analysis/GlobalsModRef.h" 30 #include "llvm/Analysis/MemoryBuiltins.h" 31 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/MemorySSA.h" 34 #include "llvm/Analysis/MemorySSAUpdater.h" 35 #include "llvm/Analysis/PostDominators.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/ValueTracking.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/Constant.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/InstIterator.h" 46 #include "llvm/IR/InstrTypes.h" 47 #include "llvm/IR/Instruction.h" 48 #include "llvm/IR/Instructions.h" 49 #include "llvm/IR/IntrinsicInst.h" 50 #include "llvm/IR/Intrinsics.h" 51 #include "llvm/IR/LLVMContext.h" 52 #include "llvm/IR/Module.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/PatternMatch.h" 55 #include "llvm/IR/Value.h" 56 #include "llvm/InitializePasses.h" 57 #include "llvm/Pass.h" 58 #include "llvm/Support/Casting.h" 59 #include "llvm/Support/CommandLine.h" 60 #include "llvm/Support/Debug.h" 61 #include "llvm/Support/DebugCounter.h" 62 #include "llvm/Support/ErrorHandling.h" 63 #include "llvm/Support/MathExtras.h" 64 #include "llvm/Support/raw_ostream.h" 65 #include "llvm/Transforms/Scalar.h" 66 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 67 #include "llvm/Transforms/Utils/Local.h" 68 #include <algorithm> 69 #include <cassert> 70 #include <cstddef> 71 #include <cstdint> 72 #include <iterator> 73 #include <map> 74 #include <utility> 75 76 using namespace llvm; 77 using namespace PatternMatch; 78 79 #define DEBUG_TYPE "dse" 80 81 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE"); 82 STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); 83 STATISTIC(NumFastStores, "Number of stores deleted"); 84 STATISTIC(NumFastOther, "Number of other instrs removed"); 85 STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); 86 STATISTIC(NumModifiedStores, "Number of stores modified"); 87 STATISTIC(NumCFGChecks, "Number of stores modified"); 88 STATISTIC(NumCFGTries, "Number of stores modified"); 89 STATISTIC(NumCFGSuccess, "Number of stores modified"); 90 91 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", 92 "Controls which MemoryDefs are eliminated."); 93 94 static cl::opt<bool> 95 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", 96 cl::init(true), cl::Hidden, 97 cl::desc("Enable partial-overwrite tracking in DSE")); 98 99 static cl::opt<bool> 100 EnablePartialStoreMerging("enable-dse-partial-store-merging", 101 cl::init(true), cl::Hidden, 102 cl::desc("Enable partial store merging in DSE")); 103 104 static cl::opt<bool> 105 EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, 106 cl::desc("Use the new MemorySSA-backed DSE.")); 107 108 static cl::opt<unsigned> 109 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, 110 cl::desc("The number of memory instructions to scan for " 111 "dead store elimination (default = 100)")); 112 113 static cl::opt<unsigned> MemorySSADefsPerBlockLimit( 114 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, 115 cl::desc("The number of MemoryDefs we consider as candidates to eliminated " 116 "other stores per basic block (default = 5000)")); 117 118 static cl::opt<unsigned> MemorySSAPathCheckLimit( 119 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, 120 cl::desc("The maximum number of blocks to check when trying to prove that " 121 "all paths to an exit go through a killing block (default = 50)")); 122 123 //===----------------------------------------------------------------------===// 124 // Helper functions 125 //===----------------------------------------------------------------------===// 126 using OverlapIntervalsTy = std::map<int64_t, int64_t>; 127 using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; 128 129 /// Delete this instruction. Before we do, go through and zero out all the 130 /// operands of this instruction. If any of them become dead, delete them and 131 /// the computation tree that feeds them. 132 /// If ValueSet is non-null, remove any deleted instructions from it as well. 133 static void 134 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, 135 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, 136 InstOverlapIntervalsTy &IOL, 137 MapVector<Instruction *, bool> &ThrowableInst, 138 SmallSetVector<const Value *, 16> *ValueSet = nullptr) { 139 SmallVector<Instruction*, 32> NowDeadInsts; 140 141 NowDeadInsts.push_back(I); 142 --NumFastOther; 143 144 // Keeping the iterator straight is a pain, so we let this routine tell the 145 // caller what the next instruction is after we're done mucking about. 146 BasicBlock::iterator NewIter = *BBI; 147 148 // Before we touch this instruction, remove it from memdep! 149 do { 150 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 151 // Mark the DeadInst as dead in the list of throwable instructions. 152 auto It = ThrowableInst.find(DeadInst); 153 if (It != ThrowableInst.end()) 154 ThrowableInst[It->first] = false; 155 ++NumFastOther; 156 157 // Try to preserve debug information attached to the dead instruction. 158 salvageDebugInfo(*DeadInst); 159 salvageKnowledge(DeadInst); 160 161 // This instruction is dead, zap it, in stages. Start by removing it from 162 // MemDep, which needs to know the operands and needs it to be in the 163 // function. 164 MD.removeInstruction(DeadInst); 165 166 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 167 Value *Op = DeadInst->getOperand(op); 168 DeadInst->setOperand(op, nullptr); 169 170 // If this operand just became dead, add it to the NowDeadInsts list. 171 if (!Op->use_empty()) continue; 172 173 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 174 if (isInstructionTriviallyDead(OpI, &TLI)) 175 NowDeadInsts.push_back(OpI); 176 } 177 178 if (ValueSet) ValueSet->remove(DeadInst); 179 IOL.erase(DeadInst); 180 181 if (NewIter == DeadInst->getIterator()) 182 NewIter = DeadInst->eraseFromParent(); 183 else 184 DeadInst->eraseFromParent(); 185 } while (!NowDeadInsts.empty()); 186 *BBI = NewIter; 187 // Pop dead entries from back of ThrowableInst till we find an alive entry. 188 while (!ThrowableInst.empty() && !ThrowableInst.back().second) 189 ThrowableInst.pop_back(); 190 } 191 192 /// Does this instruction write some memory? This only returns true for things 193 /// that we can analyze with other helpers below. 194 static bool hasAnalyzableMemoryWrite(Instruction *I, 195 const TargetLibraryInfo &TLI) { 196 if (isa<StoreInst>(I)) 197 return true; 198 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 199 switch (II->getIntrinsicID()) { 200 default: 201 return false; 202 case Intrinsic::memset: 203 case Intrinsic::memmove: 204 case Intrinsic::memcpy: 205 case Intrinsic::memcpy_element_unordered_atomic: 206 case Intrinsic::memmove_element_unordered_atomic: 207 case Intrinsic::memset_element_unordered_atomic: 208 case Intrinsic::init_trampoline: 209 case Intrinsic::lifetime_end: 210 return true; 211 } 212 } 213 if (auto *CB = dyn_cast<CallBase>(I)) { 214 LibFunc LF; 215 if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { 216 switch (LF) { 217 case LibFunc_strcpy: 218 case LibFunc_strncpy: 219 case LibFunc_strcat: 220 case LibFunc_strncat: 221 return true; 222 default: 223 return false; 224 } 225 } 226 } 227 return false; 228 } 229 230 /// Return a Location stored to by the specified instruction. If isRemovable 231 /// returns true, this function and getLocForRead completely describe the memory 232 /// operations for this instruction. 233 static MemoryLocation getLocForWrite(Instruction *Inst) { 234 235 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 236 return MemoryLocation::get(SI); 237 238 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) { 239 // memcpy/memmove/memset. 240 MemoryLocation Loc = MemoryLocation::getForDest(MI); 241 return Loc; 242 } 243 244 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 245 switch (II->getIntrinsicID()) { 246 default: 247 return MemoryLocation(); // Unhandled intrinsic. 248 case Intrinsic::init_trampoline: 249 return MemoryLocation(II->getArgOperand(0)); 250 case Intrinsic::lifetime_end: { 251 uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); 252 return MemoryLocation(II->getArgOperand(1), Len); 253 } 254 } 255 } 256 if (auto *CB = dyn_cast<CallBase>(Inst)) 257 // All the supported TLI functions so far happen to have dest as their 258 // first argument. 259 return MemoryLocation(CB->getArgOperand(0)); 260 return MemoryLocation(); 261 } 262 263 /// Return the location read by the specified "hasAnalyzableMemoryWrite" 264 /// instruction if any. 265 static MemoryLocation getLocForRead(Instruction *Inst, 266 const TargetLibraryInfo &TLI) { 267 assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); 268 269 // The only instructions that both read and write are the mem transfer 270 // instructions (memcpy/memmove). 271 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst)) 272 return MemoryLocation::getForSource(MTI); 273 return MemoryLocation(); 274 } 275 276 /// If the value of this instruction and the memory it writes to is unused, may 277 /// we delete this instruction? 278 static bool isRemovable(Instruction *I) { 279 // Don't remove volatile/atomic stores. 280 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 281 return SI->isUnordered(); 282 283 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 284 switch (II->getIntrinsicID()) { 285 default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); 286 case Intrinsic::lifetime_end: 287 // Never remove dead lifetime_end's, e.g. because it is followed by a 288 // free. 289 return false; 290 case Intrinsic::init_trampoline: 291 // Always safe to remove init_trampoline. 292 return true; 293 case Intrinsic::memset: 294 case Intrinsic::memmove: 295 case Intrinsic::memcpy: 296 // Don't remove volatile memory intrinsics. 297 return !cast<MemIntrinsic>(II)->isVolatile(); 298 case Intrinsic::memcpy_element_unordered_atomic: 299 case Intrinsic::memmove_element_unordered_atomic: 300 case Intrinsic::memset_element_unordered_atomic: 301 return true; 302 } 303 } 304 305 // note: only get here for calls with analyzable writes - i.e. libcalls 306 if (auto *CB = dyn_cast<CallBase>(I)) 307 return CB->use_empty(); 308 309 return false; 310 } 311 312 /// Returns true if the end of this instruction can be safely shortened in 313 /// length. 314 static bool isShortenableAtTheEnd(Instruction *I) { 315 // Don't shorten stores for now 316 if (isa<StoreInst>(I)) 317 return false; 318 319 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 320 switch (II->getIntrinsicID()) { 321 default: return false; 322 case Intrinsic::memset: 323 case Intrinsic::memcpy: 324 case Intrinsic::memcpy_element_unordered_atomic: 325 case Intrinsic::memset_element_unordered_atomic: 326 // Do shorten memory intrinsics. 327 // FIXME: Add memmove if it's also safe to transform. 328 return true; 329 } 330 } 331 332 // Don't shorten libcalls calls for now. 333 334 return false; 335 } 336 337 /// Returns true if the beginning of this instruction can be safely shortened 338 /// in length. 339 static bool isShortenableAtTheBeginning(Instruction *I) { 340 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be 341 // easily done by offsetting the source address. 342 return isa<AnyMemSetInst>(I); 343 } 344 345 /// Return the pointer that is being written to. 346 static Value *getStoredPointerOperand(Instruction *I) { 347 //TODO: factor this to reuse getLocForWrite 348 MemoryLocation Loc = getLocForWrite(I); 349 assert(Loc.Ptr && 350 "unable to find pointer written for analyzable instruction?"); 351 // TODO: most APIs don't expect const Value * 352 return const_cast<Value*>(Loc.Ptr); 353 } 354 355 static uint64_t getPointerSize(const Value *V, const DataLayout &DL, 356 const TargetLibraryInfo &TLI, 357 const Function *F) { 358 uint64_t Size; 359 ObjectSizeOpts Opts; 360 Opts.NullIsUnknownSize = NullPointerIsDefined(F); 361 362 if (getObjectSize(V, Size, DL, &TLI, Opts)) 363 return Size; 364 return MemoryLocation::UnknownSize; 365 } 366 367 namespace { 368 369 enum OverwriteResult { 370 OW_Begin, 371 OW_Complete, 372 OW_End, 373 OW_PartialEarlierWithFullLater, 374 OW_Unknown 375 }; 376 377 } // end anonymous namespace 378 379 /// Return 'OW_Complete' if a store to the 'Later' location completely 380 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the 381 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the 382 /// beginning of the 'Earlier' location is overwritten by 'Later'. 383 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was 384 /// overwritten by a latter (smaller) store which doesn't write outside the big 385 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. 386 static OverwriteResult isOverwrite(const MemoryLocation &Later, 387 const MemoryLocation &Earlier, 388 const DataLayout &DL, 389 const TargetLibraryInfo &TLI, 390 int64_t &EarlierOff, int64_t &LaterOff, 391 Instruction *DepWrite, 392 InstOverlapIntervalsTy &IOL, 393 AliasAnalysis &AA, 394 const Function *F) { 395 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll 396 // get imprecise values here, though (except for unknown sizes). 397 if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) 398 return OW_Unknown; 399 400 const uint64_t LaterSize = Later.Size.getValue(); 401 const uint64_t EarlierSize = Earlier.Size.getValue(); 402 403 const Value *P1 = Earlier.Ptr->stripPointerCasts(); 404 const Value *P2 = Later.Ptr->stripPointerCasts(); 405 406 // If the start pointers are the same, we just have to compare sizes to see if 407 // the later store was larger than the earlier store. 408 if (P1 == P2 || AA.isMustAlias(P1, P2)) { 409 // Make sure that the Later size is >= the Earlier size. 410 if (LaterSize >= EarlierSize) 411 return OW_Complete; 412 } 413 414 // Check to see if the later store is to the entire object (either a global, 415 // an alloca, or a byval/inalloca argument). If so, then it clearly 416 // overwrites any other store to the same object. 417 const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); 418 419 // If we can't resolve the same pointers to the same object, then we can't 420 // analyze them at all. 421 if (UO1 != UO2) 422 return OW_Unknown; 423 424 // If the "Later" store is to a recognizable object, get its size. 425 uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F); 426 if (ObjectSize != MemoryLocation::UnknownSize) 427 if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) 428 return OW_Complete; 429 430 // Okay, we have stores to two completely different pointers. Try to 431 // decompose the pointer into a "base + constant_offset" form. If the base 432 // pointers are equal, then we can reason about the two stores. 433 EarlierOff = 0; 434 LaterOff = 0; 435 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); 436 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); 437 438 // If the base pointers still differ, we have two completely different stores. 439 if (BP1 != BP2) 440 return OW_Unknown; 441 442 // The later store completely overlaps the earlier store if: 443 // 444 // 1. Both start at the same offset and the later one's size is greater than 445 // or equal to the earlier one's, or 446 // 447 // |--earlier--| 448 // |-- later --| 449 // 450 // 2. The earlier store has an offset greater than the later offset, but which 451 // still lies completely within the later store. 452 // 453 // |--earlier--| 454 // |----- later ------| 455 // 456 // We have to be careful here as *Off is signed while *.Size is unsigned. 457 if (EarlierOff >= LaterOff && 458 LaterSize >= EarlierSize && 459 uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) 460 return OW_Complete; 461 462 // We may now overlap, although the overlap is not complete. There might also 463 // be other incomplete overlaps, and together, they might cover the complete 464 // earlier write. 465 // Note: The correctness of this logic depends on the fact that this function 466 // is not even called providing DepWrite when there are any intervening reads. 467 if (EnablePartialOverwriteTracking && 468 LaterOff < int64_t(EarlierOff + EarlierSize) && 469 int64_t(LaterOff + LaterSize) >= EarlierOff) { 470 471 // Insert our part of the overlap into the map. 472 auto &IM = IOL[DepWrite]; 473 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff 474 << ", " << int64_t(EarlierOff + EarlierSize) 475 << ") Later [" << LaterOff << ", " 476 << int64_t(LaterOff + LaterSize) << ")\n"); 477 478 // Make sure that we only insert non-overlapping intervals and combine 479 // adjacent intervals. The intervals are stored in the map with the ending 480 // offset as the key (in the half-open sense) and the starting offset as 481 // the value. 482 int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize; 483 484 // Find any intervals ending at, or after, LaterIntStart which start 485 // before LaterIntEnd. 486 auto ILI = IM.lower_bound(LaterIntStart); 487 if (ILI != IM.end() && ILI->second <= LaterIntEnd) { 488 // This existing interval is overlapped with the current store somewhere 489 // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing 490 // intervals and adjusting our start and end. 491 LaterIntStart = std::min(LaterIntStart, ILI->second); 492 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 493 ILI = IM.erase(ILI); 494 495 // Continue erasing and adjusting our end in case other previous 496 // intervals are also overlapped with the current store. 497 // 498 // |--- ealier 1 ---| |--- ealier 2 ---| 499 // |------- later---------| 500 // 501 while (ILI != IM.end() && ILI->second <= LaterIntEnd) { 502 assert(ILI->second > LaterIntStart && "Unexpected interval"); 503 LaterIntEnd = std::max(LaterIntEnd, ILI->first); 504 ILI = IM.erase(ILI); 505 } 506 } 507 508 IM[LaterIntEnd] = LaterIntStart; 509 510 ILI = IM.begin(); 511 if (ILI->second <= EarlierOff && 512 ILI->first >= int64_t(EarlierOff + EarlierSize)) { 513 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" 514 << EarlierOff << ", " 515 << int64_t(EarlierOff + EarlierSize) 516 << ") Composite Later [" << ILI->second << ", " 517 << ILI->first << ")\n"); 518 ++NumCompletePartials; 519 return OW_Complete; 520 } 521 } 522 523 // Check for an earlier store which writes to all the memory locations that 524 // the later store writes to. 525 if (EnablePartialStoreMerging && LaterOff >= EarlierOff && 526 int64_t(EarlierOff + EarlierSize) > LaterOff && 527 uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) { 528 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" 529 << EarlierOff << ", " 530 << int64_t(EarlierOff + EarlierSize) 531 << ") by a later store [" << LaterOff << ", " 532 << int64_t(LaterOff + LaterSize) << ")\n"); 533 // TODO: Maybe come up with a better name? 534 return OW_PartialEarlierWithFullLater; 535 } 536 537 // Another interesting case is if the later store overwrites the end of the 538 // earlier store. 539 // 540 // |--earlier--| 541 // |-- later --| 542 // 543 // In this case we may want to trim the size of earlier to avoid generating 544 // writes to addresses which will definitely be overwritten later 545 if (!EnablePartialOverwriteTracking && 546 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) && 547 int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize))) 548 return OW_End; 549 550 // Finally, we also need to check if the later store overwrites the beginning 551 // of the earlier store. 552 // 553 // |--earlier--| 554 // |-- later --| 555 // 556 // In this case we may want to move the destination address and trim the size 557 // of earlier to avoid generating writes to addresses which will definitely 558 // be overwritten later. 559 if (!EnablePartialOverwriteTracking && 560 (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) { 561 assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) && 562 "Expect to be handled as OW_Complete"); 563 return OW_Begin; 564 } 565 // Otherwise, they don't completely overlap. 566 return OW_Unknown; 567 } 568 569 /// If 'Inst' might be a self read (i.e. a noop copy of a 570 /// memory region into an identical pointer) then it doesn't actually make its 571 /// input dead in the traditional sense. Consider this case: 572 /// 573 /// memmove(A <- B) 574 /// memmove(A <- A) 575 /// 576 /// In this case, the second store to A does not make the first store to A dead. 577 /// The usual situation isn't an explicit A<-A store like this (which can be 578 /// trivially removed) but a case where two pointers may alias. 579 /// 580 /// This function detects when it is unsafe to remove a dependent instruction 581 /// because the DSE inducing instruction may be a self-read. 582 static bool isPossibleSelfRead(Instruction *Inst, 583 const MemoryLocation &InstStoreLoc, 584 Instruction *DepWrite, 585 const TargetLibraryInfo &TLI, 586 AliasAnalysis &AA) { 587 // Self reads can only happen for instructions that read memory. Get the 588 // location read. 589 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); 590 if (!InstReadLoc.Ptr) 591 return false; // Not a reading instruction. 592 593 // If the read and written loc obviously don't alias, it isn't a read. 594 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) 595 return false; 596 597 if (isa<AnyMemCpyInst>(Inst)) { 598 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) 599 // but in practice memcpy(A <- B) either means that A and B are disjoint or 600 // are equal (i.e. there are not partial overlaps). Given that, if we have: 601 // 602 // memcpy/memmove(A <- B) // DepWrite 603 // memcpy(A <- B) // Inst 604 // 605 // with Inst reading/writing a >= size than DepWrite, we can reason as 606 // follows: 607 // 608 // - If A == B then both the copies are no-ops, so the DepWrite can be 609 // removed. 610 // - If A != B then A and B are disjoint locations in Inst. Since 611 // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. 612 // Therefore DepWrite can be removed. 613 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); 614 615 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) 616 return false; 617 } 618 619 // If DepWrite doesn't read memory or if we can't prove it is a must alias, 620 // then it can't be considered dead. 621 return true; 622 } 623 624 /// Returns true if the memory which is accessed by the second instruction is not 625 /// modified between the first and the second instruction. 626 /// Precondition: Second instruction must be dominated by the first 627 /// instruction. 628 static bool memoryIsNotModifiedBetween(Instruction *FirstI, 629 Instruction *SecondI, 630 AliasAnalysis *AA, 631 const DataLayout &DL, 632 DominatorTree *DT) { 633 // Do a backwards scan through the CFG from SecondI to FirstI. Look for 634 // instructions which can modify the memory location accessed by SecondI. 635 // 636 // While doing the walk keep track of the address to check. It might be 637 // different in different basic blocks due to PHI translation. 638 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>; 639 SmallVector<BlockAddressPair, 16> WorkList; 640 // Keep track of the address we visited each block with. Bail out if we 641 // visit a block with different addresses. 642 DenseMap<BasicBlock *, Value *> Visited; 643 644 BasicBlock::iterator FirstBBI(FirstI); 645 ++FirstBBI; 646 BasicBlock::iterator SecondBBI(SecondI); 647 BasicBlock *FirstBB = FirstI->getParent(); 648 BasicBlock *SecondBB = SecondI->getParent(); 649 MemoryLocation MemLoc = MemoryLocation::get(SecondI); 650 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr); 651 652 // Start checking the SecondBB. 653 WorkList.push_back( 654 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr))); 655 bool isFirstBlock = true; 656 657 // Check all blocks going backward until we reach the FirstBB. 658 while (!WorkList.empty()) { 659 BlockAddressPair Current = WorkList.pop_back_val(); 660 BasicBlock *B = Current.first; 661 PHITransAddr &Addr = Current.second; 662 Value *Ptr = Addr.getAddr(); 663 664 // Ignore instructions before FirstI if this is the FirstBB. 665 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); 666 667 BasicBlock::iterator EI; 668 if (isFirstBlock) { 669 // Ignore instructions after SecondI if this is the first visit of SecondBB. 670 assert(B == SecondBB && "first block is not the store block"); 671 EI = SecondBBI; 672 isFirstBlock = false; 673 } else { 674 // It's not SecondBB or (in case of a loop) the second visit of SecondBB. 675 // In this case we also have to look at instructions after SecondI. 676 EI = B->end(); 677 } 678 for (; BI != EI; ++BI) { 679 Instruction *I = &*BI; 680 if (I->mayWriteToMemory() && I != SecondI) 681 if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) 682 return false; 683 } 684 if (B != FirstBB) { 685 assert(B != &FirstBB->getParent()->getEntryBlock() && 686 "Should not hit the entry block because SI must be dominated by LI"); 687 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { 688 PHITransAddr PredAddr = Addr; 689 if (PredAddr.NeedsPHITranslationFromBlock(B)) { 690 if (!PredAddr.IsPotentiallyPHITranslatable()) 691 return false; 692 if (PredAddr.PHITranslateValue(B, *PredI, DT, false)) 693 return false; 694 } 695 Value *TranslatedPtr = PredAddr.getAddr(); 696 auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr)); 697 if (!Inserted.second) { 698 // We already visited this block before. If it was with a different 699 // address - bail out! 700 if (TranslatedPtr != Inserted.first->second) 701 return false; 702 // ... otherwise just skip it. 703 continue; 704 } 705 WorkList.push_back(std::make_pair(*PredI, PredAddr)); 706 } 707 } 708 } 709 return true; 710 } 711 712 /// Find all blocks that will unconditionally lead to the block BB and append 713 /// them to F. 714 static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, 715 BasicBlock *BB, DominatorTree *DT) { 716 for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { 717 BasicBlock *Pred = *I; 718 if (Pred == BB) continue; 719 Instruction *PredTI = Pred->getTerminator(); 720 if (PredTI->getNumSuccessors() != 1) 721 continue; 722 723 if (DT->isReachableFromEntry(Pred)) 724 Blocks.push_back(Pred); 725 } 726 } 727 728 /// Handle frees of entire structures whose dependency is a store 729 /// to a field of that structure. 730 static bool handleFree(CallInst *F, AliasAnalysis *AA, 731 MemoryDependenceResults *MD, DominatorTree *DT, 732 const TargetLibraryInfo *TLI, 733 InstOverlapIntervalsTy &IOL, 734 MapVector<Instruction *, bool> &ThrowableInst) { 735 bool MadeChange = false; 736 737 MemoryLocation Loc = MemoryLocation(F->getOperand(0)); 738 SmallVector<BasicBlock *, 16> Blocks; 739 Blocks.push_back(F->getParent()); 740 741 while (!Blocks.empty()) { 742 BasicBlock *BB = Blocks.pop_back_val(); 743 Instruction *InstPt = BB->getTerminator(); 744 if (BB == F->getParent()) InstPt = F; 745 746 MemDepResult Dep = 747 MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); 748 while (Dep.isDef() || Dep.isClobber()) { 749 Instruction *Dependency = Dep.getInst(); 750 if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || 751 !isRemovable(Dependency)) 752 break; 753 754 Value *DepPointer = 755 getUnderlyingObject(getStoredPointerOperand(Dependency)); 756 757 // Check for aliasing. 758 if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) 759 break; 760 761 LLVM_DEBUG( 762 dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " 763 << *Dependency << '\n'); 764 765 // DCE instructions only used to calculate that store. 766 BasicBlock::iterator BBI(Dependency); 767 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, 768 ThrowableInst); 769 ++NumFastStores; 770 MadeChange = true; 771 772 // Inst's old Dependency is now deleted. Compute the next dependency, 773 // which may also be dead, as in 774 // s[0] = 0; 775 // s[1] = 0; // This has just been deleted. 776 // free(s); 777 Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); 778 } 779 780 if (Dep.isNonLocal()) 781 findUnconditionalPreds(Blocks, BB, DT); 782 } 783 784 return MadeChange; 785 } 786 787 /// Check to see if the specified location may alias any of the stack objects in 788 /// the DeadStackObjects set. If so, they become live because the location is 789 /// being loaded. 790 static void removeAccessedObjects(const MemoryLocation &LoadedLoc, 791 SmallSetVector<const Value *, 16> &DeadStackObjects, 792 const DataLayout &DL, AliasAnalysis *AA, 793 const TargetLibraryInfo *TLI, 794 const Function *F) { 795 const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr); 796 797 // A constant can't be in the dead pointer set. 798 if (isa<Constant>(UnderlyingPointer)) 799 return; 800 801 // If the kill pointer can be easily reduced to an alloca, don't bother doing 802 // extraneous AA queries. 803 if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { 804 DeadStackObjects.remove(UnderlyingPointer); 805 return; 806 } 807 808 // Remove objects that could alias LoadedLoc. 809 DeadStackObjects.remove_if([&](const Value *I) { 810 // See if the loaded location could alias the stack location. 811 MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F)); 812 return !AA->isNoAlias(StackLoc, LoadedLoc); 813 }); 814 } 815 816 /// Remove dead stores to stack-allocated locations in the function end block. 817 /// Ex: 818 /// %A = alloca i32 819 /// ... 820 /// store i32 1, i32* %A 821 /// ret void 822 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, 823 MemoryDependenceResults *MD, 824 const TargetLibraryInfo *TLI, 825 InstOverlapIntervalsTy &IOL, 826 MapVector<Instruction *, bool> &ThrowableInst) { 827 bool MadeChange = false; 828 829 // Keep track of all of the stack objects that are dead at the end of the 830 // function. 831 SmallSetVector<const Value*, 16> DeadStackObjects; 832 833 // Find all of the alloca'd pointers in the entry block. 834 BasicBlock &Entry = BB.getParent()->front(); 835 for (Instruction &I : Entry) { 836 if (isa<AllocaInst>(&I)) 837 DeadStackObjects.insert(&I); 838 839 // Okay, so these are dead heap objects, but if the pointer never escapes 840 // then it's leaked by this function anyways. 841 else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) 842 DeadStackObjects.insert(&I); 843 } 844 845 // Treat byval or inalloca arguments the same, stores to them are dead at the 846 // end of the function. 847 for (Argument &AI : BB.getParent()->args()) 848 if (AI.hasPassPointeeByValueCopyAttr()) 849 DeadStackObjects.insert(&AI); 850 851 const DataLayout &DL = BB.getModule()->getDataLayout(); 852 853 // Scan the basic block backwards 854 for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ 855 --BBI; 856 857 // If we find a store, check to see if it points into a dead stack value. 858 if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { 859 // See through pointer-to-pointer bitcasts 860 SmallVector<const Value *, 4> Pointers; 861 getUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers); 862 863 // Stores to stack values are valid candidates for removal. 864 bool AllDead = true; 865 for (const Value *Pointer : Pointers) 866 if (!DeadStackObjects.count(Pointer)) { 867 AllDead = false; 868 break; 869 } 870 871 if (AllDead) { 872 Instruction *Dead = &*BBI; 873 874 LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " 875 << *Dead << "\n Objects: "; 876 for (SmallVectorImpl<const Value *>::iterator I = 877 Pointers.begin(), 878 E = Pointers.end(); 879 I != E; ++I) { 880 dbgs() << **I; 881 if (std::next(I) != E) 882 dbgs() << ", "; 883 } dbgs() 884 << '\n'); 885 886 // DCE instructions only used to calculate that store. 887 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, ThrowableInst, 888 &DeadStackObjects); 889 ++NumFastStores; 890 MadeChange = true; 891 continue; 892 } 893 } 894 895 // Remove any dead non-memory-mutating instructions. 896 if (isInstructionTriviallyDead(&*BBI, TLI)) { 897 LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " 898 << *&*BBI << '\n'); 899 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, ThrowableInst, 900 &DeadStackObjects); 901 ++NumFastOther; 902 MadeChange = true; 903 continue; 904 } 905 906 if (isa<AllocaInst>(BBI)) { 907 // Remove allocas from the list of dead stack objects; there can't be 908 // any references before the definition. 909 DeadStackObjects.remove(&*BBI); 910 continue; 911 } 912 913 if (auto *Call = dyn_cast<CallBase>(&*BBI)) { 914 // Remove allocation function calls from the list of dead stack objects; 915 // there can't be any references before the definition. 916 if (isAllocLikeFn(&*BBI, TLI)) 917 DeadStackObjects.remove(&*BBI); 918 919 // If this call does not access memory, it can't be loading any of our 920 // pointers. 921 if (AA->doesNotAccessMemory(Call)) 922 continue; 923 924 // If the call might load from any of our allocas, then any store above 925 // the call is live. 926 DeadStackObjects.remove_if([&](const Value *I) { 927 // See if the call site touches the value. 928 return isRefSet(AA->getModRefInfo( 929 Call, I, getPointerSize(I, DL, *TLI, BB.getParent()))); 930 }); 931 932 // If all of the allocas were clobbered by the call then we're not going 933 // to find anything else to process. 934 if (DeadStackObjects.empty()) 935 break; 936 937 continue; 938 } 939 940 // We can remove the dead stores, irrespective of the fence and its ordering 941 // (release/acquire/seq_cst). Fences only constraints the ordering of 942 // already visible stores, it does not make a store visible to other 943 // threads. So, skipping over a fence does not change a store from being 944 // dead. 945 if (isa<FenceInst>(*BBI)) 946 continue; 947 948 MemoryLocation LoadedLoc; 949 950 // If we encounter a use of the pointer, it is no longer considered dead 951 if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { 952 if (!L->isUnordered()) // Be conservative with atomic/volatile load 953 break; 954 LoadedLoc = MemoryLocation::get(L); 955 } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { 956 LoadedLoc = MemoryLocation::get(V); 957 } else if (!BBI->mayReadFromMemory()) { 958 // Instruction doesn't read memory. Note that stores that weren't removed 959 // above will hit this case. 960 continue; 961 } else { 962 // Unknown inst; assume it clobbers everything. 963 break; 964 } 965 966 // Remove any allocas from the DeadPointer set that are loaded, as this 967 // makes any stores above the access live. 968 removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI, BB.getParent()); 969 970 // If all of the allocas were clobbered by the access then we're not going 971 // to find anything else to process. 972 if (DeadStackObjects.empty()) 973 break; 974 } 975 976 return MadeChange; 977 } 978 979 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, 980 int64_t &EarlierSize, int64_t LaterOffset, 981 int64_t LaterSize, bool IsOverwriteEnd) { 982 // TODO: base this on the target vector size so that if the earlier 983 // store was too small to get vector writes anyway then its likely 984 // a good idea to shorten it 985 // Power of 2 vector writes are probably always a bad idea to optimize 986 // as any store/memset/memcpy is likely using vector instructions so 987 // shortening it to not vector size is likely to be slower 988 auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite); 989 unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment(); 990 if (!IsOverwriteEnd) 991 LaterOffset = int64_t(LaterOffset + LaterSize); 992 993 if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && 994 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) 995 return false; 996 997 int64_t NewLength = IsOverwriteEnd 998 ? LaterOffset - EarlierOffset 999 : EarlierSize - (LaterOffset - EarlierOffset); 1000 1001 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) { 1002 // When shortening an atomic memory intrinsic, the newly shortened 1003 // length must remain an integer multiple of the element size. 1004 const uint32_t ElementSize = AMI->getElementSizeInBytes(); 1005 if (0 != NewLength % ElementSize) 1006 return false; 1007 } 1008 1009 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " 1010 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " 1011 << *EarlierWrite << "\n KILLER (offset " << LaterOffset 1012 << ", " << EarlierSize << ")\n"); 1013 1014 Value *EarlierWriteLength = EarlierIntrinsic->getLength(); 1015 Value *TrimmedLength = 1016 ConstantInt::get(EarlierWriteLength->getType(), NewLength); 1017 EarlierIntrinsic->setLength(TrimmedLength); 1018 1019 EarlierSize = NewLength; 1020 if (!IsOverwriteEnd) { 1021 int64_t OffsetMoved = (LaterOffset - EarlierOffset); 1022 Value *Indices[1] = { 1023 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; 1024 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( 1025 EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(), 1026 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); 1027 NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc()); 1028 EarlierIntrinsic->setDest(NewDestGEP); 1029 EarlierOffset = EarlierOffset + OffsetMoved; 1030 } 1031 return true; 1032 } 1033 1034 static bool tryToShortenEnd(Instruction *EarlierWrite, 1035 OverlapIntervalsTy &IntervalMap, 1036 int64_t &EarlierStart, int64_t &EarlierSize) { 1037 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) 1038 return false; 1039 1040 OverlapIntervalsTy::iterator OII = --IntervalMap.end(); 1041 int64_t LaterStart = OII->second; 1042 int64_t LaterSize = OII->first - LaterStart; 1043 1044 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && 1045 LaterStart + LaterSize >= EarlierStart + EarlierSize) { 1046 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1047 LaterSize, true)) { 1048 IntervalMap.erase(OII); 1049 return true; 1050 } 1051 } 1052 return false; 1053 } 1054 1055 static bool tryToShortenBegin(Instruction *EarlierWrite, 1056 OverlapIntervalsTy &IntervalMap, 1057 int64_t &EarlierStart, int64_t &EarlierSize) { 1058 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) 1059 return false; 1060 1061 OverlapIntervalsTy::iterator OII = IntervalMap.begin(); 1062 int64_t LaterStart = OII->second; 1063 int64_t LaterSize = OII->first - LaterStart; 1064 1065 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { 1066 assert(LaterStart + LaterSize < EarlierStart + EarlierSize && 1067 "Should have been handled as OW_Complete"); 1068 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, 1069 LaterSize, false)) { 1070 IntervalMap.erase(OII); 1071 return true; 1072 } 1073 } 1074 return false; 1075 } 1076 1077 static bool removePartiallyOverlappedStores(const DataLayout &DL, 1078 InstOverlapIntervalsTy &IOL) { 1079 bool Changed = false; 1080 for (auto OI : IOL) { 1081 Instruction *EarlierWrite = OI.first; 1082 MemoryLocation Loc = getLocForWrite(EarlierWrite); 1083 assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); 1084 1085 const Value *Ptr = Loc.Ptr->stripPointerCasts(); 1086 int64_t EarlierStart = 0; 1087 int64_t EarlierSize = int64_t(Loc.Size.getValue()); 1088 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); 1089 OverlapIntervalsTy &IntervalMap = OI.second; 1090 Changed |= 1091 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1092 if (IntervalMap.empty()) 1093 continue; 1094 Changed |= 1095 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); 1096 } 1097 return Changed; 1098 } 1099 1100 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, 1101 AliasAnalysis *AA, MemoryDependenceResults *MD, 1102 const DataLayout &DL, 1103 const TargetLibraryInfo *TLI, 1104 InstOverlapIntervalsTy &IOL, 1105 MapVector<Instruction *, bool> &ThrowableInst, 1106 DominatorTree *DT) { 1107 // Must be a store instruction. 1108 StoreInst *SI = dyn_cast<StoreInst>(Inst); 1109 if (!SI) 1110 return false; 1111 1112 // If we're storing the same value back to a pointer that we just loaded from, 1113 // then the store can be removed. 1114 if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) { 1115 if (SI->getPointerOperand() == DepLoad->getPointerOperand() && 1116 isRemovable(SI) && 1117 memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) { 1118 1119 LLVM_DEBUG( 1120 dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " 1121 << *DepLoad << "\n STORE: " << *SI << '\n'); 1122 1123 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1124 ++NumRedundantStores; 1125 return true; 1126 } 1127 } 1128 1129 // Remove null stores into the calloc'ed objects 1130 Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand()); 1131 if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { 1132 Instruction *UnderlyingPointer = 1133 dyn_cast<Instruction>(getUnderlyingObject(SI->getPointerOperand())); 1134 1135 if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && 1136 memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) { 1137 LLVM_DEBUG( 1138 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " 1139 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); 1140 1141 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); 1142 ++NumRedundantStores; 1143 return true; 1144 } 1145 } 1146 return false; 1147 } 1148 1149 static Constant * 1150 tryToMergePartialOverlappingStores(StoreInst *Earlier, StoreInst *Later, 1151 int64_t InstWriteOffset, 1152 int64_t DepWriteOffset, const DataLayout &DL, 1153 AliasAnalysis *AA, DominatorTree *DT) { 1154 1155 if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) && 1156 DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) && 1157 Later && isa<ConstantInt>(Later->getValueOperand()) && 1158 DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) && 1159 memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) { 1160 // If the store we find is: 1161 // a) partially overwritten by the store to 'Loc' 1162 // b) the later store is fully contained in the earlier one and 1163 // c) they both have a constant value 1164 // d) none of the two stores need padding 1165 // Merge the two stores, replacing the earlier store's value with a 1166 // merge of both values. 1167 // TODO: Deal with other constant types (vectors, etc), and probably 1168 // some mem intrinsics (if needed) 1169 1170 APInt EarlierValue = 1171 cast<ConstantInt>(Earlier->getValueOperand())->getValue(); 1172 APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue(); 1173 unsigned LaterBits = LaterValue.getBitWidth(); 1174 assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); 1175 LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); 1176 1177 // Offset of the smaller store inside the larger store 1178 unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; 1179 unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() - 1180 BitOffsetDiff - LaterBits 1181 : BitOffsetDiff; 1182 APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, 1183 LShiftAmount + LaterBits); 1184 // Clear the bits we'll be replacing, then OR with the smaller 1185 // store, shifted appropriately. 1186 APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); 1187 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier 1188 << "\n Later: " << *Later 1189 << "\n Merged Value: " << Merged << '\n'); 1190 return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged); 1191 } 1192 return nullptr; 1193 } 1194 1195 static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, 1196 MemoryDependenceResults *MD, DominatorTree *DT, 1197 const TargetLibraryInfo *TLI) { 1198 const DataLayout &DL = BB.getModule()->getDataLayout(); 1199 bool MadeChange = false; 1200 1201 MapVector<Instruction *, bool> ThrowableInst; 1202 1203 // A map of interval maps representing partially-overwritten value parts. 1204 InstOverlapIntervalsTy IOL; 1205 1206 // Do a top-down walk on the BB. 1207 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { 1208 // Handle 'free' calls specially. 1209 if (CallInst *F = isFreeCall(&*BBI, TLI)) { 1210 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, ThrowableInst); 1211 // Increment BBI after handleFree has potentially deleted instructions. 1212 // This ensures we maintain a valid iterator. 1213 ++BBI; 1214 continue; 1215 } 1216 1217 Instruction *Inst = &*BBI++; 1218 1219 if (Inst->mayThrow()) { 1220 ThrowableInst[Inst] = true; 1221 continue; 1222 } 1223 1224 // Check to see if Inst writes to memory. If not, continue. 1225 if (!hasAnalyzableMemoryWrite(Inst, *TLI)) 1226 continue; 1227 1228 // eliminateNoopStore will update in iterator, if necessary. 1229 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, 1230 ThrowableInst, DT)) { 1231 MadeChange = true; 1232 continue; 1233 } 1234 1235 // If we find something that writes memory, get its memory dependence. 1236 MemDepResult InstDep = MD->getDependency(Inst); 1237 1238 // Ignore any store where we can't find a local dependence. 1239 // FIXME: cross-block DSE would be fun. :) 1240 if (!InstDep.isDef() && !InstDep.isClobber()) 1241 continue; 1242 1243 // Figure out what location is being stored to. 1244 MemoryLocation Loc = getLocForWrite(Inst); 1245 1246 // If we didn't get a useful location, fail. 1247 if (!Loc.Ptr) 1248 continue; 1249 1250 // Loop until we find a store we can eliminate or a load that 1251 // invalidates the analysis. Without an upper bound on the number of 1252 // instructions examined, this analysis can become very time-consuming. 1253 // However, the potential gain diminishes as we process more instructions 1254 // without eliminating any of them. Therefore, we limit the number of 1255 // instructions we look at. 1256 auto Limit = MD->getDefaultBlockScanLimit(); 1257 while (InstDep.isDef() || InstDep.isClobber()) { 1258 // Get the memory clobbered by the instruction we depend on. MemDep will 1259 // skip any instructions that 'Loc' clearly doesn't interact with. If we 1260 // end up depending on a may- or must-aliased load, then we can't optimize 1261 // away the store and we bail out. However, if we depend on something 1262 // that overwrites the memory location we *can* potentially optimize it. 1263 // 1264 // Find out what memory location the dependent instruction stores. 1265 Instruction *DepWrite = InstDep.getInst(); 1266 if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) 1267 break; 1268 MemoryLocation DepLoc = getLocForWrite(DepWrite); 1269 // If we didn't get a useful location, or if it isn't a size, bail out. 1270 if (!DepLoc.Ptr) 1271 break; 1272 1273 // Find the last throwable instruction not removed by call to 1274 // deleteDeadInstruction. 1275 Instruction *LastThrowing = nullptr; 1276 if (!ThrowableInst.empty()) 1277 LastThrowing = ThrowableInst.back().first; 1278 1279 // Make sure we don't look past a call which might throw. This is an 1280 // issue because MemoryDependenceAnalysis works in the wrong direction: 1281 // it finds instructions which dominate the current instruction, rather than 1282 // instructions which are post-dominated by the current instruction. 1283 // 1284 // If the underlying object is a non-escaping memory allocation, any store 1285 // to it is dead along the unwind edge. Otherwise, we need to preserve 1286 // the store. 1287 if (LastThrowing && DepWrite->comesBefore(LastThrowing)) { 1288 const Value *Underlying = getUnderlyingObject(DepLoc.Ptr); 1289 bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying); 1290 if (!IsStoreDeadOnUnwind) { 1291 // We're looking for a call to an allocation function 1292 // where the allocation doesn't escape before the last 1293 // throwing instruction; PointerMayBeCaptured 1294 // reasonably fast approximation. 1295 IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && 1296 !PointerMayBeCaptured(Underlying, false, true); 1297 } 1298 if (!IsStoreDeadOnUnwind) 1299 break; 1300 } 1301 1302 // If we find a write that is a) removable (i.e., non-volatile), b) is 1303 // completely obliterated by the store to 'Loc', and c) which we know that 1304 // 'Inst' doesn't load from, then we can remove it. 1305 // Also try to merge two stores if a later one only touches memory written 1306 // to by the earlier one. 1307 if (isRemovable(DepWrite) && 1308 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { 1309 int64_t InstWriteOffset, DepWriteOffset; 1310 OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, 1311 InstWriteOffset, DepWrite, IOL, *AA, 1312 BB.getParent()); 1313 if (OR == OW_Complete) { 1314 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite 1315 << "\n KILLER: " << *Inst << '\n'); 1316 1317 // Delete the store and now-dead instructions that feed it. 1318 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1319 ThrowableInst); 1320 ++NumFastStores; 1321 MadeChange = true; 1322 1323 // We erased DepWrite; start over. 1324 InstDep = MD->getDependency(Inst); 1325 continue; 1326 } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || 1327 ((OR == OW_Begin && 1328 isShortenableAtTheBeginning(DepWrite)))) { 1329 assert(!EnablePartialOverwriteTracking && "Do not expect to perform " 1330 "when partial-overwrite " 1331 "tracking is enabled"); 1332 // The overwrite result is known, so these must be known, too. 1333 int64_t EarlierSize = DepLoc.Size.getValue(); 1334 int64_t LaterSize = Loc.Size.getValue(); 1335 bool IsOverwriteEnd = (OR == OW_End); 1336 MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, 1337 InstWriteOffset, LaterSize, IsOverwriteEnd); 1338 } else if (EnablePartialStoreMerging && 1339 OR == OW_PartialEarlierWithFullLater) { 1340 auto *Earlier = dyn_cast<StoreInst>(DepWrite); 1341 auto *Later = dyn_cast<StoreInst>(Inst); 1342 if (Constant *C = tryToMergePartialOverlappingStores( 1343 Earlier, Later, InstWriteOffset, DepWriteOffset, DL, AA, 1344 DT)) { 1345 auto *SI = new StoreInst( 1346 C, Earlier->getPointerOperand(), false, Earlier->getAlign(), 1347 Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); 1348 1349 unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, 1350 LLVMContext::MD_alias_scope, 1351 LLVMContext::MD_noalias, 1352 LLVMContext::MD_nontemporal}; 1353 SI->copyMetadata(*DepWrite, MDToKeep); 1354 ++NumModifiedStores; 1355 1356 // Delete the old stores and now-dead instructions that feed them. 1357 deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, 1358 ThrowableInst); 1359 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, 1360 ThrowableInst); 1361 MadeChange = true; 1362 1363 // We erased DepWrite and Inst (Loc); start over. 1364 break; 1365 } 1366 } 1367 } 1368 1369 // If this is a may-aliased store that is clobbering the store value, we 1370 // can keep searching past it for another must-aliased pointer that stores 1371 // to the same location. For example, in: 1372 // store -> P 1373 // store -> Q 1374 // store -> P 1375 // we can remove the first store to P even though we don't know if P and Q 1376 // alias. 1377 if (DepWrite == &BB.front()) break; 1378 1379 // Can't look past this instruction if it might read 'Loc'. 1380 if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) 1381 break; 1382 1383 InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, 1384 DepWrite->getIterator(), &BB, 1385 /*QueryInst=*/ nullptr, &Limit); 1386 } 1387 } 1388 1389 if (EnablePartialOverwriteTracking) 1390 MadeChange |= removePartiallyOverlappedStores(DL, IOL); 1391 1392 // If this block ends in a return, unwind, or unreachable, all allocas are 1393 // dead at its end, which means stores to them are also dead. 1394 if (BB.getTerminator()->getNumSuccessors() == 0) 1395 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, ThrowableInst); 1396 1397 return MadeChange; 1398 } 1399 1400 static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, 1401 MemoryDependenceResults *MD, DominatorTree *DT, 1402 const TargetLibraryInfo *TLI) { 1403 bool MadeChange = false; 1404 for (BasicBlock &BB : F) 1405 // Only check non-dead blocks. Dead blocks may have strange pointer 1406 // cycles that will confuse alias analysis. 1407 if (DT->isReachableFromEntry(&BB)) 1408 MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); 1409 1410 return MadeChange; 1411 } 1412 1413 namespace { 1414 //============================================================================= 1415 // MemorySSA backed dead store elimination. 1416 // 1417 // The code below implements dead store elimination using MemorySSA. It uses 1418 // the following general approach: given a MemoryDef, walk upwards to find 1419 // clobbering MemoryDefs that may be killed by the starting def. Then check 1420 // that there are no uses that may read the location of the original MemoryDef 1421 // in between both MemoryDefs. A bit more concretely: 1422 // 1423 // For all MemoryDefs StartDef: 1424 // 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking 1425 // upwards. 1426 // 2. Check that there are no reads between DomAccess and the StartDef by 1427 // checking all uses starting at DomAccess and walking until we see StartDef. 1428 // 3. For each found DomDef, check that: 1429 // 1. There are no barrier instructions between DomDef and StartDef (like 1430 // throws or stores with ordering constraints). 1431 // 2. StartDef is executed whenever DomDef is executed. 1432 // 3. StartDef completely overwrites DomDef. 1433 // 4. Erase DomDef from the function and MemorySSA. 1434 1435 // Returns true if \p M is an intrisnic that does not read or write memory. 1436 bool isNoopIntrinsic(MemoryUseOrDef *M) { 1437 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) { 1438 switch (II->getIntrinsicID()) { 1439 case Intrinsic::lifetime_start: 1440 case Intrinsic::lifetime_end: 1441 case Intrinsic::invariant_end: 1442 case Intrinsic::launder_invariant_group: 1443 case Intrinsic::assume: 1444 return true; 1445 case Intrinsic::dbg_addr: 1446 case Intrinsic::dbg_declare: 1447 case Intrinsic::dbg_label: 1448 case Intrinsic::dbg_value: 1449 llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); 1450 default: 1451 return false; 1452 } 1453 } 1454 return false; 1455 } 1456 1457 // Check if we can ignore \p D for DSE. 1458 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { 1459 Instruction *DI = D->getMemoryInst(); 1460 // Calls that only access inaccessible memory cannot read or write any memory 1461 // locations we consider for elimination. 1462 if (auto *CB = dyn_cast<CallBase>(DI)) 1463 if (CB->onlyAccessesInaccessibleMemory()) 1464 return true; 1465 1466 // We can eliminate stores to locations not visible to the caller across 1467 // throwing instructions. 1468 if (DI->mayThrow() && !DefVisibleToCaller) 1469 return true; 1470 1471 // We can remove the dead stores, irrespective of the fence and its ordering 1472 // (release/acquire/seq_cst). Fences only constraints the ordering of 1473 // already visible stores, it does not make a store visible to other 1474 // threads. So, skipping over a fence does not change a store from being 1475 // dead. 1476 if (isa<FenceInst>(DI)) 1477 return true; 1478 1479 // Skip intrinsics that do not really read or modify memory. 1480 if (isNoopIntrinsic(D)) 1481 return true; 1482 1483 return false; 1484 } 1485 1486 struct DSEState { 1487 Function &F; 1488 AliasAnalysis &AA; 1489 MemorySSA &MSSA; 1490 DominatorTree &DT; 1491 PostDominatorTree &PDT; 1492 const TargetLibraryInfo &TLI; 1493 1494 // All MemoryDefs that potentially could kill other MemDefs. 1495 SmallVector<MemoryDef *, 64> MemDefs; 1496 // Any that should be skipped as they are already deleted 1497 SmallPtrSet<MemoryAccess *, 4> SkipStores; 1498 // Keep track of all of the objects that are invisible to the caller before 1499 // the function returns. 1500 SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet; 1501 // Keep track of all of the objects that are invisible to the caller after 1502 // the function returns. 1503 SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet; 1504 // Keep track of blocks with throwing instructions not modeled in MemorySSA. 1505 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks; 1506 // Post-order numbers for each basic block. Used to figure out if memory 1507 // accesses are executed before another access. 1508 DenseMap<BasicBlock *, unsigned> PostOrderNumbers; 1509 1510 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per 1511 /// basic block. 1512 DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs; 1513 1514 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, 1515 PostDominatorTree &PDT, const TargetLibraryInfo &TLI) 1516 : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} 1517 1518 static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, 1519 DominatorTree &DT, PostDominatorTree &PDT, 1520 const TargetLibraryInfo &TLI) { 1521 DSEState State(F, AA, MSSA, DT, PDT, TLI); 1522 // Collect blocks with throwing instructions not modeled in MemorySSA and 1523 // alloc-like objects. 1524 unsigned PO = 0; 1525 for (BasicBlock *BB : post_order(&F)) { 1526 State.PostOrderNumbers[BB] = PO++; 1527 for (Instruction &I : *BB) { 1528 MemoryAccess *MA = MSSA.getMemoryAccess(&I); 1529 if (I.mayThrow() && !MA) 1530 State.ThrowingBlocks.insert(I.getParent()); 1531 1532 auto *MD = dyn_cast_or_null<MemoryDef>(MA); 1533 if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && 1534 (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I))) 1535 State.MemDefs.push_back(MD); 1536 1537 // Track whether alloca and alloca-like objects are visible in the 1538 // caller before and after the function returns. Alloca objects are 1539 // invalid in the caller, so they are neither visible before or after 1540 // the function returns. 1541 if (isa<AllocaInst>(&I)) { 1542 State.InvisibleToCallerBeforeRet.insert(&I); 1543 State.InvisibleToCallerAfterRet.insert(&I); 1544 } 1545 1546 // For alloca-like objects we need to check if they are captured before 1547 // the function returns and if the return might capture the object. 1548 if (isAllocLikeFn(&I, &TLI)) { 1549 bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true); 1550 if (!CapturesBeforeRet) { 1551 State.InvisibleToCallerBeforeRet.insert(&I); 1552 if (!PointerMayBeCaptured(&I, true, false)) 1553 State.InvisibleToCallerAfterRet.insert(&I); 1554 } 1555 } 1556 } 1557 } 1558 1559 // Treat byval or inalloca arguments the same as Allocas, stores to them are 1560 // dead at the end of the function. 1561 for (Argument &AI : F.args()) 1562 if (AI.hasPassPointeeByValueCopyAttr()) { 1563 // For byval, the caller doesn't know the address of the allocation. 1564 if (AI.hasByValAttr()) 1565 State.InvisibleToCallerBeforeRet.insert(&AI); 1566 State.InvisibleToCallerAfterRet.insert(&AI); 1567 } 1568 1569 return State; 1570 } 1571 1572 Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const { 1573 if (!I->mayWriteToMemory()) 1574 return None; 1575 1576 if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I)) 1577 return {MemoryLocation::getForDest(MTI)}; 1578 1579 if (auto *CB = dyn_cast<CallBase>(I)) { 1580 LibFunc LF; 1581 if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { 1582 switch (LF) { 1583 case LibFunc_strcpy: 1584 case LibFunc_strncpy: 1585 case LibFunc_strcat: 1586 case LibFunc_strncat: 1587 return {MemoryLocation(CB->getArgOperand(0))}; 1588 default: 1589 break; 1590 } 1591 } 1592 switch (CB->getIntrinsicID()) { 1593 case Intrinsic::init_trampoline: 1594 return {MemoryLocation(CB->getArgOperand(0))}; 1595 default: 1596 break; 1597 } 1598 return None; 1599 } 1600 1601 return MemoryLocation::getOrNone(I); 1602 } 1603 1604 /// Returns true if \p Use completely overwrites \p DefLoc. 1605 bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { 1606 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a 1607 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a 1608 // MemoryDef. 1609 if (!UseInst->mayWriteToMemory()) 1610 return false; 1611 1612 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1613 if (CB->onlyAccessesInaccessibleMemory()) 1614 return false; 1615 1616 int64_t InstWriteOffset, DepWriteOffset; 1617 auto CC = getLocForWriteEx(UseInst); 1618 InstOverlapIntervalsTy IOL; 1619 1620 const DataLayout &DL = F.getParent()->getDataLayout(); 1621 1622 return CC && 1623 isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, InstWriteOffset, 1624 UseInst, IOL, AA, &F) == OW_Complete; 1625 } 1626 1627 /// Returns true if \p Def is not read before returning from the function. 1628 bool isWriteAtEndOfFunction(MemoryDef *Def) { 1629 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " (" 1630 << *Def->getMemoryInst() 1631 << ") is at the end the function \n"); 1632 1633 auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); 1634 if (!MaybeLoc) { 1635 LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); 1636 return false; 1637 } 1638 1639 SmallVector<MemoryAccess *, 4> WorkList; 1640 SmallPtrSet<MemoryAccess *, 8> Visited; 1641 auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) { 1642 if (!Visited.insert(Acc).second) 1643 return; 1644 for (Use &U : Acc->uses()) 1645 WorkList.push_back(cast<MemoryAccess>(U.getUser())); 1646 }; 1647 PushMemUses(Def); 1648 for (unsigned I = 0; I < WorkList.size(); I++) { 1649 if (WorkList.size() >= MemorySSAScanLimit) { 1650 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n"); 1651 return false; 1652 } 1653 1654 MemoryAccess *UseAccess = WorkList[I]; 1655 if (isa<MemoryPhi>(UseAccess)) { 1656 PushMemUses(UseAccess); 1657 continue; 1658 } 1659 1660 // TODO: Checking for aliasing is expensive. Consider reducing the amount 1661 // of times this is called and/or caching it. 1662 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); 1663 if (isReadClobber(*MaybeLoc, UseInst)) { 1664 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n"); 1665 return false; 1666 } 1667 1668 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) 1669 PushMemUses(UseDef); 1670 } 1671 return true; 1672 } 1673 1674 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a 1675 /// pair with the MemoryLocation terminated by \p I and a boolean flag 1676 /// indicating whether \p I is a free-like call. 1677 Optional<std::pair<MemoryLocation, bool>> 1678 getLocForTerminator(Instruction *I) const { 1679 uint64_t Len; 1680 Value *Ptr; 1681 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len), 1682 m_Value(Ptr)))) 1683 return {std::make_pair(MemoryLocation(Ptr, Len), false)}; 1684 1685 if (auto *CB = dyn_cast<CallBase>(I)) { 1686 if (isFreeCall(I, &TLI)) 1687 return {std::make_pair(MemoryLocation(CB->getArgOperand(0)), true)}; 1688 } 1689 1690 return None; 1691 } 1692 1693 /// Returns true if \p I is a memory terminator instruction like 1694 /// llvm.lifetime.end or free. 1695 bool isMemTerminatorInst(Instruction *I) const { 1696 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 1697 return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || 1698 isFreeCall(I, &TLI); 1699 } 1700 1701 /// Returns true if \p MaybeTerm is a memory terminator for the same 1702 /// underlying object as \p DefLoc. 1703 bool isMemTerminator(MemoryLocation DefLoc, Instruction *MaybeTerm) const { 1704 Optional<std::pair<MemoryLocation, bool>> MaybeTermLoc = 1705 getLocForTerminator(MaybeTerm); 1706 1707 if (!MaybeTermLoc) 1708 return false; 1709 1710 // If the terminator is a free-like call, all accesses to the underlying 1711 // object can be considered terminated. 1712 if (MaybeTermLoc->second) { 1713 DataLayout DL = MaybeTerm->getParent()->getModule()->getDataLayout(); 1714 DefLoc = MemoryLocation(getUnderlyingObject(DefLoc.Ptr)); 1715 } 1716 return AA.isMustAlias(MaybeTermLoc->first, DefLoc); 1717 } 1718 1719 // Returns true if \p Use may read from \p DefLoc. 1720 bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { 1721 if (!UseInst->mayReadFromMemory()) 1722 return false; 1723 1724 if (auto *CB = dyn_cast<CallBase>(UseInst)) 1725 if (CB->onlyAccessesInaccessibleMemory()) 1726 return false; 1727 1728 ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); 1729 // If necessary, perform additional analysis. 1730 if (isRefSet(MR)) 1731 MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); 1732 return isRefSet(MR); 1733 } 1734 1735 // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no 1736 // read access between them or on any other path to a function exit block if 1737 // \p DefLoc is not accessible after the function returns. If there is no such 1738 // MemoryDef, return None. The returned value may not (completely) overwrite 1739 // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse 1740 // (read). 1741 Optional<MemoryAccess *> 1742 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, 1743 MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet, 1744 bool DefVisibleToCallerAfterRet, unsigned &ScanLimit) const { 1745 if (ScanLimit == 0) { 1746 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); 1747 return None; 1748 } 1749 1750 MemoryAccess *DomAccess; 1751 bool StepAgain; 1752 LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current 1753 << "\n"); 1754 // Find the next clobbering Mod access for DefLoc, starting at Current. 1755 do { 1756 StepAgain = false; 1757 // Reached TOP. 1758 if (MSSA.isLiveOnEntryDef(Current)) 1759 return None; 1760 1761 if (isa<MemoryPhi>(Current)) { 1762 DomAccess = Current; 1763 break; 1764 } 1765 MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current); 1766 // Look for access that clobber DefLoc. 1767 DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD, 1768 DefLoc); 1769 if (MSSA.isLiveOnEntryDef(DomAccess)) 1770 return None; 1771 1772 if (isa<MemoryPhi>(DomAccess)) 1773 break; 1774 1775 // Check if we can skip DomDef for DSE. 1776 MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess); 1777 if (DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) { 1778 StepAgain = true; 1779 Current = DomDef->getDefiningAccess(); 1780 } 1781 1782 } while (StepAgain); 1783 1784 // Accesses to objects accessible after the function returns can only be 1785 // eliminated if the access is killed along all paths to the exit. Collect 1786 // the blocks with killing (=completely overwriting MemoryDefs) and check if 1787 // they cover all paths from DomAccess to any function exit. 1788 SmallPtrSet<Instruction *, 16> KillingDefs; 1789 KillingDefs.insert(KillingDef->getMemoryInst()); 1790 LLVM_DEBUG({ 1791 dbgs() << " Checking for reads of " << *DomAccess; 1792 if (isa<MemoryDef>(DomAccess)) 1793 dbgs() << " (" << *cast<MemoryDef>(DomAccess)->getMemoryInst() << ")\n"; 1794 else 1795 dbgs() << ")\n"; 1796 }); 1797 1798 SmallSetVector<MemoryAccess *, 32> WorkList; 1799 auto PushMemUses = [&WorkList](MemoryAccess *Acc) { 1800 for (Use &U : Acc->uses()) 1801 WorkList.insert(cast<MemoryAccess>(U.getUser())); 1802 }; 1803 PushMemUses(DomAccess); 1804 1805 // Check if DomDef may be read. 1806 for (unsigned I = 0; I < WorkList.size(); I++) { 1807 MemoryAccess *UseAccess = WorkList[I]; 1808 1809 LLVM_DEBUG(dbgs() << " " << *UseAccess); 1810 // Bail out if the number of accesses to check exceeds the scan limit. 1811 if (ScanLimit < (WorkList.size() - I)) { 1812 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); 1813 return None; 1814 } 1815 --ScanLimit; 1816 1817 if (isa<MemoryPhi>(UseAccess)) { 1818 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) { 1819 return DT.properlyDominates(KI->getParent(), 1820 UseAccess->getBlock()); 1821 })) { 1822 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n"); 1823 continue; 1824 } 1825 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n"); 1826 PushMemUses(UseAccess); 1827 continue; 1828 } 1829 1830 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst(); 1831 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); 1832 1833 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) { 1834 return DT.dominates(KI, UseInst); 1835 })) { 1836 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n"); 1837 continue; 1838 } 1839 1840 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) { 1841 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n"); 1842 PushMemUses(UseAccess); 1843 continue; 1844 } 1845 1846 // A memory terminator kills all preceeding MemoryDefs and all succeeding 1847 // MemoryAccesses. We do not have to check it's users. 1848 if (isMemTerminator(DefLoc, UseInst)) 1849 continue; 1850 1851 // Uses which may read the original MemoryDef mean we cannot eliminate the 1852 // original MD. Stop walk. 1853 if (isReadClobber(DefLoc, UseInst)) { 1854 LLVM_DEBUG(dbgs() << " ... found read clobber\n"); 1855 return None; 1856 } 1857 1858 // For the KillingDef and DomAccess we only have to check if it reads the 1859 // memory location. 1860 // TODO: It would probably be better to check for self-reads before 1861 // calling the function. 1862 if (KillingDef == UseAccess || DomAccess == UseAccess) { 1863 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); 1864 continue; 1865 } 1866 1867 // Check all uses for MemoryDefs, except for defs completely overwriting 1868 // the original location. Otherwise we have to check uses of *all* 1869 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might 1870 // miss cases like the following 1871 // 1 = Def(LoE) ; <----- DomDef stores [0,1] 1872 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] 1873 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. 1874 // (The Use points to the *first* Def it may alias) 1875 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, 1876 // stores [0,1] 1877 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) { 1878 if (isCompleteOverwrite(DefLoc, UseInst)) { 1879 if (DefVisibleToCallerAfterRet && UseAccess != DomAccess) { 1880 BasicBlock *MaybeKillingBlock = UseInst->getParent(); 1881 if (PostOrderNumbers.find(MaybeKillingBlock)->second < 1882 PostOrderNumbers.find(DomAccess->getBlock())->second) { 1883 1884 LLVM_DEBUG(dbgs() 1885 << " ... found killing def " << *UseInst << "\n"); 1886 KillingDefs.insert(UseInst); 1887 } 1888 } 1889 } else 1890 PushMemUses(UseDef); 1891 } 1892 } 1893 1894 // For accesses to locations visible after the function returns, make sure 1895 // that the location is killed (=overwritten) along all paths from DomAccess 1896 // to the exit. 1897 if (DefVisibleToCallerAfterRet) { 1898 SmallPtrSet<BasicBlock *, 16> KillingBlocks; 1899 for (Instruction *KD : KillingDefs) 1900 KillingBlocks.insert(KD->getParent()); 1901 assert(!KillingBlocks.empty() && 1902 "Expected at least a single killing block"); 1903 1904 // Find the common post-dominator of all killing blocks. 1905 BasicBlock *CommonPred = *KillingBlocks.begin(); 1906 for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end(); 1907 I != E; I++) { 1908 if (!CommonPred) 1909 break; 1910 CommonPred = PDT.findNearestCommonDominator(CommonPred, *I); 1911 } 1912 1913 // If CommonPred is in the set of killing blocks, just check if it 1914 // post-dominates DomAccess. 1915 if (KillingBlocks.count(CommonPred)) { 1916 if (PDT.dominates(CommonPred, DomAccess->getBlock())) 1917 return {DomAccess}; 1918 return None; 1919 } 1920 1921 // If the common post-dominator does not post-dominate DomAccess, there 1922 // is a path from DomAccess to an exit not going through a killing block. 1923 if (PDT.dominates(CommonPred, DomAccess->getBlock())) { 1924 SetVector<BasicBlock *> WorkList; 1925 1926 // DomAccess's post-order number provides an upper bound of the blocks 1927 // on a path starting at DomAccess. 1928 unsigned UpperBound = 1929 PostOrderNumbers.find(DomAccess->getBlock())->second; 1930 1931 // If CommonPred is null, there are multiple exits from the function. 1932 // They all have to be added to the worklist. 1933 if (CommonPred) 1934 WorkList.insert(CommonPred); 1935 else 1936 for (BasicBlock *R : PDT.roots()) 1937 WorkList.insert(R); 1938 1939 NumCFGTries++; 1940 // Check if all paths starting from an exit node go through one of the 1941 // killing blocks before reaching DomAccess. 1942 for (unsigned I = 0; I < WorkList.size(); I++) { 1943 NumCFGChecks++; 1944 BasicBlock *Current = WorkList[I]; 1945 if (KillingBlocks.count(Current)) 1946 continue; 1947 if (Current == DomAccess->getBlock()) 1948 return None; 1949 1950 // DomAccess is reachable from the entry, so we don't have to explore 1951 // unreachable blocks further. 1952 if (!DT.isReachableFromEntry(Current)) 1953 continue; 1954 1955 unsigned CPO = PostOrderNumbers.find(Current)->second; 1956 // Current block is not on a path starting at DomAccess. 1957 if (CPO > UpperBound) 1958 continue; 1959 for (BasicBlock *Pred : predecessors(Current)) 1960 WorkList.insert(Pred); 1961 1962 if (WorkList.size() >= MemorySSAPathCheckLimit) 1963 return None; 1964 } 1965 NumCFGSuccess++; 1966 return {DomAccess}; 1967 } 1968 return None; 1969 } 1970 1971 // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. 1972 return {DomAccess}; 1973 } 1974 1975 // Delete dead memory defs 1976 void deleteDeadInstruction(Instruction *SI) { 1977 MemorySSAUpdater Updater(&MSSA); 1978 SmallVector<Instruction *, 32> NowDeadInsts; 1979 NowDeadInsts.push_back(SI); 1980 --NumFastOther; 1981 1982 while (!NowDeadInsts.empty()) { 1983 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 1984 ++NumFastOther; 1985 1986 // Try to preserve debug information attached to the dead instruction. 1987 salvageDebugInfo(*DeadInst); 1988 salvageKnowledge(DeadInst); 1989 1990 // Remove the Instruction from MSSA. 1991 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { 1992 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { 1993 SkipStores.insert(MD); 1994 } 1995 Updater.removeMemoryAccess(MA); 1996 } 1997 1998 auto I = IOLs.find(DeadInst->getParent()); 1999 if (I != IOLs.end()) 2000 I->second.erase(DeadInst); 2001 // Remove its operands 2002 for (Use &O : DeadInst->operands()) 2003 if (Instruction *OpI = dyn_cast<Instruction>(O)) { 2004 O = nullptr; 2005 if (isInstructionTriviallyDead(OpI, &TLI)) 2006 NowDeadInsts.push_back(OpI); 2007 } 2008 2009 DeadInst->eraseFromParent(); 2010 } 2011 } 2012 2013 // Check for any extra throws between SI and NI that block DSE. This only 2014 // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may 2015 // throw are handled during the walk from one def to the next. 2016 bool mayThrowBetween(Instruction *SI, Instruction *NI, 2017 const Value *SILocUnd) const { 2018 // First see if we can ignore it by using the fact that SI is an 2019 // alloca/alloca like object that is not visible to the caller during 2020 // execution of the function. 2021 if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd)) 2022 return false; 2023 2024 if (SI->getParent() == NI->getParent()) 2025 return ThrowingBlocks.count(SI->getParent()); 2026 return !ThrowingBlocks.empty(); 2027 } 2028 2029 // Check if \p NI acts as a DSE barrier for \p SI. The following instructions 2030 // act as barriers: 2031 // * A memory instruction that may throw and \p SI accesses a non-stack 2032 // object. 2033 // * Atomic stores stronger that monotonic. 2034 bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) const { 2035 // If NI may throw it acts as a barrier, unless we are to an alloca/alloca 2036 // like object that does not escape. 2037 if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd)) 2038 return true; 2039 2040 // If NI is an atomic load/store stronger than monotonic, do not try to 2041 // eliminate/reorder it. 2042 if (NI->isAtomic()) { 2043 if (auto *LI = dyn_cast<LoadInst>(NI)) 2044 return isStrongerThanMonotonic(LI->getOrdering()); 2045 if (auto *SI = dyn_cast<StoreInst>(NI)) 2046 return isStrongerThanMonotonic(SI->getOrdering()); 2047 llvm_unreachable("other instructions should be skipped in MemorySSA"); 2048 } 2049 return false; 2050 } 2051 2052 /// Eliminate writes to objects that are not visible in the caller and are not 2053 /// accessed before returning from the function. 2054 bool eliminateDeadWritesAtEndOfFunction() { 2055 bool MadeChange = false; 2056 LLVM_DEBUG( 2057 dbgs() 2058 << "Trying to eliminate MemoryDefs at the end of the function\n"); 2059 for (int I = MemDefs.size() - 1; I >= 0; I--) { 2060 MemoryDef *Def = MemDefs[I]; 2061 if (SkipStores.find(Def) != SkipStores.end() || 2062 !isRemovable(Def->getMemoryInst())) 2063 continue; 2064 2065 Instruction *DefI = Def->getMemoryInst(); 2066 SmallVector<const Value *, 4> Pointers; 2067 auto DefLoc = getLocForWriteEx(DefI); 2068 if (!DefLoc) 2069 continue; 2070 getUnderlyingObjects(DefLoc->Ptr, Pointers); 2071 2072 bool CanKill = true; 2073 for (const Value *Pointer : Pointers) { 2074 if (!InvisibleToCallerAfterRet.count(Pointer)) { 2075 CanKill = false; 2076 break; 2077 } 2078 } 2079 2080 if (CanKill && isWriteAtEndOfFunction(Def)) { 2081 // See through pointer-to-pointer bitcasts 2082 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " 2083 "of the function\n"); 2084 deleteDeadInstruction(DefI); 2085 ++NumFastStores; 2086 MadeChange = true; 2087 } 2088 } 2089 return MadeChange; 2090 } 2091 2092 /// \returns true if \p Def is a no-op store, either because it 2093 /// directly stores back a loaded value or stores zero to a calloced object. 2094 bool storeIsNoop(MemoryDef *Def, MemoryLocation DefLoc, const Value *DefUO) { 2095 StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst()); 2096 if (!Store) 2097 return false; 2098 2099 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) { 2100 if (LoadI->getPointerOperand() == Store->getOperand(1)) { 2101 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess(); 2102 // If both accesses share the same defining access, no instructions 2103 // between them can modify the memory location. 2104 return LoadAccess == Def->getDefiningAccess(); 2105 } 2106 } 2107 2108 Constant *StoredConstant = dyn_cast<Constant>(Store->getOperand(0)); 2109 if (StoredConstant && StoredConstant->isNullValue()) { 2110 auto *DefUOInst = dyn_cast<Instruction>(DefUO); 2111 if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) { 2112 auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst)); 2113 // If UnderlyingDef is the clobbering access of Def, no instructions 2114 // between them can modify the memory location. 2115 auto *ClobberDef = 2116 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); 2117 return UnderlyingDef == ClobberDef; 2118 } 2119 } 2120 return false; 2121 } 2122 }; 2123 2124 bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, 2125 MemorySSA &MSSA, DominatorTree &DT, 2126 PostDominatorTree &PDT, 2127 const TargetLibraryInfo &TLI) { 2128 const DataLayout &DL = F.getParent()->getDataLayout(); 2129 bool MadeChange = false; 2130 2131 DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); 2132 // For each store: 2133 for (unsigned I = 0; I < State.MemDefs.size(); I++) { 2134 MemoryDef *KillingDef = State.MemDefs[I]; 2135 if (State.SkipStores.count(KillingDef)) 2136 continue; 2137 Instruction *SI = KillingDef->getMemoryInst(); 2138 2139 auto MaybeSILoc = State.getLocForWriteEx(SI); 2140 if (State.isMemTerminatorInst(SI)) 2141 MaybeSILoc = State.getLocForTerminator(SI).map( 2142 [](const std::pair<MemoryLocation, bool> &P) { return P.first; }); 2143 else 2144 MaybeSILoc = State.getLocForWriteEx(SI); 2145 2146 if (!MaybeSILoc) { 2147 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " 2148 << *SI << "\n"); 2149 continue; 2150 } 2151 MemoryLocation SILoc = *MaybeSILoc; 2152 assert(SILoc.Ptr && "SILoc should not be null"); 2153 const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr); 2154 2155 // Check if the store is a no-op. 2156 if (isRemovable(SI) && State.storeIsNoop(KillingDef, SILoc, SILocUnd)) { 2157 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n'); 2158 State.deleteDeadInstruction(SI); 2159 NumRedundantStores++; 2160 MadeChange = true; 2161 continue; 2162 } 2163 2164 Instruction *DefObj = 2165 const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd)); 2166 bool DefVisibleToCallerBeforeRet = 2167 !State.InvisibleToCallerBeforeRet.count(SILocUnd); 2168 bool DefVisibleToCallerAfterRet = 2169 !State.InvisibleToCallerAfterRet.count(SILocUnd); 2170 if (DefObj && isAllocLikeFn(DefObj, &TLI)) { 2171 if (DefVisibleToCallerBeforeRet) 2172 DefVisibleToCallerBeforeRet = 2173 PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT); 2174 } 2175 2176 MemoryAccess *Current = KillingDef; 2177 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " 2178 << *KillingDef << " (" << *SI << ")\n"); 2179 2180 unsigned ScanLimit = MemorySSAScanLimit; 2181 // Worklist of MemoryAccesses that may be killed by KillingDef. 2182 SetVector<MemoryAccess *> ToCheck; 2183 ToCheck.insert(KillingDef->getDefiningAccess()); 2184 2185 // Check if MemoryAccesses in the worklist are killed by KillingDef. 2186 for (unsigned I = 0; I < ToCheck.size(); I++) { 2187 Current = ToCheck[I]; 2188 if (State.SkipStores.count(Current)) 2189 continue; 2190 2191 Optional<MemoryAccess *> Next = State.getDomMemoryDef( 2192 KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet, 2193 DefVisibleToCallerAfterRet, ScanLimit); 2194 2195 if (!Next) { 2196 LLVM_DEBUG(dbgs() << " finished walk\n"); 2197 continue; 2198 } 2199 2200 MemoryAccess *DomAccess = *Next; 2201 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess); 2202 if (isa<MemoryPhi>(DomAccess)) { 2203 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); 2204 for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) { 2205 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V); 2206 BasicBlock *IncomingBlock = IncomingAccess->getBlock(); 2207 BasicBlock *PhiBlock = DomAccess->getBlock(); 2208 2209 // We only consider incoming MemoryAccesses that come before the 2210 // MemoryPhi. Otherwise we could discover candidates that do not 2211 // strictly dominate our starting def. 2212 if (State.PostOrderNumbers[IncomingBlock] > 2213 State.PostOrderNumbers[PhiBlock]) 2214 ToCheck.insert(IncomingAccess); 2215 } 2216 continue; 2217 } 2218 MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess); 2219 Instruction *NI = NextDef->getMemoryInst(); 2220 LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); 2221 2222 // Before we try to remove anything, check for any extra throwing 2223 // instructions that block us from DSEing 2224 if (State.mayThrowBetween(SI, NI, SILocUnd)) { 2225 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); 2226 break; 2227 } 2228 2229 // Check for anything that looks like it will be a barrier to further 2230 // removal 2231 if (State.isDSEBarrier(SILocUnd, NI)) { 2232 LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); 2233 continue; 2234 } 2235 2236 ToCheck.insert(NextDef->getDefiningAccess()); 2237 2238 if (!hasAnalyzableMemoryWrite(NI, TLI)) { 2239 LLVM_DEBUG(dbgs() << " ... skip, cannot analyze def\n"); 2240 continue; 2241 } 2242 2243 if (!isRemovable(NI)) { 2244 LLVM_DEBUG(dbgs() << " ... skip, cannot remove def\n"); 2245 continue; 2246 } 2247 2248 if (!DebugCounter::shouldExecute(MemorySSACounter)) 2249 continue; 2250 2251 MemoryLocation NILoc = *State.getLocForWriteEx(NI); 2252 2253 if (State.isMemTerminatorInst(SI)) { 2254 const Value *NIUnd = getUnderlyingObject(NILoc.Ptr); 2255 if (!SILocUnd || SILocUnd != NIUnd) 2256 continue; 2257 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI 2258 << "\n KILLER: " << *SI << '\n'); 2259 State.deleteDeadInstruction(NI); 2260 ++NumFastStores; 2261 MadeChange = true; 2262 } else { 2263 // Check if NI overwrites SI. 2264 int64_t InstWriteOffset, DepWriteOffset; 2265 auto Iter = State.IOLs.insert( 2266 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>( 2267 NI->getParent(), InstOverlapIntervalsTy())); 2268 auto &IOL = Iter.first->second; 2269 OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, 2270 InstWriteOffset, NI, IOL, AA, &F); 2271 2272 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { 2273 auto *Earlier = dyn_cast<StoreInst>(NI); 2274 auto *Later = dyn_cast<StoreInst>(SI); 2275 // We are re-using tryToMergePartialOverlappingStores, which requires 2276 // Earlier to domiante Later. 2277 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA. 2278 if (Earlier && Later && DT.dominates(Earlier, Later)) { 2279 if (Constant *Merged = tryToMergePartialOverlappingStores( 2280 Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA, 2281 &DT)) { 2282 2283 // Update stored value of earlier store to merged constant. 2284 Earlier->setOperand(0, Merged); 2285 ++NumModifiedStores; 2286 MadeChange = true; 2287 2288 // Remove later store and remove any outstanding overlap intervals 2289 // for the updated store. 2290 State.deleteDeadInstruction(Later); 2291 auto I = State.IOLs.find(Earlier->getParent()); 2292 if (I != State.IOLs.end()) 2293 I->second.erase(Earlier); 2294 break; 2295 } 2296 } 2297 } 2298 2299 if (OR == OW_Complete) { 2300 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI 2301 << "\n KILLER: " << *SI << '\n'); 2302 State.deleteDeadInstruction(NI); 2303 ++NumFastStores; 2304 MadeChange = true; 2305 } 2306 } 2307 } 2308 } 2309 2310 if (EnablePartialOverwriteTracking) 2311 for (auto &KV : State.IOLs) 2312 MadeChange |= removePartiallyOverlappedStores(DL, KV.second); 2313 2314 MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); 2315 return MadeChange; 2316 } 2317 } // end anonymous namespace 2318 2319 //===----------------------------------------------------------------------===// 2320 // DSE Pass 2321 //===----------------------------------------------------------------------===// 2322 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { 2323 AliasAnalysis &AA = AM.getResult<AAManager>(F); 2324 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); 2325 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 2326 2327 bool Changed = false; 2328 if (EnableMemorySSA) { 2329 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 2330 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 2331 2332 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2333 } else { 2334 MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); 2335 2336 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2337 } 2338 2339 #ifdef LLVM_ENABLE_STATS 2340 if (AreStatisticsEnabled()) 2341 for (auto &I : instructions(F)) 2342 NumRemainingStores += isa<StoreInst>(&I); 2343 #endif 2344 2345 if (!Changed) 2346 return PreservedAnalyses::all(); 2347 2348 PreservedAnalyses PA; 2349 PA.preserveSet<CFGAnalyses>(); 2350 PA.preserve<GlobalsAA>(); 2351 if (EnableMemorySSA) 2352 PA.preserve<MemorySSAAnalysis>(); 2353 else 2354 PA.preserve<MemoryDependenceAnalysis>(); 2355 return PA; 2356 } 2357 2358 namespace { 2359 2360 /// A legacy pass for the legacy pass manager that wraps \c DSEPass. 2361 class DSELegacyPass : public FunctionPass { 2362 public: 2363 static char ID; // Pass identification, replacement for typeid 2364 2365 DSELegacyPass() : FunctionPass(ID) { 2366 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); 2367 } 2368 2369 bool runOnFunction(Function &F) override { 2370 if (skipFunction(F)) 2371 return false; 2372 2373 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2374 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2375 const TargetLibraryInfo &TLI = 2376 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2377 2378 bool Changed = false; 2379 if (EnableMemorySSA) { 2380 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2381 PostDominatorTree &PDT = 2382 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 2383 2384 Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); 2385 } else { 2386 MemoryDependenceResults &MD = 2387 getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 2388 2389 Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); 2390 } 2391 2392 #ifdef LLVM_ENABLE_STATS 2393 if (AreStatisticsEnabled()) 2394 for (auto &I : instructions(F)) 2395 NumRemainingStores += isa<StoreInst>(&I); 2396 #endif 2397 2398 return Changed; 2399 } 2400 2401 void getAnalysisUsage(AnalysisUsage &AU) const override { 2402 AU.setPreservesCFG(); 2403 AU.addRequired<AAResultsWrapperPass>(); 2404 AU.addRequired<TargetLibraryInfoWrapperPass>(); 2405 AU.addPreserved<GlobalsAAWrapperPass>(); 2406 AU.addRequired<DominatorTreeWrapperPass>(); 2407 AU.addPreserved<DominatorTreeWrapperPass>(); 2408 2409 if (EnableMemorySSA) { 2410 AU.addRequired<PostDominatorTreeWrapperPass>(); 2411 AU.addRequired<MemorySSAWrapperPass>(); 2412 AU.addPreserved<PostDominatorTreeWrapperPass>(); 2413 AU.addPreserved<MemorySSAWrapperPass>(); 2414 } else { 2415 AU.addRequired<MemoryDependenceWrapperPass>(); 2416 AU.addPreserved<MemoryDependenceWrapperPass>(); 2417 } 2418 } 2419 }; 2420 2421 } // end anonymous namespace 2422 2423 char DSELegacyPass::ID = 0; 2424 2425 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, 2426 false) 2427 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2428 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 2429 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 2430 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 2431 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 2432 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 2433 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2434 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, 2435 false) 2436 2437 FunctionPass *llvm::createDeadStoreEliminationPass() { 2438 return new DSELegacyPass(); 2439 } 2440