1 //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the MemorySSA class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/MemorySSA.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseMapInfo.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/DepthFirstIterator.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/None.h" 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/iterator.h" 26 #include "llvm/ADT/iterator_range.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/IteratedDominanceFrontier.h" 29 #include "llvm/Analysis/MemoryLocation.h" 30 #include "llvm/Config/llvm-config.h" 31 #include "llvm/IR/AssemblyAnnotationWriter.h" 32 #include "llvm/IR/BasicBlock.h" 33 #include "llvm/IR/CallSite.h" 34 #include "llvm/IR/Dominators.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/Instruction.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/IntrinsicInst.h" 39 #include "llvm/IR/Intrinsics.h" 40 #include "llvm/IR/LLVMContext.h" 41 #include "llvm/IR/PassManager.h" 42 #include "llvm/IR/Use.h" 43 #include "llvm/Pass.h" 44 #include "llvm/Support/AtomicOrdering.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/ErrorHandling.h" 50 #include "llvm/Support/FormattedStream.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <cassert> 54 #include <iterator> 55 #include <memory> 56 #include <utility> 57 58 using namespace llvm; 59 60 #define DEBUG_TYPE "memoryssa" 61 62 INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 63 true) 64 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 65 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 66 INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 67 true) 68 69 INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", 70 "Memory SSA Printer", false, false) 71 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 72 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", 73 "Memory SSA Printer", false, false) 74 75 static cl::opt<unsigned> MaxCheckLimit( 76 "memssa-check-limit", cl::Hidden, cl::init(100), 77 cl::desc("The maximum number of stores/phis MemorySSA" 78 "will consider trying to walk past (default = 100)")); 79 80 // Always verify MemorySSA if expensive checking is enabled. 81 #ifdef EXPENSIVE_CHECKS 82 bool llvm::VerifyMemorySSA = true; 83 #else 84 bool llvm::VerifyMemorySSA = false; 85 #endif 86 static cl::opt<bool, true> 87 VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA), 88 cl::Hidden, cl::desc("Enable verification of MemorySSA.")); 89 90 namespace llvm { 91 92 /// An assembly annotator class to print Memory SSA information in 93 /// comments. 94 class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { 95 friend class MemorySSA; 96 97 const MemorySSA *MSSA; 98 99 public: 100 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} 101 102 void emitBasicBlockStartAnnot(const BasicBlock *BB, 103 formatted_raw_ostream &OS) override { 104 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) 105 OS << "; " << *MA << "\n"; 106 } 107 108 void emitInstructionAnnot(const Instruction *I, 109 formatted_raw_ostream &OS) override { 110 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) 111 OS << "; " << *MA << "\n"; 112 } 113 }; 114 115 } // end namespace llvm 116 117 namespace { 118 119 /// Our current alias analysis API differentiates heavily between calls and 120 /// non-calls, and functions called on one usually assert on the other. 121 /// This class encapsulates the distinction to simplify other code that wants 122 /// "Memory affecting instructions and related data" to use as a key. 123 /// For example, this class is used as a densemap key in the use optimizer. 124 class MemoryLocOrCall { 125 public: 126 bool IsCall = false; 127 128 MemoryLocOrCall() = default; 129 MemoryLocOrCall(MemoryUseOrDef *MUD) 130 : MemoryLocOrCall(MUD->getMemoryInst()) {} 131 MemoryLocOrCall(const MemoryUseOrDef *MUD) 132 : MemoryLocOrCall(MUD->getMemoryInst()) {} 133 134 MemoryLocOrCall(Instruction *Inst) { 135 if (ImmutableCallSite(Inst)) { 136 IsCall = true; 137 CS = ImmutableCallSite(Inst); 138 } else { 139 IsCall = false; 140 // There is no such thing as a memorylocation for a fence inst, and it is 141 // unique in that regard. 142 if (!isa<FenceInst>(Inst)) 143 Loc = MemoryLocation::get(Inst); 144 } 145 } 146 147 explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {} 148 149 ImmutableCallSite getCS() const { 150 assert(IsCall); 151 return CS; 152 } 153 154 MemoryLocation getLoc() const { 155 assert(!IsCall); 156 return Loc; 157 } 158 159 bool operator==(const MemoryLocOrCall &Other) const { 160 if (IsCall != Other.IsCall) 161 return false; 162 163 if (!IsCall) 164 return Loc == Other.Loc; 165 166 if (CS.getCalledValue() != Other.CS.getCalledValue()) 167 return false; 168 169 return CS.arg_size() == Other.CS.arg_size() && 170 std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin()); 171 } 172 173 private: 174 union { 175 ImmutableCallSite CS; 176 MemoryLocation Loc; 177 }; 178 }; 179 180 } // end anonymous namespace 181 182 namespace llvm { 183 184 template <> struct DenseMapInfo<MemoryLocOrCall> { 185 static inline MemoryLocOrCall getEmptyKey() { 186 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); 187 } 188 189 static inline MemoryLocOrCall getTombstoneKey() { 190 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); 191 } 192 193 static unsigned getHashValue(const MemoryLocOrCall &MLOC) { 194 if (!MLOC.IsCall) 195 return hash_combine( 196 MLOC.IsCall, 197 DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); 198 199 hash_code hash = 200 hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( 201 MLOC.getCS().getCalledValue())); 202 203 for (const Value *Arg : MLOC.getCS().args()) 204 hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); 205 return hash; 206 } 207 208 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { 209 return LHS == RHS; 210 } 211 }; 212 213 } // end namespace llvm 214 215 /// This does one-way checks to see if Use could theoretically be hoisted above 216 /// MayClobber. This will not check the other way around. 217 /// 218 /// This assumes that, for the purposes of MemorySSA, Use comes directly after 219 /// MayClobber, with no potentially clobbering operations in between them. 220 /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) 221 static bool areLoadsReorderable(const LoadInst *Use, 222 const LoadInst *MayClobber) { 223 bool VolatileUse = Use->isVolatile(); 224 bool VolatileClobber = MayClobber->isVolatile(); 225 // Volatile operations may never be reordered with other volatile operations. 226 if (VolatileUse && VolatileClobber) 227 return false; 228 // Otherwise, volatile doesn't matter here. From the language reference: 229 // 'optimizers may change the order of volatile operations relative to 230 // non-volatile operations.'" 231 232 // If a load is seq_cst, it cannot be moved above other loads. If its ordering 233 // is weaker, it can be moved above other loads. We just need to be sure that 234 // MayClobber isn't an acquire load, because loads can't be moved above 235 // acquire loads. 236 // 237 // Note that this explicitly *does* allow the free reordering of monotonic (or 238 // weaker) loads of the same address. 239 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; 240 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), 241 AtomicOrdering::Acquire); 242 return !(SeqCstUse || MayClobberIsAcquire); 243 } 244 245 namespace { 246 247 struct ClobberAlias { 248 bool IsClobber; 249 Optional<AliasResult> AR; 250 }; 251 252 } // end anonymous namespace 253 254 // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being 255 // ignored if IsClobber = false. 256 static ClobberAlias instructionClobbersQuery(const MemoryDef *MD, 257 const MemoryLocation &UseLoc, 258 const Instruction *UseInst, 259 AliasAnalysis &AA) { 260 Instruction *DefInst = MD->getMemoryInst(); 261 assert(DefInst && "Defining instruction not actually an instruction"); 262 ImmutableCallSite UseCS(UseInst); 263 Optional<AliasResult> AR; 264 265 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 266 // These intrinsics will show up as affecting memory, but they are just 267 // markers, mostly. 268 // 269 // FIXME: We probably don't actually want MemorySSA to model these at all 270 // (including creating MemoryAccesses for them): we just end up inventing 271 // clobbers where they don't really exist at all. Please see D43269 for 272 // context. 273 switch (II->getIntrinsicID()) { 274 case Intrinsic::lifetime_start: 275 if (UseCS) 276 return {false, NoAlias}; 277 AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc); 278 return {AR != NoAlias, AR}; 279 case Intrinsic::lifetime_end: 280 case Intrinsic::invariant_start: 281 case Intrinsic::invariant_end: 282 case Intrinsic::assume: 283 return {false, NoAlias}; 284 default: 285 break; 286 } 287 } 288 289 if (UseCS) { 290 ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); 291 AR = isMustSet(I) ? MustAlias : MayAlias; 292 return {isModOrRefSet(I), AR}; 293 } 294 295 if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) 296 if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) 297 return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias}; 298 299 ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); 300 AR = isMustSet(I) ? MustAlias : MayAlias; 301 return {isModSet(I), AR}; 302 } 303 304 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 305 const MemoryUseOrDef *MU, 306 const MemoryLocOrCall &UseMLOC, 307 AliasAnalysis &AA) { 308 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery 309 // to exist while MemoryLocOrCall is pushed through places. 310 if (UseMLOC.IsCall) 311 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), 312 AA); 313 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), 314 AA); 315 } 316 317 // Return true when MD may alias MU, return false otherwise. 318 bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, 319 AliasAnalysis &AA) { 320 return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber; 321 } 322 323 namespace { 324 325 struct UpwardsMemoryQuery { 326 // True if our original query started off as a call 327 bool IsCall = false; 328 // The pointer location we started the query with. This will be empty if 329 // IsCall is true. 330 MemoryLocation StartingLoc; 331 // This is the instruction we were querying about. 332 const Instruction *Inst = nullptr; 333 // The MemoryAccess we actually got called with, used to test local domination 334 const MemoryAccess *OriginalAccess = nullptr; 335 Optional<AliasResult> AR = MayAlias; 336 337 UpwardsMemoryQuery() = default; 338 339 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) 340 : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { 341 if (!IsCall) 342 StartingLoc = MemoryLocation::get(Inst); 343 } 344 }; 345 346 } // end anonymous namespace 347 348 static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, 349 AliasAnalysis &AA) { 350 Instruction *Inst = MD->getMemoryInst(); 351 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 352 switch (II->getIntrinsicID()) { 353 case Intrinsic::lifetime_end: 354 return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); 355 default: 356 return false; 357 } 358 } 359 return false; 360 } 361 362 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, 363 const Instruction *I) { 364 // If the memory can't be changed, then loads of the memory can't be 365 // clobbered. 366 return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || 367 AA.pointsToConstantMemory(cast<LoadInst>(I)-> 368 getPointerOperand())); 369 } 370 371 /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing 372 /// inbetween `Start` and `ClobberAt` can clobbers `Start`. 373 /// 374 /// This is meant to be as simple and self-contained as possible. Because it 375 /// uses no cache, etc., it can be relatively expensive. 376 /// 377 /// \param Start The MemoryAccess that we want to walk from. 378 /// \param ClobberAt A clobber for Start. 379 /// \param StartLoc The MemoryLocation for Start. 380 /// \param MSSA The MemorySSA instance that Start and ClobberAt belong to. 381 /// \param Query The UpwardsMemoryQuery we used for our search. 382 /// \param AA The AliasAnalysis we used for our search. 383 /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. 384 static void 385 checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, 386 const MemoryLocation &StartLoc, const MemorySSA &MSSA, 387 const UpwardsMemoryQuery &Query, AliasAnalysis &AA, 388 bool AllowImpreciseClobber = false) { 389 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); 390 391 if (MSSA.isLiveOnEntryDef(Start)) { 392 assert(MSSA.isLiveOnEntryDef(ClobberAt) && 393 "liveOnEntry must clobber itself"); 394 return; 395 } 396 397 bool FoundClobber = false; 398 DenseSet<ConstMemoryAccessPair> VisitedPhis; 399 SmallVector<ConstMemoryAccessPair, 8> Worklist; 400 Worklist.emplace_back(Start, StartLoc); 401 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one 402 // is found, complain. 403 while (!Worklist.empty()) { 404 auto MAP = Worklist.pop_back_val(); 405 // All we care about is that nothing from Start to ClobberAt clobbers Start. 406 // We learn nothing from revisiting nodes. 407 if (!VisitedPhis.insert(MAP).second) 408 continue; 409 410 for (const auto *MA : def_chain(MAP.first)) { 411 if (MA == ClobberAt) { 412 if (const auto *MD = dyn_cast<MemoryDef>(MA)) { 413 // instructionClobbersQuery isn't essentially free, so don't use `|=`, 414 // since it won't let us short-circuit. 415 // 416 // Also, note that this can't be hoisted out of the `Worklist` loop, 417 // since MD may only act as a clobber for 1 of N MemoryLocations. 418 FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); 419 if (!FoundClobber) { 420 ClobberAlias CA = 421 instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); 422 if (CA.IsClobber) { 423 FoundClobber = true; 424 // Not used: CA.AR; 425 } 426 } 427 } 428 break; 429 } 430 431 // We should never hit liveOnEntry, unless it's the clobber. 432 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); 433 434 if (const auto *MD = dyn_cast<MemoryDef>(MA)) { 435 // If Start is a Def, skip self. 436 if (MD == Start) 437 continue; 438 439 assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) 440 .IsClobber && 441 "Found clobber before reaching ClobberAt!"); 442 continue; 443 } 444 445 if (const auto *MU = dyn_cast<MemoryUse>(MA)) { 446 (void)MU; 447 assert (MU == Start && 448 "Can only find use in def chain if Start is a use"); 449 continue; 450 } 451 452 assert(isa<MemoryPhi>(MA)); 453 Worklist.append( 454 upward_defs_begin({const_cast<MemoryAccess *>(MA), MAP.second}), 455 upward_defs_end()); 456 } 457 } 458 459 // If the verify is done following an optimization, it's possible that 460 // ClobberAt was a conservative clobbering, that we can now infer is not a 461 // true clobbering access. Don't fail the verify if that's the case. 462 // We do have accesses that claim they're optimized, but could be optimized 463 // further. Updating all these can be expensive, so allow it for now (FIXME). 464 if (AllowImpreciseClobber) 465 return; 466 467 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a 468 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. 469 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && 470 "ClobberAt never acted as a clobber"); 471 } 472 473 namespace { 474 475 /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up 476 /// in one class. 477 class ClobberWalker { 478 /// Save a few bytes by using unsigned instead of size_t. 479 using ListIndex = unsigned; 480 481 /// Represents a span of contiguous MemoryDefs, potentially ending in a 482 /// MemoryPhi. 483 struct DefPath { 484 MemoryLocation Loc; 485 // Note that, because we always walk in reverse, Last will always dominate 486 // First. Also note that First and Last are inclusive. 487 MemoryAccess *First; 488 MemoryAccess *Last; 489 Optional<ListIndex> Previous; 490 491 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, 492 Optional<ListIndex> Previous) 493 : Loc(Loc), First(First), Last(Last), Previous(Previous) {} 494 495 DefPath(const MemoryLocation &Loc, MemoryAccess *Init, 496 Optional<ListIndex> Previous) 497 : DefPath(Loc, Init, Init, Previous) {} 498 }; 499 500 const MemorySSA &MSSA; 501 AliasAnalysis &AA; 502 DominatorTree &DT; 503 UpwardsMemoryQuery *Query; 504 505 // Phi optimization bookkeeping 506 SmallVector<DefPath, 32> Paths; 507 DenseSet<ConstMemoryAccessPair> VisitedPhis; 508 509 /// Find the nearest def or phi that `From` can legally be optimized to. 510 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { 511 assert(From->getNumOperands() && "Phi with no operands?"); 512 513 BasicBlock *BB = From->getBlock(); 514 MemoryAccess *Result = MSSA.getLiveOnEntryDef(); 515 DomTreeNode *Node = DT.getNode(BB); 516 while ((Node = Node->getIDom())) { 517 auto *Defs = MSSA.getBlockDefs(Node->getBlock()); 518 if (Defs) 519 return &*Defs->rbegin(); 520 } 521 return Result; 522 } 523 524 /// Result of calling walkToPhiOrClobber. 525 struct UpwardsWalkResult { 526 /// The "Result" of the walk. Either a clobber, the last thing we walked, or 527 /// both. Include alias info when clobber found. 528 MemoryAccess *Result; 529 bool IsKnownClobber; 530 Optional<AliasResult> AR; 531 }; 532 533 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. 534 /// This will update Desc.Last as it walks. It will (optionally) also stop at 535 /// StopAt. 536 /// 537 /// This does not test for whether StopAt is a clobber 538 UpwardsWalkResult 539 walkToPhiOrClobber(DefPath &Desc, 540 const MemoryAccess *StopAt = nullptr) const { 541 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); 542 543 for (MemoryAccess *Current : def_chain(Desc.Last)) { 544 Desc.Last = Current; 545 if (Current == StopAt) 546 return {Current, false, MayAlias}; 547 548 if (auto *MD = dyn_cast<MemoryDef>(Current)) { 549 if (MSSA.isLiveOnEntryDef(MD)) 550 return {MD, true, MustAlias}; 551 ClobberAlias CA = 552 instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); 553 if (CA.IsClobber) 554 return {MD, true, CA.AR}; 555 } 556 } 557 558 assert(isa<MemoryPhi>(Desc.Last) && 559 "Ended at a non-clobber that's not a phi?"); 560 return {Desc.Last, false, MayAlias}; 561 } 562 563 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, 564 ListIndex PriorNode) { 565 auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), 566 upward_defs_end()); 567 for (const MemoryAccessPair &P : UpwardDefs) { 568 PausedSearches.push_back(Paths.size()); 569 Paths.emplace_back(P.second, P.first, PriorNode); 570 } 571 } 572 573 /// Represents a search that terminated after finding a clobber. This clobber 574 /// may or may not be present in the path of defs from LastNode..SearchStart, 575 /// since it may have been retrieved from cache. 576 struct TerminatedPath { 577 MemoryAccess *Clobber; 578 ListIndex LastNode; 579 }; 580 581 /// Get an access that keeps us from optimizing to the given phi. 582 /// 583 /// PausedSearches is an array of indices into the Paths array. Its incoming 584 /// value is the indices of searches that stopped at the last phi optimization 585 /// target. It's left in an unspecified state. 586 /// 587 /// If this returns None, NewPaused is a vector of searches that terminated 588 /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. 589 Optional<TerminatedPath> 590 getBlockingAccess(const MemoryAccess *StopWhere, 591 SmallVectorImpl<ListIndex> &PausedSearches, 592 SmallVectorImpl<ListIndex> &NewPaused, 593 SmallVectorImpl<TerminatedPath> &Terminated) { 594 assert(!PausedSearches.empty() && "No searches to continue?"); 595 596 // BFS vs DFS really doesn't make a difference here, so just do a DFS with 597 // PausedSearches as our stack. 598 while (!PausedSearches.empty()) { 599 ListIndex PathIndex = PausedSearches.pop_back_val(); 600 DefPath &Node = Paths[PathIndex]; 601 602 // If we've already visited this path with this MemoryLocation, we don't 603 // need to do so again. 604 // 605 // NOTE: That we just drop these paths on the ground makes caching 606 // behavior sporadic. e.g. given a diamond: 607 // A 608 // B C 609 // D 610 // 611 // ...If we walk D, B, A, C, we'll only cache the result of phi 612 // optimization for A, B, and D; C will be skipped because it dies here. 613 // This arguably isn't the worst thing ever, since: 614 // - We generally query things in a top-down order, so if we got below D 615 // without needing cache entries for {C, MemLoc}, then chances are 616 // that those cache entries would end up ultimately unused. 617 // - We still cache things for A, so C only needs to walk up a bit. 618 // If this behavior becomes problematic, we can fix without a ton of extra 619 // work. 620 if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) 621 continue; 622 623 UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); 624 if (Res.IsKnownClobber) { 625 assert(Res.Result != StopWhere); 626 // If this wasn't a cache hit, we hit a clobber when walking. That's a 627 // failure. 628 TerminatedPath Term{Res.Result, PathIndex}; 629 if (!MSSA.dominates(Res.Result, StopWhere)) 630 return Term; 631 632 // Otherwise, it's a valid thing to potentially optimize to. 633 Terminated.push_back(Term); 634 continue; 635 } 636 637 if (Res.Result == StopWhere) { 638 // We've hit our target. Save this path off for if we want to continue 639 // walking. 640 NewPaused.push_back(PathIndex); 641 continue; 642 } 643 644 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); 645 addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); 646 } 647 648 return None; 649 } 650 651 template <typename T, typename Walker> 652 struct generic_def_path_iterator 653 : public iterator_facade_base<generic_def_path_iterator<T, Walker>, 654 std::forward_iterator_tag, T *> { 655 generic_def_path_iterator() = default; 656 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} 657 658 T &operator*() const { return curNode(); } 659 660 generic_def_path_iterator &operator++() { 661 N = curNode().Previous; 662 return *this; 663 } 664 665 bool operator==(const generic_def_path_iterator &O) const { 666 if (N.hasValue() != O.N.hasValue()) 667 return false; 668 return !N.hasValue() || *N == *O.N; 669 } 670 671 private: 672 T &curNode() const { return W->Paths[*N]; } 673 674 Walker *W = nullptr; 675 Optional<ListIndex> N = None; 676 }; 677 678 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; 679 using const_def_path_iterator = 680 generic_def_path_iterator<const DefPath, const ClobberWalker>; 681 682 iterator_range<def_path_iterator> def_path(ListIndex From) { 683 return make_range(def_path_iterator(this, From), def_path_iterator()); 684 } 685 686 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { 687 return make_range(const_def_path_iterator(this, From), 688 const_def_path_iterator()); 689 } 690 691 struct OptznResult { 692 /// The path that contains our result. 693 TerminatedPath PrimaryClobber; 694 /// The paths that we can legally cache back from, but that aren't 695 /// necessarily the result of the Phi optimization. 696 SmallVector<TerminatedPath, 4> OtherClobbers; 697 }; 698 699 ListIndex defPathIndex(const DefPath &N) const { 700 // The assert looks nicer if we don't need to do &N 701 const DefPath *NP = &N; 702 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && 703 "Out of bounds DefPath!"); 704 return NP - &Paths.front(); 705 } 706 707 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths 708 /// that act as legal clobbers. Note that this won't return *all* clobbers. 709 /// 710 /// Phi optimization algorithm tl;dr: 711 /// - Find the earliest def/phi, A, we can optimize to 712 /// - Find if all paths from the starting memory access ultimately reach A 713 /// - If not, optimization isn't possible. 714 /// - Otherwise, walk from A to another clobber or phi, A'. 715 /// - If A' is a def, we're done. 716 /// - If A' is a phi, try to optimize it. 717 /// 718 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path 719 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. 720 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, 721 const MemoryLocation &Loc) { 722 assert(Paths.empty() && VisitedPhis.empty() && 723 "Reset the optimization state."); 724 725 Paths.emplace_back(Loc, Start, Phi, None); 726 // Stores how many "valid" optimization nodes we had prior to calling 727 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. 728 auto PriorPathsSize = Paths.size(); 729 730 SmallVector<ListIndex, 16> PausedSearches; 731 SmallVector<ListIndex, 8> NewPaused; 732 SmallVector<TerminatedPath, 4> TerminatedPaths; 733 734 addSearches(Phi, PausedSearches, 0); 735 736 // Moves the TerminatedPath with the "most dominated" Clobber to the end of 737 // Paths. 738 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { 739 assert(!Paths.empty() && "Need a path to move"); 740 auto Dom = Paths.begin(); 741 for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) 742 if (!MSSA.dominates(I->Clobber, Dom->Clobber)) 743 Dom = I; 744 auto Last = Paths.end() - 1; 745 if (Last != Dom) 746 std::iter_swap(Last, Dom); 747 }; 748 749 MemoryPhi *Current = Phi; 750 while (true) { 751 assert(!MSSA.isLiveOnEntryDef(Current) && 752 "liveOnEntry wasn't treated as a clobber?"); 753 754 const auto *Target = getWalkTarget(Current); 755 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal 756 // optimization for the prior phi. 757 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { 758 return MSSA.dominates(P.Clobber, Target); 759 })); 760 761 // FIXME: This is broken, because the Blocker may be reported to be 762 // liveOnEntry, and we'll happily wait for that to disappear (read: never) 763 // For the moment, this is fine, since we do nothing with blocker info. 764 if (Optional<TerminatedPath> Blocker = getBlockingAccess( 765 Target, PausedSearches, NewPaused, TerminatedPaths)) { 766 767 // Find the node we started at. We can't search based on N->Last, since 768 // we may have gone around a loop with a different MemoryLocation. 769 auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { 770 return defPathIndex(N) < PriorPathsSize; 771 }); 772 assert(Iter != def_path_iterator()); 773 774 DefPath &CurNode = *Iter; 775 assert(CurNode.Last == Current); 776 777 // Two things: 778 // A. We can't reliably cache all of NewPaused back. Consider a case 779 // where we have two paths in NewPaused; one of which can't optimize 780 // above this phi, whereas the other can. If we cache the second path 781 // back, we'll end up with suboptimal cache entries. We can handle 782 // cases like this a bit better when we either try to find all 783 // clobbers that block phi optimization, or when our cache starts 784 // supporting unfinished searches. 785 // B. We can't reliably cache TerminatedPaths back here without doing 786 // extra checks; consider a case like: 787 // T 788 // / \ 789 // D C 790 // \ / 791 // S 792 // Where T is our target, C is a node with a clobber on it, D is a 793 // diamond (with a clobber *only* on the left or right node, N), and 794 // S is our start. Say we walk to D, through the node opposite N 795 // (read: ignoring the clobber), and see a cache entry in the top 796 // node of D. That cache entry gets put into TerminatedPaths. We then 797 // walk up to C (N is later in our worklist), find the clobber, and 798 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache 799 // the bottom part of D to the cached clobber, ignoring the clobber 800 // in N. Again, this problem goes away if we start tracking all 801 // blockers for a given phi optimization. 802 TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; 803 return {Result, {}}; 804 } 805 806 // If there's nothing left to search, then all paths led to valid clobbers 807 // that we got from our cache; pick the nearest to the start, and allow 808 // the rest to be cached back. 809 if (NewPaused.empty()) { 810 MoveDominatedPathToEnd(TerminatedPaths); 811 TerminatedPath Result = TerminatedPaths.pop_back_val(); 812 return {Result, std::move(TerminatedPaths)}; 813 } 814 815 MemoryAccess *DefChainEnd = nullptr; 816 SmallVector<TerminatedPath, 4> Clobbers; 817 for (ListIndex Paused : NewPaused) { 818 UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); 819 if (WR.IsKnownClobber) 820 Clobbers.push_back({WR.Result, Paused}); 821 else 822 // Micro-opt: If we hit the end of the chain, save it. 823 DefChainEnd = WR.Result; 824 } 825 826 if (!TerminatedPaths.empty()) { 827 // If we couldn't find the dominating phi/liveOnEntry in the above loop, 828 // do it now. 829 if (!DefChainEnd) 830 for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) 831 DefChainEnd = MA; 832 833 // If any of the terminated paths don't dominate the phi we'll try to 834 // optimize, we need to figure out what they are and quit. 835 const BasicBlock *ChainBB = DefChainEnd->getBlock(); 836 for (const TerminatedPath &TP : TerminatedPaths) { 837 // Because we know that DefChainEnd is as "high" as we can go, we 838 // don't need local dominance checks; BB dominance is sufficient. 839 if (DT.dominates(ChainBB, TP.Clobber->getBlock())) 840 Clobbers.push_back(TP); 841 } 842 } 843 844 // If we have clobbers in the def chain, find the one closest to Current 845 // and quit. 846 if (!Clobbers.empty()) { 847 MoveDominatedPathToEnd(Clobbers); 848 TerminatedPath Result = Clobbers.pop_back_val(); 849 return {Result, std::move(Clobbers)}; 850 } 851 852 assert(all_of(NewPaused, 853 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); 854 855 // Because liveOnEntry is a clobber, this must be a phi. 856 auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); 857 858 PriorPathsSize = Paths.size(); 859 PausedSearches.clear(); 860 for (ListIndex I : NewPaused) 861 addSearches(DefChainPhi, PausedSearches, I); 862 NewPaused.clear(); 863 864 Current = DefChainPhi; 865 } 866 } 867 868 void verifyOptResult(const OptznResult &R) const { 869 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { 870 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); 871 })); 872 } 873 874 void resetPhiOptznState() { 875 Paths.clear(); 876 VisitedPhis.clear(); 877 } 878 879 public: 880 ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) 881 : MSSA(MSSA), AA(AA), DT(DT) {} 882 883 /// Finds the nearest clobber for the given query, optimizing phis if 884 /// possible. 885 MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { 886 Query = &Q; 887 888 MemoryAccess *Current = Start; 889 // This walker pretends uses don't exist. If we're handed one, silently grab 890 // its def. (This has the nice side-effect of ensuring we never cache uses) 891 if (auto *MU = dyn_cast<MemoryUse>(Start)) 892 Current = MU->getDefiningAccess(); 893 894 DefPath FirstDesc(Q.StartingLoc, Current, Current, None); 895 // Fast path for the overly-common case (no crazy phi optimization 896 // necessary) 897 UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); 898 MemoryAccess *Result; 899 if (WalkResult.IsKnownClobber) { 900 Result = WalkResult.Result; 901 Q.AR = WalkResult.AR; 902 } else { 903 OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), 904 Current, Q.StartingLoc); 905 verifyOptResult(OptRes); 906 resetPhiOptznState(); 907 Result = OptRes.PrimaryClobber.Clobber; 908 } 909 910 #ifdef EXPENSIVE_CHECKS 911 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); 912 #endif 913 return Result; 914 } 915 916 void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } 917 }; 918 919 struct RenamePassData { 920 DomTreeNode *DTN; 921 DomTreeNode::const_iterator ChildIt; 922 MemoryAccess *IncomingVal; 923 924 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, 925 MemoryAccess *M) 926 : DTN(D), ChildIt(It), IncomingVal(M) {} 927 928 void swap(RenamePassData &RHS) { 929 std::swap(DTN, RHS.DTN); 930 std::swap(ChildIt, RHS.ChildIt); 931 std::swap(IncomingVal, RHS.IncomingVal); 932 } 933 }; 934 935 } // end anonymous namespace 936 937 namespace llvm { 938 939 /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no 940 /// longer does caching on its own, but the name has been retained for the 941 /// moment. 942 class MemorySSA::CachingWalker final : public MemorySSAWalker { 943 ClobberWalker Walker; 944 945 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); 946 947 public: 948 CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); 949 ~CachingWalker() override = default; 950 951 using MemorySSAWalker::getClobberingMemoryAccess; 952 953 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; 954 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, 955 const MemoryLocation &) override; 956 void invalidateInfo(MemoryAccess *) override; 957 958 void verify(const MemorySSA *MSSA) override { 959 MemorySSAWalker::verify(MSSA); 960 Walker.verify(MSSA); 961 } 962 }; 963 964 } // end namespace llvm 965 966 void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, 967 bool RenameAllUses) { 968 // Pass through values to our successors 969 for (const BasicBlock *S : successors(BB)) { 970 auto It = PerBlockAccesses.find(S); 971 // Rename the phi nodes in our successor block 972 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 973 continue; 974 AccessList *Accesses = It->second.get(); 975 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 976 if (RenameAllUses) { 977 int PhiIndex = Phi->getBasicBlockIndex(BB); 978 assert(PhiIndex != -1 && "Incomplete phi during partial rename"); 979 Phi->setIncomingValue(PhiIndex, IncomingVal); 980 } else 981 Phi->addIncoming(IncomingVal, BB); 982 } 983 } 984 985 /// Rename a single basic block into MemorySSA form. 986 /// Uses the standard SSA renaming algorithm. 987 /// \returns The new incoming value. 988 MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, 989 bool RenameAllUses) { 990 auto It = PerBlockAccesses.find(BB); 991 // Skip most processing if the list is empty. 992 if (It != PerBlockAccesses.end()) { 993 AccessList *Accesses = It->second.get(); 994 for (MemoryAccess &L : *Accesses) { 995 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { 996 if (MUD->getDefiningAccess() == nullptr || RenameAllUses) 997 MUD->setDefiningAccess(IncomingVal); 998 if (isa<MemoryDef>(&L)) 999 IncomingVal = &L; 1000 } else { 1001 IncomingVal = &L; 1002 } 1003 } 1004 } 1005 return IncomingVal; 1006 } 1007 1008 /// This is the standard SSA renaming algorithm. 1009 /// 1010 /// We walk the dominator tree in preorder, renaming accesses, and then filling 1011 /// in phi nodes in our successors. 1012 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, 1013 SmallPtrSetImpl<BasicBlock *> &Visited, 1014 bool SkipVisited, bool RenameAllUses) { 1015 SmallVector<RenamePassData, 32> WorkStack; 1016 // Skip everything if we already renamed this block and we are skipping. 1017 // Note: You can't sink this into the if, because we need it to occur 1018 // regardless of whether we skip blocks or not. 1019 bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; 1020 if (SkipVisited && AlreadyVisited) 1021 return; 1022 1023 IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); 1024 renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); 1025 WorkStack.push_back({Root, Root->begin(), IncomingVal}); 1026 1027 while (!WorkStack.empty()) { 1028 DomTreeNode *Node = WorkStack.back().DTN; 1029 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; 1030 IncomingVal = WorkStack.back().IncomingVal; 1031 1032 if (ChildIt == Node->end()) { 1033 WorkStack.pop_back(); 1034 } else { 1035 DomTreeNode *Child = *ChildIt; 1036 ++WorkStack.back().ChildIt; 1037 BasicBlock *BB = Child->getBlock(); 1038 // Note: You can't sink this into the if, because we need it to occur 1039 // regardless of whether we skip blocks or not. 1040 AlreadyVisited = !Visited.insert(BB).second; 1041 if (SkipVisited && AlreadyVisited) { 1042 // We already visited this during our renaming, which can happen when 1043 // being asked to rename multiple blocks. Figure out the incoming val, 1044 // which is the last def. 1045 // Incoming value can only change if there is a block def, and in that 1046 // case, it's the last block def in the list. 1047 if (auto *BlockDefs = getWritableBlockDefs(BB)) 1048 IncomingVal = &*BlockDefs->rbegin(); 1049 } else 1050 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); 1051 renameSuccessorPhis(BB, IncomingVal, RenameAllUses); 1052 WorkStack.push_back({Child, Child->begin(), IncomingVal}); 1053 } 1054 } 1055 } 1056 1057 /// This handles unreachable block accesses by deleting phi nodes in 1058 /// unreachable blocks, and marking all other unreachable MemoryAccess's as 1059 /// being uses of the live on entry definition. 1060 void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { 1061 assert(!DT->isReachableFromEntry(BB) && 1062 "Reachable block found while handling unreachable blocks"); 1063 1064 // Make sure phi nodes in our reachable successors end up with a 1065 // LiveOnEntryDef for our incoming edge, even though our block is forward 1066 // unreachable. We could just disconnect these blocks from the CFG fully, 1067 // but we do not right now. 1068 for (const BasicBlock *S : successors(BB)) { 1069 if (!DT->isReachableFromEntry(S)) 1070 continue; 1071 auto It = PerBlockAccesses.find(S); 1072 // Rename the phi nodes in our successor block 1073 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 1074 continue; 1075 AccessList *Accesses = It->second.get(); 1076 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 1077 Phi->addIncoming(LiveOnEntryDef.get(), BB); 1078 } 1079 1080 auto It = PerBlockAccesses.find(BB); 1081 if (It == PerBlockAccesses.end()) 1082 return; 1083 1084 auto &Accesses = It->second; 1085 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { 1086 auto Next = std::next(AI); 1087 // If we have a phi, just remove it. We are going to replace all 1088 // users with live on entry. 1089 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) 1090 UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); 1091 else 1092 Accesses->erase(AI); 1093 AI = Next; 1094 } 1095 } 1096 1097 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) 1098 : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), 1099 NextID(0) { 1100 buildMemorySSA(); 1101 } 1102 1103 MemorySSA::~MemorySSA() { 1104 // Drop all our references 1105 for (const auto &Pair : PerBlockAccesses) 1106 for (MemoryAccess &MA : *Pair.second) 1107 MA.dropAllReferences(); 1108 } 1109 1110 MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { 1111 auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); 1112 1113 if (Res.second) 1114 Res.first->second = llvm::make_unique<AccessList>(); 1115 return Res.first->second.get(); 1116 } 1117 1118 MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { 1119 auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); 1120 1121 if (Res.second) 1122 Res.first->second = llvm::make_unique<DefsList>(); 1123 return Res.first->second.get(); 1124 } 1125 1126 namespace llvm { 1127 1128 /// This class is a batch walker of all MemoryUse's in the program, and points 1129 /// their defining access at the thing that actually clobbers them. Because it 1130 /// is a batch walker that touches everything, it does not operate like the 1131 /// other walkers. This walker is basically performing a top-down SSA renaming 1132 /// pass, where the version stack is used as the cache. This enables it to be 1133 /// significantly more time and memory efficient than using the regular walker, 1134 /// which is walking bottom-up. 1135 class MemorySSA::OptimizeUses { 1136 public: 1137 OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, 1138 DominatorTree *DT) 1139 : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { 1140 Walker = MSSA->getWalker(); 1141 } 1142 1143 void optimizeUses(); 1144 1145 private: 1146 /// This represents where a given memorylocation is in the stack. 1147 struct MemlocStackInfo { 1148 // This essentially is keeping track of versions of the stack. Whenever 1149 // the stack changes due to pushes or pops, these versions increase. 1150 unsigned long StackEpoch; 1151 unsigned long PopEpoch; 1152 // This is the lower bound of places on the stack to check. It is equal to 1153 // the place the last stack walk ended. 1154 // Note: Correctness depends on this being initialized to 0, which densemap 1155 // does 1156 unsigned long LowerBound; 1157 const BasicBlock *LowerBoundBlock; 1158 // This is where the last walk for this memory location ended. 1159 unsigned long LastKill; 1160 bool LastKillValid; 1161 Optional<AliasResult> AR; 1162 }; 1163 1164 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, 1165 SmallVectorImpl<MemoryAccess *> &, 1166 DenseMap<MemoryLocOrCall, MemlocStackInfo> &); 1167 1168 MemorySSA *MSSA; 1169 MemorySSAWalker *Walker; 1170 AliasAnalysis *AA; 1171 DominatorTree *DT; 1172 }; 1173 1174 } // end namespace llvm 1175 1176 /// Optimize the uses in a given block This is basically the SSA renaming 1177 /// algorithm, with one caveat: We are able to use a single stack for all 1178 /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is 1179 /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just 1180 /// going to be some position in that stack of possible ones. 1181 /// 1182 /// We track the stack positions that each MemoryLocation needs 1183 /// to check, and last ended at. This is because we only want to check the 1184 /// things that changed since last time. The same MemoryLocation should 1185 /// get clobbered by the same store (getModRefInfo does not use invariantness or 1186 /// things like this, and if they start, we can modify MemoryLocOrCall to 1187 /// include relevant data) 1188 void MemorySSA::OptimizeUses::optimizeUsesInBlock( 1189 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, 1190 SmallVectorImpl<MemoryAccess *> &VersionStack, 1191 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { 1192 1193 /// If no accesses, nothing to do. 1194 MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); 1195 if (Accesses == nullptr) 1196 return; 1197 1198 // Pop everything that doesn't dominate the current block off the stack, 1199 // increment the PopEpoch to account for this. 1200 while (true) { 1201 assert( 1202 !VersionStack.empty() && 1203 "Version stack should have liveOnEntry sentinel dominating everything"); 1204 BasicBlock *BackBlock = VersionStack.back()->getBlock(); 1205 if (DT->dominates(BackBlock, BB)) 1206 break; 1207 while (VersionStack.back()->getBlock() == BackBlock) 1208 VersionStack.pop_back(); 1209 ++PopEpoch; 1210 } 1211 1212 for (MemoryAccess &MA : *Accesses) { 1213 auto *MU = dyn_cast<MemoryUse>(&MA); 1214 if (!MU) { 1215 VersionStack.push_back(&MA); 1216 ++StackEpoch; 1217 continue; 1218 } 1219 1220 if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { 1221 MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); 1222 continue; 1223 } 1224 1225 MemoryLocOrCall UseMLOC(MU); 1226 auto &LocInfo = LocStackInfo[UseMLOC]; 1227 // If the pop epoch changed, it means we've removed stuff from top of 1228 // stack due to changing blocks. We may have to reset the lower bound or 1229 // last kill info. 1230 if (LocInfo.PopEpoch != PopEpoch) { 1231 LocInfo.PopEpoch = PopEpoch; 1232 LocInfo.StackEpoch = StackEpoch; 1233 // If the lower bound was in something that no longer dominates us, we 1234 // have to reset it. 1235 // We can't simply track stack size, because the stack may have had 1236 // pushes/pops in the meantime. 1237 // XXX: This is non-optimal, but only is slower cases with heavily 1238 // branching dominator trees. To get the optimal number of queries would 1239 // be to make lowerbound and lastkill a per-loc stack, and pop it until 1240 // the top of that stack dominates us. This does not seem worth it ATM. 1241 // A much cheaper optimization would be to always explore the deepest 1242 // branch of the dominator tree first. This will guarantee this resets on 1243 // the smallest set of blocks. 1244 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && 1245 !DT->dominates(LocInfo.LowerBoundBlock, BB)) { 1246 // Reset the lower bound of things to check. 1247 // TODO: Some day we should be able to reset to last kill, rather than 1248 // 0. 1249 LocInfo.LowerBound = 0; 1250 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); 1251 LocInfo.LastKillValid = false; 1252 } 1253 } else if (LocInfo.StackEpoch != StackEpoch) { 1254 // If all that has changed is the StackEpoch, we only have to check the 1255 // new things on the stack, because we've checked everything before. In 1256 // this case, the lower bound of things to check remains the same. 1257 LocInfo.PopEpoch = PopEpoch; 1258 LocInfo.StackEpoch = StackEpoch; 1259 } 1260 if (!LocInfo.LastKillValid) { 1261 LocInfo.LastKill = VersionStack.size() - 1; 1262 LocInfo.LastKillValid = true; 1263 LocInfo.AR = MayAlias; 1264 } 1265 1266 // At this point, we should have corrected last kill and LowerBound to be 1267 // in bounds. 1268 assert(LocInfo.LowerBound < VersionStack.size() && 1269 "Lower bound out of range"); 1270 assert(LocInfo.LastKill < VersionStack.size() && 1271 "Last kill info out of range"); 1272 // In any case, the new upper bound is the top of the stack. 1273 unsigned long UpperBound = VersionStack.size() - 1; 1274 1275 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { 1276 LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" 1277 << *(MU->getMemoryInst()) << ")" 1278 << " because there are " 1279 << UpperBound - LocInfo.LowerBound 1280 << " stores to disambiguate\n"); 1281 // Because we did not walk, LastKill is no longer valid, as this may 1282 // have been a kill. 1283 LocInfo.LastKillValid = false; 1284 continue; 1285 } 1286 bool FoundClobberResult = false; 1287 while (UpperBound > LocInfo.LowerBound) { 1288 if (isa<MemoryPhi>(VersionStack[UpperBound])) { 1289 // For phis, use the walker, see where we ended up, go there 1290 Instruction *UseInst = MU->getMemoryInst(); 1291 MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); 1292 // We are guaranteed to find it or something is wrong 1293 while (VersionStack[UpperBound] != Result) { 1294 assert(UpperBound != 0); 1295 --UpperBound; 1296 } 1297 FoundClobberResult = true; 1298 break; 1299 } 1300 1301 MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); 1302 // If the lifetime of the pointer ends at this instruction, it's live on 1303 // entry. 1304 if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { 1305 // Reset UpperBound to liveOnEntryDef's place in the stack 1306 UpperBound = 0; 1307 FoundClobberResult = true; 1308 LocInfo.AR = MustAlias; 1309 break; 1310 } 1311 ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); 1312 if (CA.IsClobber) { 1313 FoundClobberResult = true; 1314 LocInfo.AR = CA.AR; 1315 break; 1316 } 1317 --UpperBound; 1318 } 1319 1320 // Note: Phis always have AliasResult AR set to MayAlias ATM. 1321 1322 // At the end of this loop, UpperBound is either a clobber, or lower bound 1323 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. 1324 if (FoundClobberResult || UpperBound < LocInfo.LastKill) { 1325 // We were last killed now by where we got to 1326 if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) 1327 LocInfo.AR = None; 1328 MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); 1329 LocInfo.LastKill = UpperBound; 1330 } else { 1331 // Otherwise, we checked all the new ones, and now we know we can get to 1332 // LastKill. 1333 MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); 1334 } 1335 LocInfo.LowerBound = VersionStack.size() - 1; 1336 LocInfo.LowerBoundBlock = BB; 1337 } 1338 } 1339 1340 /// Optimize uses to point to their actual clobbering definitions. 1341 void MemorySSA::OptimizeUses::optimizeUses() { 1342 SmallVector<MemoryAccess *, 16> VersionStack; 1343 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; 1344 VersionStack.push_back(MSSA->getLiveOnEntryDef()); 1345 1346 unsigned long StackEpoch = 1; 1347 unsigned long PopEpoch = 1; 1348 // We perform a non-recursive top-down dominator tree walk. 1349 for (const auto *DomNode : depth_first(DT->getRootNode())) 1350 optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, 1351 LocStackInfo); 1352 } 1353 1354 void MemorySSA::placePHINodes( 1355 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) { 1356 // Determine where our MemoryPhi's should go 1357 ForwardIDFCalculator IDFs(*DT); 1358 IDFs.setDefiningBlocks(DefiningBlocks); 1359 SmallVector<BasicBlock *, 32> IDFBlocks; 1360 IDFs.calculate(IDFBlocks); 1361 1362 // Now place MemoryPhi nodes. 1363 for (auto &BB : IDFBlocks) 1364 createMemoryPhi(BB); 1365 } 1366 1367 void MemorySSA::buildMemorySSA() { 1368 // We create an access to represent "live on entry", for things like 1369 // arguments or users of globals, where the memory they use is defined before 1370 // the beginning of the function. We do not actually insert it into the IR. 1371 // We do not define a live on exit for the immediate uses, and thus our 1372 // semantics do *not* imply that something with no immediate uses can simply 1373 // be removed. 1374 BasicBlock &StartingPoint = F.getEntryBlock(); 1375 LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr, 1376 &StartingPoint, NextID++)); 1377 1378 // We maintain lists of memory accesses per-block, trading memory for time. We 1379 // could just look up the memory access for every possible instruction in the 1380 // stream. 1381 SmallPtrSet<BasicBlock *, 32> DefiningBlocks; 1382 // Go through each block, figure out where defs occur, and chain together all 1383 // the accesses. 1384 for (BasicBlock &B : F) { 1385 bool InsertIntoDef = false; 1386 AccessList *Accesses = nullptr; 1387 DefsList *Defs = nullptr; 1388 for (Instruction &I : B) { 1389 MemoryUseOrDef *MUD = createNewAccess(&I); 1390 if (!MUD) 1391 continue; 1392 1393 if (!Accesses) 1394 Accesses = getOrCreateAccessList(&B); 1395 Accesses->push_back(MUD); 1396 if (isa<MemoryDef>(MUD)) { 1397 InsertIntoDef = true; 1398 if (!Defs) 1399 Defs = getOrCreateDefsList(&B); 1400 Defs->push_back(*MUD); 1401 } 1402 } 1403 if (InsertIntoDef) 1404 DefiningBlocks.insert(&B); 1405 } 1406 placePHINodes(DefiningBlocks); 1407 1408 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get 1409 // filled in with all blocks. 1410 SmallPtrSet<BasicBlock *, 16> Visited; 1411 renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); 1412 1413 CachingWalker *Walker = getWalkerImpl(); 1414 1415 OptimizeUses(this, Walker, AA, DT).optimizeUses(); 1416 1417 // Mark the uses in unreachable blocks as live on entry, so that they go 1418 // somewhere. 1419 for (auto &BB : F) 1420 if (!Visited.count(&BB)) 1421 markUnreachableAsLiveOnEntry(&BB); 1422 } 1423 1424 MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } 1425 1426 MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { 1427 if (Walker) 1428 return Walker.get(); 1429 1430 Walker = llvm::make_unique<CachingWalker>(this, AA, DT); 1431 return Walker.get(); 1432 } 1433 1434 // This is a helper function used by the creation routines. It places NewAccess 1435 // into the access and defs lists for a given basic block, at the given 1436 // insertion point. 1437 void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, 1438 const BasicBlock *BB, 1439 InsertionPlace Point) { 1440 auto *Accesses = getOrCreateAccessList(BB); 1441 if (Point == Beginning) { 1442 // If it's a phi node, it goes first, otherwise, it goes after any phi 1443 // nodes. 1444 if (isa<MemoryPhi>(NewAccess)) { 1445 Accesses->push_front(NewAccess); 1446 auto *Defs = getOrCreateDefsList(BB); 1447 Defs->push_front(*NewAccess); 1448 } else { 1449 auto AI = find_if_not( 1450 *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1451 Accesses->insert(AI, NewAccess); 1452 if (!isa<MemoryUse>(NewAccess)) { 1453 auto *Defs = getOrCreateDefsList(BB); 1454 auto DI = find_if_not( 1455 *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1456 Defs->insert(DI, *NewAccess); 1457 } 1458 } 1459 } else { 1460 Accesses->push_back(NewAccess); 1461 if (!isa<MemoryUse>(NewAccess)) { 1462 auto *Defs = getOrCreateDefsList(BB); 1463 Defs->push_back(*NewAccess); 1464 } 1465 } 1466 BlockNumberingValid.erase(BB); 1467 } 1468 1469 void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, 1470 AccessList::iterator InsertPt) { 1471 auto *Accesses = getWritableBlockAccesses(BB); 1472 bool WasEnd = InsertPt == Accesses->end(); 1473 Accesses->insert(AccessList::iterator(InsertPt), What); 1474 if (!isa<MemoryUse>(What)) { 1475 auto *Defs = getOrCreateDefsList(BB); 1476 // If we got asked to insert at the end, we have an easy job, just shove it 1477 // at the end. If we got asked to insert before an existing def, we also get 1478 // an iterator. If we got asked to insert before a use, we have to hunt for 1479 // the next def. 1480 if (WasEnd) { 1481 Defs->push_back(*What); 1482 } else if (isa<MemoryDef>(InsertPt)) { 1483 Defs->insert(InsertPt->getDefsIterator(), *What); 1484 } else { 1485 while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) 1486 ++InsertPt; 1487 // Either we found a def, or we are inserting at the end 1488 if (InsertPt == Accesses->end()) 1489 Defs->push_back(*What); 1490 else 1491 Defs->insert(InsertPt->getDefsIterator(), *What); 1492 } 1493 } 1494 BlockNumberingValid.erase(BB); 1495 } 1496 1497 void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) { 1498 // Keep it in the lookup tables, remove from the lists 1499 removeFromLists(What, false); 1500 1501 // Note that moving should implicitly invalidate the optimized state of a 1502 // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a 1503 // MemoryDef. 1504 if (auto *MD = dyn_cast<MemoryDef>(What)) 1505 MD->resetOptimized(); 1506 What->setBlock(BB); 1507 } 1508 1509 // Move What before Where in the IR. The end result is that What will belong to 1510 // the right lists and have the right Block set, but will not otherwise be 1511 // correct. It will not have the right defining access, and if it is a def, 1512 // things below it will not properly be updated. 1513 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1514 AccessList::iterator Where) { 1515 prepareForMoveTo(What, BB); 1516 insertIntoListsBefore(What, BB, Where); 1517 } 1518 1519 void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB, 1520 InsertionPlace Point) { 1521 if (isa<MemoryPhi>(What)) { 1522 assert(Point == Beginning && 1523 "Can only move a Phi at the beginning of the block"); 1524 // Update lookup table entry 1525 ValueToMemoryAccess.erase(What->getBlock()); 1526 bool Inserted = ValueToMemoryAccess.insert({BB, What}).second; 1527 (void)Inserted; 1528 assert(Inserted && "Cannot move a Phi to a block that already has one"); 1529 } 1530 1531 prepareForMoveTo(What, BB); 1532 insertIntoListsForBlock(What, BB, Point); 1533 } 1534 1535 MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { 1536 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); 1537 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); 1538 // Phi's always are placed at the front of the block. 1539 insertIntoListsForBlock(Phi, BB, Beginning); 1540 ValueToMemoryAccess[BB] = Phi; 1541 return Phi; 1542 } 1543 1544 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, 1545 MemoryAccess *Definition, 1546 const MemoryUseOrDef *Template) { 1547 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); 1548 MemoryUseOrDef *NewAccess = createNewAccess(I, Template); 1549 assert( 1550 NewAccess != nullptr && 1551 "Tried to create a memory access for a non-memory touching instruction"); 1552 NewAccess->setDefiningAccess(Definition); 1553 return NewAccess; 1554 } 1555 1556 // Return true if the instruction has ordering constraints. 1557 // Note specifically that this only considers stores and loads 1558 // because others are still considered ModRef by getModRefInfo. 1559 static inline bool isOrdered(const Instruction *I) { 1560 if (auto *SI = dyn_cast<StoreInst>(I)) { 1561 if (!SI->isUnordered()) 1562 return true; 1563 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 1564 if (!LI->isUnordered()) 1565 return true; 1566 } 1567 return false; 1568 } 1569 1570 /// Helper function to create new memory accesses 1571 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, 1572 const MemoryUseOrDef *Template) { 1573 // The assume intrinsic has a control dependency which we model by claiming 1574 // that it writes arbitrarily. Ignore that fake memory dependency here. 1575 // FIXME: Replace this special casing with a more accurate modelling of 1576 // assume's control dependency. 1577 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1578 if (II->getIntrinsicID() == Intrinsic::assume) 1579 return nullptr; 1580 1581 bool Def, Use; 1582 if (Template) { 1583 Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr; 1584 Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr; 1585 #if !defined(NDEBUG) 1586 ModRefInfo ModRef = AA->getModRefInfo(I, None); 1587 bool DefCheck, UseCheck; 1588 DefCheck = isModSet(ModRef) || isOrdered(I); 1589 UseCheck = isRefSet(ModRef); 1590 assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template"); 1591 #endif 1592 } else { 1593 // Find out what affect this instruction has on memory. 1594 ModRefInfo ModRef = AA->getModRefInfo(I, None); 1595 // The isOrdered check is used to ensure that volatiles end up as defs 1596 // (atomics end up as ModRef right now anyway). Until we separate the 1597 // ordering chain from the memory chain, this enables people to see at least 1598 // some relative ordering to volatiles. Note that getClobberingMemoryAccess 1599 // will still give an answer that bypasses other volatile loads. TODO: 1600 // Separate memory aliasing and ordering into two different chains so that 1601 // we can precisely represent both "what memory will this read/write/is 1602 // clobbered by" and "what instructions can I move this past". 1603 Def = isModSet(ModRef) || isOrdered(I); 1604 Use = isRefSet(ModRef); 1605 } 1606 1607 // It's possible for an instruction to not modify memory at all. During 1608 // construction, we ignore them. 1609 if (!Def && !Use) 1610 return nullptr; 1611 1612 MemoryUseOrDef *MUD; 1613 if (Def) 1614 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); 1615 else 1616 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); 1617 ValueToMemoryAccess[I] = MUD; 1618 return MUD; 1619 } 1620 1621 /// Returns true if \p Replacer dominates \p Replacee . 1622 bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, 1623 const MemoryAccess *Replacee) const { 1624 if (isa<MemoryUseOrDef>(Replacee)) 1625 return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); 1626 const auto *MP = cast<MemoryPhi>(Replacee); 1627 // For a phi node, the use occurs in the predecessor block of the phi node. 1628 // Since we may occur multiple times in the phi node, we have to check each 1629 // operand to ensure Replacer dominates each operand where Replacee occurs. 1630 for (const Use &Arg : MP->operands()) { 1631 if (Arg.get() != Replacee && 1632 !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) 1633 return false; 1634 } 1635 return true; 1636 } 1637 1638 /// Properly remove \p MA from all of MemorySSA's lookup tables. 1639 void MemorySSA::removeFromLookups(MemoryAccess *MA) { 1640 assert(MA->use_empty() && 1641 "Trying to remove memory access that still has uses"); 1642 BlockNumbering.erase(MA); 1643 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1644 MUD->setDefiningAccess(nullptr); 1645 // Invalidate our walker's cache if necessary 1646 if (!isa<MemoryUse>(MA)) 1647 Walker->invalidateInfo(MA); 1648 1649 Value *MemoryInst; 1650 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1651 MemoryInst = MUD->getMemoryInst(); 1652 else 1653 MemoryInst = MA->getBlock(); 1654 1655 auto VMA = ValueToMemoryAccess.find(MemoryInst); 1656 if (VMA->second == MA) 1657 ValueToMemoryAccess.erase(VMA); 1658 } 1659 1660 /// Properly remove \p MA from all of MemorySSA's lists. 1661 /// 1662 /// Because of the way the intrusive list and use lists work, it is important to 1663 /// do removal in the right order. 1664 /// ShouldDelete defaults to true, and will cause the memory access to also be 1665 /// deleted, not just removed. 1666 void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { 1667 BasicBlock *BB = MA->getBlock(); 1668 // The access list owns the reference, so we erase it from the non-owning list 1669 // first. 1670 if (!isa<MemoryUse>(MA)) { 1671 auto DefsIt = PerBlockDefs.find(BB); 1672 std::unique_ptr<DefsList> &Defs = DefsIt->second; 1673 Defs->remove(*MA); 1674 if (Defs->empty()) 1675 PerBlockDefs.erase(DefsIt); 1676 } 1677 1678 // The erase call here will delete it. If we don't want it deleted, we call 1679 // remove instead. 1680 auto AccessIt = PerBlockAccesses.find(BB); 1681 std::unique_ptr<AccessList> &Accesses = AccessIt->second; 1682 if (ShouldDelete) 1683 Accesses->erase(MA); 1684 else 1685 Accesses->remove(MA); 1686 1687 if (Accesses->empty()) { 1688 PerBlockAccesses.erase(AccessIt); 1689 BlockNumberingValid.erase(BB); 1690 } 1691 } 1692 1693 void MemorySSA::print(raw_ostream &OS) const { 1694 MemorySSAAnnotatedWriter Writer(this); 1695 F.print(OS, &Writer); 1696 } 1697 1698 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1699 LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } 1700 #endif 1701 1702 void MemorySSA::verifyMemorySSA() const { 1703 verifyDefUses(F); 1704 verifyDomination(F); 1705 verifyOrdering(F); 1706 verifyDominationNumbers(F); 1707 Walker->verify(this); 1708 verifyClobberSanity(F); 1709 } 1710 1711 /// Check sanity of the clobbering instruction for access MA. 1712 void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const { 1713 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) { 1714 if (!MUD->isOptimized()) 1715 return; 1716 auto *I = MUD->getMemoryInst(); 1717 auto Loc = MemoryLocation::getOrNone(I); 1718 if (Loc == None) 1719 return; 1720 auto *Clobber = MUD->getOptimized(); 1721 UpwardsMemoryQuery Q(I, MUD); 1722 checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true); 1723 } 1724 } 1725 1726 void MemorySSA::verifyClobberSanity(const Function &F) const { 1727 #if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) 1728 for (const BasicBlock &BB : F) { 1729 const AccessList *Accesses = getBlockAccesses(&BB); 1730 if (!Accesses) 1731 continue; 1732 for (const MemoryAccess &MA : *Accesses) 1733 checkClobberSanityAccess(&MA); 1734 } 1735 #endif 1736 } 1737 1738 /// Verify that all of the blocks we believe to have valid domination numbers 1739 /// actually have valid domination numbers. 1740 void MemorySSA::verifyDominationNumbers(const Function &F) const { 1741 #ifndef NDEBUG 1742 if (BlockNumberingValid.empty()) 1743 return; 1744 1745 SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid; 1746 for (const BasicBlock &BB : F) { 1747 if (!ValidBlocks.count(&BB)) 1748 continue; 1749 1750 ValidBlocks.erase(&BB); 1751 1752 const AccessList *Accesses = getBlockAccesses(&BB); 1753 // It's correct to say an empty block has valid numbering. 1754 if (!Accesses) 1755 continue; 1756 1757 // Block numbering starts at 1. 1758 unsigned long LastNumber = 0; 1759 for (const MemoryAccess &MA : *Accesses) { 1760 auto ThisNumberIter = BlockNumbering.find(&MA); 1761 assert(ThisNumberIter != BlockNumbering.end() && 1762 "MemoryAccess has no domination number in a valid block!"); 1763 1764 unsigned long ThisNumber = ThisNumberIter->second; 1765 assert(ThisNumber > LastNumber && 1766 "Domination numbers should be strictly increasing!"); 1767 LastNumber = ThisNumber; 1768 } 1769 } 1770 1771 assert(ValidBlocks.empty() && 1772 "All valid BasicBlocks should exist in F -- dangling pointers?"); 1773 #endif 1774 } 1775 1776 /// Verify that the order and existence of MemoryAccesses matches the 1777 /// order and existence of memory affecting instructions. 1778 void MemorySSA::verifyOrdering(Function &F) const { 1779 #ifndef NDEBUG 1780 // Walk all the blocks, comparing what the lookups think and what the access 1781 // lists think, as well as the order in the blocks vs the order in the access 1782 // lists. 1783 SmallVector<MemoryAccess *, 32> ActualAccesses; 1784 SmallVector<MemoryAccess *, 32> ActualDefs; 1785 for (BasicBlock &B : F) { 1786 const AccessList *AL = getBlockAccesses(&B); 1787 const auto *DL = getBlockDefs(&B); 1788 MemoryAccess *Phi = getMemoryAccess(&B); 1789 if (Phi) { 1790 ActualAccesses.push_back(Phi); 1791 ActualDefs.push_back(Phi); 1792 } 1793 1794 for (Instruction &I : B) { 1795 MemoryAccess *MA = getMemoryAccess(&I); 1796 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && 1797 "We have memory affecting instructions " 1798 "in this block but they are not in the " 1799 "access list or defs list"); 1800 if (MA) { 1801 ActualAccesses.push_back(MA); 1802 if (isa<MemoryDef>(MA)) 1803 ActualDefs.push_back(MA); 1804 } 1805 } 1806 // Either we hit the assert, really have no accesses, or we have both 1807 // accesses and an access list. 1808 // Same with defs. 1809 if (!AL && !DL) 1810 continue; 1811 assert(AL->size() == ActualAccesses.size() && 1812 "We don't have the same number of accesses in the block as on the " 1813 "access list"); 1814 assert((DL || ActualDefs.size() == 0) && 1815 "Either we should have a defs list, or we should have no defs"); 1816 assert((!DL || DL->size() == ActualDefs.size()) && 1817 "We don't have the same number of defs in the block as on the " 1818 "def list"); 1819 auto ALI = AL->begin(); 1820 auto AAI = ActualAccesses.begin(); 1821 while (ALI != AL->end() && AAI != ActualAccesses.end()) { 1822 assert(&*ALI == *AAI && "Not the same accesses in the same order"); 1823 ++ALI; 1824 ++AAI; 1825 } 1826 ActualAccesses.clear(); 1827 if (DL) { 1828 auto DLI = DL->begin(); 1829 auto ADI = ActualDefs.begin(); 1830 while (DLI != DL->end() && ADI != ActualDefs.end()) { 1831 assert(&*DLI == *ADI && "Not the same defs in the same order"); 1832 ++DLI; 1833 ++ADI; 1834 } 1835 } 1836 ActualDefs.clear(); 1837 } 1838 #endif 1839 } 1840 1841 /// Verify the domination properties of MemorySSA by checking that each 1842 /// definition dominates all of its uses. 1843 void MemorySSA::verifyDomination(Function &F) const { 1844 #ifndef NDEBUG 1845 for (BasicBlock &B : F) { 1846 // Phi nodes are attached to basic blocks 1847 if (MemoryPhi *MP = getMemoryAccess(&B)) 1848 for (const Use &U : MP->uses()) 1849 assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); 1850 1851 for (Instruction &I : B) { 1852 MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); 1853 if (!MD) 1854 continue; 1855 1856 for (const Use &U : MD->uses()) 1857 assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); 1858 } 1859 } 1860 #endif 1861 } 1862 1863 /// Verify the def-use lists in MemorySSA, by verifying that \p Use 1864 /// appears in the use list of \p Def. 1865 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { 1866 #ifndef NDEBUG 1867 // The live on entry use may cause us to get a NULL def here 1868 if (!Def) 1869 assert(isLiveOnEntryDef(Use) && 1870 "Null def but use not point to live on entry def"); 1871 else 1872 assert(is_contained(Def->users(), Use) && 1873 "Did not find use in def's use list"); 1874 #endif 1875 } 1876 1877 /// Verify the immediate use information, by walking all the memory 1878 /// accesses and verifying that, for each use, it appears in the 1879 /// appropriate def's use list 1880 void MemorySSA::verifyDefUses(Function &F) const { 1881 #ifndef NDEBUG 1882 for (BasicBlock &B : F) { 1883 // Phi nodes are attached to basic blocks 1884 if (MemoryPhi *Phi = getMemoryAccess(&B)) { 1885 assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( 1886 pred_begin(&B), pred_end(&B))) && 1887 "Incomplete MemoryPhi Node"); 1888 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { 1889 verifyUseInDefs(Phi->getIncomingValue(I), Phi); 1890 assert(find(predecessors(&B), Phi->getIncomingBlock(I)) != 1891 pred_end(&B) && 1892 "Incoming phi block not a block predecessor"); 1893 } 1894 } 1895 1896 for (Instruction &I : B) { 1897 if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { 1898 verifyUseInDefs(MA->getDefiningAccess(), MA); 1899 } 1900 } 1901 } 1902 #endif 1903 } 1904 1905 /// Perform a local numbering on blocks so that instruction ordering can be 1906 /// determined in constant time. 1907 /// TODO: We currently just number in order. If we numbered by N, we could 1908 /// allow at least N-1 sequences of insertBefore or insertAfter (and at least 1909 /// log2(N) sequences of mixed before and after) without needing to invalidate 1910 /// the numbering. 1911 void MemorySSA::renumberBlock(const BasicBlock *B) const { 1912 // The pre-increment ensures the numbers really start at 1. 1913 unsigned long CurrentNumber = 0; 1914 const AccessList *AL = getBlockAccesses(B); 1915 assert(AL != nullptr && "Asking to renumber an empty block"); 1916 for (const auto &I : *AL) 1917 BlockNumbering[&I] = ++CurrentNumber; 1918 BlockNumberingValid.insert(B); 1919 } 1920 1921 /// Determine, for two memory accesses in the same block, 1922 /// whether \p Dominator dominates \p Dominatee. 1923 /// \returns True if \p Dominator dominates \p Dominatee. 1924 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, 1925 const MemoryAccess *Dominatee) const { 1926 const BasicBlock *DominatorBlock = Dominator->getBlock(); 1927 1928 assert((DominatorBlock == Dominatee->getBlock()) && 1929 "Asking for local domination when accesses are in different blocks!"); 1930 // A node dominates itself. 1931 if (Dominatee == Dominator) 1932 return true; 1933 1934 // When Dominatee is defined on function entry, it is not dominated by another 1935 // memory access. 1936 if (isLiveOnEntryDef(Dominatee)) 1937 return false; 1938 1939 // When Dominator is defined on function entry, it dominates the other memory 1940 // access. 1941 if (isLiveOnEntryDef(Dominator)) 1942 return true; 1943 1944 if (!BlockNumberingValid.count(DominatorBlock)) 1945 renumberBlock(DominatorBlock); 1946 1947 unsigned long DominatorNum = BlockNumbering.lookup(Dominator); 1948 // All numbers start with 1 1949 assert(DominatorNum != 0 && "Block was not numbered properly"); 1950 unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); 1951 assert(DominateeNum != 0 && "Block was not numbered properly"); 1952 return DominatorNum < DominateeNum; 1953 } 1954 1955 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1956 const MemoryAccess *Dominatee) const { 1957 if (Dominator == Dominatee) 1958 return true; 1959 1960 if (isLiveOnEntryDef(Dominatee)) 1961 return false; 1962 1963 if (Dominator->getBlock() != Dominatee->getBlock()) 1964 return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); 1965 return locallyDominates(Dominator, Dominatee); 1966 } 1967 1968 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1969 const Use &Dominatee) const { 1970 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { 1971 BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); 1972 // The def must dominate the incoming block of the phi. 1973 if (UseBB != Dominator->getBlock()) 1974 return DT->dominates(Dominator->getBlock(), UseBB); 1975 // If the UseBB and the DefBB are the same, compare locally. 1976 return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); 1977 } 1978 // If it's not a PHI node use, the normal dominates can already handle it. 1979 return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); 1980 } 1981 1982 const static char LiveOnEntryStr[] = "liveOnEntry"; 1983 1984 void MemoryAccess::print(raw_ostream &OS) const { 1985 switch (getValueID()) { 1986 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS); 1987 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS); 1988 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS); 1989 } 1990 llvm_unreachable("invalid value id"); 1991 } 1992 1993 void MemoryDef::print(raw_ostream &OS) const { 1994 MemoryAccess *UO = getDefiningAccess(); 1995 1996 auto printID = [&OS](MemoryAccess *A) { 1997 if (A && A->getID()) 1998 OS << A->getID(); 1999 else 2000 OS << LiveOnEntryStr; 2001 }; 2002 2003 OS << getID() << " = MemoryDef("; 2004 printID(UO); 2005 OS << ")"; 2006 2007 if (isOptimized()) { 2008 OS << "->"; 2009 printID(getOptimized()); 2010 2011 if (Optional<AliasResult> AR = getOptimizedAccessType()) 2012 OS << " " << *AR; 2013 } 2014 } 2015 2016 void MemoryPhi::print(raw_ostream &OS) const { 2017 bool First = true; 2018 OS << getID() << " = MemoryPhi("; 2019 for (const auto &Op : operands()) { 2020 BasicBlock *BB = getIncomingBlock(Op); 2021 MemoryAccess *MA = cast<MemoryAccess>(Op); 2022 if (!First) 2023 OS << ','; 2024 else 2025 First = false; 2026 2027 OS << '{'; 2028 if (BB->hasName()) 2029 OS << BB->getName(); 2030 else 2031 BB->printAsOperand(OS, false); 2032 OS << ','; 2033 if (unsigned ID = MA->getID()) 2034 OS << ID; 2035 else 2036 OS << LiveOnEntryStr; 2037 OS << '}'; 2038 } 2039 OS << ')'; 2040 } 2041 2042 void MemoryUse::print(raw_ostream &OS) const { 2043 MemoryAccess *UO = getDefiningAccess(); 2044 OS << "MemoryUse("; 2045 if (UO && UO->getID()) 2046 OS << UO->getID(); 2047 else 2048 OS << LiveOnEntryStr; 2049 OS << ')'; 2050 2051 if (Optional<AliasResult> AR = getOptimizedAccessType()) 2052 OS << " " << *AR; 2053 } 2054 2055 void MemoryAccess::dump() const { 2056 // Cannot completely remove virtual function even in release mode. 2057 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2058 print(dbgs()); 2059 dbgs() << "\n"; 2060 #endif 2061 } 2062 2063 char MemorySSAPrinterLegacyPass::ID = 0; 2064 2065 MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { 2066 initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); 2067 } 2068 2069 void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { 2070 AU.setPreservesAll(); 2071 AU.addRequired<MemorySSAWrapperPass>(); 2072 } 2073 2074 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { 2075 auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2076 MSSA.print(dbgs()); 2077 if (VerifyMemorySSA) 2078 MSSA.verifyMemorySSA(); 2079 return false; 2080 } 2081 2082 AnalysisKey MemorySSAAnalysis::Key; 2083 2084 MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, 2085 FunctionAnalysisManager &AM) { 2086 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 2087 auto &AA = AM.getResult<AAManager>(F); 2088 return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT)); 2089 } 2090 2091 PreservedAnalyses MemorySSAPrinterPass::run(Function &F, 2092 FunctionAnalysisManager &AM) { 2093 OS << "MemorySSA for function: " << F.getName() << "\n"; 2094 AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); 2095 2096 return PreservedAnalyses::all(); 2097 } 2098 2099 PreservedAnalyses MemorySSAVerifierPass::run(Function &F, 2100 FunctionAnalysisManager &AM) { 2101 AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); 2102 2103 return PreservedAnalyses::all(); 2104 } 2105 2106 char MemorySSAWrapperPass::ID = 0; 2107 2108 MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { 2109 initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); 2110 } 2111 2112 void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } 2113 2114 void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 2115 AU.setPreservesAll(); 2116 AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 2117 AU.addRequiredTransitive<AAResultsWrapperPass>(); 2118 } 2119 2120 bool MemorySSAWrapperPass::runOnFunction(Function &F) { 2121 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2122 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2123 MSSA.reset(new MemorySSA(F, &AA, &DT)); 2124 return false; 2125 } 2126 2127 void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } 2128 2129 void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { 2130 MSSA->print(OS); 2131 } 2132 2133 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} 2134 2135 MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, 2136 DominatorTree *D) 2137 : MemorySSAWalker(M), Walker(*M, *A, *D) {} 2138 2139 void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { 2140 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 2141 MUD->resetOptimized(); 2142 } 2143 2144 /// Walk the use-def chains starting at \p MA and find 2145 /// the MemoryAccess that actually clobbers Loc. 2146 /// 2147 /// \returns our clobbering memory access 2148 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 2149 MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { 2150 return Walker.findClobber(StartingAccess, Q); 2151 } 2152 2153 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 2154 MemoryAccess *StartingAccess, const MemoryLocation &Loc) { 2155 if (isa<MemoryPhi>(StartingAccess)) 2156 return StartingAccess; 2157 2158 auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); 2159 if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) 2160 return StartingUseOrDef; 2161 2162 Instruction *I = StartingUseOrDef->getMemoryInst(); 2163 2164 // Conservatively, fences are always clobbers, so don't perform the walk if we 2165 // hit a fence. 2166 if (!ImmutableCallSite(I) && I->isFenceLike()) 2167 return StartingUseOrDef; 2168 2169 UpwardsMemoryQuery Q; 2170 Q.OriginalAccess = StartingUseOrDef; 2171 Q.StartingLoc = Loc; 2172 Q.Inst = I; 2173 Q.IsCall = false; 2174 2175 // Unlike the other function, do not walk to the def of a def, because we are 2176 // handed something we already believe is the clobbering access. 2177 MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) 2178 ? StartingUseOrDef->getDefiningAccess() 2179 : StartingUseOrDef; 2180 2181 MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); 2182 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2183 LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n"); 2184 LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2185 LLVM_DEBUG(dbgs() << *Clobber << "\n"); 2186 return Clobber; 2187 } 2188 2189 MemoryAccess * 2190 MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2191 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); 2192 // If this is a MemoryPhi, we can't do anything. 2193 if (!StartingAccess) 2194 return MA; 2195 2196 // If this is an already optimized use or def, return the optimized result. 2197 // Note: Currently, we store the optimized def result in a separate field, 2198 // since we can't use the defining access. 2199 if (StartingAccess->isOptimized()) 2200 return StartingAccess->getOptimized(); 2201 2202 const Instruction *I = StartingAccess->getMemoryInst(); 2203 UpwardsMemoryQuery Q(I, StartingAccess); 2204 // We can't sanely do anything with a fence, since they conservatively clobber 2205 // all memory, and have no locations to get pointers from to try to 2206 // disambiguate. 2207 if (!Q.IsCall && I->isFenceLike()) 2208 return StartingAccess; 2209 2210 if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { 2211 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); 2212 StartingAccess->setOptimized(LiveOnEntry); 2213 StartingAccess->setOptimizedAccessType(None); 2214 return LiveOnEntry; 2215 } 2216 2217 // Start with the thing we already think clobbers this location 2218 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); 2219 2220 // At this point, DefiningAccess may be the live on entry def. 2221 // If it is, we will not get a better result. 2222 if (MSSA->isLiveOnEntryDef(DefiningAccess)) { 2223 StartingAccess->setOptimized(DefiningAccess); 2224 StartingAccess->setOptimizedAccessType(None); 2225 return DefiningAccess; 2226 } 2227 2228 MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); 2229 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2230 LLVM_DEBUG(dbgs() << *DefiningAccess << "\n"); 2231 LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2232 LLVM_DEBUG(dbgs() << *Result << "\n"); 2233 2234 StartingAccess->setOptimized(Result); 2235 if (MSSA->isLiveOnEntryDef(Result)) 2236 StartingAccess->setOptimizedAccessType(None); 2237 else if (Q.AR == MustAlias) 2238 StartingAccess->setOptimizedAccessType(MustAlias); 2239 2240 return Result; 2241 } 2242 2243 MemoryAccess * 2244 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2245 if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) 2246 return Use->getDefiningAccess(); 2247 return MA; 2248 } 2249 2250 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( 2251 MemoryAccess *StartingAccess, const MemoryLocation &) { 2252 if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2253 return Use->getDefiningAccess(); 2254 return StartingAccess; 2255 } 2256 2257 void MemoryPhi::deleteMe(DerivedUser *Self) { 2258 delete static_cast<MemoryPhi *>(Self); 2259 } 2260 2261 void MemoryDef::deleteMe(DerivedUser *Self) { 2262 delete static_cast<MemoryDef *>(Self); 2263 } 2264 2265 void MemoryUse::deleteMe(DerivedUser *Self) { 2266 delete static_cast<MemoryUse *>(Self); 2267 } 2268