1 //===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------===// 9 // 10 // This file implements the MemorySSA class. 11 // 12 //===----------------------------------------------------------------===// 13 #include "llvm/Analysis/MemorySSA.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/DenseSet.h" 16 #include "llvm/ADT/DepthFirstIterator.h" 17 #include "llvm/ADT/GraphTraits.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/Analysis/CFG.h" 26 #include "llvm/Analysis/GlobalsModRef.h" 27 #include "llvm/Analysis/IteratedDominanceFrontier.h" 28 #include "llvm/Analysis/MemoryLocation.h" 29 #include "llvm/Analysis/PHITransAddr.h" 30 #include "llvm/IR/AssemblyAnnotationWriter.h" 31 #include "llvm/IR/DataLayout.h" 32 #include "llvm/IR/Dominators.h" 33 #include "llvm/IR/GlobalVariable.h" 34 #include "llvm/IR/IRBuilder.h" 35 #include "llvm/IR/IntrinsicInst.h" 36 #include "llvm/IR/LLVMContext.h" 37 #include "llvm/IR/Metadata.h" 38 #include "llvm/IR/Module.h" 39 #include "llvm/IR/PatternMatch.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/FormattedStream.h" 42 #include "llvm/Transforms/Scalar.h" 43 #include <algorithm> 44 45 #define DEBUG_TYPE "memoryssa" 46 using namespace llvm; 47 INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 48 true) 49 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 50 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 51 INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 52 true) 53 54 INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", 55 "Memory SSA Printer", false, false) 56 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 57 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", 58 "Memory SSA Printer", false, false) 59 60 static cl::opt<unsigned> MaxCheckLimit( 61 "memssa-check-limit", cl::Hidden, cl::init(100), 62 cl::desc("The maximum number of stores/phis MemorySSA" 63 "will consider trying to walk past (default = 100)")); 64 65 static cl::opt<bool> 66 VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, 67 cl::desc("Verify MemorySSA in legacy printer pass.")); 68 69 namespace llvm { 70 /// \brief An assembly annotator class to print Memory SSA information in 71 /// comments. 72 class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { 73 friend class MemorySSA; 74 const MemorySSA *MSSA; 75 76 public: 77 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} 78 79 virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, 80 formatted_raw_ostream &OS) { 81 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) 82 OS << "; " << *MA << "\n"; 83 } 84 85 virtual void emitInstructionAnnot(const Instruction *I, 86 formatted_raw_ostream &OS) { 87 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) 88 OS << "; " << *MA << "\n"; 89 } 90 }; 91 } 92 93 namespace { 94 /// Our current alias analysis API differentiates heavily between calls and 95 /// non-calls, and functions called on one usually assert on the other. 96 /// This class encapsulates the distinction to simplify other code that wants 97 /// "Memory affecting instructions and related data" to use as a key. 98 /// For example, this class is used as a densemap key in the use optimizer. 99 class MemoryLocOrCall { 100 public: 101 MemoryLocOrCall() : IsCall(false) {} 102 MemoryLocOrCall(MemoryUseOrDef *MUD) 103 : MemoryLocOrCall(MUD->getMemoryInst()) {} 104 MemoryLocOrCall(const MemoryUseOrDef *MUD) 105 : MemoryLocOrCall(MUD->getMemoryInst()) {} 106 107 MemoryLocOrCall(Instruction *Inst) { 108 if (ImmutableCallSite(Inst)) { 109 IsCall = true; 110 CS = ImmutableCallSite(Inst); 111 } else { 112 IsCall = false; 113 // There is no such thing as a memorylocation for a fence inst, and it is 114 // unique in that regard. 115 if (!isa<FenceInst>(Inst)) 116 Loc = MemoryLocation::get(Inst); 117 } 118 } 119 120 explicit MemoryLocOrCall(const MemoryLocation &Loc) 121 : IsCall(false), Loc(Loc) {} 122 123 bool IsCall; 124 ImmutableCallSite getCS() const { 125 assert(IsCall); 126 return CS; 127 } 128 MemoryLocation getLoc() const { 129 assert(!IsCall); 130 return Loc; 131 } 132 133 bool operator==(const MemoryLocOrCall &Other) const { 134 if (IsCall != Other.IsCall) 135 return false; 136 137 if (IsCall) 138 return CS.getCalledValue() == Other.CS.getCalledValue(); 139 return Loc == Other.Loc; 140 } 141 142 private: 143 union { 144 ImmutableCallSite CS; 145 MemoryLocation Loc; 146 }; 147 }; 148 } 149 150 namespace llvm { 151 template <> struct DenseMapInfo<MemoryLocOrCall> { 152 static inline MemoryLocOrCall getEmptyKey() { 153 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); 154 } 155 static inline MemoryLocOrCall getTombstoneKey() { 156 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); 157 } 158 static unsigned getHashValue(const MemoryLocOrCall &MLOC) { 159 if (MLOC.IsCall) 160 return hash_combine(MLOC.IsCall, 161 DenseMapInfo<const Value *>::getHashValue( 162 MLOC.getCS().getCalledValue())); 163 return hash_combine( 164 MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); 165 } 166 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { 167 return LHS == RHS; 168 } 169 }; 170 171 enum class Reorderability { Always, IfNoAlias, Never }; 172 173 /// This does one-way checks to see if Use could theoretically be hoisted above 174 /// MayClobber. This will not check the other way around. 175 /// 176 /// This assumes that, for the purposes of MemorySSA, Use comes directly after 177 /// MayClobber, with no potentially clobbering operations in between them. 178 /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) 179 static Reorderability getLoadReorderability(const LoadInst *Use, 180 const LoadInst *MayClobber) { 181 bool VolatileUse = Use->isVolatile(); 182 bool VolatileClobber = MayClobber->isVolatile(); 183 // Volatile operations may never be reordered with other volatile operations. 184 if (VolatileUse && VolatileClobber) 185 return Reorderability::Never; 186 187 // The lang ref allows reordering of volatile and non-volatile operations. 188 // Whether an aliasing nonvolatile load and volatile load can be reordered, 189 // though, is ambiguous. Because it may not be best to exploit this ambiguity, 190 // we only allow volatile/non-volatile reordering if the volatile and 191 // non-volatile operations don't alias. 192 Reorderability Result = VolatileUse || VolatileClobber 193 ? Reorderability::IfNoAlias 194 : Reorderability::Always; 195 196 // If a load is seq_cst, it cannot be moved above other loads. If its ordering 197 // is weaker, it can be moved above other loads. We just need to be sure that 198 // MayClobber isn't an acquire load, because loads can't be moved above 199 // acquire loads. 200 // 201 // Note that this explicitly *does* allow the free reordering of monotonic (or 202 // weaker) loads of the same address. 203 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; 204 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), 205 AtomicOrdering::Acquire); 206 if (SeqCstUse || MayClobberIsAcquire) 207 return Reorderability::Never; 208 return Result; 209 } 210 211 static bool instructionClobbersQuery(MemoryDef *MD, 212 const MemoryLocation &UseLoc, 213 const Instruction *UseInst, 214 AliasAnalysis &AA) { 215 Instruction *DefInst = MD->getMemoryInst(); 216 assert(DefInst && "Defining instruction not actually an instruction"); 217 ImmutableCallSite UseCS(UseInst); 218 219 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 220 // These intrinsics will show up as affecting memory, but they are just 221 // markers. 222 switch (II->getIntrinsicID()) { 223 case Intrinsic::lifetime_start: 224 if (UseCS) 225 return false; 226 return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc); 227 case Intrinsic::lifetime_end: 228 case Intrinsic::invariant_start: 229 case Intrinsic::invariant_end: 230 case Intrinsic::assume: 231 return false; 232 default: 233 break; 234 } 235 } 236 237 if (UseCS) { 238 ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); 239 return I != MRI_NoModRef; 240 } 241 242 if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) { 243 if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) { 244 switch (getLoadReorderability(UseLoad, DefLoad)) { 245 case Reorderability::Always: 246 return false; 247 case Reorderability::Never: 248 return true; 249 case Reorderability::IfNoAlias: 250 return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad)); 251 } 252 } 253 } 254 255 return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; 256 } 257 258 static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, 259 const MemoryLocOrCall &UseMLOC, 260 AliasAnalysis &AA) { 261 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery 262 // to exist while MemoryLocOrCall is pushed through places. 263 if (UseMLOC.IsCall) 264 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), 265 AA); 266 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), 267 AA); 268 } 269 270 // Return true when MD may alias MU, return false otherwise. 271 bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, 272 AliasAnalysis &AA) { 273 return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); 274 } 275 } 276 277 namespace { 278 struct UpwardsMemoryQuery { 279 // True if our original query started off as a call 280 bool IsCall; 281 // The pointer location we started the query with. This will be empty if 282 // IsCall is true. 283 MemoryLocation StartingLoc; 284 // This is the instruction we were querying about. 285 const Instruction *Inst; 286 // The MemoryAccess we actually got called with, used to test local domination 287 const MemoryAccess *OriginalAccess; 288 289 UpwardsMemoryQuery() 290 : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {} 291 292 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) 293 : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { 294 if (!IsCall) 295 StartingLoc = MemoryLocation::get(Inst); 296 } 297 }; 298 299 static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, 300 AliasAnalysis &AA) { 301 Instruction *Inst = MD->getMemoryInst(); 302 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 303 switch (II->getIntrinsicID()) { 304 case Intrinsic::lifetime_end: 305 return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); 306 default: 307 return false; 308 } 309 } 310 return false; 311 } 312 313 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, 314 const Instruction *I) { 315 // If the memory can't be changed, then loads of the memory can't be 316 // clobbered. 317 // 318 // FIXME: We should handle invariant groups, as well. It's a bit harder, 319 // because we need to pay close attention to invariant group barriers. 320 return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || 321 AA.pointsToConstantMemory(cast<LoadInst>(I)-> 322 getPointerOperand())); 323 } 324 325 /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing 326 /// inbetween `Start` and `ClobberAt` can clobbers `Start`. 327 /// 328 /// This is meant to be as simple and self-contained as possible. Because it 329 /// uses no cache, etc., it can be relatively expensive. 330 /// 331 /// \param Start The MemoryAccess that we want to walk from. 332 /// \param ClobberAt A clobber for Start. 333 /// \param StartLoc The MemoryLocation for Start. 334 /// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. 335 /// \param Query The UpwardsMemoryQuery we used for our search. 336 /// \param AA The AliasAnalysis we used for our search. 337 static void LLVM_ATTRIBUTE_UNUSED 338 checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, 339 const MemoryLocation &StartLoc, const MemorySSA &MSSA, 340 const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { 341 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); 342 343 if (MSSA.isLiveOnEntryDef(Start)) { 344 assert(MSSA.isLiveOnEntryDef(ClobberAt) && 345 "liveOnEntry must clobber itself"); 346 return; 347 } 348 349 bool FoundClobber = false; 350 DenseSet<MemoryAccessPair> VisitedPhis; 351 SmallVector<MemoryAccessPair, 8> Worklist; 352 Worklist.emplace_back(Start, StartLoc); 353 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one 354 // is found, complain. 355 while (!Worklist.empty()) { 356 MemoryAccessPair MAP = Worklist.pop_back_val(); 357 // All we care about is that nothing from Start to ClobberAt clobbers Start. 358 // We learn nothing from revisiting nodes. 359 if (!VisitedPhis.insert(MAP).second) 360 continue; 361 362 for (MemoryAccess *MA : def_chain(MAP.first)) { 363 if (MA == ClobberAt) { 364 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 365 // instructionClobbersQuery isn't essentially free, so don't use `|=`, 366 // since it won't let us short-circuit. 367 // 368 // Also, note that this can't be hoisted out of the `Worklist` loop, 369 // since MD may only act as a clobber for 1 of N MemoryLocations. 370 FoundClobber = 371 FoundClobber || MSSA.isLiveOnEntryDef(MD) || 372 instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); 373 } 374 break; 375 } 376 377 // We should never hit liveOnEntry, unless it's the clobber. 378 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); 379 380 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 381 (void)MD; 382 assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) && 383 "Found clobber before reaching ClobberAt!"); 384 continue; 385 } 386 387 assert(isa<MemoryPhi>(MA)); 388 Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); 389 } 390 } 391 392 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a 393 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. 394 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && 395 "ClobberAt never acted as a clobber"); 396 } 397 398 /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up 399 /// in one class. 400 class ClobberWalker { 401 /// Save a few bytes by using unsigned instead of size_t. 402 using ListIndex = unsigned; 403 404 /// Represents a span of contiguous MemoryDefs, potentially ending in a 405 /// MemoryPhi. 406 struct DefPath { 407 MemoryLocation Loc; 408 // Note that, because we always walk in reverse, Last will always dominate 409 // First. Also note that First and Last are inclusive. 410 MemoryAccess *First; 411 MemoryAccess *Last; 412 Optional<ListIndex> Previous; 413 414 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, 415 Optional<ListIndex> Previous) 416 : Loc(Loc), First(First), Last(Last), Previous(Previous) {} 417 418 DefPath(const MemoryLocation &Loc, MemoryAccess *Init, 419 Optional<ListIndex> Previous) 420 : DefPath(Loc, Init, Init, Previous) {} 421 }; 422 423 const MemorySSA &MSSA; 424 AliasAnalysis &AA; 425 DominatorTree &DT; 426 UpwardsMemoryQuery *Query; 427 428 // Phi optimization bookkeeping 429 SmallVector<DefPath, 32> Paths; 430 DenseSet<ConstMemoryAccessPair> VisitedPhis; 431 432 /// Find the nearest def or phi that `From` can legally be optimized to. 433 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { 434 assert(From->getNumOperands() && "Phi with no operands?"); 435 436 BasicBlock *BB = From->getBlock(); 437 MemoryAccess *Result = MSSA.getLiveOnEntryDef(); 438 DomTreeNode *Node = DT.getNode(BB); 439 while ((Node = Node->getIDom())) { 440 auto *Defs = MSSA.getBlockDefs(Node->getBlock()); 441 if (Defs) 442 return &*Defs->rbegin(); 443 } 444 return Result; 445 } 446 447 /// Result of calling walkToPhiOrClobber. 448 struct UpwardsWalkResult { 449 /// The "Result" of the walk. Either a clobber, the last thing we walked, or 450 /// both. 451 MemoryAccess *Result; 452 bool IsKnownClobber; 453 }; 454 455 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. 456 /// This will update Desc.Last as it walks. It will (optionally) also stop at 457 /// StopAt. 458 /// 459 /// This does not test for whether StopAt is a clobber 460 UpwardsWalkResult 461 walkToPhiOrClobber(DefPath &Desc, 462 const MemoryAccess *StopAt = nullptr) const { 463 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); 464 465 for (MemoryAccess *Current : def_chain(Desc.Last)) { 466 Desc.Last = Current; 467 if (Current == StopAt) 468 return {Current, false}; 469 470 if (auto *MD = dyn_cast<MemoryDef>(Current)) 471 if (MSSA.isLiveOnEntryDef(MD) || 472 instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA)) 473 return {MD, true}; 474 } 475 476 assert(isa<MemoryPhi>(Desc.Last) && 477 "Ended at a non-clobber that's not a phi?"); 478 return {Desc.Last, false}; 479 } 480 481 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, 482 ListIndex PriorNode) { 483 auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), 484 upward_defs_end()); 485 for (const MemoryAccessPair &P : UpwardDefs) { 486 PausedSearches.push_back(Paths.size()); 487 Paths.emplace_back(P.second, P.first, PriorNode); 488 } 489 } 490 491 /// Represents a search that terminated after finding a clobber. This clobber 492 /// may or may not be present in the path of defs from LastNode..SearchStart, 493 /// since it may have been retrieved from cache. 494 struct TerminatedPath { 495 MemoryAccess *Clobber; 496 ListIndex LastNode; 497 }; 498 499 /// Get an access that keeps us from optimizing to the given phi. 500 /// 501 /// PausedSearches is an array of indices into the Paths array. Its incoming 502 /// value is the indices of searches that stopped at the last phi optimization 503 /// target. It's left in an unspecified state. 504 /// 505 /// If this returns None, NewPaused is a vector of searches that terminated 506 /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. 507 Optional<TerminatedPath> 508 getBlockingAccess(const MemoryAccess *StopWhere, 509 SmallVectorImpl<ListIndex> &PausedSearches, 510 SmallVectorImpl<ListIndex> &NewPaused, 511 SmallVectorImpl<TerminatedPath> &Terminated) { 512 assert(!PausedSearches.empty() && "No searches to continue?"); 513 514 // BFS vs DFS really doesn't make a difference here, so just do a DFS with 515 // PausedSearches as our stack. 516 while (!PausedSearches.empty()) { 517 ListIndex PathIndex = PausedSearches.pop_back_val(); 518 DefPath &Node = Paths[PathIndex]; 519 520 // If we've already visited this path with this MemoryLocation, we don't 521 // need to do so again. 522 // 523 // NOTE: That we just drop these paths on the ground makes caching 524 // behavior sporadic. e.g. given a diamond: 525 // A 526 // B C 527 // D 528 // 529 // ...If we walk D, B, A, C, we'll only cache the result of phi 530 // optimization for A, B, and D; C will be skipped because it dies here. 531 // This arguably isn't the worst thing ever, since: 532 // - We generally query things in a top-down order, so if we got below D 533 // without needing cache entries for {C, MemLoc}, then chances are 534 // that those cache entries would end up ultimately unused. 535 // - We still cache things for A, so C only needs to walk up a bit. 536 // If this behavior becomes problematic, we can fix without a ton of extra 537 // work. 538 if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) 539 continue; 540 541 UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); 542 if (Res.IsKnownClobber) { 543 assert(Res.Result != StopWhere); 544 // If this wasn't a cache hit, we hit a clobber when walking. That's a 545 // failure. 546 TerminatedPath Term{Res.Result, PathIndex}; 547 if (!MSSA.dominates(Res.Result, StopWhere)) 548 return Term; 549 550 // Otherwise, it's a valid thing to potentially optimize to. 551 Terminated.push_back(Term); 552 continue; 553 } 554 555 if (Res.Result == StopWhere) { 556 // We've hit our target. Save this path off for if we want to continue 557 // walking. 558 NewPaused.push_back(PathIndex); 559 continue; 560 } 561 562 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); 563 addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); 564 } 565 566 return None; 567 } 568 569 template <typename T, typename Walker> 570 struct generic_def_path_iterator 571 : public iterator_facade_base<generic_def_path_iterator<T, Walker>, 572 std::forward_iterator_tag, T *> { 573 generic_def_path_iterator() : W(nullptr), N(None) {} 574 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} 575 576 T &operator*() const { return curNode(); } 577 578 generic_def_path_iterator &operator++() { 579 N = curNode().Previous; 580 return *this; 581 } 582 583 bool operator==(const generic_def_path_iterator &O) const { 584 if (N.hasValue() != O.N.hasValue()) 585 return false; 586 return !N.hasValue() || *N == *O.N; 587 } 588 589 private: 590 T &curNode() const { return W->Paths[*N]; } 591 592 Walker *W; 593 Optional<ListIndex> N; 594 }; 595 596 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; 597 using const_def_path_iterator = 598 generic_def_path_iterator<const DefPath, const ClobberWalker>; 599 600 iterator_range<def_path_iterator> def_path(ListIndex From) { 601 return make_range(def_path_iterator(this, From), def_path_iterator()); 602 } 603 604 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { 605 return make_range(const_def_path_iterator(this, From), 606 const_def_path_iterator()); 607 } 608 609 struct OptznResult { 610 /// The path that contains our result. 611 TerminatedPath PrimaryClobber; 612 /// The paths that we can legally cache back from, but that aren't 613 /// necessarily the result of the Phi optimization. 614 SmallVector<TerminatedPath, 4> OtherClobbers; 615 }; 616 617 ListIndex defPathIndex(const DefPath &N) const { 618 // The assert looks nicer if we don't need to do &N 619 const DefPath *NP = &N; 620 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && 621 "Out of bounds DefPath!"); 622 return NP - &Paths.front(); 623 } 624 625 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths 626 /// that act as legal clobbers. Note that this won't return *all* clobbers. 627 /// 628 /// Phi optimization algorithm tl;dr: 629 /// - Find the earliest def/phi, A, we can optimize to 630 /// - Find if all paths from the starting memory access ultimately reach A 631 /// - If not, optimization isn't possible. 632 /// - Otherwise, walk from A to another clobber or phi, A'. 633 /// - If A' is a def, we're done. 634 /// - If A' is a phi, try to optimize it. 635 /// 636 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path 637 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. 638 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, 639 const MemoryLocation &Loc) { 640 assert(Paths.empty() && VisitedPhis.empty() && 641 "Reset the optimization state."); 642 643 Paths.emplace_back(Loc, Start, Phi, None); 644 // Stores how many "valid" optimization nodes we had prior to calling 645 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. 646 auto PriorPathsSize = Paths.size(); 647 648 SmallVector<ListIndex, 16> PausedSearches; 649 SmallVector<ListIndex, 8> NewPaused; 650 SmallVector<TerminatedPath, 4> TerminatedPaths; 651 652 addSearches(Phi, PausedSearches, 0); 653 654 // Moves the TerminatedPath with the "most dominated" Clobber to the end of 655 // Paths. 656 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { 657 assert(!Paths.empty() && "Need a path to move"); 658 auto Dom = Paths.begin(); 659 for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) 660 if (!MSSA.dominates(I->Clobber, Dom->Clobber)) 661 Dom = I; 662 auto Last = Paths.end() - 1; 663 if (Last != Dom) 664 std::iter_swap(Last, Dom); 665 }; 666 667 MemoryPhi *Current = Phi; 668 while (1) { 669 assert(!MSSA.isLiveOnEntryDef(Current) && 670 "liveOnEntry wasn't treated as a clobber?"); 671 672 const auto *Target = getWalkTarget(Current); 673 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal 674 // optimization for the prior phi. 675 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { 676 return MSSA.dominates(P.Clobber, Target); 677 })); 678 679 // FIXME: This is broken, because the Blocker may be reported to be 680 // liveOnEntry, and we'll happily wait for that to disappear (read: never) 681 // For the moment, this is fine, since we do nothing with blocker info. 682 if (Optional<TerminatedPath> Blocker = getBlockingAccess( 683 Target, PausedSearches, NewPaused, TerminatedPaths)) { 684 685 // Find the node we started at. We can't search based on N->Last, since 686 // we may have gone around a loop with a different MemoryLocation. 687 auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { 688 return defPathIndex(N) < PriorPathsSize; 689 }); 690 assert(Iter != def_path_iterator()); 691 692 DefPath &CurNode = *Iter; 693 assert(CurNode.Last == Current); 694 695 // Two things: 696 // A. We can't reliably cache all of NewPaused back. Consider a case 697 // where we have two paths in NewPaused; one of which can't optimize 698 // above this phi, whereas the other can. If we cache the second path 699 // back, we'll end up with suboptimal cache entries. We can handle 700 // cases like this a bit better when we either try to find all 701 // clobbers that block phi optimization, or when our cache starts 702 // supporting unfinished searches. 703 // B. We can't reliably cache TerminatedPaths back here without doing 704 // extra checks; consider a case like: 705 // T 706 // / \ 707 // D C 708 // \ / 709 // S 710 // Where T is our target, C is a node with a clobber on it, D is a 711 // diamond (with a clobber *only* on the left or right node, N), and 712 // S is our start. Say we walk to D, through the node opposite N 713 // (read: ignoring the clobber), and see a cache entry in the top 714 // node of D. That cache entry gets put into TerminatedPaths. We then 715 // walk up to C (N is later in our worklist), find the clobber, and 716 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache 717 // the bottom part of D to the cached clobber, ignoring the clobber 718 // in N. Again, this problem goes away if we start tracking all 719 // blockers for a given phi optimization. 720 TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; 721 return {Result, {}}; 722 } 723 724 // If there's nothing left to search, then all paths led to valid clobbers 725 // that we got from our cache; pick the nearest to the start, and allow 726 // the rest to be cached back. 727 if (NewPaused.empty()) { 728 MoveDominatedPathToEnd(TerminatedPaths); 729 TerminatedPath Result = TerminatedPaths.pop_back_val(); 730 return {Result, std::move(TerminatedPaths)}; 731 } 732 733 MemoryAccess *DefChainEnd = nullptr; 734 SmallVector<TerminatedPath, 4> Clobbers; 735 for (ListIndex Paused : NewPaused) { 736 UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); 737 if (WR.IsKnownClobber) 738 Clobbers.push_back({WR.Result, Paused}); 739 else 740 // Micro-opt: If we hit the end of the chain, save it. 741 DefChainEnd = WR.Result; 742 } 743 744 if (!TerminatedPaths.empty()) { 745 // If we couldn't find the dominating phi/liveOnEntry in the above loop, 746 // do it now. 747 if (!DefChainEnd) 748 for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) 749 DefChainEnd = MA; 750 751 // If any of the terminated paths don't dominate the phi we'll try to 752 // optimize, we need to figure out what they are and quit. 753 const BasicBlock *ChainBB = DefChainEnd->getBlock(); 754 for (const TerminatedPath &TP : TerminatedPaths) { 755 // Because we know that DefChainEnd is as "high" as we can go, we 756 // don't need local dominance checks; BB dominance is sufficient. 757 if (DT.dominates(ChainBB, TP.Clobber->getBlock())) 758 Clobbers.push_back(TP); 759 } 760 } 761 762 // If we have clobbers in the def chain, find the one closest to Current 763 // and quit. 764 if (!Clobbers.empty()) { 765 MoveDominatedPathToEnd(Clobbers); 766 TerminatedPath Result = Clobbers.pop_back_val(); 767 return {Result, std::move(Clobbers)}; 768 } 769 770 assert(all_of(NewPaused, 771 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); 772 773 // Because liveOnEntry is a clobber, this must be a phi. 774 auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); 775 776 PriorPathsSize = Paths.size(); 777 PausedSearches.clear(); 778 for (ListIndex I : NewPaused) 779 addSearches(DefChainPhi, PausedSearches, I); 780 NewPaused.clear(); 781 782 Current = DefChainPhi; 783 } 784 } 785 786 void verifyOptResult(const OptznResult &R) const { 787 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { 788 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); 789 })); 790 } 791 792 void resetPhiOptznState() { 793 Paths.clear(); 794 VisitedPhis.clear(); 795 } 796 797 public: 798 ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) 799 : MSSA(MSSA), AA(AA), DT(DT) {} 800 801 void reset() {} 802 803 /// Finds the nearest clobber for the given query, optimizing phis if 804 /// possible. 805 MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { 806 Query = &Q; 807 808 MemoryAccess *Current = Start; 809 // This walker pretends uses don't exist. If we're handed one, silently grab 810 // its def. (This has the nice side-effect of ensuring we never cache uses) 811 if (auto *MU = dyn_cast<MemoryUse>(Start)) 812 Current = MU->getDefiningAccess(); 813 814 DefPath FirstDesc(Q.StartingLoc, Current, Current, None); 815 // Fast path for the overly-common case (no crazy phi optimization 816 // necessary) 817 UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); 818 MemoryAccess *Result; 819 if (WalkResult.IsKnownClobber) { 820 Result = WalkResult.Result; 821 } else { 822 OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), 823 Current, Q.StartingLoc); 824 verifyOptResult(OptRes); 825 resetPhiOptznState(); 826 Result = OptRes.PrimaryClobber.Clobber; 827 } 828 829 #ifdef EXPENSIVE_CHECKS 830 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); 831 #endif 832 return Result; 833 } 834 835 void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } 836 }; 837 838 struct RenamePassData { 839 DomTreeNode *DTN; 840 DomTreeNode::const_iterator ChildIt; 841 MemoryAccess *IncomingVal; 842 843 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, 844 MemoryAccess *M) 845 : DTN(D), ChildIt(It), IncomingVal(M) {} 846 void swap(RenamePassData &RHS) { 847 std::swap(DTN, RHS.DTN); 848 std::swap(ChildIt, RHS.ChildIt); 849 std::swap(IncomingVal, RHS.IncomingVal); 850 } 851 }; 852 } // anonymous namespace 853 854 namespace llvm { 855 /// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no 856 /// longer does caching on its own, 857 /// but the name has been retained for the moment. 858 class MemorySSA::CachingWalker final : public MemorySSAWalker { 859 ClobberWalker Walker; 860 bool AutoResetWalker; 861 862 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); 863 void verifyRemoved(MemoryAccess *); 864 865 public: 866 CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); 867 ~CachingWalker() override; 868 869 using MemorySSAWalker::getClobberingMemoryAccess; 870 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; 871 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, 872 const MemoryLocation &) override; 873 void invalidateInfo(MemoryAccess *) override; 874 875 /// Whether we call resetClobberWalker() after each time we *actually* walk to 876 /// answer a clobber query. 877 void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; } 878 879 /// Drop the walker's persistent data structures. 880 void resetClobberWalker() { Walker.reset(); } 881 882 void verify(const MemorySSA *MSSA) override { 883 MemorySSAWalker::verify(MSSA); 884 Walker.verify(MSSA); 885 } 886 }; 887 888 void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, 889 bool RenameAllUses) { 890 // Pass through values to our successors 891 for (const BasicBlock *S : successors(BB)) { 892 auto It = PerBlockAccesses.find(S); 893 // Rename the phi nodes in our successor block 894 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 895 continue; 896 AccessList *Accesses = It->second.get(); 897 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 898 if (RenameAllUses) { 899 int PhiIndex = Phi->getBasicBlockIndex(BB); 900 assert(PhiIndex != -1 && "Incomplete phi during partial rename"); 901 Phi->setIncomingValue(PhiIndex, IncomingVal); 902 } else 903 Phi->addIncoming(IncomingVal, BB); 904 } 905 } 906 907 /// \brief Rename a single basic block into MemorySSA form. 908 /// Uses the standard SSA renaming algorithm. 909 /// \returns The new incoming value. 910 MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, 911 bool RenameAllUses) { 912 auto It = PerBlockAccesses.find(BB); 913 // Skip most processing if the list is empty. 914 if (It != PerBlockAccesses.end()) { 915 AccessList *Accesses = It->second.get(); 916 for (MemoryAccess &L : *Accesses) { 917 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { 918 if (MUD->getDefiningAccess() == nullptr || RenameAllUses) 919 MUD->setDefiningAccess(IncomingVal); 920 if (isa<MemoryDef>(&L)) 921 IncomingVal = &L; 922 } else { 923 IncomingVal = &L; 924 } 925 } 926 } 927 return IncomingVal; 928 } 929 930 /// \brief This is the standard SSA renaming algorithm. 931 /// 932 /// We walk the dominator tree in preorder, renaming accesses, and then filling 933 /// in phi nodes in our successors. 934 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, 935 SmallPtrSetImpl<BasicBlock *> &Visited, 936 bool SkipVisited, bool RenameAllUses) { 937 SmallVector<RenamePassData, 32> WorkStack; 938 // Skip everything if we already renamed this block and we are skipping. 939 // Note: You can't sink this into the if, because we need it to occur 940 // regardless of whether we skip blocks or not. 941 bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; 942 if (SkipVisited && AlreadyVisited) 943 return; 944 945 IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); 946 renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); 947 WorkStack.push_back({Root, Root->begin(), IncomingVal}); 948 949 while (!WorkStack.empty()) { 950 DomTreeNode *Node = WorkStack.back().DTN; 951 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; 952 IncomingVal = WorkStack.back().IncomingVal; 953 954 if (ChildIt == Node->end()) { 955 WorkStack.pop_back(); 956 } else { 957 DomTreeNode *Child = *ChildIt; 958 ++WorkStack.back().ChildIt; 959 BasicBlock *BB = Child->getBlock(); 960 // Note: You can't sink this into the if, because we need it to occur 961 // regardless of whether we skip blocks or not. 962 AlreadyVisited = !Visited.insert(BB).second; 963 if (SkipVisited && AlreadyVisited) { 964 // We already visited this during our renaming, which can happen when 965 // being asked to rename multiple blocks. Figure out the incoming val, 966 // which is the last def. 967 // Incoming value can only change if there is a block def, and in that 968 // case, it's the last block def in the list. 969 if (auto *BlockDefs = getWritableBlockDefs(BB)) 970 IncomingVal = &*BlockDefs->rbegin(); 971 } else 972 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); 973 renameSuccessorPhis(BB, IncomingVal, RenameAllUses); 974 WorkStack.push_back({Child, Child->begin(), IncomingVal}); 975 } 976 } 977 } 978 979 /// \brief This handles unreachable block accesses by deleting phi nodes in 980 /// unreachable blocks, and marking all other unreachable MemoryAccess's as 981 /// being uses of the live on entry definition. 982 void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { 983 assert(!DT->isReachableFromEntry(BB) && 984 "Reachable block found while handling unreachable blocks"); 985 986 // Make sure phi nodes in our reachable successors end up with a 987 // LiveOnEntryDef for our incoming edge, even though our block is forward 988 // unreachable. We could just disconnect these blocks from the CFG fully, 989 // but we do not right now. 990 for (const BasicBlock *S : successors(BB)) { 991 if (!DT->isReachableFromEntry(S)) 992 continue; 993 auto It = PerBlockAccesses.find(S); 994 // Rename the phi nodes in our successor block 995 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 996 continue; 997 AccessList *Accesses = It->second.get(); 998 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 999 Phi->addIncoming(LiveOnEntryDef.get(), BB); 1000 } 1001 1002 auto It = PerBlockAccesses.find(BB); 1003 if (It == PerBlockAccesses.end()) 1004 return; 1005 1006 auto &Accesses = It->second; 1007 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { 1008 auto Next = std::next(AI); 1009 // If we have a phi, just remove it. We are going to replace all 1010 // users with live on entry. 1011 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) 1012 UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); 1013 else 1014 Accesses->erase(AI); 1015 AI = Next; 1016 } 1017 } 1018 1019 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) 1020 : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), 1021 NextID(INVALID_MEMORYACCESS_ID) { 1022 buildMemorySSA(); 1023 } 1024 1025 MemorySSA::~MemorySSA() { 1026 // Drop all our references 1027 for (const auto &Pair : PerBlockAccesses) 1028 for (MemoryAccess &MA : *Pair.second) 1029 MA.dropAllReferences(); 1030 } 1031 1032 MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { 1033 auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); 1034 1035 if (Res.second) 1036 Res.first->second = make_unique<AccessList>(); 1037 return Res.first->second.get(); 1038 } 1039 MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { 1040 auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); 1041 1042 if (Res.second) 1043 Res.first->second = make_unique<DefsList>(); 1044 return Res.first->second.get(); 1045 } 1046 1047 /// This class is a batch walker of all MemoryUse's in the program, and points 1048 /// their defining access at the thing that actually clobbers them. Because it 1049 /// is a batch walker that touches everything, it does not operate like the 1050 /// other walkers. This walker is basically performing a top-down SSA renaming 1051 /// pass, where the version stack is used as the cache. This enables it to be 1052 /// significantly more time and memory efficient than using the regular walker, 1053 /// which is walking bottom-up. 1054 class MemorySSA::OptimizeUses { 1055 public: 1056 OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, 1057 DominatorTree *DT) 1058 : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { 1059 Walker = MSSA->getWalker(); 1060 } 1061 1062 void optimizeUses(); 1063 1064 private: 1065 /// This represents where a given memorylocation is in the stack. 1066 struct MemlocStackInfo { 1067 // This essentially is keeping track of versions of the stack. Whenever 1068 // the stack changes due to pushes or pops, these versions increase. 1069 unsigned long StackEpoch; 1070 unsigned long PopEpoch; 1071 // This is the lower bound of places on the stack to check. It is equal to 1072 // the place the last stack walk ended. 1073 // Note: Correctness depends on this being initialized to 0, which densemap 1074 // does 1075 unsigned long LowerBound; 1076 const BasicBlock *LowerBoundBlock; 1077 // This is where the last walk for this memory location ended. 1078 unsigned long LastKill; 1079 bool LastKillValid; 1080 }; 1081 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, 1082 SmallVectorImpl<MemoryAccess *> &, 1083 DenseMap<MemoryLocOrCall, MemlocStackInfo> &); 1084 MemorySSA *MSSA; 1085 MemorySSAWalker *Walker; 1086 AliasAnalysis *AA; 1087 DominatorTree *DT; 1088 }; 1089 1090 /// Optimize the uses in a given block This is basically the SSA renaming 1091 /// algorithm, with one caveat: We are able to use a single stack for all 1092 /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is 1093 /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just 1094 /// going to be some position in that stack of possible ones. 1095 /// 1096 /// We track the stack positions that each MemoryLocation needs 1097 /// to check, and last ended at. This is because we only want to check the 1098 /// things that changed since last time. The same MemoryLocation should 1099 /// get clobbered by the same store (getModRefInfo does not use invariantness or 1100 /// things like this, and if they start, we can modify MemoryLocOrCall to 1101 /// include relevant data) 1102 void MemorySSA::OptimizeUses::optimizeUsesInBlock( 1103 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, 1104 SmallVectorImpl<MemoryAccess *> &VersionStack, 1105 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { 1106 1107 /// If no accesses, nothing to do. 1108 MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); 1109 if (Accesses == nullptr) 1110 return; 1111 1112 // Pop everything that doesn't dominate the current block off the stack, 1113 // increment the PopEpoch to account for this. 1114 while (true) { 1115 assert( 1116 !VersionStack.empty() && 1117 "Version stack should have liveOnEntry sentinel dominating everything"); 1118 BasicBlock *BackBlock = VersionStack.back()->getBlock(); 1119 if (DT->dominates(BackBlock, BB)) 1120 break; 1121 while (VersionStack.back()->getBlock() == BackBlock) 1122 VersionStack.pop_back(); 1123 ++PopEpoch; 1124 } 1125 1126 for (MemoryAccess &MA : *Accesses) { 1127 auto *MU = dyn_cast<MemoryUse>(&MA); 1128 if (!MU) { 1129 VersionStack.push_back(&MA); 1130 ++StackEpoch; 1131 continue; 1132 } 1133 1134 if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { 1135 MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); 1136 continue; 1137 } 1138 1139 MemoryLocOrCall UseMLOC(MU); 1140 auto &LocInfo = LocStackInfo[UseMLOC]; 1141 // If the pop epoch changed, it means we've removed stuff from top of 1142 // stack due to changing blocks. We may have to reset the lower bound or 1143 // last kill info. 1144 if (LocInfo.PopEpoch != PopEpoch) { 1145 LocInfo.PopEpoch = PopEpoch; 1146 LocInfo.StackEpoch = StackEpoch; 1147 // If the lower bound was in something that no longer dominates us, we 1148 // have to reset it. 1149 // We can't simply track stack size, because the stack may have had 1150 // pushes/pops in the meantime. 1151 // XXX: This is non-optimal, but only is slower cases with heavily 1152 // branching dominator trees. To get the optimal number of queries would 1153 // be to make lowerbound and lastkill a per-loc stack, and pop it until 1154 // the top of that stack dominates us. This does not seem worth it ATM. 1155 // A much cheaper optimization would be to always explore the deepest 1156 // branch of the dominator tree first. This will guarantee this resets on 1157 // the smallest set of blocks. 1158 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && 1159 !DT->dominates(LocInfo.LowerBoundBlock, BB)) { 1160 // Reset the lower bound of things to check. 1161 // TODO: Some day we should be able to reset to last kill, rather than 1162 // 0. 1163 LocInfo.LowerBound = 0; 1164 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); 1165 LocInfo.LastKillValid = false; 1166 } 1167 } else if (LocInfo.StackEpoch != StackEpoch) { 1168 // If all that has changed is the StackEpoch, we only have to check the 1169 // new things on the stack, because we've checked everything before. In 1170 // this case, the lower bound of things to check remains the same. 1171 LocInfo.PopEpoch = PopEpoch; 1172 LocInfo.StackEpoch = StackEpoch; 1173 } 1174 if (!LocInfo.LastKillValid) { 1175 LocInfo.LastKill = VersionStack.size() - 1; 1176 LocInfo.LastKillValid = true; 1177 } 1178 1179 // At this point, we should have corrected last kill and LowerBound to be 1180 // in bounds. 1181 assert(LocInfo.LowerBound < VersionStack.size() && 1182 "Lower bound out of range"); 1183 assert(LocInfo.LastKill < VersionStack.size() && 1184 "Last kill info out of range"); 1185 // In any case, the new upper bound is the top of the stack. 1186 unsigned long UpperBound = VersionStack.size() - 1; 1187 1188 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { 1189 DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" 1190 << *(MU->getMemoryInst()) << ")" 1191 << " because there are " << UpperBound - LocInfo.LowerBound 1192 << " stores to disambiguate\n"); 1193 // Because we did not walk, LastKill is no longer valid, as this may 1194 // have been a kill. 1195 LocInfo.LastKillValid = false; 1196 continue; 1197 } 1198 bool FoundClobberResult = false; 1199 while (UpperBound > LocInfo.LowerBound) { 1200 if (isa<MemoryPhi>(VersionStack[UpperBound])) { 1201 // For phis, use the walker, see where we ended up, go there 1202 Instruction *UseInst = MU->getMemoryInst(); 1203 MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); 1204 // We are guaranteed to find it or something is wrong 1205 while (VersionStack[UpperBound] != Result) { 1206 assert(UpperBound != 0); 1207 --UpperBound; 1208 } 1209 FoundClobberResult = true; 1210 break; 1211 } 1212 1213 MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); 1214 // If the lifetime of the pointer ends at this instruction, it's live on 1215 // entry. 1216 if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { 1217 // Reset UpperBound to liveOnEntryDef's place in the stack 1218 UpperBound = 0; 1219 FoundClobberResult = true; 1220 break; 1221 } 1222 if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) { 1223 FoundClobberResult = true; 1224 break; 1225 } 1226 --UpperBound; 1227 } 1228 // At the end of this loop, UpperBound is either a clobber, or lower bound 1229 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. 1230 if (FoundClobberResult || UpperBound < LocInfo.LastKill) { 1231 MU->setDefiningAccess(VersionStack[UpperBound], true); 1232 // We were last killed now by where we got to 1233 LocInfo.LastKill = UpperBound; 1234 } else { 1235 // Otherwise, we checked all the new ones, and now we know we can get to 1236 // LastKill. 1237 MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); 1238 } 1239 LocInfo.LowerBound = VersionStack.size() - 1; 1240 LocInfo.LowerBoundBlock = BB; 1241 } 1242 } 1243 1244 /// Optimize uses to point to their actual clobbering definitions. 1245 void MemorySSA::OptimizeUses::optimizeUses() { 1246 SmallVector<MemoryAccess *, 16> VersionStack; 1247 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; 1248 VersionStack.push_back(MSSA->getLiveOnEntryDef()); 1249 1250 unsigned long StackEpoch = 1; 1251 unsigned long PopEpoch = 1; 1252 // We perform a non-recursive top-down dominator tree walk. 1253 for (const auto *DomNode : depth_first(DT->getRootNode())) 1254 optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, 1255 LocStackInfo); 1256 } 1257 1258 void MemorySSA::placePHINodes( 1259 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks, 1260 const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) { 1261 // Determine where our MemoryPhi's should go 1262 ForwardIDFCalculator IDFs(*DT); 1263 IDFs.setDefiningBlocks(DefiningBlocks); 1264 SmallVector<BasicBlock *, 32> IDFBlocks; 1265 IDFs.calculate(IDFBlocks); 1266 1267 std::sort(IDFBlocks.begin(), IDFBlocks.end(), 1268 [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { 1269 return BBNumbers.lookup(A) < BBNumbers.lookup(B); 1270 }); 1271 1272 // Now place MemoryPhi nodes. 1273 for (auto &BB : IDFBlocks) 1274 createMemoryPhi(BB); 1275 } 1276 1277 void MemorySSA::buildMemorySSA() { 1278 // We create an access to represent "live on entry", for things like 1279 // arguments or users of globals, where the memory they use is defined before 1280 // the beginning of the function. We do not actually insert it into the IR. 1281 // We do not define a live on exit for the immediate uses, and thus our 1282 // semantics do *not* imply that something with no immediate uses can simply 1283 // be removed. 1284 BasicBlock &StartingPoint = F.getEntryBlock(); 1285 LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, 1286 &StartingPoint, NextID++); 1287 DenseMap<const BasicBlock *, unsigned int> BBNumbers; 1288 unsigned NextBBNum = 0; 1289 1290 // We maintain lists of memory accesses per-block, trading memory for time. We 1291 // could just look up the memory access for every possible instruction in the 1292 // stream. 1293 SmallPtrSet<BasicBlock *, 32> DefiningBlocks; 1294 // Go through each block, figure out where defs occur, and chain together all 1295 // the accesses. 1296 for (BasicBlock &B : F) { 1297 BBNumbers[&B] = NextBBNum++; 1298 bool InsertIntoDef = false; 1299 AccessList *Accesses = nullptr; 1300 DefsList *Defs = nullptr; 1301 for (Instruction &I : B) { 1302 MemoryUseOrDef *MUD = createNewAccess(&I); 1303 if (!MUD) 1304 continue; 1305 1306 if (!Accesses) 1307 Accesses = getOrCreateAccessList(&B); 1308 Accesses->push_back(MUD); 1309 if (isa<MemoryDef>(MUD)) { 1310 InsertIntoDef = true; 1311 if (!Defs) 1312 Defs = getOrCreateDefsList(&B); 1313 Defs->push_back(*MUD); 1314 } 1315 } 1316 if (InsertIntoDef) 1317 DefiningBlocks.insert(&B); 1318 } 1319 placePHINodes(DefiningBlocks, BBNumbers); 1320 1321 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get 1322 // filled in with all blocks. 1323 SmallPtrSet<BasicBlock *, 16> Visited; 1324 renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); 1325 1326 CachingWalker *Walker = getWalkerImpl(); 1327 1328 // We're doing a batch of updates; don't drop useful caches between them. 1329 Walker->setAutoResetWalker(false); 1330 OptimizeUses(this, Walker, AA, DT).optimizeUses(); 1331 Walker->setAutoResetWalker(true); 1332 Walker->resetClobberWalker(); 1333 1334 // Mark the uses in unreachable blocks as live on entry, so that they go 1335 // somewhere. 1336 for (auto &BB : F) 1337 if (!Visited.count(&BB)) 1338 markUnreachableAsLiveOnEntry(&BB); 1339 } 1340 1341 MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } 1342 1343 MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { 1344 if (Walker) 1345 return Walker.get(); 1346 1347 Walker = make_unique<CachingWalker>(this, AA, DT); 1348 return Walker.get(); 1349 } 1350 1351 // This is a helper function used by the creation routines. It places NewAccess 1352 // into the access and defs lists for a given basic block, at the given 1353 // insertion point. 1354 void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, 1355 const BasicBlock *BB, 1356 InsertionPlace Point) { 1357 auto *Accesses = getOrCreateAccessList(BB); 1358 if (Point == Beginning) { 1359 // If it's a phi node, it goes first, otherwise, it goes after any phi 1360 // nodes. 1361 if (isa<MemoryPhi>(NewAccess)) { 1362 Accesses->push_front(NewAccess); 1363 auto *Defs = getOrCreateDefsList(BB); 1364 Defs->push_front(*NewAccess); 1365 } else { 1366 auto AI = find_if_not( 1367 *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1368 Accesses->insert(AI, NewAccess); 1369 if (!isa<MemoryUse>(NewAccess)) { 1370 auto *Defs = getOrCreateDefsList(BB); 1371 auto DI = find_if_not( 1372 *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1373 Defs->insert(DI, *NewAccess); 1374 } 1375 } 1376 } else { 1377 Accesses->push_back(NewAccess); 1378 if (!isa<MemoryUse>(NewAccess)) { 1379 auto *Defs = getOrCreateDefsList(BB); 1380 Defs->push_back(*NewAccess); 1381 } 1382 } 1383 BlockNumberingValid.erase(BB); 1384 } 1385 1386 void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, 1387 AccessList::iterator InsertPt) { 1388 auto *Accesses = getWritableBlockAccesses(BB); 1389 bool WasEnd = InsertPt == Accesses->end(); 1390 Accesses->insert(AccessList::iterator(InsertPt), What); 1391 if (!isa<MemoryUse>(What)) { 1392 auto *Defs = getOrCreateDefsList(BB); 1393 // If we got asked to insert at the end, we have an easy job, just shove it 1394 // at the end. If we got asked to insert before an existing def, we also get 1395 // an terator. If we got asked to insert before a use, we have to hunt for 1396 // the next def. 1397 if (WasEnd) { 1398 Defs->push_back(*What); 1399 } else if (isa<MemoryDef>(InsertPt)) { 1400 Defs->insert(InsertPt->getDefsIterator(), *What); 1401 } else { 1402 while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) 1403 ++InsertPt; 1404 // Either we found a def, or we are inserting at the end 1405 if (InsertPt == Accesses->end()) 1406 Defs->push_back(*What); 1407 else 1408 Defs->insert(InsertPt->getDefsIterator(), *What); 1409 } 1410 } 1411 BlockNumberingValid.erase(BB); 1412 } 1413 1414 // Move What before Where in the IR. The end result is taht What will belong to 1415 // the right lists and have the right Block set, but will not otherwise be 1416 // correct. It will not have the right defining access, and if it is a def, 1417 // things below it will not properly be updated. 1418 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1419 AccessList::iterator Where) { 1420 // Keep it in the lookup tables, remove from the lists 1421 removeFromLists(What, false); 1422 What->setBlock(BB); 1423 insertIntoListsBefore(What, BB, Where); 1424 } 1425 1426 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1427 InsertionPlace Point) { 1428 removeFromLists(What, false); 1429 What->setBlock(BB); 1430 insertIntoListsForBlock(What, BB, Point); 1431 } 1432 1433 MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { 1434 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); 1435 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); 1436 // Phi's always are placed at the front of the block. 1437 insertIntoListsForBlock(Phi, BB, Beginning); 1438 ValueToMemoryAccess[BB] = Phi; 1439 return Phi; 1440 } 1441 1442 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, 1443 MemoryAccess *Definition) { 1444 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); 1445 MemoryUseOrDef *NewAccess = createNewAccess(I); 1446 assert( 1447 NewAccess != nullptr && 1448 "Tried to create a memory access for a non-memory touching instruction"); 1449 NewAccess->setDefiningAccess(Definition); 1450 return NewAccess; 1451 } 1452 1453 // Return true if the instruction has ordering constraints. 1454 // Note specifically that this only considers stores and loads 1455 // because others are still considered ModRef by getModRefInfo. 1456 static inline bool isOrdered(const Instruction *I) { 1457 if (auto *SI = dyn_cast<StoreInst>(I)) { 1458 if (!SI->isUnordered()) 1459 return true; 1460 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 1461 if (!LI->isUnordered()) 1462 return true; 1463 } 1464 return false; 1465 } 1466 /// \brief Helper function to create new memory accesses 1467 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { 1468 // The assume intrinsic has a control dependency which we model by claiming 1469 // that it writes arbitrarily. Ignore that fake memory dependency here. 1470 // FIXME: Replace this special casing with a more accurate modelling of 1471 // assume's control dependency. 1472 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1473 if (II->getIntrinsicID() == Intrinsic::assume) 1474 return nullptr; 1475 1476 // Find out what affect this instruction has on memory. 1477 ModRefInfo ModRef = AA->getModRefInfo(I); 1478 // The isOrdered check is used to ensure that volatiles end up as defs 1479 // (atomics end up as ModRef right now anyway). Until we separate the 1480 // ordering chain from the memory chain, this enables people to see at least 1481 // some relative ordering to volatiles. Note that getClobberingMemoryAccess 1482 // will still give an answer that bypasses other volatile loads. TODO: 1483 // Separate memory aliasing and ordering into two different chains so that we 1484 // can precisely represent both "what memory will this read/write/is clobbered 1485 // by" and "what instructions can I move this past". 1486 bool Def = bool(ModRef & MRI_Mod) || isOrdered(I); 1487 bool Use = bool(ModRef & MRI_Ref); 1488 1489 // It's possible for an instruction to not modify memory at all. During 1490 // construction, we ignore them. 1491 if (!Def && !Use) 1492 return nullptr; 1493 1494 assert((Def || Use) && 1495 "Trying to create a memory access with a non-memory instruction"); 1496 1497 MemoryUseOrDef *MUD; 1498 if (Def) 1499 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); 1500 else 1501 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); 1502 ValueToMemoryAccess[I] = MUD; 1503 return MUD; 1504 } 1505 1506 /// \brief Returns true if \p Replacer dominates \p Replacee . 1507 bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, 1508 const MemoryAccess *Replacee) const { 1509 if (isa<MemoryUseOrDef>(Replacee)) 1510 return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); 1511 const auto *MP = cast<MemoryPhi>(Replacee); 1512 // For a phi node, the use occurs in the predecessor block of the phi node. 1513 // Since we may occur multiple times in the phi node, we have to check each 1514 // operand to ensure Replacer dominates each operand where Replacee occurs. 1515 for (const Use &Arg : MP->operands()) { 1516 if (Arg.get() != Replacee && 1517 !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) 1518 return false; 1519 } 1520 return true; 1521 } 1522 1523 /// \brief Properly remove \p MA from all of MemorySSA's lookup tables. 1524 void MemorySSA::removeFromLookups(MemoryAccess *MA) { 1525 assert(MA->use_empty() && 1526 "Trying to remove memory access that still has uses"); 1527 BlockNumbering.erase(MA); 1528 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1529 MUD->setDefiningAccess(nullptr); 1530 // Invalidate our walker's cache if necessary 1531 if (!isa<MemoryUse>(MA)) 1532 Walker->invalidateInfo(MA); 1533 // The call below to erase will destroy MA, so we can't change the order we 1534 // are doing things here 1535 Value *MemoryInst; 1536 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { 1537 MemoryInst = MUD->getMemoryInst(); 1538 } else { 1539 MemoryInst = MA->getBlock(); 1540 } 1541 auto VMA = ValueToMemoryAccess.find(MemoryInst); 1542 if (VMA->second == MA) 1543 ValueToMemoryAccess.erase(VMA); 1544 } 1545 1546 /// \brief Properly remove \p MA from all of MemorySSA's lists. 1547 /// 1548 /// Because of the way the intrusive list and use lists work, it is important to 1549 /// do removal in the right order. 1550 /// ShouldDelete defaults to true, and will cause the memory access to also be 1551 /// deleted, not just removed. 1552 void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { 1553 // The access list owns the reference, so we erase it from the non-owning list 1554 // first. 1555 if (!isa<MemoryUse>(MA)) { 1556 auto DefsIt = PerBlockDefs.find(MA->getBlock()); 1557 std::unique_ptr<DefsList> &Defs = DefsIt->second; 1558 Defs->remove(*MA); 1559 if (Defs->empty()) 1560 PerBlockDefs.erase(DefsIt); 1561 } 1562 1563 // The erase call here will delete it. If we don't want it deleted, we call 1564 // remove instead. 1565 auto AccessIt = PerBlockAccesses.find(MA->getBlock()); 1566 std::unique_ptr<AccessList> &Accesses = AccessIt->second; 1567 if (ShouldDelete) 1568 Accesses->erase(MA); 1569 else 1570 Accesses->remove(MA); 1571 1572 if (Accesses->empty()) 1573 PerBlockAccesses.erase(AccessIt); 1574 } 1575 1576 void MemorySSA::print(raw_ostream &OS) const { 1577 MemorySSAAnnotatedWriter Writer(this); 1578 F.print(OS, &Writer); 1579 } 1580 1581 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1582 LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } 1583 #endif 1584 1585 void MemorySSA::verifyMemorySSA() const { 1586 verifyDefUses(F); 1587 verifyDomination(F); 1588 verifyOrdering(F); 1589 Walker->verify(this); 1590 } 1591 1592 /// \brief Verify that the order and existence of MemoryAccesses matches the 1593 /// order and existence of memory affecting instructions. 1594 void MemorySSA::verifyOrdering(Function &F) const { 1595 // Walk all the blocks, comparing what the lookups think and what the access 1596 // lists think, as well as the order in the blocks vs the order in the access 1597 // lists. 1598 SmallVector<MemoryAccess *, 32> ActualAccesses; 1599 SmallVector<MemoryAccess *, 32> ActualDefs; 1600 for (BasicBlock &B : F) { 1601 const AccessList *AL = getBlockAccesses(&B); 1602 const auto *DL = getBlockDefs(&B); 1603 MemoryAccess *Phi = getMemoryAccess(&B); 1604 if (Phi) { 1605 ActualAccesses.push_back(Phi); 1606 ActualDefs.push_back(Phi); 1607 } 1608 1609 for (Instruction &I : B) { 1610 MemoryAccess *MA = getMemoryAccess(&I); 1611 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && 1612 "We have memory affecting instructions " 1613 "in this block but they are not in the " 1614 "access list or defs list"); 1615 if (MA) { 1616 ActualAccesses.push_back(MA); 1617 if (isa<MemoryDef>(MA)) 1618 ActualDefs.push_back(MA); 1619 } 1620 } 1621 // Either we hit the assert, really have no accesses, or we have both 1622 // accesses and an access list. 1623 // Same with defs. 1624 if (!AL && !DL) 1625 continue; 1626 assert(AL->size() == ActualAccesses.size() && 1627 "We don't have the same number of accesses in the block as on the " 1628 "access list"); 1629 assert((DL || ActualDefs.size() == 0) && 1630 "Either we should have a defs list, or we should have no defs"); 1631 assert((!DL || DL->size() == ActualDefs.size()) && 1632 "We don't have the same number of defs in the block as on the " 1633 "def list"); 1634 auto ALI = AL->begin(); 1635 auto AAI = ActualAccesses.begin(); 1636 while (ALI != AL->end() && AAI != ActualAccesses.end()) { 1637 assert(&*ALI == *AAI && "Not the same accesses in the same order"); 1638 ++ALI; 1639 ++AAI; 1640 } 1641 ActualAccesses.clear(); 1642 if (DL) { 1643 auto DLI = DL->begin(); 1644 auto ADI = ActualDefs.begin(); 1645 while (DLI != DL->end() && ADI != ActualDefs.end()) { 1646 assert(&*DLI == *ADI && "Not the same defs in the same order"); 1647 ++DLI; 1648 ++ADI; 1649 } 1650 } 1651 ActualDefs.clear(); 1652 } 1653 } 1654 1655 /// \brief Verify the domination properties of MemorySSA by checking that each 1656 /// definition dominates all of its uses. 1657 void MemorySSA::verifyDomination(Function &F) const { 1658 #ifndef NDEBUG 1659 for (BasicBlock &B : F) { 1660 // Phi nodes are attached to basic blocks 1661 if (MemoryPhi *MP = getMemoryAccess(&B)) 1662 for (const Use &U : MP->uses()) 1663 assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); 1664 1665 for (Instruction &I : B) { 1666 MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); 1667 if (!MD) 1668 continue; 1669 1670 for (const Use &U : MD->uses()) 1671 assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); 1672 } 1673 } 1674 #endif 1675 } 1676 1677 /// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use 1678 /// appears in the use list of \p Def. 1679 1680 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { 1681 #ifndef NDEBUG 1682 // The live on entry use may cause us to get a NULL def here 1683 if (!Def) 1684 assert(isLiveOnEntryDef(Use) && 1685 "Null def but use not point to live on entry def"); 1686 else 1687 assert(is_contained(Def->users(), Use) && 1688 "Did not find use in def's use list"); 1689 #endif 1690 } 1691 1692 /// \brief Verify the immediate use information, by walking all the memory 1693 /// accesses and verifying that, for each use, it appears in the 1694 /// appropriate def's use list 1695 void MemorySSA::verifyDefUses(Function &F) const { 1696 for (BasicBlock &B : F) { 1697 // Phi nodes are attached to basic blocks 1698 if (MemoryPhi *Phi = getMemoryAccess(&B)) { 1699 assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( 1700 pred_begin(&B), pred_end(&B))) && 1701 "Incomplete MemoryPhi Node"); 1702 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) 1703 verifyUseInDefs(Phi->getIncomingValue(I), Phi); 1704 } 1705 1706 for (Instruction &I : B) { 1707 if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { 1708 verifyUseInDefs(MA->getDefiningAccess(), MA); 1709 } 1710 } 1711 } 1712 } 1713 1714 MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { 1715 return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); 1716 } 1717 1718 MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { 1719 return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); 1720 } 1721 1722 /// Perform a local numbering on blocks so that instruction ordering can be 1723 /// determined in constant time. 1724 /// TODO: We currently just number in order. If we numbered by N, we could 1725 /// allow at least N-1 sequences of insertBefore or insertAfter (and at least 1726 /// log2(N) sequences of mixed before and after) without needing to invalidate 1727 /// the numbering. 1728 void MemorySSA::renumberBlock(const BasicBlock *B) const { 1729 // The pre-increment ensures the numbers really start at 1. 1730 unsigned long CurrentNumber = 0; 1731 const AccessList *AL = getBlockAccesses(B); 1732 assert(AL != nullptr && "Asking to renumber an empty block"); 1733 for (const auto &I : *AL) 1734 BlockNumbering[&I] = ++CurrentNumber; 1735 BlockNumberingValid.insert(B); 1736 } 1737 1738 /// \brief Determine, for two memory accesses in the same block, 1739 /// whether \p Dominator dominates \p Dominatee. 1740 /// \returns True if \p Dominator dominates \p Dominatee. 1741 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, 1742 const MemoryAccess *Dominatee) const { 1743 1744 const BasicBlock *DominatorBlock = Dominator->getBlock(); 1745 1746 assert((DominatorBlock == Dominatee->getBlock()) && 1747 "Asking for local domination when accesses are in different blocks!"); 1748 // A node dominates itself. 1749 if (Dominatee == Dominator) 1750 return true; 1751 1752 // When Dominatee is defined on function entry, it is not dominated by another 1753 // memory access. 1754 if (isLiveOnEntryDef(Dominatee)) 1755 return false; 1756 1757 // When Dominator is defined on function entry, it dominates the other memory 1758 // access. 1759 if (isLiveOnEntryDef(Dominator)) 1760 return true; 1761 1762 if (!BlockNumberingValid.count(DominatorBlock)) 1763 renumberBlock(DominatorBlock); 1764 1765 unsigned long DominatorNum = BlockNumbering.lookup(Dominator); 1766 // All numbers start with 1 1767 assert(DominatorNum != 0 && "Block was not numbered properly"); 1768 unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); 1769 assert(DominateeNum != 0 && "Block was not numbered properly"); 1770 return DominatorNum < DominateeNum; 1771 } 1772 1773 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1774 const MemoryAccess *Dominatee) const { 1775 if (Dominator == Dominatee) 1776 return true; 1777 1778 if (isLiveOnEntryDef(Dominatee)) 1779 return false; 1780 1781 if (Dominator->getBlock() != Dominatee->getBlock()) 1782 return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); 1783 return locallyDominates(Dominator, Dominatee); 1784 } 1785 1786 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1787 const Use &Dominatee) const { 1788 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { 1789 BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); 1790 // The def must dominate the incoming block of the phi. 1791 if (UseBB != Dominator->getBlock()) 1792 return DT->dominates(Dominator->getBlock(), UseBB); 1793 // If the UseBB and the DefBB are the same, compare locally. 1794 return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); 1795 } 1796 // If it's not a PHI node use, the normal dominates can already handle it. 1797 return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); 1798 } 1799 1800 const static char LiveOnEntryStr[] = "liveOnEntry"; 1801 1802 void MemoryAccess::print(raw_ostream &OS) const { 1803 switch (getValueID()) { 1804 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS); 1805 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS); 1806 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS); 1807 } 1808 llvm_unreachable("invalid value id"); 1809 } 1810 1811 void MemoryDef::print(raw_ostream &OS) const { 1812 MemoryAccess *UO = getDefiningAccess(); 1813 1814 OS << getID() << " = MemoryDef("; 1815 if (UO && UO->getID()) 1816 OS << UO->getID(); 1817 else 1818 OS << LiveOnEntryStr; 1819 OS << ')'; 1820 } 1821 1822 void MemoryPhi::print(raw_ostream &OS) const { 1823 bool First = true; 1824 OS << getID() << " = MemoryPhi("; 1825 for (const auto &Op : operands()) { 1826 BasicBlock *BB = getIncomingBlock(Op); 1827 MemoryAccess *MA = cast<MemoryAccess>(Op); 1828 if (!First) 1829 OS << ','; 1830 else 1831 First = false; 1832 1833 OS << '{'; 1834 if (BB->hasName()) 1835 OS << BB->getName(); 1836 else 1837 BB->printAsOperand(OS, false); 1838 OS << ','; 1839 if (unsigned ID = MA->getID()) 1840 OS << ID; 1841 else 1842 OS << LiveOnEntryStr; 1843 OS << '}'; 1844 } 1845 OS << ')'; 1846 } 1847 1848 void MemoryUse::print(raw_ostream &OS) const { 1849 MemoryAccess *UO = getDefiningAccess(); 1850 OS << "MemoryUse("; 1851 if (UO && UO->getID()) 1852 OS << UO->getID(); 1853 else 1854 OS << LiveOnEntryStr; 1855 OS << ')'; 1856 } 1857 1858 void MemoryAccess::dump() const { 1859 // Cannot completely remove virtual function even in release mode. 1860 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1861 print(dbgs()); 1862 dbgs() << "\n"; 1863 #endif 1864 } 1865 1866 char MemorySSAPrinterLegacyPass::ID = 0; 1867 1868 MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { 1869 initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); 1870 } 1871 1872 void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { 1873 AU.setPreservesAll(); 1874 AU.addRequired<MemorySSAWrapperPass>(); 1875 AU.addPreserved<MemorySSAWrapperPass>(); 1876 } 1877 1878 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { 1879 auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 1880 MSSA.print(dbgs()); 1881 if (VerifyMemorySSA) 1882 MSSA.verifyMemorySSA(); 1883 return false; 1884 } 1885 1886 AnalysisKey MemorySSAAnalysis::Key; 1887 1888 MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, 1889 FunctionAnalysisManager &AM) { 1890 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 1891 auto &AA = AM.getResult<AAManager>(F); 1892 return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT)); 1893 } 1894 1895 PreservedAnalyses MemorySSAPrinterPass::run(Function &F, 1896 FunctionAnalysisManager &AM) { 1897 OS << "MemorySSA for function: " << F.getName() << "\n"; 1898 AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); 1899 1900 return PreservedAnalyses::all(); 1901 } 1902 1903 PreservedAnalyses MemorySSAVerifierPass::run(Function &F, 1904 FunctionAnalysisManager &AM) { 1905 AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); 1906 1907 return PreservedAnalyses::all(); 1908 } 1909 1910 char MemorySSAWrapperPass::ID = 0; 1911 1912 MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { 1913 initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); 1914 } 1915 1916 void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } 1917 1918 void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1919 AU.setPreservesAll(); 1920 AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 1921 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1922 } 1923 1924 bool MemorySSAWrapperPass::runOnFunction(Function &F) { 1925 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1926 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1927 MSSA.reset(new MemorySSA(F, &AA, &DT)); 1928 return false; 1929 } 1930 1931 void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } 1932 1933 void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { 1934 MSSA->print(OS); 1935 } 1936 1937 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} 1938 1939 MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, 1940 DominatorTree *D) 1941 : MemorySSAWalker(M), Walker(*M, *A, *D), AutoResetWalker(true) {} 1942 1943 MemorySSA::CachingWalker::~CachingWalker() {} 1944 1945 void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { 1946 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1947 MUD->resetOptimized(); 1948 } 1949 1950 /// \brief Walk the use-def chains starting at \p MA and find 1951 /// the MemoryAccess that actually clobbers Loc. 1952 /// 1953 /// \returns our clobbering memory access 1954 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 1955 MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { 1956 MemoryAccess *New = Walker.findClobber(StartingAccess, Q); 1957 #ifdef EXPENSIVE_CHECKS 1958 MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); 1959 assert(NewNoCache == New && "Cache made us hand back a different result?"); 1960 #endif 1961 if (AutoResetWalker) 1962 resetClobberWalker(); 1963 return New; 1964 } 1965 1966 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 1967 MemoryAccess *StartingAccess, const MemoryLocation &Loc) { 1968 if (isa<MemoryPhi>(StartingAccess)) 1969 return StartingAccess; 1970 1971 auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); 1972 if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) 1973 return StartingUseOrDef; 1974 1975 Instruction *I = StartingUseOrDef->getMemoryInst(); 1976 1977 // Conservatively, fences are always clobbers, so don't perform the walk if we 1978 // hit a fence. 1979 if (!ImmutableCallSite(I) && I->isFenceLike()) 1980 return StartingUseOrDef; 1981 1982 UpwardsMemoryQuery Q; 1983 Q.OriginalAccess = StartingUseOrDef; 1984 Q.StartingLoc = Loc; 1985 Q.Inst = I; 1986 Q.IsCall = false; 1987 1988 // Unlike the other function, do not walk to the def of a def, because we are 1989 // handed something we already believe is the clobbering access. 1990 MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) 1991 ? StartingUseOrDef->getDefiningAccess() 1992 : StartingUseOrDef; 1993 1994 MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); 1995 DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 1996 DEBUG(dbgs() << *StartingUseOrDef << "\n"); 1997 DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 1998 DEBUG(dbgs() << *Clobber << "\n"); 1999 return Clobber; 2000 } 2001 2002 MemoryAccess * 2003 MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2004 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); 2005 // If this is a MemoryPhi, we can't do anything. 2006 if (!StartingAccess) 2007 return MA; 2008 2009 // If this is an already optimized use or def, return the optimized result. 2010 // Note: Currently, we do not store the optimized def result because we'd need 2011 // a separate field, since we can't use it as the defining access. 2012 if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2013 if (MUD->isOptimized()) 2014 return MUD->getOptimized(); 2015 2016 const Instruction *I = StartingAccess->getMemoryInst(); 2017 UpwardsMemoryQuery Q(I, StartingAccess); 2018 // We can't sanely do anything with a fences, they conservatively 2019 // clobber all memory, and have no locations to get pointers from to 2020 // try to disambiguate. 2021 if (!Q.IsCall && I->isFenceLike()) 2022 return StartingAccess; 2023 2024 if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { 2025 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); 2026 if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2027 MUD->setOptimized(LiveOnEntry); 2028 return LiveOnEntry; 2029 } 2030 2031 // Start with the thing we already think clobbers this location 2032 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); 2033 2034 // At this point, DefiningAccess may be the live on entry def. 2035 // If it is, we will not get a better result. 2036 if (MSSA->isLiveOnEntryDef(DefiningAccess)) 2037 return DefiningAccess; 2038 2039 MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); 2040 DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2041 DEBUG(dbgs() << *DefiningAccess << "\n"); 2042 DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2043 DEBUG(dbgs() << *Result << "\n"); 2044 if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2045 MUD->setOptimized(Result); 2046 2047 return Result; 2048 } 2049 2050 MemoryAccess * 2051 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2052 if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) 2053 return Use->getDefiningAccess(); 2054 return MA; 2055 } 2056 2057 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( 2058 MemoryAccess *StartingAccess, const MemoryLocation &) { 2059 if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2060 return Use->getDefiningAccess(); 2061 return StartingAccess; 2062 } 2063 } // namespace llvm 2064 2065 void MemoryPhi::deleteMe(DerivedUser *Self) { 2066 delete static_cast<MemoryPhi *>(Self); 2067 } 2068 2069 void MemoryDef::deleteMe(DerivedUser *Self) { 2070 delete static_cast<MemoryDef *>(Self); 2071 } 2072 2073 void MemoryUse::deleteMe(DerivedUser *Self) { 2074 delete static_cast<MemoryUse *>(Self); 2075 } 2076