1 //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the MemorySSA class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/MemorySSA.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseMapInfo.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/DepthFirstIterator.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/None.h" 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/iterator.h" 26 #include "llvm/ADT/iterator_range.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/IteratedDominanceFrontier.h" 29 #include "llvm/Analysis/MemoryLocation.h" 30 #include "llvm/IR/AssemblyAnnotationWriter.h" 31 #include "llvm/IR/BasicBlock.h" 32 #include "llvm/IR/CallSite.h" 33 #include "llvm/IR/Dominators.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/Instruction.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Intrinsics.h" 39 #include "llvm/IR/LLVMContext.h" 40 #include "llvm/IR/PassManager.h" 41 #include "llvm/IR/Use.h" 42 #include "llvm/Pass.h" 43 #include "llvm/Support/AtomicOrdering.h" 44 #include "llvm/Support/Casting.h" 45 #include "llvm/Support/CommandLine.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/Debug.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/FormattedStream.h" 50 #include "llvm/Support/raw_ostream.h" 51 #include <algorithm> 52 #include <cassert> 53 #include <iterator> 54 #include <memory> 55 #include <utility> 56 57 using namespace llvm; 58 59 #define DEBUG_TYPE "memoryssa" 60 61 INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 62 true) 63 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 64 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 65 INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 66 true) 67 68 INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", 69 "Memory SSA Printer", false, false) 70 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 71 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", 72 "Memory SSA Printer", false, false) 73 74 static cl::opt<unsigned> MaxCheckLimit( 75 "memssa-check-limit", cl::Hidden, cl::init(100), 76 cl::desc("The maximum number of stores/phis MemorySSA" 77 "will consider trying to walk past (default = 100)")); 78 79 static cl::opt<bool> 80 VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, 81 cl::desc("Verify MemorySSA in legacy printer pass.")); 82 83 namespace llvm { 84 85 /// \brief An assembly annotator class to print Memory SSA information in 86 /// comments. 87 class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { 88 friend class MemorySSA; 89 90 const MemorySSA *MSSA; 91 92 public: 93 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} 94 95 void emitBasicBlockStartAnnot(const BasicBlock *BB, 96 formatted_raw_ostream &OS) override { 97 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) 98 OS << "; " << *MA << "\n"; 99 } 100 101 void emitInstructionAnnot(const Instruction *I, 102 formatted_raw_ostream &OS) override { 103 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) 104 OS << "; " << *MA << "\n"; 105 } 106 }; 107 108 } // end namespace llvm 109 110 namespace { 111 112 /// Our current alias analysis API differentiates heavily between calls and 113 /// non-calls, and functions called on one usually assert on the other. 114 /// This class encapsulates the distinction to simplify other code that wants 115 /// "Memory affecting instructions and related data" to use as a key. 116 /// For example, this class is used as a densemap key in the use optimizer. 117 class MemoryLocOrCall { 118 public: 119 bool IsCall = false; 120 121 MemoryLocOrCall() = default; 122 MemoryLocOrCall(MemoryUseOrDef *MUD) 123 : MemoryLocOrCall(MUD->getMemoryInst()) {} 124 MemoryLocOrCall(const MemoryUseOrDef *MUD) 125 : MemoryLocOrCall(MUD->getMemoryInst()) {} 126 127 MemoryLocOrCall(Instruction *Inst) { 128 if (ImmutableCallSite(Inst)) { 129 IsCall = true; 130 CS = ImmutableCallSite(Inst); 131 } else { 132 IsCall = false; 133 // There is no such thing as a memorylocation for a fence inst, and it is 134 // unique in that regard. 135 if (!isa<FenceInst>(Inst)) 136 Loc = MemoryLocation::get(Inst); 137 } 138 } 139 140 explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {} 141 142 ImmutableCallSite getCS() const { 143 assert(IsCall); 144 return CS; 145 } 146 147 MemoryLocation getLoc() const { 148 assert(!IsCall); 149 return Loc; 150 } 151 152 bool operator==(const MemoryLocOrCall &Other) const { 153 if (IsCall != Other.IsCall) 154 return false; 155 156 if (IsCall) 157 return CS.getCalledValue() == Other.CS.getCalledValue(); 158 return Loc == Other.Loc; 159 } 160 161 private: 162 union { 163 ImmutableCallSite CS; 164 MemoryLocation Loc; 165 }; 166 }; 167 168 } // end anonymous namespace 169 170 namespace llvm { 171 172 template <> struct DenseMapInfo<MemoryLocOrCall> { 173 static inline MemoryLocOrCall getEmptyKey() { 174 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); 175 } 176 177 static inline MemoryLocOrCall getTombstoneKey() { 178 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); 179 } 180 181 static unsigned getHashValue(const MemoryLocOrCall &MLOC) { 182 if (MLOC.IsCall) 183 return hash_combine(MLOC.IsCall, 184 DenseMapInfo<const Value *>::getHashValue( 185 MLOC.getCS().getCalledValue())); 186 return hash_combine( 187 MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); 188 } 189 190 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { 191 return LHS == RHS; 192 } 193 }; 194 195 } // end namespace llvm 196 197 /// This does one-way checks to see if Use could theoretically be hoisted above 198 /// MayClobber. This will not check the other way around. 199 /// 200 /// This assumes that, for the purposes of MemorySSA, Use comes directly after 201 /// MayClobber, with no potentially clobbering operations in between them. 202 /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) 203 static bool areLoadsReorderable(const LoadInst *Use, 204 const LoadInst *MayClobber) { 205 bool VolatileUse = Use->isVolatile(); 206 bool VolatileClobber = MayClobber->isVolatile(); 207 // Volatile operations may never be reordered with other volatile operations. 208 if (VolatileUse && VolatileClobber) 209 return false; 210 // Otherwise, volatile doesn't matter here. From the language reference: 211 // 'optimizers may change the order of volatile operations relative to 212 // non-volatile operations.'" 213 214 // If a load is seq_cst, it cannot be moved above other loads. If its ordering 215 // is weaker, it can be moved above other loads. We just need to be sure that 216 // MayClobber isn't an acquire load, because loads can't be moved above 217 // acquire loads. 218 // 219 // Note that this explicitly *does* allow the free reordering of monotonic (or 220 // weaker) loads of the same address. 221 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; 222 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), 223 AtomicOrdering::Acquire); 224 return !(SeqCstUse || MayClobberIsAcquire); 225 } 226 227 namespace { 228 229 struct ClobberAlias { 230 bool IsClobber; 231 Optional<AliasResult> AR; 232 }; 233 234 } // end anonymous namespace 235 236 // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being 237 // ignored if IsClobber = false. 238 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 239 const MemoryLocation &UseLoc, 240 const Instruction *UseInst, 241 AliasAnalysis &AA) { 242 Instruction *DefInst = MD->getMemoryInst(); 243 assert(DefInst && "Defining instruction not actually an instruction"); 244 ImmutableCallSite UseCS(UseInst); 245 Optional<AliasResult> AR; 246 247 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 248 // These intrinsics will show up as affecting memory, but they are just 249 // markers. 250 switch (II->getIntrinsicID()) { 251 case Intrinsic::lifetime_start: 252 if (UseCS) 253 return {false, NoAlias}; 254 AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc); 255 return {AR == MustAlias, AR}; 256 case Intrinsic::lifetime_end: 257 case Intrinsic::invariant_start: 258 case Intrinsic::invariant_end: 259 case Intrinsic::assume: 260 return {false, NoAlias}; 261 default: 262 break; 263 } 264 } 265 266 if (UseCS) { 267 ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); 268 AR = isMustSet(I) ? MustAlias : MayAlias; 269 return {isModOrRefSet(I), AR}; 270 } 271 272 if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) 273 if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) 274 return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias}; 275 276 ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); 277 AR = isMustSet(I) ? MustAlias : MayAlias; 278 return {isModSet(I), AR}; 279 } 280 281 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 282 const MemoryUseOrDef *MU, 283 const MemoryLocOrCall &UseMLOC, 284 AliasAnalysis &AA) { 285 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery 286 // to exist while MemoryLocOrCall is pushed through places. 287 if (UseMLOC.IsCall) 288 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), 289 AA); 290 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), 291 AA); 292 } 293 294 // Return true when MD may alias MU, return false otherwise. 295 bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, 296 AliasAnalysis &AA) { 297 return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber; 298 } 299 300 namespace { 301 302 struct UpwardsMemoryQuery { 303 // True if our original query started off as a call 304 bool IsCall = false; 305 // The pointer location we started the query with. This will be empty if 306 // IsCall is true. 307 MemoryLocation StartingLoc; 308 // This is the instruction we were querying about. 309 const Instruction *Inst = nullptr; 310 // The MemoryAccess we actually got called with, used to test local domination 311 const MemoryAccess *OriginalAccess = nullptr; 312 Optional<AliasResult> AR = MayAlias; 313 314 UpwardsMemoryQuery() = default; 315 316 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) 317 : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { 318 if (!IsCall) 319 StartingLoc = MemoryLocation::get(Inst); 320 } 321 }; 322 323 } // end anonymous namespace 324 325 static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, 326 AliasAnalysis &AA) { 327 Instruction *Inst = MD->getMemoryInst(); 328 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 329 switch (II->getIntrinsicID()) { 330 case Intrinsic::lifetime_end: 331 return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); 332 default: 333 return false; 334 } 335 } 336 return false; 337 } 338 339 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, 340 const Instruction *I) { 341 // If the memory can't be changed, then loads of the memory can't be 342 // clobbered. 343 // 344 // FIXME: We should handle invariant groups, as well. It's a bit harder, 345 // because we need to pay close attention to invariant group barriers. 346 return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || 347 AA.pointsToConstantMemory(cast<LoadInst>(I)-> 348 getPointerOperand())); 349 } 350 351 /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing 352 /// inbetween `Start` and `ClobberAt` can clobbers `Start`. 353 /// 354 /// This is meant to be as simple and self-contained as possible. Because it 355 /// uses no cache, etc., it can be relatively expensive. 356 /// 357 /// \param Start The MemoryAccess that we want to walk from. 358 /// \param ClobberAt A clobber for Start. 359 /// \param StartLoc The MemoryLocation for Start. 360 /// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. 361 /// \param Query The UpwardsMemoryQuery we used for our search. 362 /// \param AA The AliasAnalysis we used for our search. 363 static void LLVM_ATTRIBUTE_UNUSED 364 checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, 365 const MemoryLocation &StartLoc, const MemorySSA &MSSA, 366 const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { 367 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); 368 369 if (MSSA.isLiveOnEntryDef(Start)) { 370 assert(MSSA.isLiveOnEntryDef(ClobberAt) && 371 "liveOnEntry must clobber itself"); 372 return; 373 } 374 375 bool FoundClobber = false; 376 DenseSet<MemoryAccessPair> VisitedPhis; 377 SmallVector<MemoryAccessPair, 8> Worklist; 378 Worklist.emplace_back(Start, StartLoc); 379 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one 380 // is found, complain. 381 while (!Worklist.empty()) { 382 MemoryAccessPair MAP = Worklist.pop_back_val(); 383 // All we care about is that nothing from Start to ClobberAt clobbers Start. 384 // We learn nothing from revisiting nodes. 385 if (!VisitedPhis.insert(MAP).second) 386 continue; 387 388 for (MemoryAccess *MA : def_chain(MAP.first)) { 389 if (MA == ClobberAt) { 390 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 391 // instructionClobbersQuery isn't essentially free, so don't use `|=`, 392 // since it won't let us short-circuit. 393 // 394 // Also, note that this can't be hoisted out of the `Worklist` loop, 395 // since MD may only act as a clobber for 1 of N MemoryLocations. 396 FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); 397 if (!FoundClobber) { 398 ClobberAlias CA = 399 instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); 400 if (CA.IsClobber) { 401 FoundClobber = true; 402 // Not used: CA.AR; 403 } 404 } 405 } 406 break; 407 } 408 409 // We should never hit liveOnEntry, unless it's the clobber. 410 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); 411 412 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 413 (void)MD; 414 assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) 415 .IsClobber && 416 "Found clobber before reaching ClobberAt!"); 417 continue; 418 } 419 420 assert(isa<MemoryPhi>(MA)); 421 Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); 422 } 423 } 424 425 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a 426 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. 427 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && 428 "ClobberAt never acted as a clobber"); 429 } 430 431 namespace { 432 433 /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up 434 /// in one class. 435 class ClobberWalker { 436 /// Save a few bytes by using unsigned instead of size_t. 437 using ListIndex = unsigned; 438 439 /// Represents a span of contiguous MemoryDefs, potentially ending in a 440 /// MemoryPhi. 441 struct DefPath { 442 MemoryLocation Loc; 443 // Note that, because we always walk in reverse, Last will always dominate 444 // First. Also note that First and Last are inclusive. 445 MemoryAccess *First; 446 MemoryAccess *Last; 447 Optional<ListIndex> Previous; 448 449 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, 450 Optional<ListIndex> Previous) 451 : Loc(Loc), First(First), Last(Last), Previous(Previous) {} 452 453 DefPath(const MemoryLocation &Loc, MemoryAccess *Init, 454 Optional<ListIndex> Previous) 455 : DefPath(Loc, Init, Init, Previous) {} 456 }; 457 458 const MemorySSA &MSSA; 459 AliasAnalysis &AA; 460 DominatorTree &DT; 461 UpwardsMemoryQuery *Query; 462 463 // Phi optimization bookkeeping 464 SmallVector<DefPath, 32> Paths; 465 DenseSet<ConstMemoryAccessPair> VisitedPhis; 466 467 /// Find the nearest def or phi that `From` can legally be optimized to. 468 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { 469 assert(From->getNumOperands() && "Phi with no operands?"); 470 471 BasicBlock *BB = From->getBlock(); 472 MemoryAccess *Result = MSSA.getLiveOnEntryDef(); 473 DomTreeNode *Node = DT.getNode(BB); 474 while ((Node = Node->getIDom())) { 475 auto *Defs = MSSA.getBlockDefs(Node->getBlock()); 476 if (Defs) 477 return &*Defs->rbegin(); 478 } 479 return Result; 480 } 481 482 /// Result of calling walkToPhiOrClobber. 483 struct UpwardsWalkResult { 484 /// The "Result" of the walk. Either a clobber, the last thing we walked, or 485 /// both. Include alias info when clobber found. 486 MemoryAccess *Result; 487 bool IsKnownClobber; 488 Optional<AliasResult> AR; 489 }; 490 491 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. 492 /// This will update Desc.Last as it walks. It will (optionally) also stop at 493 /// StopAt. 494 /// 495 /// This does not test for whether StopAt is a clobber 496 UpwardsWalkResult 497 walkToPhiOrClobber(DefPath &Desc, 498 const MemoryAccess *StopAt = nullptr) const { 499 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); 500 501 for (MemoryAccess *Current : def_chain(Desc.Last)) { 502 Desc.Last = Current; 503 if (Current == StopAt) 504 return {Current, false, MayAlias}; 505 506 if (auto *MD = dyn_cast<MemoryDef>(Current)) { 507 if (MSSA.isLiveOnEntryDef(MD)) 508 return {MD, true, MustAlias}; 509 ClobberAlias CA = 510 instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); 511 if (CA.IsClobber) 512 return {MD, true, CA.AR}; 513 } 514 } 515 516 assert(isa<MemoryPhi>(Desc.Last) && 517 "Ended at a non-clobber that's not a phi?"); 518 return {Desc.Last, false, MayAlias}; 519 } 520 521 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, 522 ListIndex PriorNode) { 523 auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), 524 upward_defs_end()); 525 for (const MemoryAccessPair &P : UpwardDefs) { 526 PausedSearches.push_back(Paths.size()); 527 Paths.emplace_back(P.second, P.first, PriorNode); 528 } 529 } 530 531 /// Represents a search that terminated after finding a clobber. This clobber 532 /// may or may not be present in the path of defs from LastNode..SearchStart, 533 /// since it may have been retrieved from cache. 534 struct TerminatedPath { 535 MemoryAccess *Clobber; 536 ListIndex LastNode; 537 }; 538 539 /// Get an access that keeps us from optimizing to the given phi. 540 /// 541 /// PausedSearches is an array of indices into the Paths array. Its incoming 542 /// value is the indices of searches that stopped at the last phi optimization 543 /// target. It's left in an unspecified state. 544 /// 545 /// If this returns None, NewPaused is a vector of searches that terminated 546 /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. 547 Optional<TerminatedPath> 548 getBlockingAccess(const MemoryAccess *StopWhere, 549 SmallVectorImpl<ListIndex> &PausedSearches, 550 SmallVectorImpl<ListIndex> &NewPaused, 551 SmallVectorImpl<TerminatedPath> &Terminated) { 552 assert(!PausedSearches.empty() && "No searches to continue?"); 553 554 // BFS vs DFS really doesn't make a difference here, so just do a DFS with 555 // PausedSearches as our stack. 556 while (!PausedSearches.empty()) { 557 ListIndex PathIndex = PausedSearches.pop_back_val(); 558 DefPath &Node = Paths[PathIndex]; 559 560 // If we've already visited this path with this MemoryLocation, we don't 561 // need to do so again. 562 // 563 // NOTE: That we just drop these paths on the ground makes caching 564 // behavior sporadic. e.g. given a diamond: 565 // A 566 // B C 567 // D 568 // 569 // ...If we walk D, B, A, C, we'll only cache the result of phi 570 // optimization for A, B, and D; C will be skipped because it dies here. 571 // This arguably isn't the worst thing ever, since: 572 // - We generally query things in a top-down order, so if we got below D 573 // without needing cache entries for {C, MemLoc}, then chances are 574 // that those cache entries would end up ultimately unused. 575 // - We still cache things for A, so C only needs to walk up a bit. 576 // If this behavior becomes problematic, we can fix without a ton of extra 577 // work. 578 if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) 579 continue; 580 581 UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); 582 if (Res.IsKnownClobber) { 583 assert(Res.Result != StopWhere); 584 // If this wasn't a cache hit, we hit a clobber when walking. That's a 585 // failure. 586 TerminatedPath Term{Res.Result, PathIndex}; 587 if (!MSSA.dominates(Res.Result, StopWhere)) 588 return Term; 589 590 // Otherwise, it's a valid thing to potentially optimize to. 591 Terminated.push_back(Term); 592 continue; 593 } 594 595 if (Res.Result == StopWhere) { 596 // We've hit our target. Save this path off for if we want to continue 597 // walking. 598 NewPaused.push_back(PathIndex); 599 continue; 600 } 601 602 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); 603 addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); 604 } 605 606 return None; 607 } 608 609 template <typename T, typename Walker> 610 struct generic_def_path_iterator 611 : public iterator_facade_base<generic_def_path_iterator<T, Walker>, 612 std::forward_iterator_tag, T *> { 613 generic_def_path_iterator() = default; 614 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} 615 616 T &operator*() const { return curNode(); } 617 618 generic_def_path_iterator &operator++() { 619 N = curNode().Previous; 620 return *this; 621 } 622 623 bool operator==(const generic_def_path_iterator &O) const { 624 if (N.hasValue() != O.N.hasValue()) 625 return false; 626 return !N.hasValue() || *N == *O.N; 627 } 628 629 private: 630 T &curNode() const { return W->Paths[*N]; } 631 632 Walker *W = nullptr; 633 Optional<ListIndex> N = None; 634 }; 635 636 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; 637 using const_def_path_iterator = 638 generic_def_path_iterator<const DefPath, const ClobberWalker>; 639 640 iterator_range<def_path_iterator> def_path(ListIndex From) { 641 return make_range(def_path_iterator(this, From), def_path_iterator()); 642 } 643 644 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { 645 return make_range(const_def_path_iterator(this, From), 646 const_def_path_iterator()); 647 } 648 649 struct OptznResult { 650 /// The path that contains our result. 651 TerminatedPath PrimaryClobber; 652 /// The paths that we can legally cache back from, but that aren't 653 /// necessarily the result of the Phi optimization. 654 SmallVector<TerminatedPath, 4> OtherClobbers; 655 }; 656 657 ListIndex defPathIndex(const DefPath &N) const { 658 // The assert looks nicer if we don't need to do &N 659 const DefPath *NP = &N; 660 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && 661 "Out of bounds DefPath!"); 662 return NP - &Paths.front(); 663 } 664 665 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths 666 /// that act as legal clobbers. Note that this won't return *all* clobbers. 667 /// 668 /// Phi optimization algorithm tl;dr: 669 /// - Find the earliest def/phi, A, we can optimize to 670 /// - Find if all paths from the starting memory access ultimately reach A 671 /// - If not, optimization isn't possible. 672 /// - Otherwise, walk from A to another clobber or phi, A'. 673 /// - If A' is a def, we're done. 674 /// - If A' is a phi, try to optimize it. 675 /// 676 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path 677 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. 678 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, 679 const MemoryLocation &Loc) { 680 assert(Paths.empty() && VisitedPhis.empty() && 681 "Reset the optimization state."); 682 683 Paths.emplace_back(Loc, Start, Phi, None); 684 // Stores how many "valid" optimization nodes we had prior to calling 685 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. 686 auto PriorPathsSize = Paths.size(); 687 688 SmallVector<ListIndex, 16> PausedSearches; 689 SmallVector<ListIndex, 8> NewPaused; 690 SmallVector<TerminatedPath, 4> TerminatedPaths; 691 692 addSearches(Phi, PausedSearches, 0); 693 694 // Moves the TerminatedPath with the "most dominated" Clobber to the end of 695 // Paths. 696 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { 697 assert(!Paths.empty() && "Need a path to move"); 698 auto Dom = Paths.begin(); 699 for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) 700 if (!MSSA.dominates(I->Clobber, Dom->Clobber)) 701 Dom = I; 702 auto Last = Paths.end() - 1; 703 if (Last != Dom) 704 std::iter_swap(Last, Dom); 705 }; 706 707 MemoryPhi *Current = Phi; 708 while (true) { 709 assert(!MSSA.isLiveOnEntryDef(Current) && 710 "liveOnEntry wasn't treated as a clobber?"); 711 712 const auto *Target = getWalkTarget(Current); 713 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal 714 // optimization for the prior phi. 715 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { 716 return MSSA.dominates(P.Clobber, Target); 717 })); 718 719 // FIXME: This is broken, because the Blocker may be reported to be 720 // liveOnEntry, and we'll happily wait for that to disappear (read: never) 721 // For the moment, this is fine, since we do nothing with blocker info. 722 if (Optional<TerminatedPath> Blocker = getBlockingAccess( 723 Target, PausedSearches, NewPaused, TerminatedPaths)) { 724 725 // Find the node we started at. We can't search based on N->Last, since 726 // we may have gone around a loop with a different MemoryLocation. 727 auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { 728 return defPathIndex(N) < PriorPathsSize; 729 }); 730 assert(Iter != def_path_iterator()); 731 732 DefPath &CurNode = *Iter; 733 assert(CurNode.Last == Current); 734 735 // Two things: 736 // A. We can't reliably cache all of NewPaused back. Consider a case 737 // where we have two paths in NewPaused; one of which can't optimize 738 // above this phi, whereas the other can. If we cache the second path 739 // back, we'll end up with suboptimal cache entries. We can handle 740 // cases like this a bit better when we either try to find all 741 // clobbers that block phi optimization, or when our cache starts 742 // supporting unfinished searches. 743 // B. We can't reliably cache TerminatedPaths back here without doing 744 // extra checks; consider a case like: 745 // T 746 // / \ 747 // D C 748 // \ / 749 // S 750 // Where T is our target, C is a node with a clobber on it, D is a 751 // diamond (with a clobber *only* on the left or right node, N), and 752 // S is our start. Say we walk to D, through the node opposite N 753 // (read: ignoring the clobber), and see a cache entry in the top 754 // node of D. That cache entry gets put into TerminatedPaths. We then 755 // walk up to C (N is later in our worklist), find the clobber, and 756 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache 757 // the bottom part of D to the cached clobber, ignoring the clobber 758 // in N. Again, this problem goes away if we start tracking all 759 // blockers for a given phi optimization. 760 TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; 761 return {Result, {}}; 762 } 763 764 // If there's nothing left to search, then all paths led to valid clobbers 765 // that we got from our cache; pick the nearest to the start, and allow 766 // the rest to be cached back. 767 if (NewPaused.empty()) { 768 MoveDominatedPathToEnd(TerminatedPaths); 769 TerminatedPath Result = TerminatedPaths.pop_back_val(); 770 return {Result, std::move(TerminatedPaths)}; 771 } 772 773 MemoryAccess *DefChainEnd = nullptr; 774 SmallVector<TerminatedPath, 4> Clobbers; 775 for (ListIndex Paused : NewPaused) { 776 UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); 777 if (WR.IsKnownClobber) 778 Clobbers.push_back({WR.Result, Paused}); 779 else 780 // Micro-opt: If we hit the end of the chain, save it. 781 DefChainEnd = WR.Result; 782 } 783 784 if (!TerminatedPaths.empty()) { 785 // If we couldn't find the dominating phi/liveOnEntry in the above loop, 786 // do it now. 787 if (!DefChainEnd) 788 for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) 789 DefChainEnd = MA; 790 791 // If any of the terminated paths don't dominate the phi we'll try to 792 // optimize, we need to figure out what they are and quit. 793 const BasicBlock *ChainBB = DefChainEnd->getBlock(); 794 for (const TerminatedPath &TP : TerminatedPaths) { 795 // Because we know that DefChainEnd is as "high" as we can go, we 796 // don't need local dominance checks; BB dominance is sufficient. 797 if (DT.dominates(ChainBB, TP.Clobber->getBlock())) 798 Clobbers.push_back(TP); 799 } 800 } 801 802 // If we have clobbers in the def chain, find the one closest to Current 803 // and quit. 804 if (!Clobbers.empty()) { 805 MoveDominatedPathToEnd(Clobbers); 806 TerminatedPath Result = Clobbers.pop_back_val(); 807 return {Result, std::move(Clobbers)}; 808 } 809 810 assert(all_of(NewPaused, 811 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); 812 813 // Because liveOnEntry is a clobber, this must be a phi. 814 auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); 815 816 PriorPathsSize = Paths.size(); 817 PausedSearches.clear(); 818 for (ListIndex I : NewPaused) 819 addSearches(DefChainPhi, PausedSearches, I); 820 NewPaused.clear(); 821 822 Current = DefChainPhi; 823 } 824 } 825 826 void verifyOptResult(const OptznResult &R) const { 827 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { 828 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); 829 })); 830 } 831 832 void resetPhiOptznState() { 833 Paths.clear(); 834 VisitedPhis.clear(); 835 } 836 837 public: 838 ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) 839 : MSSA(MSSA), AA(AA), DT(DT) {} 840 841 void reset() {} 842 843 /// Finds the nearest clobber for the given query, optimizing phis if 844 /// possible. 845 MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { 846 Query = &Q; 847 848 MemoryAccess *Current = Start; 849 // This walker pretends uses don't exist. If we're handed one, silently grab 850 // its def. (This has the nice side-effect of ensuring we never cache uses) 851 if (auto *MU = dyn_cast<MemoryUse>(Start)) 852 Current = MU->getDefiningAccess(); 853 854 DefPath FirstDesc(Q.StartingLoc, Current, Current, None); 855 // Fast path for the overly-common case (no crazy phi optimization 856 // necessary) 857 UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); 858 MemoryAccess *Result; 859 if (WalkResult.IsKnownClobber) { 860 Result = WalkResult.Result; 861 Q.AR = WalkResult.AR; 862 } else { 863 OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), 864 Current, Q.StartingLoc); 865 verifyOptResult(OptRes); 866 resetPhiOptznState(); 867 Result = OptRes.PrimaryClobber.Clobber; 868 } 869 870 #ifdef EXPENSIVE_CHECKS 871 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); 872 #endif 873 return Result; 874 } 875 876 void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } 877 }; 878 879 struct RenamePassData { 880 DomTreeNode *DTN; 881 DomTreeNode::const_iterator ChildIt; 882 MemoryAccess *IncomingVal; 883 884 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, 885 MemoryAccess *M) 886 : DTN(D), ChildIt(It), IncomingVal(M) {} 887 888 void swap(RenamePassData &RHS) { 889 std::swap(DTN, RHS.DTN); 890 std::swap(ChildIt, RHS.ChildIt); 891 std::swap(IncomingVal, RHS.IncomingVal); 892 } 893 }; 894 895 } // end anonymous namespace 896 897 namespace llvm { 898 899 /// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no 900 /// longer does caching on its own, 901 /// but the name has been retained for the moment. 902 class MemorySSA::CachingWalker final : public MemorySSAWalker { 903 ClobberWalker Walker; 904 bool AutoResetWalker = true; 905 906 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); 907 908 public: 909 CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); 910 ~CachingWalker() override = default; 911 912 using MemorySSAWalker::getClobberingMemoryAccess; 913 914 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; 915 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, 916 const MemoryLocation &) override; 917 void invalidateInfo(MemoryAccess *) override; 918 919 /// Whether we call resetClobberWalker() after each time we *actually* walk to 920 /// answer a clobber query. 921 void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; } 922 923 /// Drop the walker's persistent data structures. 924 void resetClobberWalker() { Walker.reset(); } 925 926 void verify(const MemorySSA *MSSA) override { 927 MemorySSAWalker::verify(MSSA); 928 Walker.verify(MSSA); 929 } 930 }; 931 932 } // end namespace llvm 933 934 void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, 935 bool RenameAllUses) { 936 // Pass through values to our successors 937 for (const BasicBlock *S : successors(BB)) { 938 auto It = PerBlockAccesses.find(S); 939 // Rename the phi nodes in our successor block 940 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 941 continue; 942 AccessList *Accesses = It->second.get(); 943 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 944 if (RenameAllUses) { 945 int PhiIndex = Phi->getBasicBlockIndex(BB); 946 assert(PhiIndex != -1 && "Incomplete phi during partial rename"); 947 Phi->setIncomingValue(PhiIndex, IncomingVal); 948 } else 949 Phi->addIncoming(IncomingVal, BB); 950 } 951 } 952 953 /// \brief Rename a single basic block into MemorySSA form. 954 /// Uses the standard SSA renaming algorithm. 955 /// \returns The new incoming value. 956 MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, 957 bool RenameAllUses) { 958 auto It = PerBlockAccesses.find(BB); 959 // Skip most processing if the list is empty. 960 if (It != PerBlockAccesses.end()) { 961 AccessList *Accesses = It->second.get(); 962 for (MemoryAccess &L : *Accesses) { 963 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { 964 if (MUD->getDefiningAccess() == nullptr || RenameAllUses) 965 MUD->setDefiningAccess(IncomingVal); 966 if (isa<MemoryDef>(&L)) 967 IncomingVal = &L; 968 } else { 969 IncomingVal = &L; 970 } 971 } 972 } 973 return IncomingVal; 974 } 975 976 /// \brief This is the standard SSA renaming algorithm. 977 /// 978 /// We walk the dominator tree in preorder, renaming accesses, and then filling 979 /// in phi nodes in our successors. 980 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, 981 SmallPtrSetImpl<BasicBlock *> &Visited, 982 bool SkipVisited, bool RenameAllUses) { 983 SmallVector<RenamePassData, 32> WorkStack; 984 // Skip everything if we already renamed this block and we are skipping. 985 // Note: You can't sink this into the if, because we need it to occur 986 // regardless of whether we skip blocks or not. 987 bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; 988 if (SkipVisited && AlreadyVisited) 989 return; 990 991 IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); 992 renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); 993 WorkStack.push_back({Root, Root->begin(), IncomingVal}); 994 995 while (!WorkStack.empty()) { 996 DomTreeNode *Node = WorkStack.back().DTN; 997 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; 998 IncomingVal = WorkStack.back().IncomingVal; 999 1000 if (ChildIt == Node->end()) { 1001 WorkStack.pop_back(); 1002 } else { 1003 DomTreeNode *Child = *ChildIt; 1004 ++WorkStack.back().ChildIt; 1005 BasicBlock *BB = Child->getBlock(); 1006 // Note: You can't sink this into the if, because we need it to occur 1007 // regardless of whether we skip blocks or not. 1008 AlreadyVisited = !Visited.insert(BB).second; 1009 if (SkipVisited && AlreadyVisited) { 1010 // We already visited this during our renaming, which can happen when 1011 // being asked to rename multiple blocks. Figure out the incoming val, 1012 // which is the last def. 1013 // Incoming value can only change if there is a block def, and in that 1014 // case, it's the last block def in the list. 1015 if (auto *BlockDefs = getWritableBlockDefs(BB)) 1016 IncomingVal = &*BlockDefs->rbegin(); 1017 } else 1018 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); 1019 renameSuccessorPhis(BB, IncomingVal, RenameAllUses); 1020 WorkStack.push_back({Child, Child->begin(), IncomingVal}); 1021 } 1022 } 1023 } 1024 1025 /// \brief This handles unreachable block accesses by deleting phi nodes in 1026 /// unreachable blocks, and marking all other unreachable MemoryAccess's as 1027 /// being uses of the live on entry definition. 1028 void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { 1029 assert(!DT->isReachableFromEntry(BB) && 1030 "Reachable block found while handling unreachable blocks"); 1031 1032 // Make sure phi nodes in our reachable successors end up with a 1033 // LiveOnEntryDef for our incoming edge, even though our block is forward 1034 // unreachable. We could just disconnect these blocks from the CFG fully, 1035 // but we do not right now. 1036 for (const BasicBlock *S : successors(BB)) { 1037 if (!DT->isReachableFromEntry(S)) 1038 continue; 1039 auto It = PerBlockAccesses.find(S); 1040 // Rename the phi nodes in our successor block 1041 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 1042 continue; 1043 AccessList *Accesses = It->second.get(); 1044 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 1045 Phi->addIncoming(LiveOnEntryDef.get(), BB); 1046 } 1047 1048 auto It = PerBlockAccesses.find(BB); 1049 if (It == PerBlockAccesses.end()) 1050 return; 1051 1052 auto &Accesses = It->second; 1053 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { 1054 auto Next = std::next(AI); 1055 // If we have a phi, just remove it. We are going to replace all 1056 // users with live on entry. 1057 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) 1058 UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); 1059 else 1060 Accesses->erase(AI); 1061 AI = Next; 1062 } 1063 } 1064 1065 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) 1066 : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), 1067 NextID(0) { 1068 buildMemorySSA(); 1069 } 1070 1071 MemorySSA::~MemorySSA() { 1072 // Drop all our references 1073 for (const auto &Pair : PerBlockAccesses) 1074 for (MemoryAccess &MA : *Pair.second) 1075 MA.dropAllReferences(); 1076 } 1077 1078 MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { 1079 auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); 1080 1081 if (Res.second) 1082 Res.first->second = llvm::make_unique<AccessList>(); 1083 return Res.first->second.get(); 1084 } 1085 1086 MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { 1087 auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); 1088 1089 if (Res.second) 1090 Res.first->second = llvm::make_unique<DefsList>(); 1091 return Res.first->second.get(); 1092 } 1093 1094 namespace llvm { 1095 1096 /// This class is a batch walker of all MemoryUse's in the program, and points 1097 /// their defining access at the thing that actually clobbers them. Because it 1098 /// is a batch walker that touches everything, it does not operate like the 1099 /// other walkers. This walker is basically performing a top-down SSA renaming 1100 /// pass, where the version stack is used as the cache. This enables it to be 1101 /// significantly more time and memory efficient than using the regular walker, 1102 /// which is walking bottom-up. 1103 class MemorySSA::OptimizeUses { 1104 public: 1105 OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, 1106 DominatorTree *DT) 1107 : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { 1108 Walker = MSSA->getWalker(); 1109 } 1110 1111 void optimizeUses(); 1112 1113 private: 1114 /// This represents where a given memorylocation is in the stack. 1115 struct MemlocStackInfo { 1116 // This essentially is keeping track of versions of the stack. Whenever 1117 // the stack changes due to pushes or pops, these versions increase. 1118 unsigned long StackEpoch; 1119 unsigned long PopEpoch; 1120 // This is the lower bound of places on the stack to check. It is equal to 1121 // the place the last stack walk ended. 1122 // Note: Correctness depends on this being initialized to 0, which densemap 1123 // does 1124 unsigned long LowerBound; 1125 const BasicBlock *LowerBoundBlock; 1126 // This is where the last walk for this memory location ended. 1127 unsigned long LastKill; 1128 bool LastKillValid; 1129 Optional<AliasResult> AR; 1130 }; 1131 1132 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, 1133 SmallVectorImpl<MemoryAccess *> &, 1134 DenseMap<MemoryLocOrCall, MemlocStackInfo> &); 1135 1136 MemorySSA *MSSA; 1137 MemorySSAWalker *Walker; 1138 AliasAnalysis *AA; 1139 DominatorTree *DT; 1140 }; 1141 1142 } // end namespace llvm 1143 1144 /// Optimize the uses in a given block This is basically the SSA renaming 1145 /// algorithm, with one caveat: We are able to use a single stack for all 1146 /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is 1147 /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just 1148 /// going to be some position in that stack of possible ones. 1149 /// 1150 /// We track the stack positions that each MemoryLocation needs 1151 /// to check, and last ended at. This is because we only want to check the 1152 /// things that changed since last time. The same MemoryLocation should 1153 /// get clobbered by the same store (getModRefInfo does not use invariantness or 1154 /// things like this, and if they start, we can modify MemoryLocOrCall to 1155 /// include relevant data) 1156 void MemorySSA::OptimizeUses::optimizeUsesInBlock( 1157 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, 1158 SmallVectorImpl<MemoryAccess *> &VersionStack, 1159 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { 1160 1161 /// If no accesses, nothing to do. 1162 MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); 1163 if (Accesses == nullptr) 1164 return; 1165 1166 // Pop everything that doesn't dominate the current block off the stack, 1167 // increment the PopEpoch to account for this. 1168 while (true) { 1169 assert( 1170 !VersionStack.empty() && 1171 "Version stack should have liveOnEntry sentinel dominating everything"); 1172 BasicBlock *BackBlock = VersionStack.back()->getBlock(); 1173 if (DT->dominates(BackBlock, BB)) 1174 break; 1175 while (VersionStack.back()->getBlock() == BackBlock) 1176 VersionStack.pop_back(); 1177 ++PopEpoch; 1178 } 1179 1180 for (MemoryAccess &MA : *Accesses) { 1181 auto *MU = dyn_cast<MemoryUse>(&MA); 1182 if (!MU) { 1183 VersionStack.push_back(&MA); 1184 ++StackEpoch; 1185 continue; 1186 } 1187 1188 if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { 1189 MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); 1190 continue; 1191 } 1192 1193 MemoryLocOrCall UseMLOC(MU); 1194 auto &LocInfo = LocStackInfo[UseMLOC]; 1195 // If the pop epoch changed, it means we've removed stuff from top of 1196 // stack due to changing blocks. We may have to reset the lower bound or 1197 // last kill info. 1198 if (LocInfo.PopEpoch != PopEpoch) { 1199 LocInfo.PopEpoch = PopEpoch; 1200 LocInfo.StackEpoch = StackEpoch; 1201 // If the lower bound was in something that no longer dominates us, we 1202 // have to reset it. 1203 // We can't simply track stack size, because the stack may have had 1204 // pushes/pops in the meantime. 1205 // XXX: This is non-optimal, but only is slower cases with heavily 1206 // branching dominator trees. To get the optimal number of queries would 1207 // be to make lowerbound and lastkill a per-loc stack, and pop it until 1208 // the top of that stack dominates us. This does not seem worth it ATM. 1209 // A much cheaper optimization would be to always explore the deepest 1210 // branch of the dominator tree first. This will guarantee this resets on 1211 // the smallest set of blocks. 1212 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && 1213 !DT->dominates(LocInfo.LowerBoundBlock, BB)) { 1214 // Reset the lower bound of things to check. 1215 // TODO: Some day we should be able to reset to last kill, rather than 1216 // 0. 1217 LocInfo.LowerBound = 0; 1218 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); 1219 LocInfo.LastKillValid = false; 1220 } 1221 } else if (LocInfo.StackEpoch != StackEpoch) { 1222 // If all that has changed is the StackEpoch, we only have to check the 1223 // new things on the stack, because we've checked everything before. In 1224 // this case, the lower bound of things to check remains the same. 1225 LocInfo.PopEpoch = PopEpoch; 1226 LocInfo.StackEpoch = StackEpoch; 1227 } 1228 if (!LocInfo.LastKillValid) { 1229 LocInfo.LastKill = VersionStack.size() - 1; 1230 LocInfo.LastKillValid = true; 1231 LocInfo.AR = MayAlias; 1232 } 1233 1234 // At this point, we should have corrected last kill and LowerBound to be 1235 // in bounds. 1236 assert(LocInfo.LowerBound < VersionStack.size() && 1237 "Lower bound out of range"); 1238 assert(LocInfo.LastKill < VersionStack.size() && 1239 "Last kill info out of range"); 1240 // In any case, the new upper bound is the top of the stack. 1241 unsigned long UpperBound = VersionStack.size() - 1; 1242 1243 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { 1244 DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" 1245 << *(MU->getMemoryInst()) << ")" 1246 << " because there are " << UpperBound - LocInfo.LowerBound 1247 << " stores to disambiguate\n"); 1248 // Because we did not walk, LastKill is no longer valid, as this may 1249 // have been a kill. 1250 LocInfo.LastKillValid = false; 1251 continue; 1252 } 1253 bool FoundClobberResult = false; 1254 while (UpperBound > LocInfo.LowerBound) { 1255 if (isa<MemoryPhi>(VersionStack[UpperBound])) { 1256 // For phis, use the walker, see where we ended up, go there 1257 Instruction *UseInst = MU->getMemoryInst(); 1258 MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); 1259 // We are guaranteed to find it or something is wrong 1260 while (VersionStack[UpperBound] != Result) { 1261 assert(UpperBound != 0); 1262 --UpperBound; 1263 } 1264 FoundClobberResult = true; 1265 break; 1266 } 1267 1268 MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); 1269 // If the lifetime of the pointer ends at this instruction, it's live on 1270 // entry. 1271 if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { 1272 // Reset UpperBound to liveOnEntryDef's place in the stack 1273 UpperBound = 0; 1274 FoundClobberResult = true; 1275 LocInfo.AR = MustAlias; 1276 break; 1277 } 1278 ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); 1279 if (CA.IsClobber) { 1280 FoundClobberResult = true; 1281 LocInfo.AR = CA.AR; 1282 break; 1283 } 1284 --UpperBound; 1285 } 1286 1287 // Note: Phis always have AliasResult AR set to MayAlias ATM. 1288 1289 // At the end of this loop, UpperBound is either a clobber, or lower bound 1290 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. 1291 if (FoundClobberResult || UpperBound < LocInfo.LastKill) { 1292 // We were last killed now by where we got to 1293 if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) 1294 LocInfo.AR = None; 1295 MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); 1296 LocInfo.LastKill = UpperBound; 1297 } else { 1298 // Otherwise, we checked all the new ones, and now we know we can get to 1299 // LastKill. 1300 MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); 1301 } 1302 LocInfo.LowerBound = VersionStack.size() - 1; 1303 LocInfo.LowerBoundBlock = BB; 1304 } 1305 } 1306 1307 /// Optimize uses to point to their actual clobbering definitions. 1308 void MemorySSA::OptimizeUses::optimizeUses() { 1309 SmallVector<MemoryAccess *, 16> VersionStack; 1310 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; 1311 VersionStack.push_back(MSSA->getLiveOnEntryDef()); 1312 1313 unsigned long StackEpoch = 1; 1314 unsigned long PopEpoch = 1; 1315 // We perform a non-recursive top-down dominator tree walk. 1316 for (const auto *DomNode : depth_first(DT->getRootNode())) 1317 optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, 1318 LocStackInfo); 1319 } 1320 1321 void MemorySSA::placePHINodes( 1322 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks, 1323 const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) { 1324 // Determine where our MemoryPhi's should go 1325 ForwardIDFCalculator IDFs(*DT); 1326 IDFs.setDefiningBlocks(DefiningBlocks); 1327 SmallVector<BasicBlock *, 32> IDFBlocks; 1328 IDFs.calculate(IDFBlocks); 1329 1330 std::sort(IDFBlocks.begin(), IDFBlocks.end(), 1331 [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { 1332 return BBNumbers.lookup(A) < BBNumbers.lookup(B); 1333 }); 1334 1335 // Now place MemoryPhi nodes. 1336 for (auto &BB : IDFBlocks) 1337 createMemoryPhi(BB); 1338 } 1339 1340 void MemorySSA::buildMemorySSA() { 1341 // We create an access to represent "live on entry", for things like 1342 // arguments or users of globals, where the memory they use is defined before 1343 // the beginning of the function. We do not actually insert it into the IR. 1344 // We do not define a live on exit for the immediate uses, and thus our 1345 // semantics do *not* imply that something with no immediate uses can simply 1346 // be removed. 1347 BasicBlock &StartingPoint = F.getEntryBlock(); 1348 LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr, 1349 &StartingPoint, NextID++)); 1350 DenseMap<const BasicBlock *, unsigned int> BBNumbers; 1351 unsigned NextBBNum = 0; 1352 1353 // We maintain lists of memory accesses per-block, trading memory for time. We 1354 // could just look up the memory access for every possible instruction in the 1355 // stream. 1356 SmallPtrSet<BasicBlock *, 32> DefiningBlocks; 1357 // Go through each block, figure out where defs occur, and chain together all 1358 // the accesses. 1359 for (BasicBlock &B : F) { 1360 BBNumbers[&B] = NextBBNum++; 1361 bool InsertIntoDef = false; 1362 AccessList *Accesses = nullptr; 1363 DefsList *Defs = nullptr; 1364 for (Instruction &I : B) { 1365 MemoryUseOrDef *MUD = createNewAccess(&I); 1366 if (!MUD) 1367 continue; 1368 1369 if (!Accesses) 1370 Accesses = getOrCreateAccessList(&B); 1371 Accesses->push_back(MUD); 1372 if (isa<MemoryDef>(MUD)) { 1373 InsertIntoDef = true; 1374 if (!Defs) 1375 Defs = getOrCreateDefsList(&B); 1376 Defs->push_back(*MUD); 1377 } 1378 } 1379 if (InsertIntoDef) 1380 DefiningBlocks.insert(&B); 1381 } 1382 placePHINodes(DefiningBlocks, BBNumbers); 1383 1384 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get 1385 // filled in with all blocks. 1386 SmallPtrSet<BasicBlock *, 16> Visited; 1387 renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); 1388 1389 CachingWalker *Walker = getWalkerImpl(); 1390 1391 // We're doing a batch of updates; don't drop useful caches between them. 1392 Walker->setAutoResetWalker(false); 1393 OptimizeUses(this, Walker, AA, DT).optimizeUses(); 1394 Walker->setAutoResetWalker(true); 1395 Walker->resetClobberWalker(); 1396 1397 // Mark the uses in unreachable blocks as live on entry, so that they go 1398 // somewhere. 1399 for (auto &BB : F) 1400 if (!Visited.count(&BB)) 1401 markUnreachableAsLiveOnEntry(&BB); 1402 } 1403 1404 MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } 1405 1406 MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { 1407 if (Walker) 1408 return Walker.get(); 1409 1410 Walker = llvm::make_unique<CachingWalker>(this, AA, DT); 1411 return Walker.get(); 1412 } 1413 1414 // This is a helper function used by the creation routines. It places NewAccess 1415 // into the access and defs lists for a given basic block, at the given 1416 // insertion point. 1417 void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, 1418 const BasicBlock *BB, 1419 InsertionPlace Point) { 1420 auto *Accesses = getOrCreateAccessList(BB); 1421 if (Point == Beginning) { 1422 // If it's a phi node, it goes first, otherwise, it goes after any phi 1423 // nodes. 1424 if (isa<MemoryPhi>(NewAccess)) { 1425 Accesses->push_front(NewAccess); 1426 auto *Defs = getOrCreateDefsList(BB); 1427 Defs->push_front(*NewAccess); 1428 } else { 1429 auto AI = find_if_not( 1430 *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1431 Accesses->insert(AI, NewAccess); 1432 if (!isa<MemoryUse>(NewAccess)) { 1433 auto *Defs = getOrCreateDefsList(BB); 1434 auto DI = find_if_not( 1435 *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1436 Defs->insert(DI, *NewAccess); 1437 } 1438 } 1439 } else { 1440 Accesses->push_back(NewAccess); 1441 if (!isa<MemoryUse>(NewAccess)) { 1442 auto *Defs = getOrCreateDefsList(BB); 1443 Defs->push_back(*NewAccess); 1444 } 1445 } 1446 BlockNumberingValid.erase(BB); 1447 } 1448 1449 void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, 1450 AccessList::iterator InsertPt) { 1451 auto *Accesses = getWritableBlockAccesses(BB); 1452 bool WasEnd = InsertPt == Accesses->end(); 1453 Accesses->insert(AccessList::iterator(InsertPt), What); 1454 if (!isa<MemoryUse>(What)) { 1455 auto *Defs = getOrCreateDefsList(BB); 1456 // If we got asked to insert at the end, we have an easy job, just shove it 1457 // at the end. If we got asked to insert before an existing def, we also get 1458 // an terator. If we got asked to insert before a use, we have to hunt for 1459 // the next def. 1460 if (WasEnd) { 1461 Defs->push_back(*What); 1462 } else if (isa<MemoryDef>(InsertPt)) { 1463 Defs->insert(InsertPt->getDefsIterator(), *What); 1464 } else { 1465 while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) 1466 ++InsertPt; 1467 // Either we found a def, or we are inserting at the end 1468 if (InsertPt == Accesses->end()) 1469 Defs->push_back(*What); 1470 else 1471 Defs->insert(InsertPt->getDefsIterator(), *What); 1472 } 1473 } 1474 BlockNumberingValid.erase(BB); 1475 } 1476 1477 // Move What before Where in the IR. The end result is taht What will belong to 1478 // the right lists and have the right Block set, but will not otherwise be 1479 // correct. It will not have the right defining access, and if it is a def, 1480 // things below it will not properly be updated. 1481 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1482 AccessList::iterator Where) { 1483 // Keep it in the lookup tables, remove from the lists 1484 removeFromLists(What, false); 1485 What->setBlock(BB); 1486 insertIntoListsBefore(What, BB, Where); 1487 } 1488 1489 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1490 InsertionPlace Point) { 1491 removeFromLists(What, false); 1492 What->setBlock(BB); 1493 insertIntoListsForBlock(What, BB, Point); 1494 } 1495 1496 MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { 1497 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); 1498 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); 1499 // Phi's always are placed at the front of the block. 1500 insertIntoListsForBlock(Phi, BB, Beginning); 1501 ValueToMemoryAccess[BB] = Phi; 1502 return Phi; 1503 } 1504 1505 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, 1506 MemoryAccess *Definition) { 1507 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); 1508 MemoryUseOrDef *NewAccess = createNewAccess(I); 1509 assert( 1510 NewAccess != nullptr && 1511 "Tried to create a memory access for a non-memory touching instruction"); 1512 NewAccess->setDefiningAccess(Definition); 1513 return NewAccess; 1514 } 1515 1516 // Return true if the instruction has ordering constraints. 1517 // Note specifically that this only considers stores and loads 1518 // because others are still considered ModRef by getModRefInfo. 1519 static inline bool isOrdered(const Instruction *I) { 1520 if (auto *SI = dyn_cast<StoreInst>(I)) { 1521 if (!SI->isUnordered()) 1522 return true; 1523 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 1524 if (!LI->isUnordered()) 1525 return true; 1526 } 1527 return false; 1528 } 1529 1530 /// \brief Helper function to create new memory accesses 1531 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { 1532 // The assume intrinsic has a control dependency which we model by claiming 1533 // that it writes arbitrarily. Ignore that fake memory dependency here. 1534 // FIXME: Replace this special casing with a more accurate modelling of 1535 // assume's control dependency. 1536 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1537 if (II->getIntrinsicID() == Intrinsic::assume) 1538 return nullptr; 1539 1540 // Find out what affect this instruction has on memory. 1541 ModRefInfo ModRef = AA->getModRefInfo(I, None); 1542 // The isOrdered check is used to ensure that volatiles end up as defs 1543 // (atomics end up as ModRef right now anyway). Until we separate the 1544 // ordering chain from the memory chain, this enables people to see at least 1545 // some relative ordering to volatiles. Note that getClobberingMemoryAccess 1546 // will still give an answer that bypasses other volatile loads. TODO: 1547 // Separate memory aliasing and ordering into two different chains so that we 1548 // can precisely represent both "what memory will this read/write/is clobbered 1549 // by" and "what instructions can I move this past". 1550 bool Def = isModSet(ModRef) || isOrdered(I); 1551 bool Use = isRefSet(ModRef); 1552 1553 // It's possible for an instruction to not modify memory at all. During 1554 // construction, we ignore them. 1555 if (!Def && !Use) 1556 return nullptr; 1557 1558 assert((Def || Use) && 1559 "Trying to create a memory access with a non-memory instruction"); 1560 1561 MemoryUseOrDef *MUD; 1562 if (Def) 1563 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); 1564 else 1565 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); 1566 ValueToMemoryAccess[I] = MUD; 1567 return MUD; 1568 } 1569 1570 /// \brief Returns true if \p Replacer dominates \p Replacee . 1571 bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, 1572 const MemoryAccess *Replacee) const { 1573 if (isa<MemoryUseOrDef>(Replacee)) 1574 return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); 1575 const auto *MP = cast<MemoryPhi>(Replacee); 1576 // For a phi node, the use occurs in the predecessor block of the phi node. 1577 // Since we may occur multiple times in the phi node, we have to check each 1578 // operand to ensure Replacer dominates each operand where Replacee occurs. 1579 for (const Use &Arg : MP->operands()) { 1580 if (Arg.get() != Replacee && 1581 !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) 1582 return false; 1583 } 1584 return true; 1585 } 1586 1587 /// \brief Properly remove \p MA from all of MemorySSA's lookup tables. 1588 void MemorySSA::removeFromLookups(MemoryAccess *MA) { 1589 assert(MA->use_empty() && 1590 "Trying to remove memory access that still has uses"); 1591 BlockNumbering.erase(MA); 1592 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1593 MUD->setDefiningAccess(nullptr); 1594 // Invalidate our walker's cache if necessary 1595 if (!isa<MemoryUse>(MA)) 1596 Walker->invalidateInfo(MA); 1597 // The call below to erase will destroy MA, so we can't change the order we 1598 // are doing things here 1599 Value *MemoryInst; 1600 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { 1601 MemoryInst = MUD->getMemoryInst(); 1602 } else { 1603 MemoryInst = MA->getBlock(); 1604 } 1605 auto VMA = ValueToMemoryAccess.find(MemoryInst); 1606 if (VMA->second == MA) 1607 ValueToMemoryAccess.erase(VMA); 1608 } 1609 1610 /// \brief Properly remove \p MA from all of MemorySSA's lists. 1611 /// 1612 /// Because of the way the intrusive list and use lists work, it is important to 1613 /// do removal in the right order. 1614 /// ShouldDelete defaults to true, and will cause the memory access to also be 1615 /// deleted, not just removed. 1616 void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { 1617 // The access list owns the reference, so we erase it from the non-owning list 1618 // first. 1619 if (!isa<MemoryUse>(MA)) { 1620 auto DefsIt = PerBlockDefs.find(MA->getBlock()); 1621 std::unique_ptr<DefsList> &Defs = DefsIt->second; 1622 Defs->remove(*MA); 1623 if (Defs->empty()) 1624 PerBlockDefs.erase(DefsIt); 1625 } 1626 1627 // The erase call here will delete it. If we don't want it deleted, we call 1628 // remove instead. 1629 auto AccessIt = PerBlockAccesses.find(MA->getBlock()); 1630 std::unique_ptr<AccessList> &Accesses = AccessIt->second; 1631 if (ShouldDelete) 1632 Accesses->erase(MA); 1633 else 1634 Accesses->remove(MA); 1635 1636 if (Accesses->empty()) 1637 PerBlockAccesses.erase(AccessIt); 1638 } 1639 1640 void MemorySSA::print(raw_ostream &OS) const { 1641 MemorySSAAnnotatedWriter Writer(this); 1642 F.print(OS, &Writer); 1643 } 1644 1645 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1646 LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } 1647 #endif 1648 1649 void MemorySSA::verifyMemorySSA() const { 1650 verifyDefUses(F); 1651 verifyDomination(F); 1652 verifyOrdering(F); 1653 Walker->verify(this); 1654 } 1655 1656 /// \brief Verify that the order and existence of MemoryAccesses matches the 1657 /// order and existence of memory affecting instructions. 1658 void MemorySSA::verifyOrdering(Function &F) const { 1659 // Walk all the blocks, comparing what the lookups think and what the access 1660 // lists think, as well as the order in the blocks vs the order in the access 1661 // lists. 1662 SmallVector<MemoryAccess *, 32> ActualAccesses; 1663 SmallVector<MemoryAccess *, 32> ActualDefs; 1664 for (BasicBlock &B : F) { 1665 const AccessList *AL = getBlockAccesses(&B); 1666 const auto *DL = getBlockDefs(&B); 1667 MemoryAccess *Phi = getMemoryAccess(&B); 1668 if (Phi) { 1669 ActualAccesses.push_back(Phi); 1670 ActualDefs.push_back(Phi); 1671 } 1672 1673 for (Instruction &I : B) { 1674 MemoryAccess *MA = getMemoryAccess(&I); 1675 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && 1676 "We have memory affecting instructions " 1677 "in this block but they are not in the " 1678 "access list or defs list"); 1679 if (MA) { 1680 ActualAccesses.push_back(MA); 1681 if (isa<MemoryDef>(MA)) 1682 ActualDefs.push_back(MA); 1683 } 1684 } 1685 // Either we hit the assert, really have no accesses, or we have both 1686 // accesses and an access list. 1687 // Same with defs. 1688 if (!AL && !DL) 1689 continue; 1690 assert(AL->size() == ActualAccesses.size() && 1691 "We don't have the same number of accesses in the block as on the " 1692 "access list"); 1693 assert((DL || ActualDefs.size() == 0) && 1694 "Either we should have a defs list, or we should have no defs"); 1695 assert((!DL || DL->size() == ActualDefs.size()) && 1696 "We don't have the same number of defs in the block as on the " 1697 "def list"); 1698 auto ALI = AL->begin(); 1699 auto AAI = ActualAccesses.begin(); 1700 while (ALI != AL->end() && AAI != ActualAccesses.end()) { 1701 assert(&*ALI == *AAI && "Not the same accesses in the same order"); 1702 ++ALI; 1703 ++AAI; 1704 } 1705 ActualAccesses.clear(); 1706 if (DL) { 1707 auto DLI = DL->begin(); 1708 auto ADI = ActualDefs.begin(); 1709 while (DLI != DL->end() && ADI != ActualDefs.end()) { 1710 assert(&*DLI == *ADI && "Not the same defs in the same order"); 1711 ++DLI; 1712 ++ADI; 1713 } 1714 } 1715 ActualDefs.clear(); 1716 } 1717 } 1718 1719 /// \brief Verify the domination properties of MemorySSA by checking that each 1720 /// definition dominates all of its uses. 1721 void MemorySSA::verifyDomination(Function &F) const { 1722 #ifndef NDEBUG 1723 for (BasicBlock &B : F) { 1724 // Phi nodes are attached to basic blocks 1725 if (MemoryPhi *MP = getMemoryAccess(&B)) 1726 for (const Use &U : MP->uses()) 1727 assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); 1728 1729 for (Instruction &I : B) { 1730 MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); 1731 if (!MD) 1732 continue; 1733 1734 for (const Use &U : MD->uses()) 1735 assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); 1736 } 1737 } 1738 #endif 1739 } 1740 1741 /// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use 1742 /// appears in the use list of \p Def. 1743 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { 1744 #ifndef NDEBUG 1745 // The live on entry use may cause us to get a NULL def here 1746 if (!Def) 1747 assert(isLiveOnEntryDef(Use) && 1748 "Null def but use not point to live on entry def"); 1749 else 1750 assert(is_contained(Def->users(), Use) && 1751 "Did not find use in def's use list"); 1752 #endif 1753 } 1754 1755 /// \brief Verify the immediate use information, by walking all the memory 1756 /// accesses and verifying that, for each use, it appears in the 1757 /// appropriate def's use list 1758 void MemorySSA::verifyDefUses(Function &F) const { 1759 for (BasicBlock &B : F) { 1760 // Phi nodes are attached to basic blocks 1761 if (MemoryPhi *Phi = getMemoryAccess(&B)) { 1762 assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( 1763 pred_begin(&B), pred_end(&B))) && 1764 "Incomplete MemoryPhi Node"); 1765 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) 1766 verifyUseInDefs(Phi->getIncomingValue(I), Phi); 1767 } 1768 1769 for (Instruction &I : B) { 1770 if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { 1771 verifyUseInDefs(MA->getDefiningAccess(), MA); 1772 } 1773 } 1774 } 1775 } 1776 1777 MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { 1778 return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); 1779 } 1780 1781 MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { 1782 return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); 1783 } 1784 1785 /// Perform a local numbering on blocks so that instruction ordering can be 1786 /// determined in constant time. 1787 /// TODO: We currently just number in order. If we numbered by N, we could 1788 /// allow at least N-1 sequences of insertBefore or insertAfter (and at least 1789 /// log2(N) sequences of mixed before and after) without needing to invalidate 1790 /// the numbering. 1791 void MemorySSA::renumberBlock(const BasicBlock *B) const { 1792 // The pre-increment ensures the numbers really start at 1. 1793 unsigned long CurrentNumber = 0; 1794 const AccessList *AL = getBlockAccesses(B); 1795 assert(AL != nullptr && "Asking to renumber an empty block"); 1796 for (const auto &I : *AL) 1797 BlockNumbering[&I] = ++CurrentNumber; 1798 BlockNumberingValid.insert(B); 1799 } 1800 1801 /// \brief Determine, for two memory accesses in the same block, 1802 /// whether \p Dominator dominates \p Dominatee. 1803 /// \returns True if \p Dominator dominates \p Dominatee. 1804 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, 1805 const MemoryAccess *Dominatee) const { 1806 const BasicBlock *DominatorBlock = Dominator->getBlock(); 1807 1808 assert((DominatorBlock == Dominatee->getBlock()) && 1809 "Asking for local domination when accesses are in different blocks!"); 1810 // A node dominates itself. 1811 if (Dominatee == Dominator) 1812 return true; 1813 1814 // When Dominatee is defined on function entry, it is not dominated by another 1815 // memory access. 1816 if (isLiveOnEntryDef(Dominatee)) 1817 return false; 1818 1819 // When Dominator is defined on function entry, it dominates the other memory 1820 // access. 1821 if (isLiveOnEntryDef(Dominator)) 1822 return true; 1823 1824 if (!BlockNumberingValid.count(DominatorBlock)) 1825 renumberBlock(DominatorBlock); 1826 1827 unsigned long DominatorNum = BlockNumbering.lookup(Dominator); 1828 // All numbers start with 1 1829 assert(DominatorNum != 0 && "Block was not numbered properly"); 1830 unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); 1831 assert(DominateeNum != 0 && "Block was not numbered properly"); 1832 return DominatorNum < DominateeNum; 1833 } 1834 1835 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1836 const MemoryAccess *Dominatee) const { 1837 if (Dominator == Dominatee) 1838 return true; 1839 1840 if (isLiveOnEntryDef(Dominatee)) 1841 return false; 1842 1843 if (Dominator->getBlock() != Dominatee->getBlock()) 1844 return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); 1845 return locallyDominates(Dominator, Dominatee); 1846 } 1847 1848 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1849 const Use &Dominatee) const { 1850 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { 1851 BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); 1852 // The def must dominate the incoming block of the phi. 1853 if (UseBB != Dominator->getBlock()) 1854 return DT->dominates(Dominator->getBlock(), UseBB); 1855 // If the UseBB and the DefBB are the same, compare locally. 1856 return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); 1857 } 1858 // If it's not a PHI node use, the normal dominates can already handle it. 1859 return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); 1860 } 1861 1862 const static char LiveOnEntryStr[] = "liveOnEntry"; 1863 1864 void MemoryAccess::print(raw_ostream &OS) const { 1865 switch (getValueID()) { 1866 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS); 1867 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS); 1868 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS); 1869 } 1870 llvm_unreachable("invalid value id"); 1871 } 1872 1873 void MemoryDef::print(raw_ostream &OS) const { 1874 MemoryAccess *UO = getDefiningAccess(); 1875 1876 OS << getID() << " = MemoryDef("; 1877 if (UO && UO->getID()) 1878 OS << UO->getID(); 1879 else 1880 OS << LiveOnEntryStr; 1881 OS << ')'; 1882 } 1883 1884 void MemoryPhi::print(raw_ostream &OS) const { 1885 bool First = true; 1886 OS << getID() << " = MemoryPhi("; 1887 for (const auto &Op : operands()) { 1888 BasicBlock *BB = getIncomingBlock(Op); 1889 MemoryAccess *MA = cast<MemoryAccess>(Op); 1890 if (!First) 1891 OS << ','; 1892 else 1893 First = false; 1894 1895 OS << '{'; 1896 if (BB->hasName()) 1897 OS << BB->getName(); 1898 else 1899 BB->printAsOperand(OS, false); 1900 OS << ','; 1901 if (unsigned ID = MA->getID()) 1902 OS << ID; 1903 else 1904 OS << LiveOnEntryStr; 1905 OS << '}'; 1906 } 1907 OS << ')'; 1908 } 1909 1910 void MemoryUse::print(raw_ostream &OS) const { 1911 MemoryAccess *UO = getDefiningAccess(); 1912 OS << "MemoryUse("; 1913 if (UO && UO->getID()) 1914 OS << UO->getID(); 1915 else 1916 OS << LiveOnEntryStr; 1917 OS << ')'; 1918 } 1919 1920 void MemoryAccess::dump() const { 1921 // Cannot completely remove virtual function even in release mode. 1922 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1923 print(dbgs()); 1924 dbgs() << "\n"; 1925 #endif 1926 } 1927 1928 char MemorySSAPrinterLegacyPass::ID = 0; 1929 1930 MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { 1931 initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); 1932 } 1933 1934 void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { 1935 AU.setPreservesAll(); 1936 AU.addRequired<MemorySSAWrapperPass>(); 1937 } 1938 1939 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { 1940 auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 1941 MSSA.print(dbgs()); 1942 if (VerifyMemorySSA) 1943 MSSA.verifyMemorySSA(); 1944 return false; 1945 } 1946 1947 AnalysisKey MemorySSAAnalysis::Key; 1948 1949 MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, 1950 FunctionAnalysisManager &AM) { 1951 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 1952 auto &AA = AM.getResult<AAManager>(F); 1953 return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT)); 1954 } 1955 1956 PreservedAnalyses MemorySSAPrinterPass::run(Function &F, 1957 FunctionAnalysisManager &AM) { 1958 OS << "MemorySSA for function: " << F.getName() << "\n"; 1959 AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); 1960 1961 return PreservedAnalyses::all(); 1962 } 1963 1964 PreservedAnalyses MemorySSAVerifierPass::run(Function &F, 1965 FunctionAnalysisManager &AM) { 1966 AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); 1967 1968 return PreservedAnalyses::all(); 1969 } 1970 1971 char MemorySSAWrapperPass::ID = 0; 1972 1973 MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { 1974 initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); 1975 } 1976 1977 void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } 1978 1979 void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1980 AU.setPreservesAll(); 1981 AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 1982 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1983 } 1984 1985 bool MemorySSAWrapperPass::runOnFunction(Function &F) { 1986 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1987 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1988 MSSA.reset(new MemorySSA(F, &AA, &DT)); 1989 return false; 1990 } 1991 1992 void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } 1993 1994 void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { 1995 MSSA->print(OS); 1996 } 1997 1998 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} 1999 2000 MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, 2001 DominatorTree *D) 2002 : MemorySSAWalker(M), Walker(*M, *A, *D) {} 2003 2004 void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { 2005 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 2006 MUD->resetOptimized(); 2007 } 2008 2009 /// \brief Walk the use-def chains starting at \p MA and find 2010 /// the MemoryAccess that actually clobbers Loc. 2011 /// 2012 /// \returns our clobbering memory access 2013 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 2014 MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { 2015 MemoryAccess *New = Walker.findClobber(StartingAccess, Q); 2016 #ifdef EXPENSIVE_CHECKS 2017 MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); 2018 assert(NewNoCache == New && "Cache made us hand back a different result?"); 2019 (void)NewNoCache; 2020 #endif 2021 if (AutoResetWalker) 2022 resetClobberWalker(); 2023 return New; 2024 } 2025 2026 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 2027 MemoryAccess *StartingAccess, const MemoryLocation &Loc) { 2028 if (isa<MemoryPhi>(StartingAccess)) 2029 return StartingAccess; 2030 2031 auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); 2032 if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) 2033 return StartingUseOrDef; 2034 2035 Instruction *I = StartingUseOrDef->getMemoryInst(); 2036 2037 // Conservatively, fences are always clobbers, so don't perform the walk if we 2038 // hit a fence. 2039 if (!ImmutableCallSite(I) && I->isFenceLike()) 2040 return StartingUseOrDef; 2041 2042 UpwardsMemoryQuery Q; 2043 Q.OriginalAccess = StartingUseOrDef; 2044 Q.StartingLoc = Loc; 2045 Q.Inst = I; 2046 Q.IsCall = false; 2047 2048 // Unlike the other function, do not walk to the def of a def, because we are 2049 // handed something we already believe is the clobbering access. 2050 MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) 2051 ? StartingUseOrDef->getDefiningAccess() 2052 : StartingUseOrDef; 2053 2054 MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); 2055 DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2056 DEBUG(dbgs() << *StartingUseOrDef << "\n"); 2057 DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2058 DEBUG(dbgs() << *Clobber << "\n"); 2059 return Clobber; 2060 } 2061 2062 MemoryAccess * 2063 MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2064 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); 2065 // If this is a MemoryPhi, we can't do anything. 2066 if (!StartingAccess) 2067 return MA; 2068 2069 // If this is an already optimized use or def, return the optimized result. 2070 // Note: Currently, we store the optimized def result in a separate field, 2071 // since we can't use the defining access. 2072 if (StartingAccess->isOptimized()) 2073 return StartingAccess->getOptimized(); 2074 2075 const Instruction *I = StartingAccess->getMemoryInst(); 2076 UpwardsMemoryQuery Q(I, StartingAccess); 2077 // We can't sanely do anything with a fences, they conservatively 2078 // clobber all memory, and have no locations to get pointers from to 2079 // try to disambiguate. 2080 if (!Q.IsCall && I->isFenceLike()) 2081 return StartingAccess; 2082 2083 if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { 2084 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); 2085 if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) { 2086 MUD->setOptimized(LiveOnEntry); 2087 MUD->setOptimizedAccessType(None); 2088 } 2089 return LiveOnEntry; 2090 } 2091 2092 // Start with the thing we already think clobbers this location 2093 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); 2094 2095 // At this point, DefiningAccess may be the live on entry def. 2096 // If it is, we will not get a better result. 2097 if (MSSA->isLiveOnEntryDef(DefiningAccess)) { 2098 if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) { 2099 MUD->setOptimized(DefiningAccess); 2100 MUD->setOptimizedAccessType(None); 2101 } 2102 return DefiningAccess; 2103 } 2104 2105 MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); 2106 DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2107 DEBUG(dbgs() << *DefiningAccess << "\n"); 2108 DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2109 DEBUG(dbgs() << *Result << "\n"); 2110 2111 if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) { 2112 MUD->setOptimized(Result); 2113 if (MSSA->isLiveOnEntryDef(Result)) 2114 MUD->setOptimizedAccessType(None); 2115 else if (Q.AR == MustAlias) 2116 MUD->setOptimizedAccessType(MustAlias); 2117 } 2118 2119 return Result; 2120 } 2121 2122 MemoryAccess * 2123 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2124 if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) 2125 return Use->getDefiningAccess(); 2126 return MA; 2127 } 2128 2129 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( 2130 MemoryAccess *StartingAccess, const MemoryLocation &) { 2131 if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2132 return Use->getDefiningAccess(); 2133 return StartingAccess; 2134 } 2135 2136 void MemoryPhi::deleteMe(DerivedUser *Self) { 2137 delete static_cast<MemoryPhi *>(Self); 2138 } 2139 2140 void MemoryDef::deleteMe(DerivedUser *Self) { 2141 delete static_cast<MemoryDef *>(Self); 2142 } 2143 2144 void MemoryUse::deleteMe(DerivedUser *Self) { 2145 delete static_cast<MemoryUse *>(Self); 2146 } 2147