1 //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the MemorySSA class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/MemorySSA.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseMapInfo.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/DepthFirstIterator.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/None.h" 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/iterator.h" 26 #include "llvm/ADT/iterator_range.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/IteratedDominanceFrontier.h" 29 #include "llvm/Analysis/MemoryLocation.h" 30 #include "llvm/Config/llvm-config.h" 31 #include "llvm/IR/AssemblyAnnotationWriter.h" 32 #include "llvm/IR/BasicBlock.h" 33 #include "llvm/IR/CallSite.h" 34 #include "llvm/IR/Dominators.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/Instruction.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/IntrinsicInst.h" 39 #include "llvm/IR/Intrinsics.h" 40 #include "llvm/IR/LLVMContext.h" 41 #include "llvm/IR/PassManager.h" 42 #include "llvm/IR/Use.h" 43 #include "llvm/Pass.h" 44 #include "llvm/Support/AtomicOrdering.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/ErrorHandling.h" 50 #include "llvm/Support/FormattedStream.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <cassert> 54 #include <iterator> 55 #include <memory> 56 #include <utility> 57 58 using namespace llvm; 59 60 #define DEBUG_TYPE "memoryssa" 61 62 INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 63 true) 64 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 65 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 66 INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 67 true) 68 69 INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", 70 "Memory SSA Printer", false, false) 71 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 72 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", 73 "Memory SSA Printer", false, false) 74 75 static cl::opt<unsigned> MaxCheckLimit( 76 "memssa-check-limit", cl::Hidden, cl::init(100), 77 cl::desc("The maximum number of stores/phis MemorySSA" 78 "will consider trying to walk past (default = 100)")); 79 80 static cl::opt<bool> 81 VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, 82 cl::desc("Verify MemorySSA in legacy printer pass.")); 83 84 namespace llvm { 85 86 /// An assembly annotator class to print Memory SSA information in 87 /// comments. 88 class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { 89 friend class MemorySSA; 90 91 const MemorySSA *MSSA; 92 93 public: 94 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} 95 96 void emitBasicBlockStartAnnot(const BasicBlock *BB, 97 formatted_raw_ostream &OS) override { 98 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) 99 OS << "; " << *MA << "\n"; 100 } 101 102 void emitInstructionAnnot(const Instruction *I, 103 formatted_raw_ostream &OS) override { 104 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) 105 OS << "; " << *MA << "\n"; 106 } 107 }; 108 109 } // end namespace llvm 110 111 namespace { 112 113 /// Our current alias analysis API differentiates heavily between calls and 114 /// non-calls, and functions called on one usually assert on the other. 115 /// This class encapsulates the distinction to simplify other code that wants 116 /// "Memory affecting instructions and related data" to use as a key. 117 /// For example, this class is used as a densemap key in the use optimizer. 118 class MemoryLocOrCall { 119 public: 120 bool IsCall = false; 121 122 MemoryLocOrCall() = default; 123 MemoryLocOrCall(MemoryUseOrDef *MUD) 124 : MemoryLocOrCall(MUD->getMemoryInst()) {} 125 MemoryLocOrCall(const MemoryUseOrDef *MUD) 126 : MemoryLocOrCall(MUD->getMemoryInst()) {} 127 128 MemoryLocOrCall(Instruction *Inst) { 129 if (ImmutableCallSite(Inst)) { 130 IsCall = true; 131 CS = ImmutableCallSite(Inst); 132 } else { 133 IsCall = false; 134 // There is no such thing as a memorylocation for a fence inst, and it is 135 // unique in that regard. 136 if (!isa<FenceInst>(Inst)) 137 Loc = MemoryLocation::get(Inst); 138 } 139 } 140 141 explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {} 142 143 ImmutableCallSite getCS() const { 144 assert(IsCall); 145 return CS; 146 } 147 148 MemoryLocation getLoc() const { 149 assert(!IsCall); 150 return Loc; 151 } 152 153 bool operator==(const MemoryLocOrCall &Other) const { 154 if (IsCall != Other.IsCall) 155 return false; 156 157 if (!IsCall) 158 return Loc == Other.Loc; 159 160 if (CS.getCalledValue() != Other.CS.getCalledValue()) 161 return false; 162 163 return CS.arg_size() == Other.CS.arg_size() && 164 std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin()); 165 } 166 167 private: 168 union { 169 ImmutableCallSite CS; 170 MemoryLocation Loc; 171 }; 172 }; 173 174 } // end anonymous namespace 175 176 namespace llvm { 177 178 template <> struct DenseMapInfo<MemoryLocOrCall> { 179 static inline MemoryLocOrCall getEmptyKey() { 180 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); 181 } 182 183 static inline MemoryLocOrCall getTombstoneKey() { 184 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); 185 } 186 187 static unsigned getHashValue(const MemoryLocOrCall &MLOC) { 188 if (!MLOC.IsCall) 189 return hash_combine( 190 MLOC.IsCall, 191 DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); 192 193 hash_code hash = 194 hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( 195 MLOC.getCS().getCalledValue())); 196 197 for (const Value *Arg : MLOC.getCS().args()) 198 hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); 199 return hash; 200 } 201 202 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { 203 return LHS == RHS; 204 } 205 }; 206 207 } // end namespace llvm 208 209 /// This does one-way checks to see if Use could theoretically be hoisted above 210 /// MayClobber. This will not check the other way around. 211 /// 212 /// This assumes that, for the purposes of MemorySSA, Use comes directly after 213 /// MayClobber, with no potentially clobbering operations in between them. 214 /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) 215 static bool areLoadsReorderable(const LoadInst *Use, 216 const LoadInst *MayClobber) { 217 bool VolatileUse = Use->isVolatile(); 218 bool VolatileClobber = MayClobber->isVolatile(); 219 // Volatile operations may never be reordered with other volatile operations. 220 if (VolatileUse && VolatileClobber) 221 return false; 222 // Otherwise, volatile doesn't matter here. From the language reference: 223 // 'optimizers may change the order of volatile operations relative to 224 // non-volatile operations.'" 225 226 // If a load is seq_cst, it cannot be moved above other loads. If its ordering 227 // is weaker, it can be moved above other loads. We just need to be sure that 228 // MayClobber isn't an acquire load, because loads can't be moved above 229 // acquire loads. 230 // 231 // Note that this explicitly *does* allow the free reordering of monotonic (or 232 // weaker) loads of the same address. 233 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; 234 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), 235 AtomicOrdering::Acquire); 236 return !(SeqCstUse || MayClobberIsAcquire); 237 } 238 239 namespace { 240 241 struct ClobberAlias { 242 bool IsClobber; 243 Optional<AliasResult> AR; 244 }; 245 246 } // end anonymous namespace 247 248 // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being 249 // ignored if IsClobber = false. 250 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 251 const MemoryLocation &UseLoc, 252 const Instruction *UseInst, 253 AliasAnalysis &AA) { 254 Instruction *DefInst = MD->getMemoryInst(); 255 assert(DefInst && "Defining instruction not actually an instruction"); 256 ImmutableCallSite UseCS(UseInst); 257 Optional<AliasResult> AR; 258 259 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 260 // These intrinsics will show up as affecting memory, but they are just 261 // markers. 262 switch (II->getIntrinsicID()) { 263 case Intrinsic::lifetime_start: 264 if (UseCS) 265 return {false, NoAlias}; 266 AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc); 267 return {AR == MustAlias, AR}; 268 case Intrinsic::lifetime_end: 269 case Intrinsic::invariant_start: 270 case Intrinsic::invariant_end: 271 case Intrinsic::assume: 272 return {false, NoAlias}; 273 default: 274 break; 275 } 276 } 277 278 if (UseCS) { 279 ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); 280 AR = isMustSet(I) ? MustAlias : MayAlias; 281 return {isModOrRefSet(I), AR}; 282 } 283 284 if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) 285 if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) 286 return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias}; 287 288 ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); 289 AR = isMustSet(I) ? MustAlias : MayAlias; 290 return {isModSet(I), AR}; 291 } 292 293 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 294 const MemoryUseOrDef *MU, 295 const MemoryLocOrCall &UseMLOC, 296 AliasAnalysis &AA) { 297 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery 298 // to exist while MemoryLocOrCall is pushed through places. 299 if (UseMLOC.IsCall) 300 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), 301 AA); 302 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), 303 AA); 304 } 305 306 // Return true when MD may alias MU, return false otherwise. 307 bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, 308 AliasAnalysis &AA) { 309 return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber; 310 } 311 312 namespace { 313 314 struct UpwardsMemoryQuery { 315 // True if our original query started off as a call 316 bool IsCall = false; 317 // The pointer location we started the query with. This will be empty if 318 // IsCall is true. 319 MemoryLocation StartingLoc; 320 // This is the instruction we were querying about. 321 const Instruction *Inst = nullptr; 322 // The MemoryAccess we actually got called with, used to test local domination 323 const MemoryAccess *OriginalAccess = nullptr; 324 Optional<AliasResult> AR = MayAlias; 325 326 UpwardsMemoryQuery() = default; 327 328 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) 329 : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { 330 if (!IsCall) 331 StartingLoc = MemoryLocation::get(Inst); 332 } 333 }; 334 335 } // end anonymous namespace 336 337 static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, 338 AliasAnalysis &AA) { 339 Instruction *Inst = MD->getMemoryInst(); 340 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 341 switch (II->getIntrinsicID()) { 342 case Intrinsic::lifetime_end: 343 return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); 344 default: 345 return false; 346 } 347 } 348 return false; 349 } 350 351 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, 352 const Instruction *I) { 353 // If the memory can't be changed, then loads of the memory can't be 354 // clobbered. 355 return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || 356 AA.pointsToConstantMemory(cast<LoadInst>(I)-> 357 getPointerOperand())); 358 } 359 360 /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing 361 /// inbetween `Start` and `ClobberAt` can clobbers `Start`. 362 /// 363 /// This is meant to be as simple and self-contained as possible. Because it 364 /// uses no cache, etc., it can be relatively expensive. 365 /// 366 /// \param Start The MemoryAccess that we want to walk from. 367 /// \param ClobberAt A clobber for Start. 368 /// \param StartLoc The MemoryLocation for Start. 369 /// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. 370 /// \param Query The UpwardsMemoryQuery we used for our search. 371 /// \param AA The AliasAnalysis we used for our search. 372 static void LLVM_ATTRIBUTE_UNUSED 373 checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, 374 const MemoryLocation &StartLoc, const MemorySSA &MSSA, 375 const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { 376 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); 377 378 if (MSSA.isLiveOnEntryDef(Start)) { 379 assert(MSSA.isLiveOnEntryDef(ClobberAt) && 380 "liveOnEntry must clobber itself"); 381 return; 382 } 383 384 bool FoundClobber = false; 385 DenseSet<MemoryAccessPair> VisitedPhis; 386 SmallVector<MemoryAccessPair, 8> Worklist; 387 Worklist.emplace_back(Start, StartLoc); 388 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one 389 // is found, complain. 390 while (!Worklist.empty()) { 391 MemoryAccessPair MAP = Worklist.pop_back_val(); 392 // All we care about is that nothing from Start to ClobberAt clobbers Start. 393 // We learn nothing from revisiting nodes. 394 if (!VisitedPhis.insert(MAP).second) 395 continue; 396 397 for (MemoryAccess *MA : def_chain(MAP.first)) { 398 if (MA == ClobberAt) { 399 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 400 // instructionClobbersQuery isn't essentially free, so don't use `|=`, 401 // since it won't let us short-circuit. 402 // 403 // Also, note that this can't be hoisted out of the `Worklist` loop, 404 // since MD may only act as a clobber for 1 of N MemoryLocations. 405 FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); 406 if (!FoundClobber) { 407 ClobberAlias CA = 408 instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); 409 if (CA.IsClobber) { 410 FoundClobber = true; 411 // Not used: CA.AR; 412 } 413 } 414 } 415 break; 416 } 417 418 // We should never hit liveOnEntry, unless it's the clobber. 419 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); 420 421 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 422 (void)MD; 423 assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) 424 .IsClobber && 425 "Found clobber before reaching ClobberAt!"); 426 continue; 427 } 428 429 assert(isa<MemoryPhi>(MA)); 430 Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); 431 } 432 } 433 434 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a 435 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. 436 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && 437 "ClobberAt never acted as a clobber"); 438 } 439 440 namespace { 441 442 /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up 443 /// in one class. 444 class ClobberWalker { 445 /// Save a few bytes by using unsigned instead of size_t. 446 using ListIndex = unsigned; 447 448 /// Represents a span of contiguous MemoryDefs, potentially ending in a 449 /// MemoryPhi. 450 struct DefPath { 451 MemoryLocation Loc; 452 // Note that, because we always walk in reverse, Last will always dominate 453 // First. Also note that First and Last are inclusive. 454 MemoryAccess *First; 455 MemoryAccess *Last; 456 Optional<ListIndex> Previous; 457 458 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, 459 Optional<ListIndex> Previous) 460 : Loc(Loc), First(First), Last(Last), Previous(Previous) {} 461 462 DefPath(const MemoryLocation &Loc, MemoryAccess *Init, 463 Optional<ListIndex> Previous) 464 : DefPath(Loc, Init, Init, Previous) {} 465 }; 466 467 const MemorySSA &MSSA; 468 AliasAnalysis &AA; 469 DominatorTree &DT; 470 UpwardsMemoryQuery *Query; 471 472 // Phi optimization bookkeeping 473 SmallVector<DefPath, 32> Paths; 474 DenseSet<ConstMemoryAccessPair> VisitedPhis; 475 476 /// Find the nearest def or phi that `From` can legally be optimized to. 477 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { 478 assert(From->getNumOperands() && "Phi with no operands?"); 479 480 BasicBlock *BB = From->getBlock(); 481 MemoryAccess *Result = MSSA.getLiveOnEntryDef(); 482 DomTreeNode *Node = DT.getNode(BB); 483 while ((Node = Node->getIDom())) { 484 auto *Defs = MSSA.getBlockDefs(Node->getBlock()); 485 if (Defs) 486 return &*Defs->rbegin(); 487 } 488 return Result; 489 } 490 491 /// Result of calling walkToPhiOrClobber. 492 struct UpwardsWalkResult { 493 /// The "Result" of the walk. Either a clobber, the last thing we walked, or 494 /// both. Include alias info when clobber found. 495 MemoryAccess *Result; 496 bool IsKnownClobber; 497 Optional<AliasResult> AR; 498 }; 499 500 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. 501 /// This will update Desc.Last as it walks. It will (optionally) also stop at 502 /// StopAt. 503 /// 504 /// This does not test for whether StopAt is a clobber 505 UpwardsWalkResult 506 walkToPhiOrClobber(DefPath &Desc, 507 const MemoryAccess *StopAt = nullptr) const { 508 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); 509 510 for (MemoryAccess *Current : def_chain(Desc.Last)) { 511 Desc.Last = Current; 512 if (Current == StopAt) 513 return {Current, false, MayAlias}; 514 515 if (auto *MD = dyn_cast<MemoryDef>(Current)) { 516 if (MSSA.isLiveOnEntryDef(MD)) 517 return {MD, true, MustAlias}; 518 ClobberAlias CA = 519 instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); 520 if (CA.IsClobber) 521 return {MD, true, CA.AR}; 522 } 523 } 524 525 assert(isa<MemoryPhi>(Desc.Last) && 526 "Ended at a non-clobber that's not a phi?"); 527 return {Desc.Last, false, MayAlias}; 528 } 529 530 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, 531 ListIndex PriorNode) { 532 auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), 533 upward_defs_end()); 534 for (const MemoryAccessPair &P : UpwardDefs) { 535 PausedSearches.push_back(Paths.size()); 536 Paths.emplace_back(P.second, P.first, PriorNode); 537 } 538 } 539 540 /// Represents a search that terminated after finding a clobber. This clobber 541 /// may or may not be present in the path of defs from LastNode..SearchStart, 542 /// since it may have been retrieved from cache. 543 struct TerminatedPath { 544 MemoryAccess *Clobber; 545 ListIndex LastNode; 546 }; 547 548 /// Get an access that keeps us from optimizing to the given phi. 549 /// 550 /// PausedSearches is an array of indices into the Paths array. Its incoming 551 /// value is the indices of searches that stopped at the last phi optimization 552 /// target. It's left in an unspecified state. 553 /// 554 /// If this returns None, NewPaused is a vector of searches that terminated 555 /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. 556 Optional<TerminatedPath> 557 getBlockingAccess(const MemoryAccess *StopWhere, 558 SmallVectorImpl<ListIndex> &PausedSearches, 559 SmallVectorImpl<ListIndex> &NewPaused, 560 SmallVectorImpl<TerminatedPath> &Terminated) { 561 assert(!PausedSearches.empty() && "No searches to continue?"); 562 563 // BFS vs DFS really doesn't make a difference here, so just do a DFS with 564 // PausedSearches as our stack. 565 while (!PausedSearches.empty()) { 566 ListIndex PathIndex = PausedSearches.pop_back_val(); 567 DefPath &Node = Paths[PathIndex]; 568 569 // If we've already visited this path with this MemoryLocation, we don't 570 // need to do so again. 571 // 572 // NOTE: That we just drop these paths on the ground makes caching 573 // behavior sporadic. e.g. given a diamond: 574 // A 575 // B C 576 // D 577 // 578 // ...If we walk D, B, A, C, we'll only cache the result of phi 579 // optimization for A, B, and D; C will be skipped because it dies here. 580 // This arguably isn't the worst thing ever, since: 581 // - We generally query things in a top-down order, so if we got below D 582 // without needing cache entries for {C, MemLoc}, then chances are 583 // that those cache entries would end up ultimately unused. 584 // - We still cache things for A, so C only needs to walk up a bit. 585 // If this behavior becomes problematic, we can fix without a ton of extra 586 // work. 587 if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) 588 continue; 589 590 UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); 591 if (Res.IsKnownClobber) { 592 assert(Res.Result != StopWhere); 593 // If this wasn't a cache hit, we hit a clobber when walking. That's a 594 // failure. 595 TerminatedPath Term{Res.Result, PathIndex}; 596 if (!MSSA.dominates(Res.Result, StopWhere)) 597 return Term; 598 599 // Otherwise, it's a valid thing to potentially optimize to. 600 Terminated.push_back(Term); 601 continue; 602 } 603 604 if (Res.Result == StopWhere) { 605 // We've hit our target. Save this path off for if we want to continue 606 // walking. 607 NewPaused.push_back(PathIndex); 608 continue; 609 } 610 611 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); 612 addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); 613 } 614 615 return None; 616 } 617 618 template <typename T, typename Walker> 619 struct generic_def_path_iterator 620 : public iterator_facade_base<generic_def_path_iterator<T, Walker>, 621 std::forward_iterator_tag, T *> { 622 generic_def_path_iterator() = default; 623 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} 624 625 T &operator*() const { return curNode(); } 626 627 generic_def_path_iterator &operator++() { 628 N = curNode().Previous; 629 return *this; 630 } 631 632 bool operator==(const generic_def_path_iterator &O) const { 633 if (N.hasValue() != O.N.hasValue()) 634 return false; 635 return !N.hasValue() || *N == *O.N; 636 } 637 638 private: 639 T &curNode() const { return W->Paths[*N]; } 640 641 Walker *W = nullptr; 642 Optional<ListIndex> N = None; 643 }; 644 645 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; 646 using const_def_path_iterator = 647 generic_def_path_iterator<const DefPath, const ClobberWalker>; 648 649 iterator_range<def_path_iterator> def_path(ListIndex From) { 650 return make_range(def_path_iterator(this, From), def_path_iterator()); 651 } 652 653 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { 654 return make_range(const_def_path_iterator(this, From), 655 const_def_path_iterator()); 656 } 657 658 struct OptznResult { 659 /// The path that contains our result. 660 TerminatedPath PrimaryClobber; 661 /// The paths that we can legally cache back from, but that aren't 662 /// necessarily the result of the Phi optimization. 663 SmallVector<TerminatedPath, 4> OtherClobbers; 664 }; 665 666 ListIndex defPathIndex(const DefPath &N) const { 667 // The assert looks nicer if we don't need to do &N 668 const DefPath *NP = &N; 669 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && 670 "Out of bounds DefPath!"); 671 return NP - &Paths.front(); 672 } 673 674 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths 675 /// that act as legal clobbers. Note that this won't return *all* clobbers. 676 /// 677 /// Phi optimization algorithm tl;dr: 678 /// - Find the earliest def/phi, A, we can optimize to 679 /// - Find if all paths from the starting memory access ultimately reach A 680 /// - If not, optimization isn't possible. 681 /// - Otherwise, walk from A to another clobber or phi, A'. 682 /// - If A' is a def, we're done. 683 /// - If A' is a phi, try to optimize it. 684 /// 685 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path 686 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. 687 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, 688 const MemoryLocation &Loc) { 689 assert(Paths.empty() && VisitedPhis.empty() && 690 "Reset the optimization state."); 691 692 Paths.emplace_back(Loc, Start, Phi, None); 693 // Stores how many "valid" optimization nodes we had prior to calling 694 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. 695 auto PriorPathsSize = Paths.size(); 696 697 SmallVector<ListIndex, 16> PausedSearches; 698 SmallVector<ListIndex, 8> NewPaused; 699 SmallVector<TerminatedPath, 4> TerminatedPaths; 700 701 addSearches(Phi, PausedSearches, 0); 702 703 // Moves the TerminatedPath with the "most dominated" Clobber to the end of 704 // Paths. 705 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { 706 assert(!Paths.empty() && "Need a path to move"); 707 auto Dom = Paths.begin(); 708 for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) 709 if (!MSSA.dominates(I->Clobber, Dom->Clobber)) 710 Dom = I; 711 auto Last = Paths.end() - 1; 712 if (Last != Dom) 713 std::iter_swap(Last, Dom); 714 }; 715 716 MemoryPhi *Current = Phi; 717 while (true) { 718 assert(!MSSA.isLiveOnEntryDef(Current) && 719 "liveOnEntry wasn't treated as a clobber?"); 720 721 const auto *Target = getWalkTarget(Current); 722 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal 723 // optimization for the prior phi. 724 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { 725 return MSSA.dominates(P.Clobber, Target); 726 })); 727 728 // FIXME: This is broken, because the Blocker may be reported to be 729 // liveOnEntry, and we'll happily wait for that to disappear (read: never) 730 // For the moment, this is fine, since we do nothing with blocker info. 731 if (Optional<TerminatedPath> Blocker = getBlockingAccess( 732 Target, PausedSearches, NewPaused, TerminatedPaths)) { 733 734 // Find the node we started at. We can't search based on N->Last, since 735 // we may have gone around a loop with a different MemoryLocation. 736 auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { 737 return defPathIndex(N) < PriorPathsSize; 738 }); 739 assert(Iter != def_path_iterator()); 740 741 DefPath &CurNode = *Iter; 742 assert(CurNode.Last == Current); 743 744 // Two things: 745 // A. We can't reliably cache all of NewPaused back. Consider a case 746 // where we have two paths in NewPaused; one of which can't optimize 747 // above this phi, whereas the other can. If we cache the second path 748 // back, we'll end up with suboptimal cache entries. We can handle 749 // cases like this a bit better when we either try to find all 750 // clobbers that block phi optimization, or when our cache starts 751 // supporting unfinished searches. 752 // B. We can't reliably cache TerminatedPaths back here without doing 753 // extra checks; consider a case like: 754 // T 755 // / \ 756 // D C 757 // \ / 758 // S 759 // Where T is our target, C is a node with a clobber on it, D is a 760 // diamond (with a clobber *only* on the left or right node, N), and 761 // S is our start. Say we walk to D, through the node opposite N 762 // (read: ignoring the clobber), and see a cache entry in the top 763 // node of D. That cache entry gets put into TerminatedPaths. We then 764 // walk up to C (N is later in our worklist), find the clobber, and 765 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache 766 // the bottom part of D to the cached clobber, ignoring the clobber 767 // in N. Again, this problem goes away if we start tracking all 768 // blockers for a given phi optimization. 769 TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; 770 return {Result, {}}; 771 } 772 773 // If there's nothing left to search, then all paths led to valid clobbers 774 // that we got from our cache; pick the nearest to the start, and allow 775 // the rest to be cached back. 776 if (NewPaused.empty()) { 777 MoveDominatedPathToEnd(TerminatedPaths); 778 TerminatedPath Result = TerminatedPaths.pop_back_val(); 779 return {Result, std::move(TerminatedPaths)}; 780 } 781 782 MemoryAccess *DefChainEnd = nullptr; 783 SmallVector<TerminatedPath, 4> Clobbers; 784 for (ListIndex Paused : NewPaused) { 785 UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); 786 if (WR.IsKnownClobber) 787 Clobbers.push_back({WR.Result, Paused}); 788 else 789 // Micro-opt: If we hit the end of the chain, save it. 790 DefChainEnd = WR.Result; 791 } 792 793 if (!TerminatedPaths.empty()) { 794 // If we couldn't find the dominating phi/liveOnEntry in the above loop, 795 // do it now. 796 if (!DefChainEnd) 797 for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) 798 DefChainEnd = MA; 799 800 // If any of the terminated paths don't dominate the phi we'll try to 801 // optimize, we need to figure out what they are and quit. 802 const BasicBlock *ChainBB = DefChainEnd->getBlock(); 803 for (const TerminatedPath &TP : TerminatedPaths) { 804 // Because we know that DefChainEnd is as "high" as we can go, we 805 // don't need local dominance checks; BB dominance is sufficient. 806 if (DT.dominates(ChainBB, TP.Clobber->getBlock())) 807 Clobbers.push_back(TP); 808 } 809 } 810 811 // If we have clobbers in the def chain, find the one closest to Current 812 // and quit. 813 if (!Clobbers.empty()) { 814 MoveDominatedPathToEnd(Clobbers); 815 TerminatedPath Result = Clobbers.pop_back_val(); 816 return {Result, std::move(Clobbers)}; 817 } 818 819 assert(all_of(NewPaused, 820 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); 821 822 // Because liveOnEntry is a clobber, this must be a phi. 823 auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); 824 825 PriorPathsSize = Paths.size(); 826 PausedSearches.clear(); 827 for (ListIndex I : NewPaused) 828 addSearches(DefChainPhi, PausedSearches, I); 829 NewPaused.clear(); 830 831 Current = DefChainPhi; 832 } 833 } 834 835 void verifyOptResult(const OptznResult &R) const { 836 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { 837 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); 838 })); 839 } 840 841 void resetPhiOptznState() { 842 Paths.clear(); 843 VisitedPhis.clear(); 844 } 845 846 public: 847 ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) 848 : MSSA(MSSA), AA(AA), DT(DT) {} 849 850 /// Finds the nearest clobber for the given query, optimizing phis if 851 /// possible. 852 MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { 853 Query = &Q; 854 855 MemoryAccess *Current = Start; 856 // This walker pretends uses don't exist. If we're handed one, silently grab 857 // its def. (This has the nice side-effect of ensuring we never cache uses) 858 if (auto *MU = dyn_cast<MemoryUse>(Start)) 859 Current = MU->getDefiningAccess(); 860 861 DefPath FirstDesc(Q.StartingLoc, Current, Current, None); 862 // Fast path for the overly-common case (no crazy phi optimization 863 // necessary) 864 UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); 865 MemoryAccess *Result; 866 if (WalkResult.IsKnownClobber) { 867 Result = WalkResult.Result; 868 Q.AR = WalkResult.AR; 869 } else { 870 OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), 871 Current, Q.StartingLoc); 872 verifyOptResult(OptRes); 873 resetPhiOptznState(); 874 Result = OptRes.PrimaryClobber.Clobber; 875 } 876 877 #ifdef EXPENSIVE_CHECKS 878 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); 879 #endif 880 return Result; 881 } 882 883 void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } 884 }; 885 886 struct RenamePassData { 887 DomTreeNode *DTN; 888 DomTreeNode::const_iterator ChildIt; 889 MemoryAccess *IncomingVal; 890 891 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, 892 MemoryAccess *M) 893 : DTN(D), ChildIt(It), IncomingVal(M) {} 894 895 void swap(RenamePassData &RHS) { 896 std::swap(DTN, RHS.DTN); 897 std::swap(ChildIt, RHS.ChildIt); 898 std::swap(IncomingVal, RHS.IncomingVal); 899 } 900 }; 901 902 } // end anonymous namespace 903 904 namespace llvm { 905 906 /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no 907 /// longer does caching on its own, but the name has been retained for the 908 /// moment. 909 class MemorySSA::CachingWalker final : public MemorySSAWalker { 910 ClobberWalker Walker; 911 912 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); 913 914 public: 915 CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); 916 ~CachingWalker() override = default; 917 918 using MemorySSAWalker::getClobberingMemoryAccess; 919 920 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; 921 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, 922 const MemoryLocation &) override; 923 void invalidateInfo(MemoryAccess *) override; 924 925 void verify(const MemorySSA *MSSA) override { 926 MemorySSAWalker::verify(MSSA); 927 Walker.verify(MSSA); 928 } 929 }; 930 931 } // end namespace llvm 932 933 void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, 934 bool RenameAllUses) { 935 // Pass through values to our successors 936 for (const BasicBlock *S : successors(BB)) { 937 auto It = PerBlockAccesses.find(S); 938 // Rename the phi nodes in our successor block 939 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 940 continue; 941 AccessList *Accesses = It->second.get(); 942 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 943 if (RenameAllUses) { 944 int PhiIndex = Phi->getBasicBlockIndex(BB); 945 assert(PhiIndex != -1 && "Incomplete phi during partial rename"); 946 Phi->setIncomingValue(PhiIndex, IncomingVal); 947 } else 948 Phi->addIncoming(IncomingVal, BB); 949 } 950 } 951 952 /// Rename a single basic block into MemorySSA form. 953 /// Uses the standard SSA renaming algorithm. 954 /// \returns The new incoming value. 955 MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, 956 bool RenameAllUses) { 957 auto It = PerBlockAccesses.find(BB); 958 // Skip most processing if the list is empty. 959 if (It != PerBlockAccesses.end()) { 960 AccessList *Accesses = It->second.get(); 961 for (MemoryAccess &L : *Accesses) { 962 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { 963 if (MUD->getDefiningAccess() == nullptr || RenameAllUses) 964 MUD->setDefiningAccess(IncomingVal); 965 if (isa<MemoryDef>(&L)) 966 IncomingVal = &L; 967 } else { 968 IncomingVal = &L; 969 } 970 } 971 } 972 return IncomingVal; 973 } 974 975 /// This is the standard SSA renaming algorithm. 976 /// 977 /// We walk the dominator tree in preorder, renaming accesses, and then filling 978 /// in phi nodes in our successors. 979 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, 980 SmallPtrSetImpl<BasicBlock *> &Visited, 981 bool SkipVisited, bool RenameAllUses) { 982 SmallVector<RenamePassData, 32> WorkStack; 983 // Skip everything if we already renamed this block and we are skipping. 984 // Note: You can't sink this into the if, because we need it to occur 985 // regardless of whether we skip blocks or not. 986 bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; 987 if (SkipVisited && AlreadyVisited) 988 return; 989 990 IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); 991 renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); 992 WorkStack.push_back({Root, Root->begin(), IncomingVal}); 993 994 while (!WorkStack.empty()) { 995 DomTreeNode *Node = WorkStack.back().DTN; 996 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; 997 IncomingVal = WorkStack.back().IncomingVal; 998 999 if (ChildIt == Node->end()) { 1000 WorkStack.pop_back(); 1001 } else { 1002 DomTreeNode *Child = *ChildIt; 1003 ++WorkStack.back().ChildIt; 1004 BasicBlock *BB = Child->getBlock(); 1005 // Note: You can't sink this into the if, because we need it to occur 1006 // regardless of whether we skip blocks or not. 1007 AlreadyVisited = !Visited.insert(BB).second; 1008 if (SkipVisited && AlreadyVisited) { 1009 // We already visited this during our renaming, which can happen when 1010 // being asked to rename multiple blocks. Figure out the incoming val, 1011 // which is the last def. 1012 // Incoming value can only change if there is a block def, and in that 1013 // case, it's the last block def in the list. 1014 if (auto *BlockDefs = getWritableBlockDefs(BB)) 1015 IncomingVal = &*BlockDefs->rbegin(); 1016 } else 1017 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); 1018 renameSuccessorPhis(BB, IncomingVal, RenameAllUses); 1019 WorkStack.push_back({Child, Child->begin(), IncomingVal}); 1020 } 1021 } 1022 } 1023 1024 /// This handles unreachable block accesses by deleting phi nodes in 1025 /// unreachable blocks, and marking all other unreachable MemoryAccess's as 1026 /// being uses of the live on entry definition. 1027 void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { 1028 assert(!DT->isReachableFromEntry(BB) && 1029 "Reachable block found while handling unreachable blocks"); 1030 1031 // Make sure phi nodes in our reachable successors end up with a 1032 // LiveOnEntryDef for our incoming edge, even though our block is forward 1033 // unreachable. We could just disconnect these blocks from the CFG fully, 1034 // but we do not right now. 1035 for (const BasicBlock *S : successors(BB)) { 1036 if (!DT->isReachableFromEntry(S)) 1037 continue; 1038 auto It = PerBlockAccesses.find(S); 1039 // Rename the phi nodes in our successor block 1040 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 1041 continue; 1042 AccessList *Accesses = It->second.get(); 1043 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 1044 Phi->addIncoming(LiveOnEntryDef.get(), BB); 1045 } 1046 1047 auto It = PerBlockAccesses.find(BB); 1048 if (It == PerBlockAccesses.end()) 1049 return; 1050 1051 auto &Accesses = It->second; 1052 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { 1053 auto Next = std::next(AI); 1054 // If we have a phi, just remove it. We are going to replace all 1055 // users with live on entry. 1056 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) 1057 UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); 1058 else 1059 Accesses->erase(AI); 1060 AI = Next; 1061 } 1062 } 1063 1064 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) 1065 : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), 1066 NextID(0) { 1067 buildMemorySSA(); 1068 } 1069 1070 MemorySSA::~MemorySSA() { 1071 // Drop all our references 1072 for (const auto &Pair : PerBlockAccesses) 1073 for (MemoryAccess &MA : *Pair.second) 1074 MA.dropAllReferences(); 1075 } 1076 1077 MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { 1078 auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); 1079 1080 if (Res.second) 1081 Res.first->second = llvm::make_unique<AccessList>(); 1082 return Res.first->second.get(); 1083 } 1084 1085 MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { 1086 auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); 1087 1088 if (Res.second) 1089 Res.first->second = llvm::make_unique<DefsList>(); 1090 return Res.first->second.get(); 1091 } 1092 1093 namespace llvm { 1094 1095 /// This class is a batch walker of all MemoryUse's in the program, and points 1096 /// their defining access at the thing that actually clobbers them. Because it 1097 /// is a batch walker that touches everything, it does not operate like the 1098 /// other walkers. This walker is basically performing a top-down SSA renaming 1099 /// pass, where the version stack is used as the cache. This enables it to be 1100 /// significantly more time and memory efficient than using the regular walker, 1101 /// which is walking bottom-up. 1102 class MemorySSA::OptimizeUses { 1103 public: 1104 OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, 1105 DominatorTree *DT) 1106 : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { 1107 Walker = MSSA->getWalker(); 1108 } 1109 1110 void optimizeUses(); 1111 1112 private: 1113 /// This represents where a given memorylocation is in the stack. 1114 struct MemlocStackInfo { 1115 // This essentially is keeping track of versions of the stack. Whenever 1116 // the stack changes due to pushes or pops, these versions increase. 1117 unsigned long StackEpoch; 1118 unsigned long PopEpoch; 1119 // This is the lower bound of places on the stack to check. It is equal to 1120 // the place the last stack walk ended. 1121 // Note: Correctness depends on this being initialized to 0, which densemap 1122 // does 1123 unsigned long LowerBound; 1124 const BasicBlock *LowerBoundBlock; 1125 // This is where the last walk for this memory location ended. 1126 unsigned long LastKill; 1127 bool LastKillValid; 1128 Optional<AliasResult> AR; 1129 }; 1130 1131 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, 1132 SmallVectorImpl<MemoryAccess *> &, 1133 DenseMap<MemoryLocOrCall, MemlocStackInfo> &); 1134 1135 MemorySSA *MSSA; 1136 MemorySSAWalker *Walker; 1137 AliasAnalysis *AA; 1138 DominatorTree *DT; 1139 }; 1140 1141 } // end namespace llvm 1142 1143 /// Optimize the uses in a given block This is basically the SSA renaming 1144 /// algorithm, with one caveat: We are able to use a single stack for all 1145 /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is 1146 /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just 1147 /// going to be some position in that stack of possible ones. 1148 /// 1149 /// We track the stack positions that each MemoryLocation needs 1150 /// to check, and last ended at. This is because we only want to check the 1151 /// things that changed since last time. The same MemoryLocation should 1152 /// get clobbered by the same store (getModRefInfo does not use invariantness or 1153 /// things like this, and if they start, we can modify MemoryLocOrCall to 1154 /// include relevant data) 1155 void MemorySSA::OptimizeUses::optimizeUsesInBlock( 1156 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, 1157 SmallVectorImpl<MemoryAccess *> &VersionStack, 1158 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { 1159 1160 /// If no accesses, nothing to do. 1161 MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); 1162 if (Accesses == nullptr) 1163 return; 1164 1165 // Pop everything that doesn't dominate the current block off the stack, 1166 // increment the PopEpoch to account for this. 1167 while (true) { 1168 assert( 1169 !VersionStack.empty() && 1170 "Version stack should have liveOnEntry sentinel dominating everything"); 1171 BasicBlock *BackBlock = VersionStack.back()->getBlock(); 1172 if (DT->dominates(BackBlock, BB)) 1173 break; 1174 while (VersionStack.back()->getBlock() == BackBlock) 1175 VersionStack.pop_back(); 1176 ++PopEpoch; 1177 } 1178 1179 for (MemoryAccess &MA : *Accesses) { 1180 auto *MU = dyn_cast<MemoryUse>(&MA); 1181 if (!MU) { 1182 VersionStack.push_back(&MA); 1183 ++StackEpoch; 1184 continue; 1185 } 1186 1187 if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { 1188 MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); 1189 continue; 1190 } 1191 1192 MemoryLocOrCall UseMLOC(MU); 1193 auto &LocInfo = LocStackInfo[UseMLOC]; 1194 // If the pop epoch changed, it means we've removed stuff from top of 1195 // stack due to changing blocks. We may have to reset the lower bound or 1196 // last kill info. 1197 if (LocInfo.PopEpoch != PopEpoch) { 1198 LocInfo.PopEpoch = PopEpoch; 1199 LocInfo.StackEpoch = StackEpoch; 1200 // If the lower bound was in something that no longer dominates us, we 1201 // have to reset it. 1202 // We can't simply track stack size, because the stack may have had 1203 // pushes/pops in the meantime. 1204 // XXX: This is non-optimal, but only is slower cases with heavily 1205 // branching dominator trees. To get the optimal number of queries would 1206 // be to make lowerbound and lastkill a per-loc stack, and pop it until 1207 // the top of that stack dominates us. This does not seem worth it ATM. 1208 // A much cheaper optimization would be to always explore the deepest 1209 // branch of the dominator tree first. This will guarantee this resets on 1210 // the smallest set of blocks. 1211 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && 1212 !DT->dominates(LocInfo.LowerBoundBlock, BB)) { 1213 // Reset the lower bound of things to check. 1214 // TODO: Some day we should be able to reset to last kill, rather than 1215 // 0. 1216 LocInfo.LowerBound = 0; 1217 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); 1218 LocInfo.LastKillValid = false; 1219 } 1220 } else if (LocInfo.StackEpoch != StackEpoch) { 1221 // If all that has changed is the StackEpoch, we only have to check the 1222 // new things on the stack, because we've checked everything before. In 1223 // this case, the lower bound of things to check remains the same. 1224 LocInfo.PopEpoch = PopEpoch; 1225 LocInfo.StackEpoch = StackEpoch; 1226 } 1227 if (!LocInfo.LastKillValid) { 1228 LocInfo.LastKill = VersionStack.size() - 1; 1229 LocInfo.LastKillValid = true; 1230 LocInfo.AR = MayAlias; 1231 } 1232 1233 // At this point, we should have corrected last kill and LowerBound to be 1234 // in bounds. 1235 assert(LocInfo.LowerBound < VersionStack.size() && 1236 "Lower bound out of range"); 1237 assert(LocInfo.LastKill < VersionStack.size() && 1238 "Last kill info out of range"); 1239 // In any case, the new upper bound is the top of the stack. 1240 unsigned long UpperBound = VersionStack.size() - 1; 1241 1242 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { 1243 LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" 1244 << *(MU->getMemoryInst()) << ")" 1245 << " because there are " 1246 << UpperBound - LocInfo.LowerBound 1247 << " stores to disambiguate\n"); 1248 // Because we did not walk, LastKill is no longer valid, as this may 1249 // have been a kill. 1250 LocInfo.LastKillValid = false; 1251 continue; 1252 } 1253 bool FoundClobberResult = false; 1254 while (UpperBound > LocInfo.LowerBound) { 1255 if (isa<MemoryPhi>(VersionStack[UpperBound])) { 1256 // For phis, use the walker, see where we ended up, go there 1257 Instruction *UseInst = MU->getMemoryInst(); 1258 MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); 1259 // We are guaranteed to find it or something is wrong 1260 while (VersionStack[UpperBound] != Result) { 1261 assert(UpperBound != 0); 1262 --UpperBound; 1263 } 1264 FoundClobberResult = true; 1265 break; 1266 } 1267 1268 MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); 1269 // If the lifetime of the pointer ends at this instruction, it's live on 1270 // entry. 1271 if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { 1272 // Reset UpperBound to liveOnEntryDef's place in the stack 1273 UpperBound = 0; 1274 FoundClobberResult = true; 1275 LocInfo.AR = MustAlias; 1276 break; 1277 } 1278 ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); 1279 if (CA.IsClobber) { 1280 FoundClobberResult = true; 1281 LocInfo.AR = CA.AR; 1282 break; 1283 } 1284 --UpperBound; 1285 } 1286 1287 // Note: Phis always have AliasResult AR set to MayAlias ATM. 1288 1289 // At the end of this loop, UpperBound is either a clobber, or lower bound 1290 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. 1291 if (FoundClobberResult || UpperBound < LocInfo.LastKill) { 1292 // We were last killed now by where we got to 1293 if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) 1294 LocInfo.AR = None; 1295 MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); 1296 LocInfo.LastKill = UpperBound; 1297 } else { 1298 // Otherwise, we checked all the new ones, and now we know we can get to 1299 // LastKill. 1300 MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); 1301 } 1302 LocInfo.LowerBound = VersionStack.size() - 1; 1303 LocInfo.LowerBoundBlock = BB; 1304 } 1305 } 1306 1307 /// Optimize uses to point to their actual clobbering definitions. 1308 void MemorySSA::OptimizeUses::optimizeUses() { 1309 SmallVector<MemoryAccess *, 16> VersionStack; 1310 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; 1311 VersionStack.push_back(MSSA->getLiveOnEntryDef()); 1312 1313 unsigned long StackEpoch = 1; 1314 unsigned long PopEpoch = 1; 1315 // We perform a non-recursive top-down dominator tree walk. 1316 for (const auto *DomNode : depth_first(DT->getRootNode())) 1317 optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, 1318 LocStackInfo); 1319 } 1320 1321 void MemorySSA::placePHINodes( 1322 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) { 1323 // Determine where our MemoryPhi's should go 1324 ForwardIDFCalculator IDFs(*DT); 1325 IDFs.setDefiningBlocks(DefiningBlocks); 1326 SmallVector<BasicBlock *, 32> IDFBlocks; 1327 IDFs.calculate(IDFBlocks); 1328 1329 // Now place MemoryPhi nodes. 1330 for (auto &BB : IDFBlocks) 1331 createMemoryPhi(BB); 1332 } 1333 1334 void MemorySSA::buildMemorySSA() { 1335 // We create an access to represent "live on entry", for things like 1336 // arguments or users of globals, where the memory they use is defined before 1337 // the beginning of the function. We do not actually insert it into the IR. 1338 // We do not define a live on exit for the immediate uses, and thus our 1339 // semantics do *not* imply that something with no immediate uses can simply 1340 // be removed. 1341 BasicBlock &StartingPoint = F.getEntryBlock(); 1342 LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr, 1343 &StartingPoint, NextID++)); 1344 1345 // We maintain lists of memory accesses per-block, trading memory for time. We 1346 // could just look up the memory access for every possible instruction in the 1347 // stream. 1348 SmallPtrSet<BasicBlock *, 32> DefiningBlocks; 1349 // Go through each block, figure out where defs occur, and chain together all 1350 // the accesses. 1351 for (BasicBlock &B : F) { 1352 bool InsertIntoDef = false; 1353 AccessList *Accesses = nullptr; 1354 DefsList *Defs = nullptr; 1355 for (Instruction &I : B) { 1356 MemoryUseOrDef *MUD = createNewAccess(&I); 1357 if (!MUD) 1358 continue; 1359 1360 if (!Accesses) 1361 Accesses = getOrCreateAccessList(&B); 1362 Accesses->push_back(MUD); 1363 if (isa<MemoryDef>(MUD)) { 1364 InsertIntoDef = true; 1365 if (!Defs) 1366 Defs = getOrCreateDefsList(&B); 1367 Defs->push_back(*MUD); 1368 } 1369 } 1370 if (InsertIntoDef) 1371 DefiningBlocks.insert(&B); 1372 } 1373 placePHINodes(DefiningBlocks); 1374 1375 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get 1376 // filled in with all blocks. 1377 SmallPtrSet<BasicBlock *, 16> Visited; 1378 renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); 1379 1380 CachingWalker *Walker = getWalkerImpl(); 1381 1382 OptimizeUses(this, Walker, AA, DT).optimizeUses(); 1383 1384 // Mark the uses in unreachable blocks as live on entry, so that they go 1385 // somewhere. 1386 for (auto &BB : F) 1387 if (!Visited.count(&BB)) 1388 markUnreachableAsLiveOnEntry(&BB); 1389 } 1390 1391 MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } 1392 1393 MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { 1394 if (Walker) 1395 return Walker.get(); 1396 1397 Walker = llvm::make_unique<CachingWalker>(this, AA, DT); 1398 return Walker.get(); 1399 } 1400 1401 // This is a helper function used by the creation routines. It places NewAccess 1402 // into the access and defs lists for a given basic block, at the given 1403 // insertion point. 1404 void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, 1405 const BasicBlock *BB, 1406 InsertionPlace Point) { 1407 auto *Accesses = getOrCreateAccessList(BB); 1408 if (Point == Beginning) { 1409 // If it's a phi node, it goes first, otherwise, it goes after any phi 1410 // nodes. 1411 if (isa<MemoryPhi>(NewAccess)) { 1412 Accesses->push_front(NewAccess); 1413 auto *Defs = getOrCreateDefsList(BB); 1414 Defs->push_front(*NewAccess); 1415 } else { 1416 auto AI = find_if_not( 1417 *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1418 Accesses->insert(AI, NewAccess); 1419 if (!isa<MemoryUse>(NewAccess)) { 1420 auto *Defs = getOrCreateDefsList(BB); 1421 auto DI = find_if_not( 1422 *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1423 Defs->insert(DI, *NewAccess); 1424 } 1425 } 1426 } else { 1427 Accesses->push_back(NewAccess); 1428 if (!isa<MemoryUse>(NewAccess)) { 1429 auto *Defs = getOrCreateDefsList(BB); 1430 Defs->push_back(*NewAccess); 1431 } 1432 } 1433 BlockNumberingValid.erase(BB); 1434 } 1435 1436 void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, 1437 AccessList::iterator InsertPt) { 1438 auto *Accesses = getWritableBlockAccesses(BB); 1439 bool WasEnd = InsertPt == Accesses->end(); 1440 Accesses->insert(AccessList::iterator(InsertPt), What); 1441 if (!isa<MemoryUse>(What)) { 1442 auto *Defs = getOrCreateDefsList(BB); 1443 // If we got asked to insert at the end, we have an easy job, just shove it 1444 // at the end. If we got asked to insert before an existing def, we also get 1445 // an iterator. If we got asked to insert before a use, we have to hunt for 1446 // the next def. 1447 if (WasEnd) { 1448 Defs->push_back(*What); 1449 } else if (isa<MemoryDef>(InsertPt)) { 1450 Defs->insert(InsertPt->getDefsIterator(), *What); 1451 } else { 1452 while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) 1453 ++InsertPt; 1454 // Either we found a def, or we are inserting at the end 1455 if (InsertPt == Accesses->end()) 1456 Defs->push_back(*What); 1457 else 1458 Defs->insert(InsertPt->getDefsIterator(), *What); 1459 } 1460 } 1461 BlockNumberingValid.erase(BB); 1462 } 1463 1464 // Move What before Where in the IR. The end result is that What will belong to 1465 // the right lists and have the right Block set, but will not otherwise be 1466 // correct. It will not have the right defining access, and if it is a def, 1467 // things below it will not properly be updated. 1468 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1469 AccessList::iterator Where) { 1470 // Keep it in the lookup tables, remove from the lists 1471 removeFromLists(What, false); 1472 What->setBlock(BB); 1473 insertIntoListsBefore(What, BB, Where); 1474 } 1475 1476 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1477 InsertionPlace Point) { 1478 removeFromLists(What, false); 1479 What->setBlock(BB); 1480 insertIntoListsForBlock(What, BB, Point); 1481 } 1482 1483 MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { 1484 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); 1485 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); 1486 // Phi's always are placed at the front of the block. 1487 insertIntoListsForBlock(Phi, BB, Beginning); 1488 ValueToMemoryAccess[BB] = Phi; 1489 return Phi; 1490 } 1491 1492 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, 1493 MemoryAccess *Definition) { 1494 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); 1495 MemoryUseOrDef *NewAccess = createNewAccess(I); 1496 assert( 1497 NewAccess != nullptr && 1498 "Tried to create a memory access for a non-memory touching instruction"); 1499 NewAccess->setDefiningAccess(Definition); 1500 return NewAccess; 1501 } 1502 1503 // Return true if the instruction has ordering constraints. 1504 // Note specifically that this only considers stores and loads 1505 // because others are still considered ModRef by getModRefInfo. 1506 static inline bool isOrdered(const Instruction *I) { 1507 if (auto *SI = dyn_cast<StoreInst>(I)) { 1508 if (!SI->isUnordered()) 1509 return true; 1510 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 1511 if (!LI->isUnordered()) 1512 return true; 1513 } 1514 return false; 1515 } 1516 1517 /// Helper function to create new memory accesses 1518 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { 1519 // The assume intrinsic has a control dependency which we model by claiming 1520 // that it writes arbitrarily. Ignore that fake memory dependency here. 1521 // FIXME: Replace this special casing with a more accurate modelling of 1522 // assume's control dependency. 1523 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1524 if (II->getIntrinsicID() == Intrinsic::assume) 1525 return nullptr; 1526 1527 // Find out what affect this instruction has on memory. 1528 ModRefInfo ModRef = AA->getModRefInfo(I, None); 1529 // The isOrdered check is used to ensure that volatiles end up as defs 1530 // (atomics end up as ModRef right now anyway). Until we separate the 1531 // ordering chain from the memory chain, this enables people to see at least 1532 // some relative ordering to volatiles. Note that getClobberingMemoryAccess 1533 // will still give an answer that bypasses other volatile loads. TODO: 1534 // Separate memory aliasing and ordering into two different chains so that we 1535 // can precisely represent both "what memory will this read/write/is clobbered 1536 // by" and "what instructions can I move this past". 1537 bool Def = isModSet(ModRef) || isOrdered(I); 1538 bool Use = isRefSet(ModRef); 1539 1540 // It's possible for an instruction to not modify memory at all. During 1541 // construction, we ignore them. 1542 if (!Def && !Use) 1543 return nullptr; 1544 1545 MemoryUseOrDef *MUD; 1546 if (Def) 1547 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); 1548 else 1549 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); 1550 ValueToMemoryAccess[I] = MUD; 1551 return MUD; 1552 } 1553 1554 /// Returns true if \p Replacer dominates \p Replacee . 1555 bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, 1556 const MemoryAccess *Replacee) const { 1557 if (isa<MemoryUseOrDef>(Replacee)) 1558 return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); 1559 const auto *MP = cast<MemoryPhi>(Replacee); 1560 // For a phi node, the use occurs in the predecessor block of the phi node. 1561 // Since we may occur multiple times in the phi node, we have to check each 1562 // operand to ensure Replacer dominates each operand where Replacee occurs. 1563 for (const Use &Arg : MP->operands()) { 1564 if (Arg.get() != Replacee && 1565 !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) 1566 return false; 1567 } 1568 return true; 1569 } 1570 1571 /// Properly remove \p MA from all of MemorySSA's lookup tables. 1572 void MemorySSA::removeFromLookups(MemoryAccess *MA) { 1573 assert(MA->use_empty() && 1574 "Trying to remove memory access that still has uses"); 1575 BlockNumbering.erase(MA); 1576 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1577 MUD->setDefiningAccess(nullptr); 1578 // Invalidate our walker's cache if necessary 1579 if (!isa<MemoryUse>(MA)) 1580 Walker->invalidateInfo(MA); 1581 // The call below to erase will destroy MA, so we can't change the order we 1582 // are doing things here 1583 Value *MemoryInst; 1584 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { 1585 MemoryInst = MUD->getMemoryInst(); 1586 } else { 1587 MemoryInst = MA->getBlock(); 1588 } 1589 auto VMA = ValueToMemoryAccess.find(MemoryInst); 1590 if (VMA->second == MA) 1591 ValueToMemoryAccess.erase(VMA); 1592 } 1593 1594 /// Properly remove \p MA from all of MemorySSA's lists. 1595 /// 1596 /// Because of the way the intrusive list and use lists work, it is important to 1597 /// do removal in the right order. 1598 /// ShouldDelete defaults to true, and will cause the memory access to also be 1599 /// deleted, not just removed. 1600 void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { 1601 // The access list owns the reference, so we erase it from the non-owning list 1602 // first. 1603 if (!isa<MemoryUse>(MA)) { 1604 auto DefsIt = PerBlockDefs.find(MA->getBlock()); 1605 std::unique_ptr<DefsList> &Defs = DefsIt->second; 1606 Defs->remove(*MA); 1607 if (Defs->empty()) 1608 PerBlockDefs.erase(DefsIt); 1609 } 1610 1611 // The erase call here will delete it. If we don't want it deleted, we call 1612 // remove instead. 1613 auto AccessIt = PerBlockAccesses.find(MA->getBlock()); 1614 std::unique_ptr<AccessList> &Accesses = AccessIt->second; 1615 if (ShouldDelete) 1616 Accesses->erase(MA); 1617 else 1618 Accesses->remove(MA); 1619 1620 if (Accesses->empty()) 1621 PerBlockAccesses.erase(AccessIt); 1622 } 1623 1624 void MemorySSA::print(raw_ostream &OS) const { 1625 MemorySSAAnnotatedWriter Writer(this); 1626 F.print(OS, &Writer); 1627 } 1628 1629 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1630 LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } 1631 #endif 1632 1633 void MemorySSA::verifyMemorySSA() const { 1634 verifyDefUses(F); 1635 verifyDomination(F); 1636 verifyOrdering(F); 1637 Walker->verify(this); 1638 } 1639 1640 /// Verify that the order and existence of MemoryAccesses matches the 1641 /// order and existence of memory affecting instructions. 1642 void MemorySSA::verifyOrdering(Function &F) const { 1643 // Walk all the blocks, comparing what the lookups think and what the access 1644 // lists think, as well as the order in the blocks vs the order in the access 1645 // lists. 1646 SmallVector<MemoryAccess *, 32> ActualAccesses; 1647 SmallVector<MemoryAccess *, 32> ActualDefs; 1648 for (BasicBlock &B : F) { 1649 const AccessList *AL = getBlockAccesses(&B); 1650 const auto *DL = getBlockDefs(&B); 1651 MemoryAccess *Phi = getMemoryAccess(&B); 1652 if (Phi) { 1653 ActualAccesses.push_back(Phi); 1654 ActualDefs.push_back(Phi); 1655 } 1656 1657 for (Instruction &I : B) { 1658 MemoryAccess *MA = getMemoryAccess(&I); 1659 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && 1660 "We have memory affecting instructions " 1661 "in this block but they are not in the " 1662 "access list or defs list"); 1663 if (MA) { 1664 ActualAccesses.push_back(MA); 1665 if (isa<MemoryDef>(MA)) 1666 ActualDefs.push_back(MA); 1667 } 1668 } 1669 // Either we hit the assert, really have no accesses, or we have both 1670 // accesses and an access list. 1671 // Same with defs. 1672 if (!AL && !DL) 1673 continue; 1674 assert(AL->size() == ActualAccesses.size() && 1675 "We don't have the same number of accesses in the block as on the " 1676 "access list"); 1677 assert((DL || ActualDefs.size() == 0) && 1678 "Either we should have a defs list, or we should have no defs"); 1679 assert((!DL || DL->size() == ActualDefs.size()) && 1680 "We don't have the same number of defs in the block as on the " 1681 "def list"); 1682 auto ALI = AL->begin(); 1683 auto AAI = ActualAccesses.begin(); 1684 while (ALI != AL->end() && AAI != ActualAccesses.end()) { 1685 assert(&*ALI == *AAI && "Not the same accesses in the same order"); 1686 ++ALI; 1687 ++AAI; 1688 } 1689 ActualAccesses.clear(); 1690 if (DL) { 1691 auto DLI = DL->begin(); 1692 auto ADI = ActualDefs.begin(); 1693 while (DLI != DL->end() && ADI != ActualDefs.end()) { 1694 assert(&*DLI == *ADI && "Not the same defs in the same order"); 1695 ++DLI; 1696 ++ADI; 1697 } 1698 } 1699 ActualDefs.clear(); 1700 } 1701 } 1702 1703 /// Verify the domination properties of MemorySSA by checking that each 1704 /// definition dominates all of its uses. 1705 void MemorySSA::verifyDomination(Function &F) const { 1706 #ifndef NDEBUG 1707 for (BasicBlock &B : F) { 1708 // Phi nodes are attached to basic blocks 1709 if (MemoryPhi *MP = getMemoryAccess(&B)) 1710 for (const Use &U : MP->uses()) 1711 assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); 1712 1713 for (Instruction &I : B) { 1714 MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); 1715 if (!MD) 1716 continue; 1717 1718 for (const Use &U : MD->uses()) 1719 assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); 1720 } 1721 } 1722 #endif 1723 } 1724 1725 /// Verify the def-use lists in MemorySSA, by verifying that \p Use 1726 /// appears in the use list of \p Def. 1727 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { 1728 #ifndef NDEBUG 1729 // The live on entry use may cause us to get a NULL def here 1730 if (!Def) 1731 assert(isLiveOnEntryDef(Use) && 1732 "Null def but use not point to live on entry def"); 1733 else 1734 assert(is_contained(Def->users(), Use) && 1735 "Did not find use in def's use list"); 1736 #endif 1737 } 1738 1739 /// Verify the immediate use information, by walking all the memory 1740 /// accesses and verifying that, for each use, it appears in the 1741 /// appropriate def's use list 1742 void MemorySSA::verifyDefUses(Function &F) const { 1743 for (BasicBlock &B : F) { 1744 // Phi nodes are attached to basic blocks 1745 if (MemoryPhi *Phi = getMemoryAccess(&B)) { 1746 assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( 1747 pred_begin(&B), pred_end(&B))) && 1748 "Incomplete MemoryPhi Node"); 1749 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) 1750 verifyUseInDefs(Phi->getIncomingValue(I), Phi); 1751 } 1752 1753 for (Instruction &I : B) { 1754 if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { 1755 verifyUseInDefs(MA->getDefiningAccess(), MA); 1756 } 1757 } 1758 } 1759 } 1760 1761 MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { 1762 return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); 1763 } 1764 1765 MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { 1766 return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); 1767 } 1768 1769 /// Perform a local numbering on blocks so that instruction ordering can be 1770 /// determined in constant time. 1771 /// TODO: We currently just number in order. If we numbered by N, we could 1772 /// allow at least N-1 sequences of insertBefore or insertAfter (and at least 1773 /// log2(N) sequences of mixed before and after) without needing to invalidate 1774 /// the numbering. 1775 void MemorySSA::renumberBlock(const BasicBlock *B) const { 1776 // The pre-increment ensures the numbers really start at 1. 1777 unsigned long CurrentNumber = 0; 1778 const AccessList *AL = getBlockAccesses(B); 1779 assert(AL != nullptr && "Asking to renumber an empty block"); 1780 for (const auto &I : *AL) 1781 BlockNumbering[&I] = ++CurrentNumber; 1782 BlockNumberingValid.insert(B); 1783 } 1784 1785 /// Determine, for two memory accesses in the same block, 1786 /// whether \p Dominator dominates \p Dominatee. 1787 /// \returns True if \p Dominator dominates \p Dominatee. 1788 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, 1789 const MemoryAccess *Dominatee) const { 1790 const BasicBlock *DominatorBlock = Dominator->getBlock(); 1791 1792 assert((DominatorBlock == Dominatee->getBlock()) && 1793 "Asking for local domination when accesses are in different blocks!"); 1794 // A node dominates itself. 1795 if (Dominatee == Dominator) 1796 return true; 1797 1798 // When Dominatee is defined on function entry, it is not dominated by another 1799 // memory access. 1800 if (isLiveOnEntryDef(Dominatee)) 1801 return false; 1802 1803 // When Dominator is defined on function entry, it dominates the other memory 1804 // access. 1805 if (isLiveOnEntryDef(Dominator)) 1806 return true; 1807 1808 if (!BlockNumberingValid.count(DominatorBlock)) 1809 renumberBlock(DominatorBlock); 1810 1811 unsigned long DominatorNum = BlockNumbering.lookup(Dominator); 1812 // All numbers start with 1 1813 assert(DominatorNum != 0 && "Block was not numbered properly"); 1814 unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); 1815 assert(DominateeNum != 0 && "Block was not numbered properly"); 1816 return DominatorNum < DominateeNum; 1817 } 1818 1819 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1820 const MemoryAccess *Dominatee) const { 1821 if (Dominator == Dominatee) 1822 return true; 1823 1824 if (isLiveOnEntryDef(Dominatee)) 1825 return false; 1826 1827 if (Dominator->getBlock() != Dominatee->getBlock()) 1828 return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); 1829 return locallyDominates(Dominator, Dominatee); 1830 } 1831 1832 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1833 const Use &Dominatee) const { 1834 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { 1835 BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); 1836 // The def must dominate the incoming block of the phi. 1837 if (UseBB != Dominator->getBlock()) 1838 return DT->dominates(Dominator->getBlock(), UseBB); 1839 // If the UseBB and the DefBB are the same, compare locally. 1840 return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); 1841 } 1842 // If it's not a PHI node use, the normal dominates can already handle it. 1843 return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); 1844 } 1845 1846 const static char LiveOnEntryStr[] = "liveOnEntry"; 1847 1848 void MemoryAccess::print(raw_ostream &OS) const { 1849 switch (getValueID()) { 1850 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS); 1851 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS); 1852 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS); 1853 } 1854 llvm_unreachable("invalid value id"); 1855 } 1856 1857 void MemoryDef::print(raw_ostream &OS) const { 1858 MemoryAccess *UO = getDefiningAccess(); 1859 1860 OS << getID() << " = MemoryDef("; 1861 if (UO && UO->getID()) 1862 OS << UO->getID(); 1863 else 1864 OS << LiveOnEntryStr; 1865 OS << ')'; 1866 } 1867 1868 void MemoryPhi::print(raw_ostream &OS) const { 1869 bool First = true; 1870 OS << getID() << " = MemoryPhi("; 1871 for (const auto &Op : operands()) { 1872 BasicBlock *BB = getIncomingBlock(Op); 1873 MemoryAccess *MA = cast<MemoryAccess>(Op); 1874 if (!First) 1875 OS << ','; 1876 else 1877 First = false; 1878 1879 OS << '{'; 1880 if (BB->hasName()) 1881 OS << BB->getName(); 1882 else 1883 BB->printAsOperand(OS, false); 1884 OS << ','; 1885 if (unsigned ID = MA->getID()) 1886 OS << ID; 1887 else 1888 OS << LiveOnEntryStr; 1889 OS << '}'; 1890 } 1891 OS << ')'; 1892 } 1893 1894 void MemoryUse::print(raw_ostream &OS) const { 1895 MemoryAccess *UO = getDefiningAccess(); 1896 OS << "MemoryUse("; 1897 if (UO && UO->getID()) 1898 OS << UO->getID(); 1899 else 1900 OS << LiveOnEntryStr; 1901 OS << ')'; 1902 } 1903 1904 void MemoryAccess::dump() const { 1905 // Cannot completely remove virtual function even in release mode. 1906 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1907 print(dbgs()); 1908 dbgs() << "\n"; 1909 #endif 1910 } 1911 1912 char MemorySSAPrinterLegacyPass::ID = 0; 1913 1914 MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { 1915 initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); 1916 } 1917 1918 void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { 1919 AU.setPreservesAll(); 1920 AU.addRequired<MemorySSAWrapperPass>(); 1921 } 1922 1923 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { 1924 auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 1925 MSSA.print(dbgs()); 1926 if (VerifyMemorySSA) 1927 MSSA.verifyMemorySSA(); 1928 return false; 1929 } 1930 1931 AnalysisKey MemorySSAAnalysis::Key; 1932 1933 MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, 1934 FunctionAnalysisManager &AM) { 1935 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 1936 auto &AA = AM.getResult<AAManager>(F); 1937 return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT)); 1938 } 1939 1940 PreservedAnalyses MemorySSAPrinterPass::run(Function &F, 1941 FunctionAnalysisManager &AM) { 1942 OS << "MemorySSA for function: " << F.getName() << "\n"; 1943 AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); 1944 1945 return PreservedAnalyses::all(); 1946 } 1947 1948 PreservedAnalyses MemorySSAVerifierPass::run(Function &F, 1949 FunctionAnalysisManager &AM) { 1950 AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); 1951 1952 return PreservedAnalyses::all(); 1953 } 1954 1955 char MemorySSAWrapperPass::ID = 0; 1956 1957 MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { 1958 initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); 1959 } 1960 1961 void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } 1962 1963 void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1964 AU.setPreservesAll(); 1965 AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 1966 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1967 } 1968 1969 bool MemorySSAWrapperPass::runOnFunction(Function &F) { 1970 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1971 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1972 MSSA.reset(new MemorySSA(F, &AA, &DT)); 1973 return false; 1974 } 1975 1976 void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } 1977 1978 void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { 1979 MSSA->print(OS); 1980 } 1981 1982 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} 1983 1984 MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, 1985 DominatorTree *D) 1986 : MemorySSAWalker(M), Walker(*M, *A, *D) {} 1987 1988 void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { 1989 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1990 MUD->resetOptimized(); 1991 } 1992 1993 /// Walk the use-def chains starting at \p MA and find 1994 /// the MemoryAccess that actually clobbers Loc. 1995 /// 1996 /// \returns our clobbering memory access 1997 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 1998 MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { 1999 return Walker.findClobber(StartingAccess, Q); 2000 } 2001 2002 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 2003 MemoryAccess *StartingAccess, const MemoryLocation &Loc) { 2004 if (isa<MemoryPhi>(StartingAccess)) 2005 return StartingAccess; 2006 2007 auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); 2008 if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) 2009 return StartingUseOrDef; 2010 2011 Instruction *I = StartingUseOrDef->getMemoryInst(); 2012 2013 // Conservatively, fences are always clobbers, so don't perform the walk if we 2014 // hit a fence. 2015 if (!ImmutableCallSite(I) && I->isFenceLike()) 2016 return StartingUseOrDef; 2017 2018 UpwardsMemoryQuery Q; 2019 Q.OriginalAccess = StartingUseOrDef; 2020 Q.StartingLoc = Loc; 2021 Q.Inst = I; 2022 Q.IsCall = false; 2023 2024 // Unlike the other function, do not walk to the def of a def, because we are 2025 // handed something we already believe is the clobbering access. 2026 MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) 2027 ? StartingUseOrDef->getDefiningAccess() 2028 : StartingUseOrDef; 2029 2030 MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); 2031 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2032 LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n"); 2033 LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2034 LLVM_DEBUG(dbgs() << *Clobber << "\n"); 2035 return Clobber; 2036 } 2037 2038 MemoryAccess * 2039 MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2040 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); 2041 // If this is a MemoryPhi, we can't do anything. 2042 if (!StartingAccess) 2043 return MA; 2044 2045 // If this is an already optimized use or def, return the optimized result. 2046 // Note: Currently, we store the optimized def result in a separate field, 2047 // since we can't use the defining access. 2048 if (StartingAccess->isOptimized()) 2049 return StartingAccess->getOptimized(); 2050 2051 const Instruction *I = StartingAccess->getMemoryInst(); 2052 UpwardsMemoryQuery Q(I, StartingAccess); 2053 // We can't sanely do anything with a fence, since they conservatively clobber 2054 // all memory, and have no locations to get pointers from to try to 2055 // disambiguate. 2056 if (!Q.IsCall && I->isFenceLike()) 2057 return StartingAccess; 2058 2059 if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { 2060 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); 2061 StartingAccess->setOptimized(LiveOnEntry); 2062 StartingAccess->setOptimizedAccessType(None); 2063 return LiveOnEntry; 2064 } 2065 2066 // Start with the thing we already think clobbers this location 2067 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); 2068 2069 // At this point, DefiningAccess may be the live on entry def. 2070 // If it is, we will not get a better result. 2071 if (MSSA->isLiveOnEntryDef(DefiningAccess)) { 2072 StartingAccess->setOptimized(DefiningAccess); 2073 StartingAccess->setOptimizedAccessType(None); 2074 return DefiningAccess; 2075 } 2076 2077 MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); 2078 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2079 LLVM_DEBUG(dbgs() << *DefiningAccess << "\n"); 2080 LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2081 LLVM_DEBUG(dbgs() << *Result << "\n"); 2082 2083 StartingAccess->setOptimized(Result); 2084 if (MSSA->isLiveOnEntryDef(Result)) 2085 StartingAccess->setOptimizedAccessType(None); 2086 else if (Q.AR == MustAlias) 2087 StartingAccess->setOptimizedAccessType(MustAlias); 2088 2089 return Result; 2090 } 2091 2092 MemoryAccess * 2093 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2094 if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) 2095 return Use->getDefiningAccess(); 2096 return MA; 2097 } 2098 2099 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( 2100 MemoryAccess *StartingAccess, const MemoryLocation &) { 2101 if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2102 return Use->getDefiningAccess(); 2103 return StartingAccess; 2104 } 2105 2106 void MemoryPhi::deleteMe(DerivedUser *Self) { 2107 delete static_cast<MemoryPhi *>(Self); 2108 } 2109 2110 void MemoryDef::deleteMe(DerivedUser *Self) { 2111 delete static_cast<MemoryDef *>(Self); 2112 } 2113 2114 void MemoryUse::deleteMe(DerivedUser *Self) { 2115 delete static_cast<MemoryUse *>(Self); 2116 } 2117