1 //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the MemorySSA class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/MemorySSA.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseMapInfo.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/DepthFirstIterator.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/None.h" 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/iterator.h" 26 #include "llvm/ADT/iterator_range.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/IteratedDominanceFrontier.h" 29 #include "llvm/Analysis/MemoryLocation.h" 30 #include "llvm/Config/llvm-config.h" 31 #include "llvm/IR/AssemblyAnnotationWriter.h" 32 #include "llvm/IR/BasicBlock.h" 33 #include "llvm/IR/CallSite.h" 34 #include "llvm/IR/Dominators.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/Instruction.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/IntrinsicInst.h" 39 #include "llvm/IR/Intrinsics.h" 40 #include "llvm/IR/LLVMContext.h" 41 #include "llvm/IR/PassManager.h" 42 #include "llvm/IR/Use.h" 43 #include "llvm/Pass.h" 44 #include "llvm/Support/AtomicOrdering.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/ErrorHandling.h" 50 #include "llvm/Support/FormattedStream.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <cassert> 54 #include <iterator> 55 #include <memory> 56 #include <utility> 57 58 using namespace llvm; 59 60 #define DEBUG_TYPE "memoryssa" 61 62 INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 63 true) 64 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 65 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 66 INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 67 true) 68 69 INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", 70 "Memory SSA Printer", false, false) 71 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 72 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", 73 "Memory SSA Printer", false, false) 74 75 static cl::opt<unsigned> MaxCheckLimit( 76 "memssa-check-limit", cl::Hidden, cl::init(100), 77 cl::desc("The maximum number of stores/phis MemorySSA" 78 "will consider trying to walk past (default = 100)")); 79 80 static cl::opt<bool> 81 VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, 82 cl::desc("Verify MemorySSA in legacy printer pass.")); 83 84 namespace llvm { 85 86 /// An assembly annotator class to print Memory SSA information in 87 /// comments. 88 class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { 89 friend class MemorySSA; 90 91 const MemorySSA *MSSA; 92 93 public: 94 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} 95 96 void emitBasicBlockStartAnnot(const BasicBlock *BB, 97 formatted_raw_ostream &OS) override { 98 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) 99 OS << "; " << *MA << "\n"; 100 } 101 102 void emitInstructionAnnot(const Instruction *I, 103 formatted_raw_ostream &OS) override { 104 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) 105 OS << "; " << *MA << "\n"; 106 } 107 }; 108 109 } // end namespace llvm 110 111 namespace { 112 113 /// Our current alias analysis API differentiates heavily between calls and 114 /// non-calls, and functions called on one usually assert on the other. 115 /// This class encapsulates the distinction to simplify other code that wants 116 /// "Memory affecting instructions and related data" to use as a key. 117 /// For example, this class is used as a densemap key in the use optimizer. 118 class MemoryLocOrCall { 119 public: 120 bool IsCall = false; 121 122 MemoryLocOrCall() = default; 123 MemoryLocOrCall(MemoryUseOrDef *MUD) 124 : MemoryLocOrCall(MUD->getMemoryInst()) {} 125 MemoryLocOrCall(const MemoryUseOrDef *MUD) 126 : MemoryLocOrCall(MUD->getMemoryInst()) {} 127 128 MemoryLocOrCall(Instruction *Inst) { 129 if (ImmutableCallSite(Inst)) { 130 IsCall = true; 131 CS = ImmutableCallSite(Inst); 132 } else { 133 IsCall = false; 134 // There is no such thing as a memorylocation for a fence inst, and it is 135 // unique in that regard. 136 if (!isa<FenceInst>(Inst)) 137 Loc = MemoryLocation::get(Inst); 138 } 139 } 140 141 explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {} 142 143 ImmutableCallSite getCS() const { 144 assert(IsCall); 145 return CS; 146 } 147 148 MemoryLocation getLoc() const { 149 assert(!IsCall); 150 return Loc; 151 } 152 153 bool operator==(const MemoryLocOrCall &Other) const { 154 if (IsCall != Other.IsCall) 155 return false; 156 157 if (!IsCall) 158 return Loc == Other.Loc; 159 160 if (CS.getCalledValue() != Other.CS.getCalledValue()) 161 return false; 162 163 return CS.arg_size() == Other.CS.arg_size() && 164 std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin()); 165 } 166 167 private: 168 union { 169 ImmutableCallSite CS; 170 MemoryLocation Loc; 171 }; 172 }; 173 174 } // end anonymous namespace 175 176 namespace llvm { 177 178 template <> struct DenseMapInfo<MemoryLocOrCall> { 179 static inline MemoryLocOrCall getEmptyKey() { 180 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); 181 } 182 183 static inline MemoryLocOrCall getTombstoneKey() { 184 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); 185 } 186 187 static unsigned getHashValue(const MemoryLocOrCall &MLOC) { 188 if (!MLOC.IsCall) 189 return hash_combine( 190 MLOC.IsCall, 191 DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); 192 193 hash_code hash = 194 hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( 195 MLOC.getCS().getCalledValue())); 196 197 for (const Value *Arg : MLOC.getCS().args()) 198 hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); 199 return hash; 200 } 201 202 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { 203 return LHS == RHS; 204 } 205 }; 206 207 } // end namespace llvm 208 209 /// This does one-way checks to see if Use could theoretically be hoisted above 210 /// MayClobber. This will not check the other way around. 211 /// 212 /// This assumes that, for the purposes of MemorySSA, Use comes directly after 213 /// MayClobber, with no potentially clobbering operations in between them. 214 /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) 215 static bool areLoadsReorderable(const LoadInst *Use, 216 const LoadInst *MayClobber) { 217 bool VolatileUse = Use->isVolatile(); 218 bool VolatileClobber = MayClobber->isVolatile(); 219 // Volatile operations may never be reordered with other volatile operations. 220 if (VolatileUse && VolatileClobber) 221 return false; 222 // Otherwise, volatile doesn't matter here. From the language reference: 223 // 'optimizers may change the order of volatile operations relative to 224 // non-volatile operations.'" 225 226 // If a load is seq_cst, it cannot be moved above other loads. If its ordering 227 // is weaker, it can be moved above other loads. We just need to be sure that 228 // MayClobber isn't an acquire load, because loads can't be moved above 229 // acquire loads. 230 // 231 // Note that this explicitly *does* allow the free reordering of monotonic (or 232 // weaker) loads of the same address. 233 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; 234 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), 235 AtomicOrdering::Acquire); 236 return !(SeqCstUse || MayClobberIsAcquire); 237 } 238 239 namespace { 240 241 struct ClobberAlias { 242 bool IsClobber; 243 Optional<AliasResult> AR; 244 }; 245 246 } // end anonymous namespace 247 248 // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being 249 // ignored if IsClobber = false. 250 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 251 const MemoryLocation &UseLoc, 252 const Instruction *UseInst, 253 AliasAnalysis &AA) { 254 Instruction *DefInst = MD->getMemoryInst(); 255 assert(DefInst && "Defining instruction not actually an instruction"); 256 ImmutableCallSite UseCS(UseInst); 257 Optional<AliasResult> AR; 258 259 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 260 // These intrinsics will show up as affecting memory, but they are just 261 // markers. 262 switch (II->getIntrinsicID()) { 263 case Intrinsic::lifetime_start: 264 if (UseCS) 265 return {false, NoAlias}; 266 AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc); 267 return {AR == MustAlias, AR}; 268 case Intrinsic::lifetime_end: 269 case Intrinsic::invariant_start: 270 case Intrinsic::invariant_end: 271 case Intrinsic::assume: 272 return {false, NoAlias}; 273 default: 274 break; 275 } 276 } 277 278 if (UseCS) { 279 ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); 280 AR = isMustSet(I) ? MustAlias : MayAlias; 281 return {isModOrRefSet(I), AR}; 282 } 283 284 if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) 285 if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) 286 return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias}; 287 288 ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); 289 AR = isMustSet(I) ? MustAlias : MayAlias; 290 return {isModSet(I), AR}; 291 } 292 293 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 294 const MemoryUseOrDef *MU, 295 const MemoryLocOrCall &UseMLOC, 296 AliasAnalysis &AA) { 297 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery 298 // to exist while MemoryLocOrCall is pushed through places. 299 if (UseMLOC.IsCall) 300 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), 301 AA); 302 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), 303 AA); 304 } 305 306 // Return true when MD may alias MU, return false otherwise. 307 bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, 308 AliasAnalysis &AA) { 309 return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber; 310 } 311 312 namespace { 313 314 struct UpwardsMemoryQuery { 315 // True if our original query started off as a call 316 bool IsCall = false; 317 // The pointer location we started the query with. This will be empty if 318 // IsCall is true. 319 MemoryLocation StartingLoc; 320 // This is the instruction we were querying about. 321 const Instruction *Inst = nullptr; 322 // The MemoryAccess we actually got called with, used to test local domination 323 const MemoryAccess *OriginalAccess = nullptr; 324 Optional<AliasResult> AR = MayAlias; 325 326 UpwardsMemoryQuery() = default; 327 328 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) 329 : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { 330 if (!IsCall) 331 StartingLoc = MemoryLocation::get(Inst); 332 } 333 }; 334 335 } // end anonymous namespace 336 337 static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, 338 AliasAnalysis &AA) { 339 Instruction *Inst = MD->getMemoryInst(); 340 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 341 switch (II->getIntrinsicID()) { 342 case Intrinsic::lifetime_end: 343 return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); 344 default: 345 return false; 346 } 347 } 348 return false; 349 } 350 351 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, 352 const Instruction *I) { 353 // If the memory can't be changed, then loads of the memory can't be 354 // clobbered. 355 return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || 356 AA.pointsToConstantMemory(cast<LoadInst>(I)-> 357 getPointerOperand())); 358 } 359 360 /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing 361 /// inbetween `Start` and `ClobberAt` can clobbers `Start`. 362 /// 363 /// This is meant to be as simple and self-contained as possible. Because it 364 /// uses no cache, etc., it can be relatively expensive. 365 /// 366 /// \param Start The MemoryAccess that we want to walk from. 367 /// \param ClobberAt A clobber for Start. 368 /// \param StartLoc The MemoryLocation for Start. 369 /// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. 370 /// \param Query The UpwardsMemoryQuery we used for our search. 371 /// \param AA The AliasAnalysis we used for our search. 372 static void LLVM_ATTRIBUTE_UNUSED 373 checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, 374 const MemoryLocation &StartLoc, const MemorySSA &MSSA, 375 const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { 376 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); 377 378 if (MSSA.isLiveOnEntryDef(Start)) { 379 assert(MSSA.isLiveOnEntryDef(ClobberAt) && 380 "liveOnEntry must clobber itself"); 381 return; 382 } 383 384 bool FoundClobber = false; 385 DenseSet<MemoryAccessPair> VisitedPhis; 386 SmallVector<MemoryAccessPair, 8> Worklist; 387 Worklist.emplace_back(Start, StartLoc); 388 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one 389 // is found, complain. 390 while (!Worklist.empty()) { 391 MemoryAccessPair MAP = Worklist.pop_back_val(); 392 // All we care about is that nothing from Start to ClobberAt clobbers Start. 393 // We learn nothing from revisiting nodes. 394 if (!VisitedPhis.insert(MAP).second) 395 continue; 396 397 for (MemoryAccess *MA : def_chain(MAP.first)) { 398 if (MA == ClobberAt) { 399 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 400 // instructionClobbersQuery isn't essentially free, so don't use `|=`, 401 // since it won't let us short-circuit. 402 // 403 // Also, note that this can't be hoisted out of the `Worklist` loop, 404 // since MD may only act as a clobber for 1 of N MemoryLocations. 405 FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); 406 if (!FoundClobber) { 407 ClobberAlias CA = 408 instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); 409 if (CA.IsClobber) { 410 FoundClobber = true; 411 // Not used: CA.AR; 412 } 413 } 414 } 415 break; 416 } 417 418 // We should never hit liveOnEntry, unless it's the clobber. 419 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); 420 421 if (auto *MD = dyn_cast<MemoryDef>(MA)) { 422 (void)MD; 423 assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) 424 .IsClobber && 425 "Found clobber before reaching ClobberAt!"); 426 continue; 427 } 428 429 assert(isa<MemoryPhi>(MA)); 430 Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); 431 } 432 } 433 434 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a 435 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. 436 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && 437 "ClobberAt never acted as a clobber"); 438 } 439 440 namespace { 441 442 /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up 443 /// in one class. 444 class ClobberWalker { 445 /// Save a few bytes by using unsigned instead of size_t. 446 using ListIndex = unsigned; 447 448 /// Represents a span of contiguous MemoryDefs, potentially ending in a 449 /// MemoryPhi. 450 struct DefPath { 451 MemoryLocation Loc; 452 // Note that, because we always walk in reverse, Last will always dominate 453 // First. Also note that First and Last are inclusive. 454 MemoryAccess *First; 455 MemoryAccess *Last; 456 Optional<ListIndex> Previous; 457 458 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, 459 Optional<ListIndex> Previous) 460 : Loc(Loc), First(First), Last(Last), Previous(Previous) {} 461 462 DefPath(const MemoryLocation &Loc, MemoryAccess *Init, 463 Optional<ListIndex> Previous) 464 : DefPath(Loc, Init, Init, Previous) {} 465 }; 466 467 const MemorySSA &MSSA; 468 AliasAnalysis &AA; 469 DominatorTree &DT; 470 UpwardsMemoryQuery *Query; 471 472 // Phi optimization bookkeeping 473 SmallVector<DefPath, 32> Paths; 474 DenseSet<ConstMemoryAccessPair> VisitedPhis; 475 476 /// Find the nearest def or phi that `From` can legally be optimized to. 477 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { 478 assert(From->getNumOperands() && "Phi with no operands?"); 479 480 BasicBlock *BB = From->getBlock(); 481 MemoryAccess *Result = MSSA.getLiveOnEntryDef(); 482 DomTreeNode *Node = DT.getNode(BB); 483 while ((Node = Node->getIDom())) { 484 auto *Defs = MSSA.getBlockDefs(Node->getBlock()); 485 if (Defs) 486 return &*Defs->rbegin(); 487 } 488 return Result; 489 } 490 491 /// Result of calling walkToPhiOrClobber. 492 struct UpwardsWalkResult { 493 /// The "Result" of the walk. Either a clobber, the last thing we walked, or 494 /// both. Include alias info when clobber found. 495 MemoryAccess *Result; 496 bool IsKnownClobber; 497 Optional<AliasResult> AR; 498 }; 499 500 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. 501 /// This will update Desc.Last as it walks. It will (optionally) also stop at 502 /// StopAt. 503 /// 504 /// This does not test for whether StopAt is a clobber 505 UpwardsWalkResult 506 walkToPhiOrClobber(DefPath &Desc, 507 const MemoryAccess *StopAt = nullptr) const { 508 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); 509 510 for (MemoryAccess *Current : def_chain(Desc.Last)) { 511 Desc.Last = Current; 512 if (Current == StopAt) 513 return {Current, false, MayAlias}; 514 515 if (auto *MD = dyn_cast<MemoryDef>(Current)) { 516 if (MSSA.isLiveOnEntryDef(MD)) 517 return {MD, true, MustAlias}; 518 ClobberAlias CA = 519 instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); 520 if (CA.IsClobber) 521 return {MD, true, CA.AR}; 522 } 523 } 524 525 assert(isa<MemoryPhi>(Desc.Last) && 526 "Ended at a non-clobber that's not a phi?"); 527 return {Desc.Last, false, MayAlias}; 528 } 529 530 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, 531 ListIndex PriorNode) { 532 auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), 533 upward_defs_end()); 534 for (const MemoryAccessPair &P : UpwardDefs) { 535 PausedSearches.push_back(Paths.size()); 536 Paths.emplace_back(P.second, P.first, PriorNode); 537 } 538 } 539 540 /// Represents a search that terminated after finding a clobber. This clobber 541 /// may or may not be present in the path of defs from LastNode..SearchStart, 542 /// since it may have been retrieved from cache. 543 struct TerminatedPath { 544 MemoryAccess *Clobber; 545 ListIndex LastNode; 546 }; 547 548 /// Get an access that keeps us from optimizing to the given phi. 549 /// 550 /// PausedSearches is an array of indices into the Paths array. Its incoming 551 /// value is the indices of searches that stopped at the last phi optimization 552 /// target. It's left in an unspecified state. 553 /// 554 /// If this returns None, NewPaused is a vector of searches that terminated 555 /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. 556 Optional<TerminatedPath> 557 getBlockingAccess(const MemoryAccess *StopWhere, 558 SmallVectorImpl<ListIndex> &PausedSearches, 559 SmallVectorImpl<ListIndex> &NewPaused, 560 SmallVectorImpl<TerminatedPath> &Terminated) { 561 assert(!PausedSearches.empty() && "No searches to continue?"); 562 563 // BFS vs DFS really doesn't make a difference here, so just do a DFS with 564 // PausedSearches as our stack. 565 while (!PausedSearches.empty()) { 566 ListIndex PathIndex = PausedSearches.pop_back_val(); 567 DefPath &Node = Paths[PathIndex]; 568 569 // If we've already visited this path with this MemoryLocation, we don't 570 // need to do so again. 571 // 572 // NOTE: That we just drop these paths on the ground makes caching 573 // behavior sporadic. e.g. given a diamond: 574 // A 575 // B C 576 // D 577 // 578 // ...If we walk D, B, A, C, we'll only cache the result of phi 579 // optimization for A, B, and D; C will be skipped because it dies here. 580 // This arguably isn't the worst thing ever, since: 581 // - We generally query things in a top-down order, so if we got below D 582 // without needing cache entries for {C, MemLoc}, then chances are 583 // that those cache entries would end up ultimately unused. 584 // - We still cache things for A, so C only needs to walk up a bit. 585 // If this behavior becomes problematic, we can fix without a ton of extra 586 // work. 587 if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) 588 continue; 589 590 UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); 591 if (Res.IsKnownClobber) { 592 assert(Res.Result != StopWhere); 593 // If this wasn't a cache hit, we hit a clobber when walking. That's a 594 // failure. 595 TerminatedPath Term{Res.Result, PathIndex}; 596 if (!MSSA.dominates(Res.Result, StopWhere)) 597 return Term; 598 599 // Otherwise, it's a valid thing to potentially optimize to. 600 Terminated.push_back(Term); 601 continue; 602 } 603 604 if (Res.Result == StopWhere) { 605 // We've hit our target. Save this path off for if we want to continue 606 // walking. 607 NewPaused.push_back(PathIndex); 608 continue; 609 } 610 611 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); 612 addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); 613 } 614 615 return None; 616 } 617 618 template <typename T, typename Walker> 619 struct generic_def_path_iterator 620 : public iterator_facade_base<generic_def_path_iterator<T, Walker>, 621 std::forward_iterator_tag, T *> { 622 generic_def_path_iterator() = default; 623 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} 624 625 T &operator*() const { return curNode(); } 626 627 generic_def_path_iterator &operator++() { 628 N = curNode().Previous; 629 return *this; 630 } 631 632 bool operator==(const generic_def_path_iterator &O) const { 633 if (N.hasValue() != O.N.hasValue()) 634 return false; 635 return !N.hasValue() || *N == *O.N; 636 } 637 638 private: 639 T &curNode() const { return W->Paths[*N]; } 640 641 Walker *W = nullptr; 642 Optional<ListIndex> N = None; 643 }; 644 645 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; 646 using const_def_path_iterator = 647 generic_def_path_iterator<const DefPath, const ClobberWalker>; 648 649 iterator_range<def_path_iterator> def_path(ListIndex From) { 650 return make_range(def_path_iterator(this, From), def_path_iterator()); 651 } 652 653 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { 654 return make_range(const_def_path_iterator(this, From), 655 const_def_path_iterator()); 656 } 657 658 struct OptznResult { 659 /// The path that contains our result. 660 TerminatedPath PrimaryClobber; 661 /// The paths that we can legally cache back from, but that aren't 662 /// necessarily the result of the Phi optimization. 663 SmallVector<TerminatedPath, 4> OtherClobbers; 664 }; 665 666 ListIndex defPathIndex(const DefPath &N) const { 667 // The assert looks nicer if we don't need to do &N 668 const DefPath *NP = &N; 669 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && 670 "Out of bounds DefPath!"); 671 return NP - &Paths.front(); 672 } 673 674 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths 675 /// that act as legal clobbers. Note that this won't return *all* clobbers. 676 /// 677 /// Phi optimization algorithm tl;dr: 678 /// - Find the earliest def/phi, A, we can optimize to 679 /// - Find if all paths from the starting memory access ultimately reach A 680 /// - If not, optimization isn't possible. 681 /// - Otherwise, walk from A to another clobber or phi, A'. 682 /// - If A' is a def, we're done. 683 /// - If A' is a phi, try to optimize it. 684 /// 685 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path 686 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. 687 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, 688 const MemoryLocation &Loc) { 689 assert(Paths.empty() && VisitedPhis.empty() && 690 "Reset the optimization state."); 691 692 Paths.emplace_back(Loc, Start, Phi, None); 693 // Stores how many "valid" optimization nodes we had prior to calling 694 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. 695 auto PriorPathsSize = Paths.size(); 696 697 SmallVector<ListIndex, 16> PausedSearches; 698 SmallVector<ListIndex, 8> NewPaused; 699 SmallVector<TerminatedPath, 4> TerminatedPaths; 700 701 addSearches(Phi, PausedSearches, 0); 702 703 // Moves the TerminatedPath with the "most dominated" Clobber to the end of 704 // Paths. 705 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { 706 assert(!Paths.empty() && "Need a path to move"); 707 auto Dom = Paths.begin(); 708 for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) 709 if (!MSSA.dominates(I->Clobber, Dom->Clobber)) 710 Dom = I; 711 auto Last = Paths.end() - 1; 712 if (Last != Dom) 713 std::iter_swap(Last, Dom); 714 }; 715 716 MemoryPhi *Current = Phi; 717 while (true) { 718 assert(!MSSA.isLiveOnEntryDef(Current) && 719 "liveOnEntry wasn't treated as a clobber?"); 720 721 const auto *Target = getWalkTarget(Current); 722 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal 723 // optimization for the prior phi. 724 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { 725 return MSSA.dominates(P.Clobber, Target); 726 })); 727 728 // FIXME: This is broken, because the Blocker may be reported to be 729 // liveOnEntry, and we'll happily wait for that to disappear (read: never) 730 // For the moment, this is fine, since we do nothing with blocker info. 731 if (Optional<TerminatedPath> Blocker = getBlockingAccess( 732 Target, PausedSearches, NewPaused, TerminatedPaths)) { 733 734 // Find the node we started at. We can't search based on N->Last, since 735 // we may have gone around a loop with a different MemoryLocation. 736 auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { 737 return defPathIndex(N) < PriorPathsSize; 738 }); 739 assert(Iter != def_path_iterator()); 740 741 DefPath &CurNode = *Iter; 742 assert(CurNode.Last == Current); 743 744 // Two things: 745 // A. We can't reliably cache all of NewPaused back. Consider a case 746 // where we have two paths in NewPaused; one of which can't optimize 747 // above this phi, whereas the other can. If we cache the second path 748 // back, we'll end up with suboptimal cache entries. We can handle 749 // cases like this a bit better when we either try to find all 750 // clobbers that block phi optimization, or when our cache starts 751 // supporting unfinished searches. 752 // B. We can't reliably cache TerminatedPaths back here without doing 753 // extra checks; consider a case like: 754 // T 755 // / \ 756 // D C 757 // \ / 758 // S 759 // Where T is our target, C is a node with a clobber on it, D is a 760 // diamond (with a clobber *only* on the left or right node, N), and 761 // S is our start. Say we walk to D, through the node opposite N 762 // (read: ignoring the clobber), and see a cache entry in the top 763 // node of D. That cache entry gets put into TerminatedPaths. We then 764 // walk up to C (N is later in our worklist), find the clobber, and 765 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache 766 // the bottom part of D to the cached clobber, ignoring the clobber 767 // in N. Again, this problem goes away if we start tracking all 768 // blockers for a given phi optimization. 769 TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; 770 return {Result, {}}; 771 } 772 773 // If there's nothing left to search, then all paths led to valid clobbers 774 // that we got from our cache; pick the nearest to the start, and allow 775 // the rest to be cached back. 776 if (NewPaused.empty()) { 777 MoveDominatedPathToEnd(TerminatedPaths); 778 TerminatedPath Result = TerminatedPaths.pop_back_val(); 779 return {Result, std::move(TerminatedPaths)}; 780 } 781 782 MemoryAccess *DefChainEnd = nullptr; 783 SmallVector<TerminatedPath, 4> Clobbers; 784 for (ListIndex Paused : NewPaused) { 785 UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); 786 if (WR.IsKnownClobber) 787 Clobbers.push_back({WR.Result, Paused}); 788 else 789 // Micro-opt: If we hit the end of the chain, save it. 790 DefChainEnd = WR.Result; 791 } 792 793 if (!TerminatedPaths.empty()) { 794 // If we couldn't find the dominating phi/liveOnEntry in the above loop, 795 // do it now. 796 if (!DefChainEnd) 797 for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) 798 DefChainEnd = MA; 799 800 // If any of the terminated paths don't dominate the phi we'll try to 801 // optimize, we need to figure out what they are and quit. 802 const BasicBlock *ChainBB = DefChainEnd->getBlock(); 803 for (const TerminatedPath &TP : TerminatedPaths) { 804 // Because we know that DefChainEnd is as "high" as we can go, we 805 // don't need local dominance checks; BB dominance is sufficient. 806 if (DT.dominates(ChainBB, TP.Clobber->getBlock())) 807 Clobbers.push_back(TP); 808 } 809 } 810 811 // If we have clobbers in the def chain, find the one closest to Current 812 // and quit. 813 if (!Clobbers.empty()) { 814 MoveDominatedPathToEnd(Clobbers); 815 TerminatedPath Result = Clobbers.pop_back_val(); 816 return {Result, std::move(Clobbers)}; 817 } 818 819 assert(all_of(NewPaused, 820 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); 821 822 // Because liveOnEntry is a clobber, this must be a phi. 823 auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); 824 825 PriorPathsSize = Paths.size(); 826 PausedSearches.clear(); 827 for (ListIndex I : NewPaused) 828 addSearches(DefChainPhi, PausedSearches, I); 829 NewPaused.clear(); 830 831 Current = DefChainPhi; 832 } 833 } 834 835 void verifyOptResult(const OptznResult &R) const { 836 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { 837 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); 838 })); 839 } 840 841 void resetPhiOptznState() { 842 Paths.clear(); 843 VisitedPhis.clear(); 844 } 845 846 public: 847 ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) 848 : MSSA(MSSA), AA(AA), DT(DT) {} 849 850 /// Finds the nearest clobber for the given query, optimizing phis if 851 /// possible. 852 MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { 853 Query = &Q; 854 855 MemoryAccess *Current = Start; 856 // This walker pretends uses don't exist. If we're handed one, silently grab 857 // its def. (This has the nice side-effect of ensuring we never cache uses) 858 if (auto *MU = dyn_cast<MemoryUse>(Start)) 859 Current = MU->getDefiningAccess(); 860 861 DefPath FirstDesc(Q.StartingLoc, Current, Current, None); 862 // Fast path for the overly-common case (no crazy phi optimization 863 // necessary) 864 UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); 865 MemoryAccess *Result; 866 if (WalkResult.IsKnownClobber) { 867 Result = WalkResult.Result; 868 Q.AR = WalkResult.AR; 869 } else { 870 OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), 871 Current, Q.StartingLoc); 872 verifyOptResult(OptRes); 873 resetPhiOptznState(); 874 Result = OptRes.PrimaryClobber.Clobber; 875 } 876 877 #ifdef EXPENSIVE_CHECKS 878 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); 879 #endif 880 return Result; 881 } 882 883 void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } 884 }; 885 886 struct RenamePassData { 887 DomTreeNode *DTN; 888 DomTreeNode::const_iterator ChildIt; 889 MemoryAccess *IncomingVal; 890 891 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, 892 MemoryAccess *M) 893 : DTN(D), ChildIt(It), IncomingVal(M) {} 894 895 void swap(RenamePassData &RHS) { 896 std::swap(DTN, RHS.DTN); 897 std::swap(ChildIt, RHS.ChildIt); 898 std::swap(IncomingVal, RHS.IncomingVal); 899 } 900 }; 901 902 } // end anonymous namespace 903 904 namespace llvm { 905 906 /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no 907 /// longer does caching on its own, but the name has been retained for the 908 /// moment. 909 class MemorySSA::CachingWalker final : public MemorySSAWalker { 910 ClobberWalker Walker; 911 912 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); 913 914 public: 915 CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); 916 ~CachingWalker() override = default; 917 918 using MemorySSAWalker::getClobberingMemoryAccess; 919 920 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; 921 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, 922 const MemoryLocation &) override; 923 void invalidateInfo(MemoryAccess *) override; 924 925 void verify(const MemorySSA *MSSA) override { 926 MemorySSAWalker::verify(MSSA); 927 Walker.verify(MSSA); 928 } 929 }; 930 931 } // end namespace llvm 932 933 void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, 934 bool RenameAllUses) { 935 // Pass through values to our successors 936 for (const BasicBlock *S : successors(BB)) { 937 auto It = PerBlockAccesses.find(S); 938 // Rename the phi nodes in our successor block 939 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 940 continue; 941 AccessList *Accesses = It->second.get(); 942 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 943 if (RenameAllUses) { 944 int PhiIndex = Phi->getBasicBlockIndex(BB); 945 assert(PhiIndex != -1 && "Incomplete phi during partial rename"); 946 Phi->setIncomingValue(PhiIndex, IncomingVal); 947 } else 948 Phi->addIncoming(IncomingVal, BB); 949 } 950 } 951 952 /// Rename a single basic block into MemorySSA form. 953 /// Uses the standard SSA renaming algorithm. 954 /// \returns The new incoming value. 955 MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, 956 bool RenameAllUses) { 957 auto It = PerBlockAccesses.find(BB); 958 // Skip most processing if the list is empty. 959 if (It != PerBlockAccesses.end()) { 960 AccessList *Accesses = It->second.get(); 961 for (MemoryAccess &L : *Accesses) { 962 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { 963 if (MUD->getDefiningAccess() == nullptr || RenameAllUses) 964 MUD->setDefiningAccess(IncomingVal); 965 if (isa<MemoryDef>(&L)) 966 IncomingVal = &L; 967 } else { 968 IncomingVal = &L; 969 } 970 } 971 } 972 return IncomingVal; 973 } 974 975 /// This is the standard SSA renaming algorithm. 976 /// 977 /// We walk the dominator tree in preorder, renaming accesses, and then filling 978 /// in phi nodes in our successors. 979 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, 980 SmallPtrSetImpl<BasicBlock *> &Visited, 981 bool SkipVisited, bool RenameAllUses) { 982 SmallVector<RenamePassData, 32> WorkStack; 983 // Skip everything if we already renamed this block and we are skipping. 984 // Note: You can't sink this into the if, because we need it to occur 985 // regardless of whether we skip blocks or not. 986 bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; 987 if (SkipVisited && AlreadyVisited) 988 return; 989 990 IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); 991 renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); 992 WorkStack.push_back({Root, Root->begin(), IncomingVal}); 993 994 while (!WorkStack.empty()) { 995 DomTreeNode *Node = WorkStack.back().DTN; 996 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; 997 IncomingVal = WorkStack.back().IncomingVal; 998 999 if (ChildIt == Node->end()) { 1000 WorkStack.pop_back(); 1001 } else { 1002 DomTreeNode *Child = *ChildIt; 1003 ++WorkStack.back().ChildIt; 1004 BasicBlock *BB = Child->getBlock(); 1005 // Note: You can't sink this into the if, because we need it to occur 1006 // regardless of whether we skip blocks or not. 1007 AlreadyVisited = !Visited.insert(BB).second; 1008 if (SkipVisited && AlreadyVisited) { 1009 // We already visited this during our renaming, which can happen when 1010 // being asked to rename multiple blocks. Figure out the incoming val, 1011 // which is the last def. 1012 // Incoming value can only change if there is a block def, and in that 1013 // case, it's the last block def in the list. 1014 if (auto *BlockDefs = getWritableBlockDefs(BB)) 1015 IncomingVal = &*BlockDefs->rbegin(); 1016 } else 1017 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); 1018 renameSuccessorPhis(BB, IncomingVal, RenameAllUses); 1019 WorkStack.push_back({Child, Child->begin(), IncomingVal}); 1020 } 1021 } 1022 } 1023 1024 /// This handles unreachable block accesses by deleting phi nodes in 1025 /// unreachable blocks, and marking all other unreachable MemoryAccess's as 1026 /// being uses of the live on entry definition. 1027 void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { 1028 assert(!DT->isReachableFromEntry(BB) && 1029 "Reachable block found while handling unreachable blocks"); 1030 1031 // Make sure phi nodes in our reachable successors end up with a 1032 // LiveOnEntryDef for our incoming edge, even though our block is forward 1033 // unreachable. We could just disconnect these blocks from the CFG fully, 1034 // but we do not right now. 1035 for (const BasicBlock *S : successors(BB)) { 1036 if (!DT->isReachableFromEntry(S)) 1037 continue; 1038 auto It = PerBlockAccesses.find(S); 1039 // Rename the phi nodes in our successor block 1040 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 1041 continue; 1042 AccessList *Accesses = It->second.get(); 1043 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 1044 Phi->addIncoming(LiveOnEntryDef.get(), BB); 1045 } 1046 1047 auto It = PerBlockAccesses.find(BB); 1048 if (It == PerBlockAccesses.end()) 1049 return; 1050 1051 auto &Accesses = It->second; 1052 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { 1053 auto Next = std::next(AI); 1054 // If we have a phi, just remove it. We are going to replace all 1055 // users with live on entry. 1056 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) 1057 UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); 1058 else 1059 Accesses->erase(AI); 1060 AI = Next; 1061 } 1062 } 1063 1064 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) 1065 : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), 1066 NextID(0) { 1067 buildMemorySSA(); 1068 } 1069 1070 MemorySSA::~MemorySSA() { 1071 // Drop all our references 1072 for (const auto &Pair : PerBlockAccesses) 1073 for (MemoryAccess &MA : *Pair.second) 1074 MA.dropAllReferences(); 1075 } 1076 1077 MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { 1078 auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); 1079 1080 if (Res.second) 1081 Res.first->second = llvm::make_unique<AccessList>(); 1082 return Res.first->second.get(); 1083 } 1084 1085 MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { 1086 auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); 1087 1088 if (Res.second) 1089 Res.first->second = llvm::make_unique<DefsList>(); 1090 return Res.first->second.get(); 1091 } 1092 1093 namespace llvm { 1094 1095 /// This class is a batch walker of all MemoryUse's in the program, and points 1096 /// their defining access at the thing that actually clobbers them. Because it 1097 /// is a batch walker that touches everything, it does not operate like the 1098 /// other walkers. This walker is basically performing a top-down SSA renaming 1099 /// pass, where the version stack is used as the cache. This enables it to be 1100 /// significantly more time and memory efficient than using the regular walker, 1101 /// which is walking bottom-up. 1102 class MemorySSA::OptimizeUses { 1103 public: 1104 OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, 1105 DominatorTree *DT) 1106 : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { 1107 Walker = MSSA->getWalker(); 1108 } 1109 1110 void optimizeUses(); 1111 1112 private: 1113 /// This represents where a given memorylocation is in the stack. 1114 struct MemlocStackInfo { 1115 // This essentially is keeping track of versions of the stack. Whenever 1116 // the stack changes due to pushes or pops, these versions increase. 1117 unsigned long StackEpoch; 1118 unsigned long PopEpoch; 1119 // This is the lower bound of places on the stack to check. It is equal to 1120 // the place the last stack walk ended. 1121 // Note: Correctness depends on this being initialized to 0, which densemap 1122 // does 1123 unsigned long LowerBound; 1124 const BasicBlock *LowerBoundBlock; 1125 // This is where the last walk for this memory location ended. 1126 unsigned long LastKill; 1127 bool LastKillValid; 1128 Optional<AliasResult> AR; 1129 }; 1130 1131 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, 1132 SmallVectorImpl<MemoryAccess *> &, 1133 DenseMap<MemoryLocOrCall, MemlocStackInfo> &); 1134 1135 MemorySSA *MSSA; 1136 MemorySSAWalker *Walker; 1137 AliasAnalysis *AA; 1138 DominatorTree *DT; 1139 }; 1140 1141 } // end namespace llvm 1142 1143 /// Optimize the uses in a given block This is basically the SSA renaming 1144 /// algorithm, with one caveat: We are able to use a single stack for all 1145 /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is 1146 /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just 1147 /// going to be some position in that stack of possible ones. 1148 /// 1149 /// We track the stack positions that each MemoryLocation needs 1150 /// to check, and last ended at. This is because we only want to check the 1151 /// things that changed since last time. The same MemoryLocation should 1152 /// get clobbered by the same store (getModRefInfo does not use invariantness or 1153 /// things like this, and if they start, we can modify MemoryLocOrCall to 1154 /// include relevant data) 1155 void MemorySSA::OptimizeUses::optimizeUsesInBlock( 1156 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, 1157 SmallVectorImpl<MemoryAccess *> &VersionStack, 1158 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { 1159 1160 /// If no accesses, nothing to do. 1161 MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); 1162 if (Accesses == nullptr) 1163 return; 1164 1165 // Pop everything that doesn't dominate the current block off the stack, 1166 // increment the PopEpoch to account for this. 1167 while (true) { 1168 assert( 1169 !VersionStack.empty() && 1170 "Version stack should have liveOnEntry sentinel dominating everything"); 1171 BasicBlock *BackBlock = VersionStack.back()->getBlock(); 1172 if (DT->dominates(BackBlock, BB)) 1173 break; 1174 while (VersionStack.back()->getBlock() == BackBlock) 1175 VersionStack.pop_back(); 1176 ++PopEpoch; 1177 } 1178 1179 for (MemoryAccess &MA : *Accesses) { 1180 auto *MU = dyn_cast<MemoryUse>(&MA); 1181 if (!MU) { 1182 VersionStack.push_back(&MA); 1183 ++StackEpoch; 1184 continue; 1185 } 1186 1187 if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { 1188 MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); 1189 continue; 1190 } 1191 1192 MemoryLocOrCall UseMLOC(MU); 1193 auto &LocInfo = LocStackInfo[UseMLOC]; 1194 // If the pop epoch changed, it means we've removed stuff from top of 1195 // stack due to changing blocks. We may have to reset the lower bound or 1196 // last kill info. 1197 if (LocInfo.PopEpoch != PopEpoch) { 1198 LocInfo.PopEpoch = PopEpoch; 1199 LocInfo.StackEpoch = StackEpoch; 1200 // If the lower bound was in something that no longer dominates us, we 1201 // have to reset it. 1202 // We can't simply track stack size, because the stack may have had 1203 // pushes/pops in the meantime. 1204 // XXX: This is non-optimal, but only is slower cases with heavily 1205 // branching dominator trees. To get the optimal number of queries would 1206 // be to make lowerbound and lastkill a per-loc stack, and pop it until 1207 // the top of that stack dominates us. This does not seem worth it ATM. 1208 // A much cheaper optimization would be to always explore the deepest 1209 // branch of the dominator tree first. This will guarantee this resets on 1210 // the smallest set of blocks. 1211 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && 1212 !DT->dominates(LocInfo.LowerBoundBlock, BB)) { 1213 // Reset the lower bound of things to check. 1214 // TODO: Some day we should be able to reset to last kill, rather than 1215 // 0. 1216 LocInfo.LowerBound = 0; 1217 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); 1218 LocInfo.LastKillValid = false; 1219 } 1220 } else if (LocInfo.StackEpoch != StackEpoch) { 1221 // If all that has changed is the StackEpoch, we only have to check the 1222 // new things on the stack, because we've checked everything before. In 1223 // this case, the lower bound of things to check remains the same. 1224 LocInfo.PopEpoch = PopEpoch; 1225 LocInfo.StackEpoch = StackEpoch; 1226 } 1227 if (!LocInfo.LastKillValid) { 1228 LocInfo.LastKill = VersionStack.size() - 1; 1229 LocInfo.LastKillValid = true; 1230 LocInfo.AR = MayAlias; 1231 } 1232 1233 // At this point, we should have corrected last kill and LowerBound to be 1234 // in bounds. 1235 assert(LocInfo.LowerBound < VersionStack.size() && 1236 "Lower bound out of range"); 1237 assert(LocInfo.LastKill < VersionStack.size() && 1238 "Last kill info out of range"); 1239 // In any case, the new upper bound is the top of the stack. 1240 unsigned long UpperBound = VersionStack.size() - 1; 1241 1242 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { 1243 LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" 1244 << *(MU->getMemoryInst()) << ")" 1245 << " because there are " 1246 << UpperBound - LocInfo.LowerBound 1247 << " stores to disambiguate\n"); 1248 // Because we did not walk, LastKill is no longer valid, as this may 1249 // have been a kill. 1250 LocInfo.LastKillValid = false; 1251 continue; 1252 } 1253 bool FoundClobberResult = false; 1254 while (UpperBound > LocInfo.LowerBound) { 1255 if (isa<MemoryPhi>(VersionStack[UpperBound])) { 1256 // For phis, use the walker, see where we ended up, go there 1257 Instruction *UseInst = MU->getMemoryInst(); 1258 MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); 1259 // We are guaranteed to find it or something is wrong 1260 while (VersionStack[UpperBound] != Result) { 1261 assert(UpperBound != 0); 1262 --UpperBound; 1263 } 1264 FoundClobberResult = true; 1265 break; 1266 } 1267 1268 MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); 1269 // If the lifetime of the pointer ends at this instruction, it's live on 1270 // entry. 1271 if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { 1272 // Reset UpperBound to liveOnEntryDef's place in the stack 1273 UpperBound = 0; 1274 FoundClobberResult = true; 1275 LocInfo.AR = MustAlias; 1276 break; 1277 } 1278 ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); 1279 if (CA.IsClobber) { 1280 FoundClobberResult = true; 1281 LocInfo.AR = CA.AR; 1282 break; 1283 } 1284 --UpperBound; 1285 } 1286 1287 // Note: Phis always have AliasResult AR set to MayAlias ATM. 1288 1289 // At the end of this loop, UpperBound is either a clobber, or lower bound 1290 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. 1291 if (FoundClobberResult || UpperBound < LocInfo.LastKill) { 1292 // We were last killed now by where we got to 1293 if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) 1294 LocInfo.AR = None; 1295 MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); 1296 LocInfo.LastKill = UpperBound; 1297 } else { 1298 // Otherwise, we checked all the new ones, and now we know we can get to 1299 // LastKill. 1300 MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); 1301 } 1302 LocInfo.LowerBound = VersionStack.size() - 1; 1303 LocInfo.LowerBoundBlock = BB; 1304 } 1305 } 1306 1307 /// Optimize uses to point to their actual clobbering definitions. 1308 void MemorySSA::OptimizeUses::optimizeUses() { 1309 SmallVector<MemoryAccess *, 16> VersionStack; 1310 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; 1311 VersionStack.push_back(MSSA->getLiveOnEntryDef()); 1312 1313 unsigned long StackEpoch = 1; 1314 unsigned long PopEpoch = 1; 1315 // We perform a non-recursive top-down dominator tree walk. 1316 for (const auto *DomNode : depth_first(DT->getRootNode())) 1317 optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, 1318 LocStackInfo); 1319 } 1320 1321 void MemorySSA::placePHINodes( 1322 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) { 1323 // Determine where our MemoryPhi's should go 1324 ForwardIDFCalculator IDFs(*DT); 1325 IDFs.setDefiningBlocks(DefiningBlocks); 1326 SmallVector<BasicBlock *, 32> IDFBlocks; 1327 IDFs.calculate(IDFBlocks); 1328 1329 // Now place MemoryPhi nodes. 1330 for (auto &BB : IDFBlocks) 1331 createMemoryPhi(BB); 1332 } 1333 1334 void MemorySSA::buildMemorySSA() { 1335 // We create an access to represent "live on entry", for things like 1336 // arguments or users of globals, where the memory they use is defined before 1337 // the beginning of the function. We do not actually insert it into the IR. 1338 // We do not define a live on exit for the immediate uses, and thus our 1339 // semantics do *not* imply that something with no immediate uses can simply 1340 // be removed. 1341 BasicBlock &StartingPoint = F.getEntryBlock(); 1342 LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr, 1343 &StartingPoint, NextID++)); 1344 1345 // We maintain lists of memory accesses per-block, trading memory for time. We 1346 // could just look up the memory access for every possible instruction in the 1347 // stream. 1348 SmallPtrSet<BasicBlock *, 32> DefiningBlocks; 1349 // Go through each block, figure out where defs occur, and chain together all 1350 // the accesses. 1351 for (BasicBlock &B : F) { 1352 bool InsertIntoDef = false; 1353 AccessList *Accesses = nullptr; 1354 DefsList *Defs = nullptr; 1355 for (Instruction &I : B) { 1356 MemoryUseOrDef *MUD = createNewAccess(&I); 1357 if (!MUD) 1358 continue; 1359 1360 if (!Accesses) 1361 Accesses = getOrCreateAccessList(&B); 1362 Accesses->push_back(MUD); 1363 if (isa<MemoryDef>(MUD)) { 1364 InsertIntoDef = true; 1365 if (!Defs) 1366 Defs = getOrCreateDefsList(&B); 1367 Defs->push_back(*MUD); 1368 } 1369 } 1370 if (InsertIntoDef) 1371 DefiningBlocks.insert(&B); 1372 } 1373 placePHINodes(DefiningBlocks); 1374 1375 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get 1376 // filled in with all blocks. 1377 SmallPtrSet<BasicBlock *, 16> Visited; 1378 renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); 1379 1380 CachingWalker *Walker = getWalkerImpl(); 1381 1382 OptimizeUses(this, Walker, AA, DT).optimizeUses(); 1383 1384 // Mark the uses in unreachable blocks as live on entry, so that they go 1385 // somewhere. 1386 for (auto &BB : F) 1387 if (!Visited.count(&BB)) 1388 markUnreachableAsLiveOnEntry(&BB); 1389 } 1390 1391 MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } 1392 1393 MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { 1394 if (Walker) 1395 return Walker.get(); 1396 1397 Walker = llvm::make_unique<CachingWalker>(this, AA, DT); 1398 return Walker.get(); 1399 } 1400 1401 // This is a helper function used by the creation routines. It places NewAccess 1402 // into the access and defs lists for a given basic block, at the given 1403 // insertion point. 1404 void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, 1405 const BasicBlock *BB, 1406 InsertionPlace Point) { 1407 auto *Accesses = getOrCreateAccessList(BB); 1408 if (Point == Beginning) { 1409 // If it's a phi node, it goes first, otherwise, it goes after any phi 1410 // nodes. 1411 if (isa<MemoryPhi>(NewAccess)) { 1412 Accesses->push_front(NewAccess); 1413 auto *Defs = getOrCreateDefsList(BB); 1414 Defs->push_front(*NewAccess); 1415 } else { 1416 auto AI = find_if_not( 1417 *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1418 Accesses->insert(AI, NewAccess); 1419 if (!isa<MemoryUse>(NewAccess)) { 1420 auto *Defs = getOrCreateDefsList(BB); 1421 auto DI = find_if_not( 1422 *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1423 Defs->insert(DI, *NewAccess); 1424 } 1425 } 1426 } else { 1427 Accesses->push_back(NewAccess); 1428 if (!isa<MemoryUse>(NewAccess)) { 1429 auto *Defs = getOrCreateDefsList(BB); 1430 Defs->push_back(*NewAccess); 1431 } 1432 } 1433 BlockNumberingValid.erase(BB); 1434 } 1435 1436 void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, 1437 AccessList::iterator InsertPt) { 1438 auto *Accesses = getWritableBlockAccesses(BB); 1439 bool WasEnd = InsertPt == Accesses->end(); 1440 Accesses->insert(AccessList::iterator(InsertPt), What); 1441 if (!isa<MemoryUse>(What)) { 1442 auto *Defs = getOrCreateDefsList(BB); 1443 // If we got asked to insert at the end, we have an easy job, just shove it 1444 // at the end. If we got asked to insert before an existing def, we also get 1445 // an iterator. If we got asked to insert before a use, we have to hunt for 1446 // the next def. 1447 if (WasEnd) { 1448 Defs->push_back(*What); 1449 } else if (isa<MemoryDef>(InsertPt)) { 1450 Defs->insert(InsertPt->getDefsIterator(), *What); 1451 } else { 1452 while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) 1453 ++InsertPt; 1454 // Either we found a def, or we are inserting at the end 1455 if (InsertPt == Accesses->end()) 1456 Defs->push_back(*What); 1457 else 1458 Defs->insert(InsertPt->getDefsIterator(), *What); 1459 } 1460 } 1461 BlockNumberingValid.erase(BB); 1462 } 1463 1464 // Move What before Where in the IR. The end result is that What will belong to 1465 // the right lists and have the right Block set, but will not otherwise be 1466 // correct. It will not have the right defining access, and if it is a def, 1467 // things below it will not properly be updated. 1468 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1469 AccessList::iterator Where) { 1470 // Keep it in the lookup tables, remove from the lists 1471 removeFromLists(What, false); 1472 What->setBlock(BB); 1473 insertIntoListsBefore(What, BB, Where); 1474 } 1475 1476 void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB, 1477 InsertionPlace Point) { 1478 if (isa<MemoryPhi>(What)) { 1479 assert(Point == Beginning && 1480 "Can only move a Phi at the beginning of the block"); 1481 // Update lookup table entry 1482 ValueToMemoryAccess.erase(What->getBlock()); 1483 bool Inserted = ValueToMemoryAccess.insert({BB, What}).second; 1484 (void)Inserted; 1485 assert(Inserted && "Cannot move a Phi to a block that already has one"); 1486 } 1487 1488 removeFromLists(What, false); 1489 What->setBlock(BB); 1490 insertIntoListsForBlock(What, BB, Point); 1491 } 1492 1493 MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { 1494 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); 1495 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); 1496 // Phi's always are placed at the front of the block. 1497 insertIntoListsForBlock(Phi, BB, Beginning); 1498 ValueToMemoryAccess[BB] = Phi; 1499 return Phi; 1500 } 1501 1502 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, 1503 MemoryAccess *Definition) { 1504 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); 1505 MemoryUseOrDef *NewAccess = createNewAccess(I); 1506 assert( 1507 NewAccess != nullptr && 1508 "Tried to create a memory access for a non-memory touching instruction"); 1509 NewAccess->setDefiningAccess(Definition); 1510 return NewAccess; 1511 } 1512 1513 // Return true if the instruction has ordering constraints. 1514 // Note specifically that this only considers stores and loads 1515 // because others are still considered ModRef by getModRefInfo. 1516 static inline bool isOrdered(const Instruction *I) { 1517 if (auto *SI = dyn_cast<StoreInst>(I)) { 1518 if (!SI->isUnordered()) 1519 return true; 1520 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 1521 if (!LI->isUnordered()) 1522 return true; 1523 } 1524 return false; 1525 } 1526 1527 /// Helper function to create new memory accesses 1528 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { 1529 // The assume intrinsic has a control dependency which we model by claiming 1530 // that it writes arbitrarily. Ignore that fake memory dependency here. 1531 // FIXME: Replace this special casing with a more accurate modelling of 1532 // assume's control dependency. 1533 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1534 if (II->getIntrinsicID() == Intrinsic::assume) 1535 return nullptr; 1536 1537 // Find out what affect this instruction has on memory. 1538 ModRefInfo ModRef = AA->getModRefInfo(I, None); 1539 // The isOrdered check is used to ensure that volatiles end up as defs 1540 // (atomics end up as ModRef right now anyway). Until we separate the 1541 // ordering chain from the memory chain, this enables people to see at least 1542 // some relative ordering to volatiles. Note that getClobberingMemoryAccess 1543 // will still give an answer that bypasses other volatile loads. TODO: 1544 // Separate memory aliasing and ordering into two different chains so that we 1545 // can precisely represent both "what memory will this read/write/is clobbered 1546 // by" and "what instructions can I move this past". 1547 bool Def = isModSet(ModRef) || isOrdered(I); 1548 bool Use = isRefSet(ModRef); 1549 1550 // It's possible for an instruction to not modify memory at all. During 1551 // construction, we ignore them. 1552 if (!Def && !Use) 1553 return nullptr; 1554 1555 MemoryUseOrDef *MUD; 1556 if (Def) 1557 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); 1558 else 1559 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); 1560 ValueToMemoryAccess[I] = MUD; 1561 return MUD; 1562 } 1563 1564 /// Returns true if \p Replacer dominates \p Replacee . 1565 bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, 1566 const MemoryAccess *Replacee) const { 1567 if (isa<MemoryUseOrDef>(Replacee)) 1568 return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); 1569 const auto *MP = cast<MemoryPhi>(Replacee); 1570 // For a phi node, the use occurs in the predecessor block of the phi node. 1571 // Since we may occur multiple times in the phi node, we have to check each 1572 // operand to ensure Replacer dominates each operand where Replacee occurs. 1573 for (const Use &Arg : MP->operands()) { 1574 if (Arg.get() != Replacee && 1575 !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) 1576 return false; 1577 } 1578 return true; 1579 } 1580 1581 /// Properly remove \p MA from all of MemorySSA's lookup tables. 1582 void MemorySSA::removeFromLookups(MemoryAccess *MA) { 1583 assert(MA->use_empty() && 1584 "Trying to remove memory access that still has uses"); 1585 BlockNumbering.erase(MA); 1586 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1587 MUD->setDefiningAccess(nullptr); 1588 // Invalidate our walker's cache if necessary 1589 if (!isa<MemoryUse>(MA)) 1590 Walker->invalidateInfo(MA); 1591 1592 Value *MemoryInst; 1593 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1594 MemoryInst = MUD->getMemoryInst(); 1595 else 1596 MemoryInst = MA->getBlock(); 1597 1598 auto VMA = ValueToMemoryAccess.find(MemoryInst); 1599 if (VMA->second == MA) 1600 ValueToMemoryAccess.erase(VMA); 1601 } 1602 1603 /// Properly remove \p MA from all of MemorySSA's lists. 1604 /// 1605 /// Because of the way the intrusive list and use lists work, it is important to 1606 /// do removal in the right order. 1607 /// ShouldDelete defaults to true, and will cause the memory access to also be 1608 /// deleted, not just removed. 1609 void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { 1610 BasicBlock *BB = MA->getBlock(); 1611 // The access list owns the reference, so we erase it from the non-owning list 1612 // first. 1613 if (!isa<MemoryUse>(MA)) { 1614 auto DefsIt = PerBlockDefs.find(BB); 1615 std::unique_ptr<DefsList> &Defs = DefsIt->second; 1616 Defs->remove(*MA); 1617 if (Defs->empty()) 1618 PerBlockDefs.erase(DefsIt); 1619 } 1620 1621 // The erase call here will delete it. If we don't want it deleted, we call 1622 // remove instead. 1623 auto AccessIt = PerBlockAccesses.find(BB); 1624 std::unique_ptr<AccessList> &Accesses = AccessIt->second; 1625 if (ShouldDelete) 1626 Accesses->erase(MA); 1627 else 1628 Accesses->remove(MA); 1629 1630 if (Accesses->empty()) { 1631 PerBlockAccesses.erase(AccessIt); 1632 BlockNumberingValid.erase(BB); 1633 } 1634 } 1635 1636 void MemorySSA::print(raw_ostream &OS) const { 1637 MemorySSAAnnotatedWriter Writer(this); 1638 F.print(OS, &Writer); 1639 } 1640 1641 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1642 LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } 1643 #endif 1644 1645 void MemorySSA::verifyMemorySSA() const { 1646 verifyDefUses(F); 1647 verifyDomination(F); 1648 verifyOrdering(F); 1649 verifyDominationNumbers(F); 1650 Walker->verify(this); 1651 } 1652 1653 /// Verify that all of the blocks we believe to have valid domination numbers 1654 /// actually have valid domination numbers. 1655 void MemorySSA::verifyDominationNumbers(const Function &F) const { 1656 #ifndef NDEBUG 1657 if (BlockNumberingValid.empty()) 1658 return; 1659 1660 SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid; 1661 for (const BasicBlock &BB : F) { 1662 if (!ValidBlocks.count(&BB)) 1663 continue; 1664 1665 ValidBlocks.erase(&BB); 1666 1667 const AccessList *Accesses = getBlockAccesses(&BB); 1668 // It's correct to say an empty block has valid numbering. 1669 if (!Accesses) 1670 continue; 1671 1672 // Block numbering starts at 1. 1673 unsigned long LastNumber = 0; 1674 for (const MemoryAccess &MA : *Accesses) { 1675 auto ThisNumberIter = BlockNumbering.find(&MA); 1676 assert(ThisNumberIter != BlockNumbering.end() && 1677 "MemoryAccess has no domination number in a valid block!"); 1678 1679 unsigned long ThisNumber = ThisNumberIter->second; 1680 assert(ThisNumber > LastNumber && 1681 "Domination numbers should be strictly increasing!"); 1682 LastNumber = ThisNumber; 1683 } 1684 } 1685 1686 assert(ValidBlocks.empty() && 1687 "All valid BasicBlocks should exist in F -- dangling pointers?"); 1688 #endif 1689 } 1690 1691 /// Verify that the order and existence of MemoryAccesses matches the 1692 /// order and existence of memory affecting instructions. 1693 void MemorySSA::verifyOrdering(Function &F) const { 1694 // Walk all the blocks, comparing what the lookups think and what the access 1695 // lists think, as well as the order in the blocks vs the order in the access 1696 // lists. 1697 SmallVector<MemoryAccess *, 32> ActualAccesses; 1698 SmallVector<MemoryAccess *, 32> ActualDefs; 1699 for (BasicBlock &B : F) { 1700 const AccessList *AL = getBlockAccesses(&B); 1701 const auto *DL = getBlockDefs(&B); 1702 MemoryAccess *Phi = getMemoryAccess(&B); 1703 if (Phi) { 1704 ActualAccesses.push_back(Phi); 1705 ActualDefs.push_back(Phi); 1706 } 1707 1708 for (Instruction &I : B) { 1709 MemoryAccess *MA = getMemoryAccess(&I); 1710 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && 1711 "We have memory affecting instructions " 1712 "in this block but they are not in the " 1713 "access list or defs list"); 1714 if (MA) { 1715 ActualAccesses.push_back(MA); 1716 if (isa<MemoryDef>(MA)) 1717 ActualDefs.push_back(MA); 1718 } 1719 } 1720 // Either we hit the assert, really have no accesses, or we have both 1721 // accesses and an access list. 1722 // Same with defs. 1723 if (!AL && !DL) 1724 continue; 1725 assert(AL->size() == ActualAccesses.size() && 1726 "We don't have the same number of accesses in the block as on the " 1727 "access list"); 1728 assert((DL || ActualDefs.size() == 0) && 1729 "Either we should have a defs list, or we should have no defs"); 1730 assert((!DL || DL->size() == ActualDefs.size()) && 1731 "We don't have the same number of defs in the block as on the " 1732 "def list"); 1733 auto ALI = AL->begin(); 1734 auto AAI = ActualAccesses.begin(); 1735 while (ALI != AL->end() && AAI != ActualAccesses.end()) { 1736 assert(&*ALI == *AAI && "Not the same accesses in the same order"); 1737 ++ALI; 1738 ++AAI; 1739 } 1740 ActualAccesses.clear(); 1741 if (DL) { 1742 auto DLI = DL->begin(); 1743 auto ADI = ActualDefs.begin(); 1744 while (DLI != DL->end() && ADI != ActualDefs.end()) { 1745 assert(&*DLI == *ADI && "Not the same defs in the same order"); 1746 ++DLI; 1747 ++ADI; 1748 } 1749 } 1750 ActualDefs.clear(); 1751 } 1752 } 1753 1754 /// Verify the domination properties of MemorySSA by checking that each 1755 /// definition dominates all of its uses. 1756 void MemorySSA::verifyDomination(Function &F) const { 1757 #ifndef NDEBUG 1758 for (BasicBlock &B : F) { 1759 // Phi nodes are attached to basic blocks 1760 if (MemoryPhi *MP = getMemoryAccess(&B)) 1761 for (const Use &U : MP->uses()) 1762 assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); 1763 1764 for (Instruction &I : B) { 1765 MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); 1766 if (!MD) 1767 continue; 1768 1769 for (const Use &U : MD->uses()) 1770 assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); 1771 } 1772 } 1773 #endif 1774 } 1775 1776 /// Verify the def-use lists in MemorySSA, by verifying that \p Use 1777 /// appears in the use list of \p Def. 1778 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { 1779 #ifndef NDEBUG 1780 // The live on entry use may cause us to get a NULL def here 1781 if (!Def) 1782 assert(isLiveOnEntryDef(Use) && 1783 "Null def but use not point to live on entry def"); 1784 else 1785 assert(is_contained(Def->users(), Use) && 1786 "Did not find use in def's use list"); 1787 #endif 1788 } 1789 1790 /// Verify the immediate use information, by walking all the memory 1791 /// accesses and verifying that, for each use, it appears in the 1792 /// appropriate def's use list 1793 void MemorySSA::verifyDefUses(Function &F) const { 1794 for (BasicBlock &B : F) { 1795 // Phi nodes are attached to basic blocks 1796 if (MemoryPhi *Phi = getMemoryAccess(&B)) { 1797 assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( 1798 pred_begin(&B), pred_end(&B))) && 1799 "Incomplete MemoryPhi Node"); 1800 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { 1801 verifyUseInDefs(Phi->getIncomingValue(I), Phi); 1802 assert(find(predecessors(&B), Phi->getIncomingBlock(I)) != 1803 pred_end(&B) && 1804 "Incoming phi block not a block predecessor"); 1805 } 1806 } 1807 1808 for (Instruction &I : B) { 1809 if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { 1810 verifyUseInDefs(MA->getDefiningAccess(), MA); 1811 } 1812 } 1813 } 1814 } 1815 1816 MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { 1817 return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); 1818 } 1819 1820 MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { 1821 return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); 1822 } 1823 1824 /// Perform a local numbering on blocks so that instruction ordering can be 1825 /// determined in constant time. 1826 /// TODO: We currently just number in order. If we numbered by N, we could 1827 /// allow at least N-1 sequences of insertBefore or insertAfter (and at least 1828 /// log2(N) sequences of mixed before and after) without needing to invalidate 1829 /// the numbering. 1830 void MemorySSA::renumberBlock(const BasicBlock *B) const { 1831 // The pre-increment ensures the numbers really start at 1. 1832 unsigned long CurrentNumber = 0; 1833 const AccessList *AL = getBlockAccesses(B); 1834 assert(AL != nullptr && "Asking to renumber an empty block"); 1835 for (const auto &I : *AL) 1836 BlockNumbering[&I] = ++CurrentNumber; 1837 BlockNumberingValid.insert(B); 1838 } 1839 1840 /// Determine, for two memory accesses in the same block, 1841 /// whether \p Dominator dominates \p Dominatee. 1842 /// \returns True if \p Dominator dominates \p Dominatee. 1843 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, 1844 const MemoryAccess *Dominatee) const { 1845 const BasicBlock *DominatorBlock = Dominator->getBlock(); 1846 1847 assert((DominatorBlock == Dominatee->getBlock()) && 1848 "Asking for local domination when accesses are in different blocks!"); 1849 // A node dominates itself. 1850 if (Dominatee == Dominator) 1851 return true; 1852 1853 // When Dominatee is defined on function entry, it is not dominated by another 1854 // memory access. 1855 if (isLiveOnEntryDef(Dominatee)) 1856 return false; 1857 1858 // When Dominator is defined on function entry, it dominates the other memory 1859 // access. 1860 if (isLiveOnEntryDef(Dominator)) 1861 return true; 1862 1863 if (!BlockNumberingValid.count(DominatorBlock)) 1864 renumberBlock(DominatorBlock); 1865 1866 unsigned long DominatorNum = BlockNumbering.lookup(Dominator); 1867 // All numbers start with 1 1868 assert(DominatorNum != 0 && "Block was not numbered properly"); 1869 unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); 1870 assert(DominateeNum != 0 && "Block was not numbered properly"); 1871 return DominatorNum < DominateeNum; 1872 } 1873 1874 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1875 const MemoryAccess *Dominatee) const { 1876 if (Dominator == Dominatee) 1877 return true; 1878 1879 if (isLiveOnEntryDef(Dominatee)) 1880 return false; 1881 1882 if (Dominator->getBlock() != Dominatee->getBlock()) 1883 return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); 1884 return locallyDominates(Dominator, Dominatee); 1885 } 1886 1887 bool MemorySSA::dominates(const MemoryAccess *Dominator, 1888 const Use &Dominatee) const { 1889 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { 1890 BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); 1891 // The def must dominate the incoming block of the phi. 1892 if (UseBB != Dominator->getBlock()) 1893 return DT->dominates(Dominator->getBlock(), UseBB); 1894 // If the UseBB and the DefBB are the same, compare locally. 1895 return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); 1896 } 1897 // If it's not a PHI node use, the normal dominates can already handle it. 1898 return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); 1899 } 1900 1901 const static char LiveOnEntryStr[] = "liveOnEntry"; 1902 1903 void MemoryAccess::print(raw_ostream &OS) const { 1904 switch (getValueID()) { 1905 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS); 1906 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS); 1907 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS); 1908 } 1909 llvm_unreachable("invalid value id"); 1910 } 1911 1912 void MemoryDef::print(raw_ostream &OS) const { 1913 MemoryAccess *UO = getDefiningAccess(); 1914 1915 auto printID = [&OS](MemoryAccess *A) { 1916 if (A && A->getID()) 1917 OS << A->getID(); 1918 else 1919 OS << LiveOnEntryStr; 1920 }; 1921 1922 OS << getID() << " = MemoryDef("; 1923 printID(UO); 1924 OS << ")"; 1925 1926 if (isOptimized()) { 1927 OS << "->"; 1928 printID(getOptimized()); 1929 1930 if (Optional<AliasResult> AR = getOptimizedAccessType()) 1931 OS << " " << *AR; 1932 } 1933 } 1934 1935 void MemoryPhi::print(raw_ostream &OS) const { 1936 bool First = true; 1937 OS << getID() << " = MemoryPhi("; 1938 for (const auto &Op : operands()) { 1939 BasicBlock *BB = getIncomingBlock(Op); 1940 MemoryAccess *MA = cast<MemoryAccess>(Op); 1941 if (!First) 1942 OS << ','; 1943 else 1944 First = false; 1945 1946 OS << '{'; 1947 if (BB->hasName()) 1948 OS << BB->getName(); 1949 else 1950 BB->printAsOperand(OS, false); 1951 OS << ','; 1952 if (unsigned ID = MA->getID()) 1953 OS << ID; 1954 else 1955 OS << LiveOnEntryStr; 1956 OS << '}'; 1957 } 1958 OS << ')'; 1959 } 1960 1961 void MemoryUse::print(raw_ostream &OS) const { 1962 MemoryAccess *UO = getDefiningAccess(); 1963 OS << "MemoryUse("; 1964 if (UO && UO->getID()) 1965 OS << UO->getID(); 1966 else 1967 OS << LiveOnEntryStr; 1968 OS << ')'; 1969 1970 if (Optional<AliasResult> AR = getOptimizedAccessType()) 1971 OS << " " << *AR; 1972 } 1973 1974 void MemoryAccess::dump() const { 1975 // Cannot completely remove virtual function even in release mode. 1976 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1977 print(dbgs()); 1978 dbgs() << "\n"; 1979 #endif 1980 } 1981 1982 char MemorySSAPrinterLegacyPass::ID = 0; 1983 1984 MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { 1985 initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); 1986 } 1987 1988 void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { 1989 AU.setPreservesAll(); 1990 AU.addRequired<MemorySSAWrapperPass>(); 1991 } 1992 1993 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { 1994 auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 1995 MSSA.print(dbgs()); 1996 if (VerifyMemorySSA) 1997 MSSA.verifyMemorySSA(); 1998 return false; 1999 } 2000 2001 AnalysisKey MemorySSAAnalysis::Key; 2002 2003 MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, 2004 FunctionAnalysisManager &AM) { 2005 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 2006 auto &AA = AM.getResult<AAManager>(F); 2007 return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT)); 2008 } 2009 2010 PreservedAnalyses MemorySSAPrinterPass::run(Function &F, 2011 FunctionAnalysisManager &AM) { 2012 OS << "MemorySSA for function: " << F.getName() << "\n"; 2013 AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); 2014 2015 return PreservedAnalyses::all(); 2016 } 2017 2018 PreservedAnalyses MemorySSAVerifierPass::run(Function &F, 2019 FunctionAnalysisManager &AM) { 2020 AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); 2021 2022 return PreservedAnalyses::all(); 2023 } 2024 2025 char MemorySSAWrapperPass::ID = 0; 2026 2027 MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { 2028 initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); 2029 } 2030 2031 void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } 2032 2033 void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 2034 AU.setPreservesAll(); 2035 AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 2036 AU.addRequiredTransitive<AAResultsWrapperPass>(); 2037 } 2038 2039 bool MemorySSAWrapperPass::runOnFunction(Function &F) { 2040 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2041 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2042 MSSA.reset(new MemorySSA(F, &AA, &DT)); 2043 return false; 2044 } 2045 2046 void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } 2047 2048 void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { 2049 MSSA->print(OS); 2050 } 2051 2052 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} 2053 2054 MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, 2055 DominatorTree *D) 2056 : MemorySSAWalker(M), Walker(*M, *A, *D) {} 2057 2058 void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { 2059 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 2060 MUD->resetOptimized(); 2061 } 2062 2063 /// Walk the use-def chains starting at \p MA and find 2064 /// the MemoryAccess that actually clobbers Loc. 2065 /// 2066 /// \returns our clobbering memory access 2067 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 2068 MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { 2069 return Walker.findClobber(StartingAccess, Q); 2070 } 2071 2072 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( 2073 MemoryAccess *StartingAccess, const MemoryLocation &Loc) { 2074 if (isa<MemoryPhi>(StartingAccess)) 2075 return StartingAccess; 2076 2077 auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); 2078 if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) 2079 return StartingUseOrDef; 2080 2081 Instruction *I = StartingUseOrDef->getMemoryInst(); 2082 2083 // Conservatively, fences are always clobbers, so don't perform the walk if we 2084 // hit a fence. 2085 if (!ImmutableCallSite(I) && I->isFenceLike()) 2086 return StartingUseOrDef; 2087 2088 UpwardsMemoryQuery Q; 2089 Q.OriginalAccess = StartingUseOrDef; 2090 Q.StartingLoc = Loc; 2091 Q.Inst = I; 2092 Q.IsCall = false; 2093 2094 // Unlike the other function, do not walk to the def of a def, because we are 2095 // handed something we already believe is the clobbering access. 2096 MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) 2097 ? StartingUseOrDef->getDefiningAccess() 2098 : StartingUseOrDef; 2099 2100 MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); 2101 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2102 LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n"); 2103 LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2104 LLVM_DEBUG(dbgs() << *Clobber << "\n"); 2105 return Clobber; 2106 } 2107 2108 MemoryAccess * 2109 MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2110 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); 2111 // If this is a MemoryPhi, we can't do anything. 2112 if (!StartingAccess) 2113 return MA; 2114 2115 // If this is an already optimized use or def, return the optimized result. 2116 // Note: Currently, we store the optimized def result in a separate field, 2117 // since we can't use the defining access. 2118 if (StartingAccess->isOptimized()) 2119 return StartingAccess->getOptimized(); 2120 2121 const Instruction *I = StartingAccess->getMemoryInst(); 2122 UpwardsMemoryQuery Q(I, StartingAccess); 2123 // We can't sanely do anything with a fence, since they conservatively clobber 2124 // all memory, and have no locations to get pointers from to try to 2125 // disambiguate. 2126 if (!Q.IsCall && I->isFenceLike()) 2127 return StartingAccess; 2128 2129 if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { 2130 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); 2131 StartingAccess->setOptimized(LiveOnEntry); 2132 StartingAccess->setOptimizedAccessType(None); 2133 return LiveOnEntry; 2134 } 2135 2136 // Start with the thing we already think clobbers this location 2137 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); 2138 2139 // At this point, DefiningAccess may be the live on entry def. 2140 // If it is, we will not get a better result. 2141 if (MSSA->isLiveOnEntryDef(DefiningAccess)) { 2142 StartingAccess->setOptimized(DefiningAccess); 2143 StartingAccess->setOptimizedAccessType(None); 2144 return DefiningAccess; 2145 } 2146 2147 MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); 2148 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2149 LLVM_DEBUG(dbgs() << *DefiningAccess << "\n"); 2150 LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2151 LLVM_DEBUG(dbgs() << *Result << "\n"); 2152 2153 StartingAccess->setOptimized(Result); 2154 if (MSSA->isLiveOnEntryDef(Result)) 2155 StartingAccess->setOptimizedAccessType(None); 2156 else if (Q.AR == MustAlias) 2157 StartingAccess->setOptimizedAccessType(MustAlias); 2158 2159 return Result; 2160 } 2161 2162 MemoryAccess * 2163 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2164 if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) 2165 return Use->getDefiningAccess(); 2166 return MA; 2167 } 2168 2169 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( 2170 MemoryAccess *StartingAccess, const MemoryLocation &) { 2171 if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2172 return Use->getDefiningAccess(); 2173 return StartingAccess; 2174 } 2175 2176 void MemoryPhi::deleteMe(DerivedUser *Self) { 2177 delete static_cast<MemoryPhi *>(Self); 2178 } 2179 2180 void MemoryDef::deleteMe(DerivedUser *Self) { 2181 delete static_cast<MemoryDef *>(Self); 2182 } 2183 2184 void MemoryUse::deleteMe(DerivedUser *Self) { 2185 delete static_cast<MemoryUse *>(Self); 2186 } 2187