1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using iterated dominator frontiers to place PHI nodes, then 13 // traversing the function in depth-first order to rewrite loads and stores as 14 // appropriate. 15 // 16 // The algorithm used here is based on: 17 // 18 // Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19 // In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20 // Programming Languages 21 // POPL '95. ACM, New York, NY, 62-73. 22 // 23 // It has been modified to not explicitly use the DJ graph data structure and to 24 // directly compute pruned SSA using per-variable liveness information. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #define DEBUG_TYPE "mem2reg" 29 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/SetVector.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/Analysis/AliasSetTracker.h" 38 #include "llvm/Analysis/Dominators.h" 39 #include "llvm/Analysis/InstructionSimplify.h" 40 #include "llvm/Analysis/ValueTracking.h" 41 #include "llvm/DIBuilder.h" 42 #include "llvm/DebugInfo.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/Function.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Metadata.h" 49 #include "llvm/InstVisitor.h" 50 #include "llvm/Support/CFG.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include <algorithm> 53 #include <queue> 54 using namespace llvm; 55 56 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 57 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 58 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 59 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 60 61 namespace { 62 63 struct AllocaInfo : private InstVisitor<AllocaInfo, bool> { 64 SmallVector<BasicBlock *, 32> DefiningBlocks; 65 SmallVector<BasicBlock *, 32> UsingBlocks; 66 67 Type *AllocaTy; 68 StoreInst *OnlyStore; 69 BasicBlock *OnlyBlock; 70 bool OnlyUsedInOneBlock; 71 72 Value *AllocaPointerVal; 73 DbgDeclareInst *DbgDeclare; 74 75 void clear() { 76 DefiningBlocks.clear(); 77 UsingBlocks.clear(); 78 AllocaTy = 0; 79 OnlyStore = 0; 80 OnlyBlock = 0; 81 OnlyUsedInOneBlock = true; 82 AllocaPointerVal = 0; 83 DbgDeclare = 0; 84 } 85 86 /// Scan the uses of the specified alloca, filling in the AllocaInfo used 87 /// by the rest of the pass to reason about the uses of this alloca. 88 bool analyzeAlloca(AllocaInst &AI) { 89 clear(); 90 91 AllocaTy = AI.getAllocatedType(); 92 enqueueUsers(AI); 93 94 // Walk queued up uses in the worklist to handle nested uses. 95 while (!UseWorklist.empty()) { 96 U = UseWorklist.pop_back_val(); 97 Instruction &I = *cast<Instruction>(U->getUser()); 98 if (!visit(I)) 99 return false; // Propagate failure to promote up. 100 101 if (OnlyUsedInOneBlock) { 102 if (OnlyBlock == 0) 103 OnlyBlock = I.getParent(); 104 else if (OnlyBlock != I.getParent()) 105 OnlyUsedInOneBlock = false; 106 } 107 } 108 109 DbgDeclare = FindAllocaDbgDeclare(&AI); 110 return true; 111 } 112 113 private: 114 // Befriend the base class so it can call through private visitor methods. 115 friend class InstVisitor<AllocaInfo, bool>; 116 117 /// \brief A use pointer that is non-null when visiting uses. 118 Use *U; 119 120 /// \brief A worklist for recursively visiting all uses of an alloca. 121 SmallVector<Use *, 8> UseWorklist; 122 123 /// \brief A set for preventing cyclic visitation. 124 SmallPtrSet<Use *, 8> VisitedUses; 125 126 void enqueueUsers(Instruction &I) { 127 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; 128 ++UI) 129 if (VisitedUses.insert(&UI.getUse())) 130 UseWorklist.push_back(&UI.getUse()); 131 } 132 133 bool visitLoadInst(LoadInst &LI) { 134 if (LI.isVolatile() || LI.getType() != AllocaTy) 135 return false; 136 137 // Keep track of variable reads. 138 UsingBlocks.push_back(LI.getParent()); 139 AllocaPointerVal = &LI; 140 return true; 141 } 142 143 bool visitStoreInst(StoreInst &SI) { 144 if (SI.isVolatile() || SI.getValueOperand() == U->get() || 145 SI.getValueOperand()->getType() != AllocaTy) 146 return false; 147 148 // Remember the basic blocks which define new values for the alloca 149 DefiningBlocks.push_back(SI.getParent()); 150 AllocaPointerVal = SI.getOperand(0); 151 OnlyStore = &SI; 152 return true; 153 } 154 155 bool visitBitCastInst(BitCastInst &BC) { 156 enqueueUsers(BC); 157 return true; 158 } 159 160 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { 161 if (GEPI.use_empty()) 162 return true; 163 164 enqueueUsers(GEPI); 165 166 return GEPI.hasAllZeroIndices(); 167 } 168 169 // We can promote through debug info intrinsics as they don't alter the 170 // value stored in memory. 171 bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { return true; } 172 173 bool visitIntrinsicInst(IntrinsicInst &II) { 174 switch (II.getIntrinsicID()) { 175 default: 176 return false; 177 178 // Lifetime intrinsics don't preclude promoting the memory to a register. 179 // FIXME: We should use these to promote to undef when outside of a valid 180 // lifetime. 181 case Intrinsic::lifetime_start: 182 case Intrinsic::lifetime_end: 183 return true; 184 } 185 } 186 187 // The fallback is that the alloca cannot be promoted. 188 bool visitInstruction(Instruction &I) { return false; } 189 }; 190 191 // Data package used by RenamePass() 192 class RenamePassData { 193 public: 194 typedef std::vector<Value *> ValVector; 195 196 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 197 RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) 198 : BB(B), Pred(P), Values(V) {} 199 BasicBlock *BB; 200 BasicBlock *Pred; 201 ValVector Values; 202 203 void swap(RenamePassData &RHS) { 204 std::swap(BB, RHS.BB); 205 std::swap(Pred, RHS.Pred); 206 Values.swap(RHS.Values); 207 } 208 }; 209 210 /// \brief This assigns and keeps a per-bb relative ordering of load/store 211 /// instructions in the block that directly load or store an alloca. 212 /// 213 /// This functionality is important because it avoids scanning large basic 214 /// blocks multiple times when promoting many allocas in the same block. 215 class LargeBlockInfo { 216 /// \brief For each instruction that we track, keep the index of the 217 /// instruction. 218 /// 219 /// The index starts out as the number of the instruction from the start of 220 /// the block. 221 DenseMap<const Instruction *, unsigned> InstNumbers; 222 223 public: 224 225 /// This code only looks at accesses to allocas. 226 static bool isInterestingInstruction(const Instruction *I) { 227 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 228 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 229 } 230 231 /// Get or calculate the index of the specified instruction. 232 unsigned getInstructionIndex(const Instruction *I) { 233 assert(isInterestingInstruction(I) && 234 "Not a load/store to/from an alloca?"); 235 236 // If we already have this instruction number, return it. 237 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 238 if (It != InstNumbers.end()) 239 return It->second; 240 241 // Scan the whole block to get the instruction. This accumulates 242 // information for every interesting instruction in the block, in order to 243 // avoid gratuitus rescans. 244 const BasicBlock *BB = I->getParent(); 245 unsigned InstNo = 0; 246 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; 247 ++BBI) 248 if (isInterestingInstruction(BBI)) 249 InstNumbers[BBI] = InstNo++; 250 It = InstNumbers.find(I); 251 252 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 253 return It->second; 254 } 255 256 void deleteValue(const Instruction *I) { InstNumbers.erase(I); } 257 258 void clear() { InstNumbers.clear(); } 259 }; 260 261 struct PromoteMem2Reg { 262 /// The alloca instructions being promoted. 263 std::vector<AllocaInst *> Allocas; 264 DominatorTree &DT; 265 DIBuilder DIB; 266 267 /// An AliasSetTracker object to update. If null, don't update it. 268 AliasSetTracker *AST; 269 270 /// Reverse mapping of Allocas. 271 DenseMap<AllocaInst *, unsigned> AllocaLookup; 272 273 /// \brief The PhiNodes we're adding. 274 /// 275 /// That map is used to simplify some Phi nodes as we iterate over it, so 276 /// it should have deterministic iterators. We could use a MapVector, but 277 /// since we already maintain a map from BasicBlock* to a stable numbering 278 /// (BBNumbers), the DenseMap is more efficient (also supports removal). 279 DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; 280 281 /// For each PHI node, keep track of which entry in Allocas it corresponds 282 /// to. 283 DenseMap<PHINode *, unsigned> PhiToAllocaMap; 284 285 /// If we are updating an AliasSetTracker, then for each alloca that is of 286 /// pointer type, we keep track of what to copyValue to the inserted PHI 287 /// nodes here. 288 std::vector<Value *> PointerAllocaValues; 289 290 /// For each alloca, we keep track of the dbg.declare intrinsic that 291 /// describes it, if any, so that we can convert it to a dbg.value 292 /// intrinsic if the alloca gets promoted. 293 SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; 294 295 /// The set of basic blocks the renamer has already visited. 296 /// 297 SmallPtrSet<BasicBlock *, 16> Visited; 298 299 /// Contains a stable numbering of basic blocks to avoid non-determinstic 300 /// behavior. 301 DenseMap<BasicBlock *, unsigned> BBNumbers; 302 303 /// Maps DomTreeNodes to their level in the dominator tree. 304 DenseMap<DomTreeNode *, unsigned> DomLevels; 305 306 /// Lazily compute the number of predecessors a block has. 307 DenseMap<const BasicBlock *, unsigned> BBNumPreds; 308 309 public: 310 PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, 311 AliasSetTracker *AST) 312 : Allocas(Allocas.begin(), Allocas.end()), DT(DT), 313 DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {} 314 315 void run(); 316 317 private: 318 void RemoveFromAllocasList(unsigned &AllocaIdx) { 319 Allocas[AllocaIdx] = Allocas.back(); 320 Allocas.pop_back(); 321 --AllocaIdx; 322 } 323 324 unsigned getNumPreds(const BasicBlock *BB) { 325 unsigned &NP = BBNumPreds[BB]; 326 if (NP == 0) 327 NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; 328 return NP - 1; 329 } 330 331 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 332 AllocaInfo &Info); 333 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 334 const SmallPtrSet<BasicBlock *, 32> &DefBlocks, 335 SmallPtrSet<BasicBlock *, 32> &LiveInBlocks); 336 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 337 RenamePassData::ValVector &IncVals, 338 std::vector<RenamePassData> &Worklist); 339 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 340 }; 341 342 } // end of anonymous namespace 343 344 static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { 345 // Knowing that this alloca is promotable, we know that it's safe to kill all 346 // instructions except for load and store. 347 // FIXME: This function isn't actually specific to lifetime instrinsics. It 348 // also is redundant with the actual analysis of the loads and stores and 349 // should be folded into that. 350 351 SmallVector<Instruction *, 8> Worklist; 352 SmallPtrSet<Instruction *, 8> Visited; 353 SmallVector<Instruction *, 8> Dead; 354 Worklist.push_back(AI); 355 356 do { 357 Instruction *I = Worklist.pop_back_val(); 358 359 if (I->use_empty()) { 360 Dead.push_back(I); 361 continue; 362 } 363 364 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 365 UI != UE; ++UI) 366 if (!isa<LoadInst>(*UI) && !isa<StoreInst>(*UI) && 367 Visited.insert(cast<Instruction>(*UI))) 368 Worklist.push_back(cast<Instruction>(*UI)); 369 } while (!Worklist.empty()); 370 371 while (!Dead.empty()) { 372 Instruction *I = Dead.pop_back_val(); 373 374 // Don't delete the alloca itself. 375 if (I == AI) 376 continue; 377 378 // Note that we open code the deletion algorithm here because we know 379 // apriori that all of the instructions using an alloca that reaches here 380 // are trivially dead when their use list becomes empty (The only risk are 381 // lifetime markers which we specifically want to nuke). By coding it here 382 // we can skip the triviality test and be more efficient. 383 // 384 // Null out all of the instruction's operands to see if any operand becomes 385 // dead as we go. 386 for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; 387 ++OI) { 388 Instruction *Op = dyn_cast<Instruction>(*OI); 389 if (!Op) 390 continue; 391 392 OI->set(0); 393 if (!Op->use_empty()) 394 continue; 395 396 Dead.push_back(Op); 397 } 398 I->eraseFromParent(); 399 } 400 } 401 402 /// \brief Rewrite as many loads as possible given a single store. 403 /// 404 /// When there is only a single store, we can use the domtree to trivially 405 /// replace all of the dominated loads with the stored value. Do so, and return 406 /// true if this has successfully promoted the alloca entirely. If this returns 407 /// false there were some loads which were not dominated by the single store 408 /// and thus must be phi-ed with undef. We fall back to the standard alloca 409 /// promotion algorithm in that case. 410 static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 411 LargeBlockInfo &LBI, 412 DominatorTree &DT, 413 AliasSetTracker *AST) { 414 StoreInst *OnlyStore = Info.OnlyStore; 415 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 416 BasicBlock *StoreBB = OnlyStore->getParent(); 417 int StoreIndex = -1; 418 419 // Clear out UsingBlocks. We will reconstruct it here if needed. 420 Info.UsingBlocks.clear(); 421 422 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 423 Instruction *UserInst = cast<Instruction>(*UI++); 424 if (!isa<LoadInst>(UserInst)) { 425 assert(UserInst == OnlyStore && "Should only have load/stores"); 426 continue; 427 } 428 LoadInst *LI = cast<LoadInst>(UserInst); 429 430 // Okay, if we have a load from the alloca, we want to replace it with the 431 // only value stored to the alloca. We can do this if the value is 432 // dominated by the store. If not, we use the rest of the mem2reg machinery 433 // to insert the phi nodes as needed. 434 if (!StoringGlobalVal) { // Non-instructions are always dominated. 435 if (LI->getParent() == StoreBB) { 436 // If we have a use that is in the same block as the store, compare the 437 // indices of the two instructions to see which one came first. If the 438 // load came before the store, we can't handle it. 439 if (StoreIndex == -1) 440 StoreIndex = LBI.getInstructionIndex(OnlyStore); 441 442 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 443 // Can't handle this load, bail out. 444 Info.UsingBlocks.push_back(StoreBB); 445 continue; 446 } 447 448 } else if (LI->getParent() != StoreBB && 449 !DT.dominates(StoreBB, LI->getParent())) { 450 // If the load and store are in different blocks, use BB dominance to 451 // check their relationships. If the store doesn't dom the use, bail 452 // out. 453 Info.UsingBlocks.push_back(LI->getParent()); 454 continue; 455 } 456 } 457 458 // Otherwise, we *can* safely rewrite this load. 459 Value *ReplVal = OnlyStore->getOperand(0); 460 // If the replacement value is the load, this must occur in unreachable 461 // code. 462 if (ReplVal == LI) 463 ReplVal = UndefValue::get(LI->getType()); 464 LI->replaceAllUsesWith(ReplVal); 465 if (AST && LI->getType()->isPointerTy()) 466 AST->deleteValue(LI); 467 LI->eraseFromParent(); 468 LBI.deleteValue(LI); 469 } 470 471 // Finally, after the scan, check to see if the store is all that is left. 472 if (!Info.UsingBlocks.empty()) 473 return false; // If not, we'll have to fall back for the remainder. 474 475 // Record debuginfo for the store and remove the declaration's 476 // debuginfo. 477 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 478 DIBuilder DIB(*AI->getParent()->getParent()->getParent()); 479 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); 480 DDI->eraseFromParent(); 481 } 482 // Remove the (now dead) store and alloca. 483 Info.OnlyStore->eraseFromParent(); 484 LBI.deleteValue(Info.OnlyStore); 485 486 if (AST) 487 AST->deleteValue(AI); 488 AI->eraseFromParent(); 489 LBI.deleteValue(AI); 490 return true; 491 } 492 493 namespace { 494 /// This is a helper predicate used to search by the first element of a pair. 495 struct StoreIndexSearchPredicate { 496 bool operator()(const std::pair<unsigned, StoreInst *> &LHS, 497 const std::pair<unsigned, StoreInst *> &RHS) { 498 return LHS.first < RHS.first; 499 } 500 }; 501 } 502 503 /// Many allocas are only used within a single basic block. If this is the 504 /// case, avoid traversing the CFG and inserting a lot of potentially useless 505 /// PHI nodes by just performing a single linear pass over the basic block 506 /// using the Alloca. 507 /// 508 /// If we cannot promote this alloca (because it is read before it is written), 509 /// return true. This is necessary in cases where, due to control flow, the 510 /// alloca is potentially undefined on some control flow paths. e.g. code like 511 /// this is potentially correct: 512 /// 513 /// for (...) { if (c) { A = undef; undef = B; } } 514 /// 515 /// ... so long as A is not used before undef is set. 516 static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, 517 LargeBlockInfo &LBI, 518 AliasSetTracker *AST) { 519 // The trickiest case to handle is when we have large blocks. Because of this, 520 // this code is optimized assuming that large blocks happen. This does not 521 // significantly pessimize the small block case. This uses LargeBlockInfo to 522 // make it efficient to get the index of various operations in the block. 523 524 // Walk the use-def list of the alloca, getting the locations of all stores. 525 typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; 526 StoresByIndexTy StoresByIndex; 527 528 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; 529 ++UI) 530 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 531 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 532 533 // Sort the stores by their index, making it efficient to do a lookup with a 534 // binary search. 535 std::sort(StoresByIndex.begin(), StoresByIndex.end(), 536 StoreIndexSearchPredicate()); 537 538 // Walk all of the loads from this alloca, replacing them with the nearest 539 // store above them, if any. 540 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 541 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 542 if (!LI) 543 continue; 544 545 unsigned LoadIdx = LBI.getInstructionIndex(LI); 546 547 // Find the nearest store that has a lower index than this load. 548 StoresByIndexTy::iterator I = 549 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 550 std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), 551 StoreIndexSearchPredicate()); 552 553 if (I == StoresByIndex.begin()) 554 // If there is no store before this load, the load takes the undef value. 555 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 556 else 557 // Otherwise, there was a store before this load, the load takes its value. 558 LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0)); 559 560 if (AST && LI->getType()->isPointerTy()) 561 AST->deleteValue(LI); 562 LI->eraseFromParent(); 563 LBI.deleteValue(LI); 564 } 565 566 // Remove the (now dead) stores and alloca. 567 while (!AI->use_empty()) { 568 StoreInst *SI = cast<StoreInst>(AI->use_back()); 569 // Record debuginfo for the store before removing it. 570 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 571 DIBuilder DIB(*AI->getParent()->getParent()->getParent()); 572 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 573 } 574 SI->eraseFromParent(); 575 LBI.deleteValue(SI); 576 } 577 578 if (AST) 579 AST->deleteValue(AI); 580 AI->eraseFromParent(); 581 LBI.deleteValue(AI); 582 583 // The alloca's debuginfo can be removed as well. 584 if (DbgDeclareInst *DDI = Info.DbgDeclare) 585 DDI->eraseFromParent(); 586 587 ++NumLocalPromoted; 588 } 589 590 void PromoteMem2Reg::run() { 591 Function &F = *DT.getRoot()->getParent(); 592 593 if (AST) 594 PointerAllocaValues.resize(Allocas.size()); 595 AllocaDbgDeclares.resize(Allocas.size()); 596 597 AllocaInfo Info; 598 LargeBlockInfo LBI; 599 600 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 601 AllocaInst *AI = Allocas[AllocaNum]; 602 603 //assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); 604 assert(AI->getParent()->getParent() == &F && 605 "All allocas should be in the same function, which is same as DF!"); 606 607 removeLifetimeIntrinsicUsers(AI); 608 609 if (AI->use_empty()) { 610 // If there are no uses of the alloca, just delete it now. 611 if (AST) 612 AST->deleteValue(AI); 613 AI->eraseFromParent(); 614 615 // Remove the alloca from the Allocas list, since it has been processed 616 RemoveFromAllocasList(AllocaNum); 617 ++NumDeadAlloca; 618 continue; 619 } 620 621 // Calculate the set of read and write-locations for each alloca. This is 622 // analogous to finding the 'uses' and 'definitions' of each variable. 623 bool Good = Info.analyzeAlloca(*AI); 624 (void)Good; 625 assert(Good && "Cannot promote non-promotable alloca!"); 626 627 // If there is only a single store to this value, replace any loads of 628 // it that are directly dominated by the definition with the value stored. 629 if (Info.DefiningBlocks.size() == 1) { 630 if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { 631 // The alloca has been processed, move on. 632 RemoveFromAllocasList(AllocaNum); 633 ++NumSingleStore; 634 continue; 635 } 636 } 637 638 // If the alloca is only read and written in one basic block, just perform a 639 // linear sweep over the block to eliminate it. 640 if (Info.OnlyUsedInOneBlock) { 641 promoteSingleBlockAlloca(AI, Info, LBI, AST); 642 643 // The alloca has been processed, move on. 644 RemoveFromAllocasList(AllocaNum); 645 continue; 646 } 647 648 // If we haven't computed dominator tree levels, do so now. 649 if (DomLevels.empty()) { 650 SmallVector<DomTreeNode *, 32> Worklist; 651 652 DomTreeNode *Root = DT.getRootNode(); 653 DomLevels[Root] = 0; 654 Worklist.push_back(Root); 655 656 while (!Worklist.empty()) { 657 DomTreeNode *Node = Worklist.pop_back_val(); 658 unsigned ChildLevel = DomLevels[Node] + 1; 659 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 660 CI != CE; ++CI) { 661 DomLevels[*CI] = ChildLevel; 662 Worklist.push_back(*CI); 663 } 664 } 665 } 666 667 // If we haven't computed a numbering for the BB's in the function, do so 668 // now. 669 if (BBNumbers.empty()) { 670 unsigned ID = 0; 671 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 672 BBNumbers[I] = ID++; 673 } 674 675 // If we have an AST to keep updated, remember some pointer value that is 676 // stored into the alloca. 677 if (AST) 678 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 679 680 // Remember the dbg.declare intrinsic describing this alloca, if any. 681 if (Info.DbgDeclare) 682 AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 683 684 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 685 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 686 687 // At this point, we're committed to promoting the alloca using IDF's, and 688 // the standard SSA construction algorithm. Determine which blocks need PHI 689 // nodes and see if we can optimize out some work by avoiding insertion of 690 // dead phi nodes. 691 DetermineInsertionPoint(AI, AllocaNum, Info); 692 } 693 694 if (Allocas.empty()) 695 return; // All of the allocas must have been trivial! 696 697 LBI.clear(); 698 699 // Set the incoming values for the basic block to be null values for all of 700 // the alloca's. We do this in case there is a load of a value that has not 701 // been stored yet. In this case, it will get this null value. 702 // 703 RenamePassData::ValVector Values(Allocas.size()); 704 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 705 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 706 707 // Walks all basic blocks in the function performing the SSA rename algorithm 708 // and inserting the phi nodes we marked as necessary 709 // 710 std::vector<RenamePassData> RenamePassWorkList; 711 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 712 do { 713 RenamePassData RPD; 714 RPD.swap(RenamePassWorkList.back()); 715 RenamePassWorkList.pop_back(); 716 // RenamePass may add new worklist entries. 717 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 718 } while (!RenamePassWorkList.empty()); 719 720 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 721 Visited.clear(); 722 723 // Remove the allocas themselves from the function. 724 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 725 Instruction *A = Allocas[i]; 726 727 // If there are any uses of the alloca instructions left, they must be in 728 // unreachable basic blocks that were not processed by walking the dominator 729 // tree. Just delete the users now. 730 if (!A->use_empty()) 731 A->replaceAllUsesWith(UndefValue::get(A->getType())); 732 if (AST) 733 AST->deleteValue(A); 734 A->eraseFromParent(); 735 } 736 737 // Remove alloca's dbg.declare instrinsics from the function. 738 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 739 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 740 DDI->eraseFromParent(); 741 742 // Loop over all of the PHI nodes and see if there are any that we can get 743 // rid of because they merge all of the same incoming values. This can 744 // happen due to undef values coming into the PHI nodes. This process is 745 // iterative, because eliminating one PHI node can cause others to be removed. 746 bool EliminatedAPHI = true; 747 while (EliminatedAPHI) { 748 EliminatedAPHI = false; 749 750 // Iterating over NewPhiNodes is deterministic, so it is safe to try to 751 // simplify and RAUW them as we go. If it was not, we could add uses to 752 // the values we replace with in a non deterministic order, thus creating 753 // non deterministic def->use chains. 754 for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator 755 I = NewPhiNodes.begin(), 756 E = NewPhiNodes.end(); 757 I != E;) { 758 PHINode *PN = I->second; 759 760 // If this PHI node merges one value and/or undefs, get the value. 761 if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { 762 if (AST && PN->getType()->isPointerTy()) 763 AST->deleteValue(PN); 764 PN->replaceAllUsesWith(V); 765 PN->eraseFromParent(); 766 NewPhiNodes.erase(I++); 767 EliminatedAPHI = true; 768 continue; 769 } 770 ++I; 771 } 772 } 773 774 // At this point, the renamer has added entries to PHI nodes for all reachable 775 // code. Unfortunately, there may be unreachable blocks which the renamer 776 // hasn't traversed. If this is the case, the PHI nodes may not 777 // have incoming values for all predecessors. Loop over all PHI nodes we have 778 // created, inserting undef values if they are missing any incoming values. 779 // 780 for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator 781 I = NewPhiNodes.begin(), 782 E = NewPhiNodes.end(); 783 I != E; ++I) { 784 // We want to do this once per basic block. As such, only process a block 785 // when we find the PHI that is the first entry in the block. 786 PHINode *SomePHI = I->second; 787 BasicBlock *BB = SomePHI->getParent(); 788 if (&BB->front() != SomePHI) 789 continue; 790 791 // Only do work here if there the PHI nodes are missing incoming values. We 792 // know that all PHI nodes that were inserted in a block will have the same 793 // number of incoming values, so we can just check any of them. 794 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 795 continue; 796 797 // Get the preds for BB. 798 SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); 799 800 // Ok, now we know that all of the PHI nodes are missing entries for some 801 // basic blocks. Start by sorting the incoming predecessors for efficient 802 // access. 803 std::sort(Preds.begin(), Preds.end()); 804 805 // Now we loop through all BB's which have entries in SomePHI and remove 806 // them from the Preds list. 807 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 808 // Do a log(n) search of the Preds list for the entry we want. 809 SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( 810 Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); 811 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && 812 "PHI node has entry for a block which is not a predecessor!"); 813 814 // Remove the entry 815 Preds.erase(EntIt); 816 } 817 818 // At this point, the blocks left in the preds list must have dummy 819 // entries inserted into every PHI nodes for the block. Update all the phi 820 // nodes in this block that we are inserting (there could be phis before 821 // mem2reg runs). 822 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 823 BasicBlock::iterator BBI = BB->begin(); 824 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 825 SomePHI->getNumIncomingValues() == NumBadPreds) { 826 Value *UndefVal = UndefValue::get(SomePHI->getType()); 827 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 828 SomePHI->addIncoming(UndefVal, Preds[pred]); 829 } 830 } 831 832 NewPhiNodes.clear(); 833 } 834 835 /// \brief Determine which blocks the value is live in. 836 /// 837 /// These are blocks which lead to uses. Knowing this allows us to avoid 838 /// inserting PHI nodes into blocks which don't lead to uses (thus, the 839 /// inserted phi nodes would be dead). 840 void PromoteMem2Reg::ComputeLiveInBlocks( 841 AllocaInst *AI, AllocaInfo &Info, 842 const SmallPtrSet<BasicBlock *, 32> &DefBlocks, 843 SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) { 844 845 // To determine liveness, we must iterate through the predecessors of blocks 846 // where the def is live. Blocks are added to the worklist if we need to 847 // check their predecessors. Start with all the using blocks. 848 SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 849 Info.UsingBlocks.end()); 850 851 // If any of the using blocks is also a definition block, check to see if the 852 // definition occurs before or after the use. If it happens before the use, 853 // the value isn't really live-in. 854 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 855 BasicBlock *BB = LiveInBlockWorklist[i]; 856 if (!DefBlocks.count(BB)) 857 continue; 858 859 // Okay, this is a block that both uses and defines the value. If the first 860 // reference to the alloca is a def (store), then we know it isn't live-in. 861 for (BasicBlock::iterator I = BB->begin();; ++I) { 862 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 863 if (SI->getOperand(1) != AI) 864 continue; 865 866 // We found a store to the alloca before a load. The alloca is not 867 // actually live-in here. 868 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 869 LiveInBlockWorklist.pop_back(); 870 --i, --e; 871 break; 872 } 873 874 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 875 if (LI->getOperand(0) != AI) 876 continue; 877 878 // Okay, we found a load before a store to the alloca. It is actually 879 // live into this block. 880 break; 881 } 882 } 883 } 884 885 // Now that we have a set of blocks where the phi is live-in, recursively add 886 // their predecessors until we find the full region the value is live. 887 while (!LiveInBlockWorklist.empty()) { 888 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 889 890 // The block really is live in here, insert it into the set. If already in 891 // the set, then it has already been processed. 892 if (!LiveInBlocks.insert(BB)) 893 continue; 894 895 // Since the value is live into BB, it is either defined in a predecessor or 896 // live into it to. Add the preds to the worklist unless they are a 897 // defining block. 898 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 899 BasicBlock *P = *PI; 900 901 // The value is not live into a predecessor if it defines the value. 902 if (DefBlocks.count(P)) 903 continue; 904 905 // Otherwise it is, add to the worklist. 906 LiveInBlockWorklist.push_back(P); 907 } 908 } 909 } 910 911 namespace { 912 typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; 913 914 struct DomTreeNodeCompare { 915 bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { 916 return LHS.second < RHS.second; 917 } 918 }; 919 } // end anonymous namespace 920 921 /// At this point, we're committed to promoting the alloca using IDF's, and the 922 /// standard SSA construction algorithm. Determine which blocks need phi nodes 923 /// and see if we can optimize out some work by avoiding insertion of dead phi 924 /// nodes. 925 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 926 AllocaInfo &Info) { 927 // Unique the set of defining blocks for efficient lookup. 928 SmallPtrSet<BasicBlock *, 32> DefBlocks; 929 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 930 931 // Determine which blocks the value is live in. These are blocks which lead 932 // to uses. 933 SmallPtrSet<BasicBlock *, 32> LiveInBlocks; 934 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 935 936 // Use a priority queue keyed on dominator tree level so that inserted nodes 937 // are handled from the bottom of the dominator tree upwards. 938 typedef std::priority_queue<DomTreeNodePair, 939 SmallVector<DomTreeNodePair, 32>, 940 DomTreeNodeCompare> IDFPriorityQueue; 941 IDFPriorityQueue PQ; 942 943 for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), 944 E = DefBlocks.end(); 945 I != E; ++I) { 946 if (DomTreeNode *Node = DT.getNode(*I)) 947 PQ.push(std::make_pair(Node, DomLevels[Node])); 948 } 949 950 SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; 951 SmallPtrSet<DomTreeNode *, 32> Visited; 952 SmallVector<DomTreeNode *, 32> Worklist; 953 while (!PQ.empty()) { 954 DomTreeNodePair RootPair = PQ.top(); 955 PQ.pop(); 956 DomTreeNode *Root = RootPair.first; 957 unsigned RootLevel = RootPair.second; 958 959 // Walk all dominator tree children of Root, inspecting their CFG edges with 960 // targets elsewhere on the dominator tree. Only targets whose level is at 961 // most Root's level are added to the iterated dominance frontier of the 962 // definition set. 963 964 Worklist.clear(); 965 Worklist.push_back(Root); 966 967 while (!Worklist.empty()) { 968 DomTreeNode *Node = Worklist.pop_back_val(); 969 BasicBlock *BB = Node->getBlock(); 970 971 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 972 ++SI) { 973 DomTreeNode *SuccNode = DT.getNode(*SI); 974 975 // Quickly skip all CFG edges that are also dominator tree edges instead 976 // of catching them below. 977 if (SuccNode->getIDom() == Node) 978 continue; 979 980 unsigned SuccLevel = DomLevels[SuccNode]; 981 if (SuccLevel > RootLevel) 982 continue; 983 984 if (!Visited.insert(SuccNode)) 985 continue; 986 987 BasicBlock *SuccBB = SuccNode->getBlock(); 988 if (!LiveInBlocks.count(SuccBB)) 989 continue; 990 991 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 992 if (!DefBlocks.count(SuccBB)) 993 PQ.push(std::make_pair(SuccNode, SuccLevel)); 994 } 995 996 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 997 ++CI) { 998 if (!Visited.count(*CI)) 999 Worklist.push_back(*CI); 1000 } 1001 } 1002 } 1003 1004 if (DFBlocks.size() > 1) 1005 std::sort(DFBlocks.begin(), DFBlocks.end()); 1006 1007 unsigned CurrentVersion = 0; 1008 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 1009 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 1010 } 1011 1012 /// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. 1013 /// 1014 /// Returns true if there wasn't already a phi-node for that variable 1015 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 1016 unsigned &Version) { 1017 // Look up the basic-block in question. 1018 PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; 1019 1020 // If the BB already has a phi node added for the i'th alloca then we're done! 1021 if (PN) 1022 return false; 1023 1024 // Create a PhiNode using the dereferenced type... and add the phi-node to the 1025 // BasicBlock. 1026 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), 1027 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 1028 BB->begin()); 1029 ++NumPHIInsert; 1030 PhiToAllocaMap[PN] = AllocaNo; 1031 1032 if (AST && PN->getType()->isPointerTy()) 1033 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 1034 1035 return true; 1036 } 1037 1038 /// \brief Recursively traverse the CFG of the function, renaming loads and 1039 /// stores to the allocas which we are promoting. 1040 /// 1041 /// IncomingVals indicates what value each Alloca contains on exit from the 1042 /// predecessor block Pred. 1043 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 1044 RenamePassData::ValVector &IncomingVals, 1045 std::vector<RenamePassData> &Worklist) { 1046 NextIteration: 1047 // If we are inserting any phi nodes into this BB, they will already be in the 1048 // block. 1049 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 1050 // If we have PHI nodes to update, compute the number of edges from Pred to 1051 // BB. 1052 if (PhiToAllocaMap.count(APN)) { 1053 // We want to be able to distinguish between PHI nodes being inserted by 1054 // this invocation of mem2reg from those phi nodes that already existed in 1055 // the IR before mem2reg was run. We determine that APN is being inserted 1056 // because it is missing incoming edges. All other PHI nodes being 1057 // inserted by this pass of mem2reg will have the same number of incoming 1058 // operands so far. Remember this count. 1059 unsigned NewPHINumOperands = APN->getNumOperands(); 1060 1061 unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); 1062 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 1063 1064 // Add entries for all the phis. 1065 BasicBlock::iterator PNI = BB->begin(); 1066 do { 1067 unsigned AllocaNo = PhiToAllocaMap[APN]; 1068 1069 // Add N incoming values to the PHI node. 1070 for (unsigned i = 0; i != NumEdges; ++i) 1071 APN->addIncoming(IncomingVals[AllocaNo], Pred); 1072 1073 // The currently active variable for this block is now the PHI. 1074 IncomingVals[AllocaNo] = APN; 1075 1076 // Get the next phi node. 1077 ++PNI; 1078 APN = dyn_cast<PHINode>(PNI); 1079 if (APN == 0) 1080 break; 1081 1082 // Verify that it is missing entries. If not, it is not being inserted 1083 // by this mem2reg invocation so we want to ignore it. 1084 } while (APN->getNumOperands() == NewPHINumOperands); 1085 } 1086 } 1087 1088 // Don't revisit blocks. 1089 if (!Visited.insert(BB)) 1090 return; 1091 1092 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { 1093 Instruction *I = II++; // get the instruction, increment iterator 1094 1095 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1096 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1097 if (!Src) 1098 continue; 1099 1100 DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); 1101 if (AI == AllocaLookup.end()) 1102 continue; 1103 1104 Value *V = IncomingVals[AI->second]; 1105 1106 // Anything using the load now uses the current value. 1107 LI->replaceAllUsesWith(V); 1108 if (AST && LI->getType()->isPointerTy()) 1109 AST->deleteValue(LI); 1110 BB->getInstList().erase(LI); 1111 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1112 // Delete this instruction and mark the name as the current holder of the 1113 // value 1114 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1115 if (!Dest) 1116 continue; 1117 1118 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1119 if (ai == AllocaLookup.end()) 1120 continue; 1121 1122 // what value were we writing? 1123 IncomingVals[ai->second] = SI->getOperand(0); 1124 // Record debuginfo for the store before removing it. 1125 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) 1126 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 1127 BB->getInstList().erase(SI); 1128 } 1129 } 1130 1131 // 'Recurse' to our successors. 1132 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1133 if (I == E) 1134 return; 1135 1136 // Keep track of the successors so we don't visit the same successor twice 1137 SmallPtrSet<BasicBlock *, 8> VisitedSuccs; 1138 1139 // Handle the first successor without using the worklist. 1140 VisitedSuccs.insert(*I); 1141 Pred = BB; 1142 BB = *I; 1143 ++I; 1144 1145 for (; I != E; ++I) 1146 if (VisitedSuccs.insert(*I)) 1147 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1148 1149 goto NextIteration; 1150 } 1151 1152 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 1153 // We cast away constness because we re-use the non-const analysis that the 1154 // actual promotion routine uses. While it is non-const, it doesn't actually 1155 // mutate anything at this phase, and we discard the non-const results that 1156 // promotion uses to mutate the alloca. 1157 return AllocaInfo().analyzeAlloca(*const_cast<AllocaInst *>(AI)); 1158 } 1159 1160 void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, 1161 DominatorTree &DT, AliasSetTracker *AST) { 1162 // If there is nothing to do, bail out... 1163 if (Allocas.empty()) 1164 return; 1165 1166 PromoteMem2Reg(Allocas, DT, AST).run(); 1167 } 1168