1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using iterated dominator frontiers to place PHI nodes, then 13 // traversing the function in depth-first order to rewrite loads and stores as 14 // appropriate. 15 // 16 // The algorithm used here is based on: 17 // 18 // Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19 // In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20 // Programming Languages 21 // POPL '95. ACM, New York, NY, 62-73. 22 // 23 // It has been modified to not explicitly use the DJ graph data structure and to 24 // directly compute pruned SSA using per-variable liveness information. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #define DEBUG_TYPE "mem2reg" 29 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 30 #include "llvm/Constants.h" 31 #include "llvm/DerivedTypes.h" 32 #include "llvm/Function.h" 33 #include "llvm/Instructions.h" 34 #include "llvm/IntrinsicInst.h" 35 #include "llvm/Metadata.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/DebugInfo.h" 38 #include "llvm/Analysis/DIBuilder.h" 39 #include "llvm/Analysis/Dominators.h" 40 #include "llvm/Analysis/InstructionSimplify.h" 41 #include "llvm/Transforms/Utils/Local.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/SmallPtrSet.h" 44 #include "llvm/ADT/SmallVector.h" 45 #include "llvm/ADT/Statistic.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include "llvm/Support/CFG.h" 48 #include <algorithm> 49 #include <map> 50 #include <queue> 51 using namespace llvm; 52 53 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 54 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 55 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 56 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 57 58 namespace llvm { 59 template<> 60 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 61 typedef std::pair<BasicBlock*, unsigned> EltTy; 62 static inline EltTy getEmptyKey() { 63 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 64 } 65 static inline EltTy getTombstoneKey() { 66 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 67 } 68 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 69 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 70 } 71 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 72 return LHS == RHS; 73 } 74 }; 75 } 76 77 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 78 /// This is true if there are only loads and stores to the alloca. 79 /// 80 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 81 // FIXME: If the memory unit is of pointer or integer type, we can permit 82 // assignments to subsections of the memory unit. 83 84 // Only allow direct and non-volatile loads and stores... 85 for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 86 UI != UE; ++UI) { // Loop over all of the uses of the alloca 87 const User *U = *UI; 88 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 89 if (LI->isVolatile()) 90 return false; 91 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 92 if (SI->getOperand(0) == AI) 93 return false; // Don't allow a store OF the AI, only INTO the AI. 94 if (SI->isVolatile()) 95 return false; 96 } else { 97 return false; 98 } 99 } 100 101 return true; 102 } 103 104 /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the 105 /// alloca 'V', if any. 106 static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { 107 if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) 108 for (Value::use_iterator UI = DebugNode->use_begin(), 109 E = DebugNode->use_end(); UI != E; ++UI) 110 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 111 return DDI; 112 113 return 0; 114 } 115 116 namespace { 117 struct AllocaInfo; 118 119 // Data package used by RenamePass() 120 class RenamePassData { 121 public: 122 typedef std::vector<Value *> ValVector; 123 124 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 125 RenamePassData(BasicBlock *B, BasicBlock *P, 126 const ValVector &V) : BB(B), Pred(P), Values(V) {} 127 BasicBlock *BB; 128 BasicBlock *Pred; 129 ValVector Values; 130 131 void swap(RenamePassData &RHS) { 132 std::swap(BB, RHS.BB); 133 std::swap(Pred, RHS.Pred); 134 Values.swap(RHS.Values); 135 } 136 }; 137 138 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 139 /// load/store instructions in the block that directly load or store an alloca. 140 /// 141 /// This functionality is important because it avoids scanning large basic 142 /// blocks multiple times when promoting many allocas in the same block. 143 class LargeBlockInfo { 144 /// InstNumbers - For each instruction that we track, keep the index of the 145 /// instruction. The index starts out as the number of the instruction from 146 /// the start of the block. 147 DenseMap<const Instruction *, unsigned> InstNumbers; 148 public: 149 150 /// isInterestingInstruction - This code only looks at accesses to allocas. 151 static bool isInterestingInstruction(const Instruction *I) { 152 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 153 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 154 } 155 156 /// getInstructionIndex - Get or calculate the index of the specified 157 /// instruction. 158 unsigned getInstructionIndex(const Instruction *I) { 159 assert(isInterestingInstruction(I) && 160 "Not a load/store to/from an alloca?"); 161 162 // If we already have this instruction number, return it. 163 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 164 if (It != InstNumbers.end()) return It->second; 165 166 // Scan the whole block to get the instruction. This accumulates 167 // information for every interesting instruction in the block, in order to 168 // avoid gratuitus rescans. 169 const BasicBlock *BB = I->getParent(); 170 unsigned InstNo = 0; 171 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 172 BBI != E; ++BBI) 173 if (isInterestingInstruction(BBI)) 174 InstNumbers[BBI] = InstNo++; 175 It = InstNumbers.find(I); 176 177 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 178 return It->second; 179 } 180 181 void deleteValue(const Instruction *I) { 182 InstNumbers.erase(I); 183 } 184 185 void clear() { 186 InstNumbers.clear(); 187 } 188 }; 189 190 struct PromoteMem2Reg { 191 /// Allocas - The alloca instructions being promoted. 192 /// 193 std::vector<AllocaInst*> Allocas; 194 DominatorTree &DT; 195 DIBuilder *DIB; 196 197 /// AST - An AliasSetTracker object to update. If null, don't update it. 198 /// 199 AliasSetTracker *AST; 200 201 /// AllocaLookup - Reverse mapping of Allocas. 202 /// 203 DenseMap<AllocaInst*, unsigned> AllocaLookup; 204 205 /// NewPhiNodes - The PhiNodes we're adding. 206 /// 207 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 208 209 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 210 /// it corresponds to. 211 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 212 213 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 214 /// each alloca that is of pointer type, we keep track of what to copyValue 215 /// to the inserted PHI nodes here. 216 /// 217 std::vector<Value*> PointerAllocaValues; 218 219 /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 220 /// intrinsic that describes it, if any, so that we can convert it to a 221 /// dbg.value intrinsic if the alloca gets promoted. 222 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 223 224 /// Visited - The set of basic blocks the renamer has already visited. 225 /// 226 SmallPtrSet<BasicBlock*, 16> Visited; 227 228 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 229 /// non-determinstic behavior. 230 DenseMap<BasicBlock*, unsigned> BBNumbers; 231 232 /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. 233 DenseMap<DomTreeNode*, unsigned> DomLevels; 234 235 /// BBNumPreds - Lazily compute the number of predecessors a block has. 236 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 237 public: 238 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 239 AliasSetTracker *ast) 240 : Allocas(A), DT(dt), DIB(0), AST(ast) {} 241 ~PromoteMem2Reg() { 242 delete DIB; 243 } 244 245 void run(); 246 247 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 248 /// 249 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 250 return DT.dominates(BB1, BB2); 251 } 252 253 private: 254 void RemoveFromAllocasList(unsigned &AllocaIdx) { 255 Allocas[AllocaIdx] = Allocas.back(); 256 Allocas.pop_back(); 257 --AllocaIdx; 258 } 259 260 unsigned getNumPreds(const BasicBlock *BB) { 261 unsigned &NP = BBNumPreds[BB]; 262 if (NP == 0) 263 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 264 return NP-1; 265 } 266 267 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 268 AllocaInfo &Info); 269 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 270 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 271 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 272 273 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 274 LargeBlockInfo &LBI); 275 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 276 LargeBlockInfo &LBI); 277 278 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 279 RenamePassData::ValVector &IncVals, 280 std::vector<RenamePassData> &Worklist); 281 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 282 }; 283 284 struct AllocaInfo { 285 SmallVector<BasicBlock*, 32> DefiningBlocks; 286 SmallVector<BasicBlock*, 32> UsingBlocks; 287 288 StoreInst *OnlyStore; 289 BasicBlock *OnlyBlock; 290 bool OnlyUsedInOneBlock; 291 292 Value *AllocaPointerVal; 293 DbgDeclareInst *DbgDeclare; 294 295 void clear() { 296 DefiningBlocks.clear(); 297 UsingBlocks.clear(); 298 OnlyStore = 0; 299 OnlyBlock = 0; 300 OnlyUsedInOneBlock = true; 301 AllocaPointerVal = 0; 302 DbgDeclare = 0; 303 } 304 305 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 306 /// ivars. 307 void AnalyzeAlloca(AllocaInst *AI) { 308 clear(); 309 310 // As we scan the uses of the alloca instruction, keep track of stores, 311 // and decide whether all of the loads and stores to the alloca are within 312 // the same basic block. 313 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 314 UI != E;) { 315 Instruction *User = cast<Instruction>(*UI++); 316 317 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 318 // Remember the basic blocks which define new values for the alloca 319 DefiningBlocks.push_back(SI->getParent()); 320 AllocaPointerVal = SI->getOperand(0); 321 OnlyStore = SI; 322 } else { 323 LoadInst *LI = cast<LoadInst>(User); 324 // Otherwise it must be a load instruction, keep track of variable 325 // reads. 326 UsingBlocks.push_back(LI->getParent()); 327 AllocaPointerVal = LI; 328 } 329 330 if (OnlyUsedInOneBlock) { 331 if (OnlyBlock == 0) 332 OnlyBlock = User->getParent(); 333 else if (OnlyBlock != User->getParent()) 334 OnlyUsedInOneBlock = false; 335 } 336 } 337 338 DbgDeclare = FindAllocaDbgDeclare(AI); 339 } 340 }; 341 342 typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; 343 344 struct DomTreeNodeCompare { 345 bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { 346 return LHS.second < RHS.second; 347 } 348 }; 349 } // end of anonymous namespace 350 351 352 void PromoteMem2Reg::run() { 353 Function &F = *DT.getRoot()->getParent(); 354 355 if (AST) PointerAllocaValues.resize(Allocas.size()); 356 AllocaDbgDeclares.resize(Allocas.size()); 357 358 AllocaInfo Info; 359 LargeBlockInfo LBI; 360 361 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 362 AllocaInst *AI = Allocas[AllocaNum]; 363 364 assert(isAllocaPromotable(AI) && 365 "Cannot promote non-promotable alloca!"); 366 assert(AI->getParent()->getParent() == &F && 367 "All allocas should be in the same function, which is same as DF!"); 368 369 if (AI->use_empty()) { 370 // If there are no uses of the alloca, just delete it now. 371 if (AST) AST->deleteValue(AI); 372 AI->eraseFromParent(); 373 374 // Remove the alloca from the Allocas list, since it has been processed 375 RemoveFromAllocasList(AllocaNum); 376 ++NumDeadAlloca; 377 continue; 378 } 379 380 // Calculate the set of read and write-locations for each alloca. This is 381 // analogous to finding the 'uses' and 'definitions' of each variable. 382 Info.AnalyzeAlloca(AI); 383 384 // If there is only a single store to this value, replace any loads of 385 // it that are directly dominated by the definition with the value stored. 386 if (Info.DefiningBlocks.size() == 1) { 387 RewriteSingleStoreAlloca(AI, Info, LBI); 388 389 // Finally, after the scan, check to see if the store is all that is left. 390 if (Info.UsingBlocks.empty()) { 391 // Record debuginfo for the store and remove the declaration's debuginfo. 392 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 393 if (!DIB) 394 DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); 395 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); 396 DDI->eraseFromParent(); 397 } 398 // Remove the (now dead) store and alloca. 399 Info.OnlyStore->eraseFromParent(); 400 LBI.deleteValue(Info.OnlyStore); 401 402 if (AST) AST->deleteValue(AI); 403 AI->eraseFromParent(); 404 LBI.deleteValue(AI); 405 406 // The alloca has been processed, move on. 407 RemoveFromAllocasList(AllocaNum); 408 409 ++NumSingleStore; 410 continue; 411 } 412 } 413 414 // If the alloca is only read and written in one basic block, just perform a 415 // linear sweep over the block to eliminate it. 416 if (Info.OnlyUsedInOneBlock) { 417 PromoteSingleBlockAlloca(AI, Info, LBI); 418 419 // Finally, after the scan, check to see if the stores are all that is 420 // left. 421 if (Info.UsingBlocks.empty()) { 422 423 // Remove the (now dead) stores and alloca. 424 while (!AI->use_empty()) { 425 StoreInst *SI = cast<StoreInst>(AI->use_back()); 426 // Record debuginfo for the store before removing it. 427 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 428 if (!DIB) 429 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 430 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 431 } 432 SI->eraseFromParent(); 433 LBI.deleteValue(SI); 434 } 435 436 if (AST) AST->deleteValue(AI); 437 AI->eraseFromParent(); 438 LBI.deleteValue(AI); 439 440 // The alloca has been processed, move on. 441 RemoveFromAllocasList(AllocaNum); 442 443 // The alloca's debuginfo can be removed as well. 444 if (DbgDeclareInst *DDI = Info.DbgDeclare) 445 DDI->eraseFromParent(); 446 447 ++NumLocalPromoted; 448 continue; 449 } 450 } 451 452 // If we haven't computed dominator tree levels, do so now. 453 if (DomLevels.empty()) { 454 SmallVector<DomTreeNode*, 32> Worklist; 455 456 DomTreeNode *Root = DT.getRootNode(); 457 DomLevels[Root] = 0; 458 Worklist.push_back(Root); 459 460 while (!Worklist.empty()) { 461 DomTreeNode *Node = Worklist.pop_back_val(); 462 unsigned ChildLevel = DomLevels[Node] + 1; 463 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 464 CI != CE; ++CI) { 465 DomLevels[*CI] = ChildLevel; 466 Worklist.push_back(*CI); 467 } 468 } 469 } 470 471 // If we haven't computed a numbering for the BB's in the function, do so 472 // now. 473 if (BBNumbers.empty()) { 474 unsigned ID = 0; 475 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 476 BBNumbers[I] = ID++; 477 } 478 479 // If we have an AST to keep updated, remember some pointer value that is 480 // stored into the alloca. 481 if (AST) 482 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 483 484 // Remember the dbg.declare intrinsic describing this alloca, if any. 485 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 486 487 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 488 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 489 490 // At this point, we're committed to promoting the alloca using IDF's, and 491 // the standard SSA construction algorithm. Determine which blocks need PHI 492 // nodes and see if we can optimize out some work by avoiding insertion of 493 // dead phi nodes. 494 DetermineInsertionPoint(AI, AllocaNum, Info); 495 } 496 497 if (Allocas.empty()) 498 return; // All of the allocas must have been trivial! 499 500 LBI.clear(); 501 502 503 // Set the incoming values for the basic block to be null values for all of 504 // the alloca's. We do this in case there is a load of a value that has not 505 // been stored yet. In this case, it will get this null value. 506 // 507 RenamePassData::ValVector Values(Allocas.size()); 508 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 509 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 510 511 // Walks all basic blocks in the function performing the SSA rename algorithm 512 // and inserting the phi nodes we marked as necessary 513 // 514 std::vector<RenamePassData> RenamePassWorkList; 515 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 516 do { 517 RenamePassData RPD; 518 RPD.swap(RenamePassWorkList.back()); 519 RenamePassWorkList.pop_back(); 520 // RenamePass may add new worklist entries. 521 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 522 } while (!RenamePassWorkList.empty()); 523 524 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 525 Visited.clear(); 526 527 // Remove the allocas themselves from the function. 528 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 529 Instruction *A = Allocas[i]; 530 531 // If there are any uses of the alloca instructions left, they must be in 532 // unreachable basic blocks that were not processed by walking the dominator 533 // tree. Just delete the users now. 534 if (!A->use_empty()) 535 A->replaceAllUsesWith(UndefValue::get(A->getType())); 536 if (AST) AST->deleteValue(A); 537 A->eraseFromParent(); 538 } 539 540 // Remove alloca's dbg.declare instrinsics from the function. 541 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 542 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 543 DDI->eraseFromParent(); 544 545 // Loop over all of the PHI nodes and see if there are any that we can get 546 // rid of because they merge all of the same incoming values. This can 547 // happen due to undef values coming into the PHI nodes. This process is 548 // iterative, because eliminating one PHI node can cause others to be removed. 549 bool EliminatedAPHI = true; 550 while (EliminatedAPHI) { 551 EliminatedAPHI = false; 552 553 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 554 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 555 PHINode *PN = I->second; 556 557 // If this PHI node merges one value and/or undefs, get the value. 558 if (Value *V = SimplifyInstruction(PN, 0, &DT)) { 559 if (AST && PN->getType()->isPointerTy()) 560 AST->deleteValue(PN); 561 PN->replaceAllUsesWith(V); 562 PN->eraseFromParent(); 563 NewPhiNodes.erase(I++); 564 EliminatedAPHI = true; 565 continue; 566 } 567 ++I; 568 } 569 } 570 571 // At this point, the renamer has added entries to PHI nodes for all reachable 572 // code. Unfortunately, there may be unreachable blocks which the renamer 573 // hasn't traversed. If this is the case, the PHI nodes may not 574 // have incoming values for all predecessors. Loop over all PHI nodes we have 575 // created, inserting undef values if they are missing any incoming values. 576 // 577 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 578 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 579 // We want to do this once per basic block. As such, only process a block 580 // when we find the PHI that is the first entry in the block. 581 PHINode *SomePHI = I->second; 582 BasicBlock *BB = SomePHI->getParent(); 583 if (&BB->front() != SomePHI) 584 continue; 585 586 // Only do work here if there the PHI nodes are missing incoming values. We 587 // know that all PHI nodes that were inserted in a block will have the same 588 // number of incoming values, so we can just check any of them. 589 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 590 continue; 591 592 // Get the preds for BB. 593 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 594 595 // Ok, now we know that all of the PHI nodes are missing entries for some 596 // basic blocks. Start by sorting the incoming predecessors for efficient 597 // access. 598 std::sort(Preds.begin(), Preds.end()); 599 600 // Now we loop through all BB's which have entries in SomePHI and remove 601 // them from the Preds list. 602 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 603 // Do a log(n) search of the Preds list for the entry we want. 604 SmallVector<BasicBlock*, 16>::iterator EntIt = 605 std::lower_bound(Preds.begin(), Preds.end(), 606 SomePHI->getIncomingBlock(i)); 607 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 608 "PHI node has entry for a block which is not a predecessor!"); 609 610 // Remove the entry 611 Preds.erase(EntIt); 612 } 613 614 // At this point, the blocks left in the preds list must have dummy 615 // entries inserted into every PHI nodes for the block. Update all the phi 616 // nodes in this block that we are inserting (there could be phis before 617 // mem2reg runs). 618 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 619 BasicBlock::iterator BBI = BB->begin(); 620 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 621 SomePHI->getNumIncomingValues() == NumBadPreds) { 622 Value *UndefVal = UndefValue::get(SomePHI->getType()); 623 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 624 SomePHI->addIncoming(UndefVal, Preds[pred]); 625 } 626 } 627 628 NewPhiNodes.clear(); 629 } 630 631 632 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 633 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 634 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 635 /// would be dead). 636 void PromoteMem2Reg:: 637 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 638 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 639 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 640 641 // To determine liveness, we must iterate through the predecessors of blocks 642 // where the def is live. Blocks are added to the worklist if we need to 643 // check their predecessors. Start with all the using blocks. 644 SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 645 Info.UsingBlocks.end()); 646 647 // If any of the using blocks is also a definition block, check to see if the 648 // definition occurs before or after the use. If it happens before the use, 649 // the value isn't really live-in. 650 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 651 BasicBlock *BB = LiveInBlockWorklist[i]; 652 if (!DefBlocks.count(BB)) continue; 653 654 // Okay, this is a block that both uses and defines the value. If the first 655 // reference to the alloca is a def (store), then we know it isn't live-in. 656 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 657 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 658 if (SI->getOperand(1) != AI) continue; 659 660 // We found a store to the alloca before a load. The alloca is not 661 // actually live-in here. 662 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 663 LiveInBlockWorklist.pop_back(); 664 --i, --e; 665 break; 666 } 667 668 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 669 if (LI->getOperand(0) != AI) continue; 670 671 // Okay, we found a load before a store to the alloca. It is actually 672 // live into this block. 673 break; 674 } 675 } 676 } 677 678 // Now that we have a set of blocks where the phi is live-in, recursively add 679 // their predecessors until we find the full region the value is live. 680 while (!LiveInBlockWorklist.empty()) { 681 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 682 683 // The block really is live in here, insert it into the set. If already in 684 // the set, then it has already been processed. 685 if (!LiveInBlocks.insert(BB)) 686 continue; 687 688 // Since the value is live into BB, it is either defined in a predecessor or 689 // live into it to. Add the preds to the worklist unless they are a 690 // defining block. 691 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 692 BasicBlock *P = *PI; 693 694 // The value is not live into a predecessor if it defines the value. 695 if (DefBlocks.count(P)) 696 continue; 697 698 // Otherwise it is, add to the worklist. 699 LiveInBlockWorklist.push_back(P); 700 } 701 } 702 } 703 704 /// DetermineInsertionPoint - At this point, we're committed to promoting the 705 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 706 /// which blocks need phi nodes and see if we can optimize out some work by 707 /// avoiding insertion of dead phi nodes. 708 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 709 AllocaInfo &Info) { 710 // Unique the set of defining blocks for efficient lookup. 711 SmallPtrSet<BasicBlock*, 32> DefBlocks; 712 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 713 714 // Determine which blocks the value is live in. These are blocks which lead 715 // to uses. 716 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 717 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 718 719 // Use a priority queue keyed on dominator tree level so that inserted nodes 720 // are handled from the bottom of the dominator tree upwards. 721 typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, 722 DomTreeNodeCompare> IDFPriorityQueue; 723 IDFPriorityQueue PQ; 724 725 for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), 726 E = DefBlocks.end(); I != E; ++I) { 727 if (DomTreeNode *Node = DT.getNode(*I)) 728 PQ.push(std::make_pair(Node, DomLevels[Node])); 729 } 730 731 SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; 732 SmallPtrSet<DomTreeNode*, 32> Visited; 733 SmallVector<DomTreeNode*, 32> Worklist; 734 while (!PQ.empty()) { 735 DomTreeNodePair RootPair = PQ.top(); 736 PQ.pop(); 737 DomTreeNode *Root = RootPair.first; 738 unsigned RootLevel = RootPair.second; 739 740 // Walk all dominator tree children of Root, inspecting their CFG edges with 741 // targets elsewhere on the dominator tree. Only targets whose level is at 742 // most Root's level are added to the iterated dominance frontier of the 743 // definition set. 744 745 Worklist.clear(); 746 Worklist.push_back(Root); 747 748 while (!Worklist.empty()) { 749 DomTreeNode *Node = Worklist.pop_back_val(); 750 BasicBlock *BB = Node->getBlock(); 751 752 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 753 ++SI) { 754 DomTreeNode *SuccNode = DT.getNode(*SI); 755 756 // Quickly skip all CFG edges that are also dominator tree edges instead 757 // of catching them below. 758 if (SuccNode->getIDom() == Node) 759 continue; 760 761 unsigned SuccLevel = DomLevels[SuccNode]; 762 if (SuccLevel > RootLevel) 763 continue; 764 765 if (!Visited.insert(SuccNode)) 766 continue; 767 768 BasicBlock *SuccBB = SuccNode->getBlock(); 769 if (!LiveInBlocks.count(SuccBB)) 770 continue; 771 772 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 773 if (!DefBlocks.count(SuccBB)) 774 PQ.push(std::make_pair(SuccNode, SuccLevel)); 775 } 776 777 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 778 ++CI) { 779 if (!Visited.count(*CI)) 780 Worklist.push_back(*CI); 781 } 782 } 783 } 784 785 if (DFBlocks.size() > 1) 786 std::sort(DFBlocks.begin(), DFBlocks.end()); 787 788 unsigned CurrentVersion = 0; 789 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 790 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 791 } 792 793 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 794 /// replace any loads of it that are directly dominated by the definition with 795 /// the value stored. 796 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 797 AllocaInfo &Info, 798 LargeBlockInfo &LBI) { 799 StoreInst *OnlyStore = Info.OnlyStore; 800 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 801 BasicBlock *StoreBB = OnlyStore->getParent(); 802 int StoreIndex = -1; 803 804 // Clear out UsingBlocks. We will reconstruct it here if needed. 805 Info.UsingBlocks.clear(); 806 807 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 808 Instruction *UserInst = cast<Instruction>(*UI++); 809 if (!isa<LoadInst>(UserInst)) { 810 assert(UserInst == OnlyStore && "Should only have load/stores"); 811 continue; 812 } 813 LoadInst *LI = cast<LoadInst>(UserInst); 814 815 // Okay, if we have a load from the alloca, we want to replace it with the 816 // only value stored to the alloca. We can do this if the value is 817 // dominated by the store. If not, we use the rest of the mem2reg machinery 818 // to insert the phi nodes as needed. 819 if (!StoringGlobalVal) { // Non-instructions are always dominated. 820 if (LI->getParent() == StoreBB) { 821 // If we have a use that is in the same block as the store, compare the 822 // indices of the two instructions to see which one came first. If the 823 // load came before the store, we can't handle it. 824 if (StoreIndex == -1) 825 StoreIndex = LBI.getInstructionIndex(OnlyStore); 826 827 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 828 // Can't handle this load, bail out. 829 Info.UsingBlocks.push_back(StoreBB); 830 continue; 831 } 832 833 } else if (LI->getParent() != StoreBB && 834 !dominates(StoreBB, LI->getParent())) { 835 // If the load and store are in different blocks, use BB dominance to 836 // check their relationships. If the store doesn't dom the use, bail 837 // out. 838 Info.UsingBlocks.push_back(LI->getParent()); 839 continue; 840 } 841 } 842 843 // Otherwise, we *can* safely rewrite this load. 844 Value *ReplVal = OnlyStore->getOperand(0); 845 // If the replacement value is the load, this must occur in unreachable 846 // code. 847 if (ReplVal == LI) 848 ReplVal = UndefValue::get(LI->getType()); 849 LI->replaceAllUsesWith(ReplVal); 850 if (AST && LI->getType()->isPointerTy()) 851 AST->deleteValue(LI); 852 LI->eraseFromParent(); 853 LBI.deleteValue(LI); 854 } 855 } 856 857 namespace { 858 859 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 860 /// first element of a pair. 861 struct StoreIndexSearchPredicate { 862 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 863 const std::pair<unsigned, StoreInst*> &RHS) { 864 return LHS.first < RHS.first; 865 } 866 }; 867 868 } 869 870 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 871 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 872 /// potentially useless PHI nodes by just performing a single linear pass over 873 /// the basic block using the Alloca. 874 /// 875 /// If we cannot promote this alloca (because it is read before it is written), 876 /// return true. This is necessary in cases where, due to control flow, the 877 /// alloca is potentially undefined on some control flow paths. e.g. code like 878 /// this is potentially correct: 879 /// 880 /// for (...) { if (c) { A = undef; undef = B; } } 881 /// 882 /// ... so long as A is not used before undef is set. 883 /// 884 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 885 LargeBlockInfo &LBI) { 886 // The trickiest case to handle is when we have large blocks. Because of this, 887 // this code is optimized assuming that large blocks happen. This does not 888 // significantly pessimize the small block case. This uses LargeBlockInfo to 889 // make it efficient to get the index of various operations in the block. 890 891 // Clear out UsingBlocks. We will reconstruct it here if needed. 892 Info.UsingBlocks.clear(); 893 894 // Walk the use-def list of the alloca, getting the locations of all stores. 895 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 896 StoresByIndexTy StoresByIndex; 897 898 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 899 UI != E; ++UI) 900 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 901 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 902 903 // If there are no stores to the alloca, just replace any loads with undef. 904 if (StoresByIndex.empty()) { 905 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 906 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 907 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 908 if (AST && LI->getType()->isPointerTy()) 909 AST->deleteValue(LI); 910 LBI.deleteValue(LI); 911 LI->eraseFromParent(); 912 } 913 return; 914 } 915 916 // Sort the stores by their index, making it efficient to do a lookup with a 917 // binary search. 918 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 919 920 // Walk all of the loads from this alloca, replacing them with the nearest 921 // store above them, if any. 922 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 923 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 924 if (!LI) continue; 925 926 unsigned LoadIdx = LBI.getInstructionIndex(LI); 927 928 // Find the nearest store that has a lower than this load. 929 StoresByIndexTy::iterator I = 930 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 931 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 932 StoreIndexSearchPredicate()); 933 934 // If there is no store before this load, then we can't promote this load. 935 if (I == StoresByIndex.begin()) { 936 // Can't handle this load, bail out. 937 Info.UsingBlocks.push_back(LI->getParent()); 938 continue; 939 } 940 941 // Otherwise, there was a store before this load, the load takes its value. 942 --I; 943 LI->replaceAllUsesWith(I->second->getOperand(0)); 944 if (AST && LI->getType()->isPointerTy()) 945 AST->deleteValue(LI); 946 LI->eraseFromParent(); 947 LBI.deleteValue(LI); 948 } 949 } 950 951 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 952 // Alloca returns true if there wasn't already a phi-node for that variable 953 // 954 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 955 unsigned &Version) { 956 // Look up the basic-block in question. 957 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 958 959 // If the BB already has a phi node added for the i'th alloca then we're done! 960 if (PN) return false; 961 962 // Create a PhiNode using the dereferenced type... and add the phi-node to the 963 // BasicBlock. 964 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), 965 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 966 BB->begin()); 967 ++NumPHIInsert; 968 PhiToAllocaMap[PN] = AllocaNo; 969 970 if (AST && PN->getType()->isPointerTy()) 971 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 972 973 return true; 974 } 975 976 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 977 // stores to the allocas which we are promoting. IncomingVals indicates what 978 // value each Alloca contains on exit from the predecessor block Pred. 979 // 980 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 981 RenamePassData::ValVector &IncomingVals, 982 std::vector<RenamePassData> &Worklist) { 983 NextIteration: 984 // If we are inserting any phi nodes into this BB, they will already be in the 985 // block. 986 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 987 // If we have PHI nodes to update, compute the number of edges from Pred to 988 // BB. 989 if (PhiToAllocaMap.count(APN)) { 990 // We want to be able to distinguish between PHI nodes being inserted by 991 // this invocation of mem2reg from those phi nodes that already existed in 992 // the IR before mem2reg was run. We determine that APN is being inserted 993 // because it is missing incoming edges. All other PHI nodes being 994 // inserted by this pass of mem2reg will have the same number of incoming 995 // operands so far. Remember this count. 996 unsigned NewPHINumOperands = APN->getNumOperands(); 997 998 unsigned NumEdges = 0; 999 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 1000 if (*I == BB) 1001 ++NumEdges; 1002 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 1003 1004 // Add entries for all the phis. 1005 BasicBlock::iterator PNI = BB->begin(); 1006 do { 1007 unsigned AllocaNo = PhiToAllocaMap[APN]; 1008 1009 // Add N incoming values to the PHI node. 1010 for (unsigned i = 0; i != NumEdges; ++i) 1011 APN->addIncoming(IncomingVals[AllocaNo], Pred); 1012 1013 // The currently active variable for this block is now the PHI. 1014 IncomingVals[AllocaNo] = APN; 1015 1016 // Get the next phi node. 1017 ++PNI; 1018 APN = dyn_cast<PHINode>(PNI); 1019 if (APN == 0) break; 1020 1021 // Verify that it is missing entries. If not, it is not being inserted 1022 // by this mem2reg invocation so we want to ignore it. 1023 } while (APN->getNumOperands() == NewPHINumOperands); 1024 } 1025 } 1026 1027 // Don't revisit blocks. 1028 if (!Visited.insert(BB)) return; 1029 1030 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 1031 Instruction *I = II++; // get the instruction, increment iterator 1032 1033 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1034 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1035 if (!Src) continue; 1036 1037 DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 1038 if (AI == AllocaLookup.end()) continue; 1039 1040 Value *V = IncomingVals[AI->second]; 1041 1042 // Anything using the load now uses the current value. 1043 LI->replaceAllUsesWith(V); 1044 if (AST && LI->getType()->isPointerTy()) 1045 AST->deleteValue(LI); 1046 BB->getInstList().erase(LI); 1047 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1048 // Delete this instruction and mark the name as the current holder of the 1049 // value 1050 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1051 if (!Dest) continue; 1052 1053 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1054 if (ai == AllocaLookup.end()) 1055 continue; 1056 1057 // what value were we writing? 1058 IncomingVals[ai->second] = SI->getOperand(0); 1059 // Record debuginfo for the store before removing it. 1060 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { 1061 if (!DIB) 1062 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 1063 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 1064 } 1065 BB->getInstList().erase(SI); 1066 } 1067 } 1068 1069 // 'Recurse' to our successors. 1070 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1071 if (I == E) return; 1072 1073 // Keep track of the successors so we don't visit the same successor twice 1074 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1075 1076 // Handle the first successor without using the worklist. 1077 VisitedSuccs.insert(*I); 1078 Pred = BB; 1079 BB = *I; 1080 ++I; 1081 1082 for (; I != E; ++I) 1083 if (VisitedSuccs.insert(*I)) 1084 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1085 1086 goto NextIteration; 1087 } 1088 1089 /// PromoteMemToReg - Promote the specified list of alloca instructions into 1090 /// scalar registers, inserting PHI nodes as appropriate. This function does 1091 /// not modify the CFG of the function at all. All allocas must be from the 1092 /// same function. 1093 /// 1094 /// If AST is specified, the specified tracker is updated to reflect changes 1095 /// made to the IR. 1096 /// 1097 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1098 DominatorTree &DT, AliasSetTracker *AST) { 1099 // If there is nothing to do, bail out... 1100 if (Allocas.empty()) return; 1101 1102 PromoteMem2Reg(Allocas, DT, AST).run(); 1103 } 1104