1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using iterated dominator frontiers to place PHI nodes, then 13 // traversing the function in depth-first order to rewrite loads and stores as 14 // appropriate. 15 // 16 // The algorithm used here is based on: 17 // 18 // Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19 // In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20 // Programming Languages 21 // POPL '95. ACM, New York, NY, 62-73. 22 // 23 // It has been modified to not explicitly use the DJ graph data structure and to 24 // directly compute pruned SSA using per-variable liveness information. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #define DEBUG_TYPE "mem2reg" 29 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 30 #include "llvm/Constants.h" 31 #include "llvm/DerivedTypes.h" 32 #include "llvm/Function.h" 33 #include "llvm/Instructions.h" 34 #include "llvm/IntrinsicInst.h" 35 #include "llvm/Metadata.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/DebugInfo.h" 38 #include "llvm/Analysis/DIBuilder.h" 39 #include "llvm/Analysis/Dominators.h" 40 #include "llvm/Analysis/InstructionSimplify.h" 41 #include "llvm/Transforms/Utils/Local.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/SmallPtrSet.h" 44 #include "llvm/ADT/SmallVector.h" 45 #include "llvm/ADT/Statistic.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include "llvm/Support/CFG.h" 48 #include <algorithm> 49 #include <queue> 50 using namespace llvm; 51 52 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 53 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 54 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 55 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 56 57 namespace llvm { 58 template<> 59 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 60 typedef std::pair<BasicBlock*, unsigned> EltTy; 61 static inline EltTy getEmptyKey() { 62 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 63 } 64 static inline EltTy getTombstoneKey() { 65 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 66 } 67 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 68 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 69 } 70 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 71 return LHS == RHS; 72 } 73 }; 74 } 75 76 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 77 /// This is true if there are only loads and stores to the alloca. 78 /// 79 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 80 // FIXME: If the memory unit is of pointer or integer type, we can permit 81 // assignments to subsections of the memory unit. 82 83 // Only allow direct and non-volatile loads and stores... 84 for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 85 UI != UE; ++UI) { // Loop over all of the uses of the alloca 86 const User *U = *UI; 87 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 88 if (LI->isVolatile()) 89 return false; 90 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 91 if (SI->getOperand(0) == AI) 92 return false; // Don't allow a store OF the AI, only INTO the AI. 93 if (SI->isVolatile()) 94 return false; 95 } else { 96 return false; 97 } 98 } 99 100 return true; 101 } 102 103 namespace { 104 struct AllocaInfo; 105 106 // Data package used by RenamePass() 107 class RenamePassData { 108 public: 109 typedef std::vector<Value *> ValVector; 110 111 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 112 RenamePassData(BasicBlock *B, BasicBlock *P, 113 const ValVector &V) : BB(B), Pred(P), Values(V) {} 114 BasicBlock *BB; 115 BasicBlock *Pred; 116 ValVector Values; 117 118 void swap(RenamePassData &RHS) { 119 std::swap(BB, RHS.BB); 120 std::swap(Pred, RHS.Pred); 121 Values.swap(RHS.Values); 122 } 123 }; 124 125 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 126 /// load/store instructions in the block that directly load or store an alloca. 127 /// 128 /// This functionality is important because it avoids scanning large basic 129 /// blocks multiple times when promoting many allocas in the same block. 130 class LargeBlockInfo { 131 /// InstNumbers - For each instruction that we track, keep the index of the 132 /// instruction. The index starts out as the number of the instruction from 133 /// the start of the block. 134 DenseMap<const Instruction *, unsigned> InstNumbers; 135 public: 136 137 /// isInterestingInstruction - This code only looks at accesses to allocas. 138 static bool isInterestingInstruction(const Instruction *I) { 139 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 140 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 141 } 142 143 /// getInstructionIndex - Get or calculate the index of the specified 144 /// instruction. 145 unsigned getInstructionIndex(const Instruction *I) { 146 assert(isInterestingInstruction(I) && 147 "Not a load/store to/from an alloca?"); 148 149 // If we already have this instruction number, return it. 150 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 151 if (It != InstNumbers.end()) return It->second; 152 153 // Scan the whole block to get the instruction. This accumulates 154 // information for every interesting instruction in the block, in order to 155 // avoid gratuitus rescans. 156 const BasicBlock *BB = I->getParent(); 157 unsigned InstNo = 0; 158 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 159 BBI != E; ++BBI) 160 if (isInterestingInstruction(BBI)) 161 InstNumbers[BBI] = InstNo++; 162 It = InstNumbers.find(I); 163 164 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 165 return It->second; 166 } 167 168 void deleteValue(const Instruction *I) { 169 InstNumbers.erase(I); 170 } 171 172 void clear() { 173 InstNumbers.clear(); 174 } 175 }; 176 177 struct PromoteMem2Reg { 178 /// Allocas - The alloca instructions being promoted. 179 /// 180 std::vector<AllocaInst*> Allocas; 181 DominatorTree &DT; 182 DIBuilder *DIB; 183 184 /// AST - An AliasSetTracker object to update. If null, don't update it. 185 /// 186 AliasSetTracker *AST; 187 188 /// AllocaLookup - Reverse mapping of Allocas. 189 /// 190 DenseMap<AllocaInst*, unsigned> AllocaLookup; 191 192 /// NewPhiNodes - The PhiNodes we're adding. 193 /// 194 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 195 196 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 197 /// it corresponds to. 198 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 199 200 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 201 /// each alloca that is of pointer type, we keep track of what to copyValue 202 /// to the inserted PHI nodes here. 203 /// 204 std::vector<Value*> PointerAllocaValues; 205 206 /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 207 /// intrinsic that describes it, if any, so that we can convert it to a 208 /// dbg.value intrinsic if the alloca gets promoted. 209 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 210 211 /// Visited - The set of basic blocks the renamer has already visited. 212 /// 213 SmallPtrSet<BasicBlock*, 16> Visited; 214 215 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 216 /// non-determinstic behavior. 217 DenseMap<BasicBlock*, unsigned> BBNumbers; 218 219 /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. 220 DenseMap<DomTreeNode*, unsigned> DomLevels; 221 222 /// BBNumPreds - Lazily compute the number of predecessors a block has. 223 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 224 public: 225 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 226 AliasSetTracker *ast) 227 : Allocas(A), DT(dt), DIB(0), AST(ast) {} 228 ~PromoteMem2Reg() { 229 delete DIB; 230 } 231 232 void run(); 233 234 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 235 /// 236 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 237 return DT.dominates(BB1, BB2); 238 } 239 240 private: 241 void RemoveFromAllocasList(unsigned &AllocaIdx) { 242 Allocas[AllocaIdx] = Allocas.back(); 243 Allocas.pop_back(); 244 --AllocaIdx; 245 } 246 247 unsigned getNumPreds(const BasicBlock *BB) { 248 unsigned &NP = BBNumPreds[BB]; 249 if (NP == 0) 250 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 251 return NP-1; 252 } 253 254 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 255 AllocaInfo &Info); 256 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 257 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 258 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 259 260 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 261 LargeBlockInfo &LBI); 262 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 263 LargeBlockInfo &LBI); 264 265 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 266 RenamePassData::ValVector &IncVals, 267 std::vector<RenamePassData> &Worklist); 268 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 269 }; 270 271 struct AllocaInfo { 272 SmallVector<BasicBlock*, 32> DefiningBlocks; 273 SmallVector<BasicBlock*, 32> UsingBlocks; 274 275 StoreInst *OnlyStore; 276 BasicBlock *OnlyBlock; 277 bool OnlyUsedInOneBlock; 278 279 Value *AllocaPointerVal; 280 DbgDeclareInst *DbgDeclare; 281 282 void clear() { 283 DefiningBlocks.clear(); 284 UsingBlocks.clear(); 285 OnlyStore = 0; 286 OnlyBlock = 0; 287 OnlyUsedInOneBlock = true; 288 AllocaPointerVal = 0; 289 DbgDeclare = 0; 290 } 291 292 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 293 /// ivars. 294 void AnalyzeAlloca(AllocaInst *AI) { 295 clear(); 296 297 // As we scan the uses of the alloca instruction, keep track of stores, 298 // and decide whether all of the loads and stores to the alloca are within 299 // the same basic block. 300 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 301 UI != E;) { 302 Instruction *User = cast<Instruction>(*UI++); 303 304 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 305 // Remember the basic blocks which define new values for the alloca 306 DefiningBlocks.push_back(SI->getParent()); 307 AllocaPointerVal = SI->getOperand(0); 308 OnlyStore = SI; 309 } else { 310 LoadInst *LI = cast<LoadInst>(User); 311 // Otherwise it must be a load instruction, keep track of variable 312 // reads. 313 UsingBlocks.push_back(LI->getParent()); 314 AllocaPointerVal = LI; 315 } 316 317 if (OnlyUsedInOneBlock) { 318 if (OnlyBlock == 0) 319 OnlyBlock = User->getParent(); 320 else if (OnlyBlock != User->getParent()) 321 OnlyUsedInOneBlock = false; 322 } 323 } 324 325 DbgDeclare = FindAllocaDbgDeclare(AI); 326 } 327 }; 328 329 typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; 330 331 struct DomTreeNodeCompare { 332 bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { 333 return LHS.second < RHS.second; 334 } 335 }; 336 } // end of anonymous namespace 337 338 339 void PromoteMem2Reg::run() { 340 Function &F = *DT.getRoot()->getParent(); 341 342 if (AST) PointerAllocaValues.resize(Allocas.size()); 343 AllocaDbgDeclares.resize(Allocas.size()); 344 345 AllocaInfo Info; 346 LargeBlockInfo LBI; 347 348 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 349 AllocaInst *AI = Allocas[AllocaNum]; 350 351 assert(isAllocaPromotable(AI) && 352 "Cannot promote non-promotable alloca!"); 353 assert(AI->getParent()->getParent() == &F && 354 "All allocas should be in the same function, which is same as DF!"); 355 356 if (AI->use_empty()) { 357 // If there are no uses of the alloca, just delete it now. 358 if (AST) AST->deleteValue(AI); 359 AI->eraseFromParent(); 360 361 // Remove the alloca from the Allocas list, since it has been processed 362 RemoveFromAllocasList(AllocaNum); 363 ++NumDeadAlloca; 364 continue; 365 } 366 367 // Calculate the set of read and write-locations for each alloca. This is 368 // analogous to finding the 'uses' and 'definitions' of each variable. 369 Info.AnalyzeAlloca(AI); 370 371 // If there is only a single store to this value, replace any loads of 372 // it that are directly dominated by the definition with the value stored. 373 if (Info.DefiningBlocks.size() == 1) { 374 RewriteSingleStoreAlloca(AI, Info, LBI); 375 376 // Finally, after the scan, check to see if the store is all that is left. 377 if (Info.UsingBlocks.empty()) { 378 // Record debuginfo for the store and remove the declaration's debuginfo. 379 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 380 if (!DIB) 381 DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); 382 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); 383 DDI->eraseFromParent(); 384 } 385 // Remove the (now dead) store and alloca. 386 Info.OnlyStore->eraseFromParent(); 387 LBI.deleteValue(Info.OnlyStore); 388 389 if (AST) AST->deleteValue(AI); 390 AI->eraseFromParent(); 391 LBI.deleteValue(AI); 392 393 // The alloca has been processed, move on. 394 RemoveFromAllocasList(AllocaNum); 395 396 ++NumSingleStore; 397 continue; 398 } 399 } 400 401 // If the alloca is only read and written in one basic block, just perform a 402 // linear sweep over the block to eliminate it. 403 if (Info.OnlyUsedInOneBlock) { 404 PromoteSingleBlockAlloca(AI, Info, LBI); 405 406 // Finally, after the scan, check to see if the stores are all that is 407 // left. 408 if (Info.UsingBlocks.empty()) { 409 410 // Remove the (now dead) stores and alloca. 411 while (!AI->use_empty()) { 412 StoreInst *SI = cast<StoreInst>(AI->use_back()); 413 // Record debuginfo for the store before removing it. 414 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 415 if (!DIB) 416 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 417 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 418 } 419 SI->eraseFromParent(); 420 LBI.deleteValue(SI); 421 } 422 423 if (AST) AST->deleteValue(AI); 424 AI->eraseFromParent(); 425 LBI.deleteValue(AI); 426 427 // The alloca has been processed, move on. 428 RemoveFromAllocasList(AllocaNum); 429 430 // The alloca's debuginfo can be removed as well. 431 if (DbgDeclareInst *DDI = Info.DbgDeclare) 432 DDI->eraseFromParent(); 433 434 ++NumLocalPromoted; 435 continue; 436 } 437 } 438 439 // If we haven't computed dominator tree levels, do so now. 440 if (DomLevels.empty()) { 441 SmallVector<DomTreeNode*, 32> Worklist; 442 443 DomTreeNode *Root = DT.getRootNode(); 444 DomLevels[Root] = 0; 445 Worklist.push_back(Root); 446 447 while (!Worklist.empty()) { 448 DomTreeNode *Node = Worklist.pop_back_val(); 449 unsigned ChildLevel = DomLevels[Node] + 1; 450 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 451 CI != CE; ++CI) { 452 DomLevels[*CI] = ChildLevel; 453 Worklist.push_back(*CI); 454 } 455 } 456 } 457 458 // If we haven't computed a numbering for the BB's in the function, do so 459 // now. 460 if (BBNumbers.empty()) { 461 unsigned ID = 0; 462 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 463 BBNumbers[I] = ID++; 464 } 465 466 // If we have an AST to keep updated, remember some pointer value that is 467 // stored into the alloca. 468 if (AST) 469 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 470 471 // Remember the dbg.declare intrinsic describing this alloca, if any. 472 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 473 474 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 475 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 476 477 // At this point, we're committed to promoting the alloca using IDF's, and 478 // the standard SSA construction algorithm. Determine which blocks need PHI 479 // nodes and see if we can optimize out some work by avoiding insertion of 480 // dead phi nodes. 481 DetermineInsertionPoint(AI, AllocaNum, Info); 482 } 483 484 if (Allocas.empty()) 485 return; // All of the allocas must have been trivial! 486 487 LBI.clear(); 488 489 490 // Set the incoming values for the basic block to be null values for all of 491 // the alloca's. We do this in case there is a load of a value that has not 492 // been stored yet. In this case, it will get this null value. 493 // 494 RenamePassData::ValVector Values(Allocas.size()); 495 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 496 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 497 498 // Walks all basic blocks in the function performing the SSA rename algorithm 499 // and inserting the phi nodes we marked as necessary 500 // 501 std::vector<RenamePassData> RenamePassWorkList; 502 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 503 do { 504 RenamePassData RPD; 505 RPD.swap(RenamePassWorkList.back()); 506 RenamePassWorkList.pop_back(); 507 // RenamePass may add new worklist entries. 508 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 509 } while (!RenamePassWorkList.empty()); 510 511 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 512 Visited.clear(); 513 514 // Remove the allocas themselves from the function. 515 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 516 Instruction *A = Allocas[i]; 517 518 // If there are any uses of the alloca instructions left, they must be in 519 // unreachable basic blocks that were not processed by walking the dominator 520 // tree. Just delete the users now. 521 if (!A->use_empty()) 522 A->replaceAllUsesWith(UndefValue::get(A->getType())); 523 if (AST) AST->deleteValue(A); 524 A->eraseFromParent(); 525 } 526 527 // Remove alloca's dbg.declare instrinsics from the function. 528 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 529 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 530 DDI->eraseFromParent(); 531 532 // Loop over all of the PHI nodes and see if there are any that we can get 533 // rid of because they merge all of the same incoming values. This can 534 // happen due to undef values coming into the PHI nodes. This process is 535 // iterative, because eliminating one PHI node can cause others to be removed. 536 bool EliminatedAPHI = true; 537 while (EliminatedAPHI) { 538 EliminatedAPHI = false; 539 540 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 541 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 542 PHINode *PN = I->second; 543 544 // If this PHI node merges one value and/or undefs, get the value. 545 if (Value *V = SimplifyInstruction(PN, 0, &DT)) { 546 if (AST && PN->getType()->isPointerTy()) 547 AST->deleteValue(PN); 548 PN->replaceAllUsesWith(V); 549 PN->eraseFromParent(); 550 NewPhiNodes.erase(I++); 551 EliminatedAPHI = true; 552 continue; 553 } 554 ++I; 555 } 556 } 557 558 // At this point, the renamer has added entries to PHI nodes for all reachable 559 // code. Unfortunately, there may be unreachable blocks which the renamer 560 // hasn't traversed. If this is the case, the PHI nodes may not 561 // have incoming values for all predecessors. Loop over all PHI nodes we have 562 // created, inserting undef values if they are missing any incoming values. 563 // 564 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 565 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 566 // We want to do this once per basic block. As such, only process a block 567 // when we find the PHI that is the first entry in the block. 568 PHINode *SomePHI = I->second; 569 BasicBlock *BB = SomePHI->getParent(); 570 if (&BB->front() != SomePHI) 571 continue; 572 573 // Only do work here if there the PHI nodes are missing incoming values. We 574 // know that all PHI nodes that were inserted in a block will have the same 575 // number of incoming values, so we can just check any of them. 576 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 577 continue; 578 579 // Get the preds for BB. 580 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 581 582 // Ok, now we know that all of the PHI nodes are missing entries for some 583 // basic blocks. Start by sorting the incoming predecessors for efficient 584 // access. 585 std::sort(Preds.begin(), Preds.end()); 586 587 // Now we loop through all BB's which have entries in SomePHI and remove 588 // them from the Preds list. 589 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 590 // Do a log(n) search of the Preds list for the entry we want. 591 SmallVector<BasicBlock*, 16>::iterator EntIt = 592 std::lower_bound(Preds.begin(), Preds.end(), 593 SomePHI->getIncomingBlock(i)); 594 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 595 "PHI node has entry for a block which is not a predecessor!"); 596 597 // Remove the entry 598 Preds.erase(EntIt); 599 } 600 601 // At this point, the blocks left in the preds list must have dummy 602 // entries inserted into every PHI nodes for the block. Update all the phi 603 // nodes in this block that we are inserting (there could be phis before 604 // mem2reg runs). 605 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 606 BasicBlock::iterator BBI = BB->begin(); 607 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 608 SomePHI->getNumIncomingValues() == NumBadPreds) { 609 Value *UndefVal = UndefValue::get(SomePHI->getType()); 610 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 611 SomePHI->addIncoming(UndefVal, Preds[pred]); 612 } 613 } 614 615 NewPhiNodes.clear(); 616 } 617 618 619 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 620 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 621 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 622 /// would be dead). 623 void PromoteMem2Reg:: 624 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 625 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 626 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 627 628 // To determine liveness, we must iterate through the predecessors of blocks 629 // where the def is live. Blocks are added to the worklist if we need to 630 // check their predecessors. Start with all the using blocks. 631 SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 632 Info.UsingBlocks.end()); 633 634 // If any of the using blocks is also a definition block, check to see if the 635 // definition occurs before or after the use. If it happens before the use, 636 // the value isn't really live-in. 637 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 638 BasicBlock *BB = LiveInBlockWorklist[i]; 639 if (!DefBlocks.count(BB)) continue; 640 641 // Okay, this is a block that both uses and defines the value. If the first 642 // reference to the alloca is a def (store), then we know it isn't live-in. 643 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 644 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 645 if (SI->getOperand(1) != AI) continue; 646 647 // We found a store to the alloca before a load. The alloca is not 648 // actually live-in here. 649 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 650 LiveInBlockWorklist.pop_back(); 651 --i, --e; 652 break; 653 } 654 655 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 656 if (LI->getOperand(0) != AI) continue; 657 658 // Okay, we found a load before a store to the alloca. It is actually 659 // live into this block. 660 break; 661 } 662 } 663 } 664 665 // Now that we have a set of blocks where the phi is live-in, recursively add 666 // their predecessors until we find the full region the value is live. 667 while (!LiveInBlockWorklist.empty()) { 668 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 669 670 // The block really is live in here, insert it into the set. If already in 671 // the set, then it has already been processed. 672 if (!LiveInBlocks.insert(BB)) 673 continue; 674 675 // Since the value is live into BB, it is either defined in a predecessor or 676 // live into it to. Add the preds to the worklist unless they are a 677 // defining block. 678 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 679 BasicBlock *P = *PI; 680 681 // The value is not live into a predecessor if it defines the value. 682 if (DefBlocks.count(P)) 683 continue; 684 685 // Otherwise it is, add to the worklist. 686 LiveInBlockWorklist.push_back(P); 687 } 688 } 689 } 690 691 /// DetermineInsertionPoint - At this point, we're committed to promoting the 692 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 693 /// which blocks need phi nodes and see if we can optimize out some work by 694 /// avoiding insertion of dead phi nodes. 695 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 696 AllocaInfo &Info) { 697 // Unique the set of defining blocks for efficient lookup. 698 SmallPtrSet<BasicBlock*, 32> DefBlocks; 699 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 700 701 // Determine which blocks the value is live in. These are blocks which lead 702 // to uses. 703 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 704 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 705 706 // Use a priority queue keyed on dominator tree level so that inserted nodes 707 // are handled from the bottom of the dominator tree upwards. 708 typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, 709 DomTreeNodeCompare> IDFPriorityQueue; 710 IDFPriorityQueue PQ; 711 712 for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), 713 E = DefBlocks.end(); I != E; ++I) { 714 if (DomTreeNode *Node = DT.getNode(*I)) 715 PQ.push(std::make_pair(Node, DomLevels[Node])); 716 } 717 718 SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; 719 SmallPtrSet<DomTreeNode*, 32> Visited; 720 SmallVector<DomTreeNode*, 32> Worklist; 721 while (!PQ.empty()) { 722 DomTreeNodePair RootPair = PQ.top(); 723 PQ.pop(); 724 DomTreeNode *Root = RootPair.first; 725 unsigned RootLevel = RootPair.second; 726 727 // Walk all dominator tree children of Root, inspecting their CFG edges with 728 // targets elsewhere on the dominator tree. Only targets whose level is at 729 // most Root's level are added to the iterated dominance frontier of the 730 // definition set. 731 732 Worklist.clear(); 733 Worklist.push_back(Root); 734 735 while (!Worklist.empty()) { 736 DomTreeNode *Node = Worklist.pop_back_val(); 737 BasicBlock *BB = Node->getBlock(); 738 739 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 740 ++SI) { 741 DomTreeNode *SuccNode = DT.getNode(*SI); 742 743 // Quickly skip all CFG edges that are also dominator tree edges instead 744 // of catching them below. 745 if (SuccNode->getIDom() == Node) 746 continue; 747 748 unsigned SuccLevel = DomLevels[SuccNode]; 749 if (SuccLevel > RootLevel) 750 continue; 751 752 if (!Visited.insert(SuccNode)) 753 continue; 754 755 BasicBlock *SuccBB = SuccNode->getBlock(); 756 if (!LiveInBlocks.count(SuccBB)) 757 continue; 758 759 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 760 if (!DefBlocks.count(SuccBB)) 761 PQ.push(std::make_pair(SuccNode, SuccLevel)); 762 } 763 764 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 765 ++CI) { 766 if (!Visited.count(*CI)) 767 Worklist.push_back(*CI); 768 } 769 } 770 } 771 772 if (DFBlocks.size() > 1) 773 std::sort(DFBlocks.begin(), DFBlocks.end()); 774 775 unsigned CurrentVersion = 0; 776 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 777 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 778 } 779 780 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 781 /// replace any loads of it that are directly dominated by the definition with 782 /// the value stored. 783 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 784 AllocaInfo &Info, 785 LargeBlockInfo &LBI) { 786 StoreInst *OnlyStore = Info.OnlyStore; 787 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 788 BasicBlock *StoreBB = OnlyStore->getParent(); 789 int StoreIndex = -1; 790 791 // Clear out UsingBlocks. We will reconstruct it here if needed. 792 Info.UsingBlocks.clear(); 793 794 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 795 Instruction *UserInst = cast<Instruction>(*UI++); 796 if (!isa<LoadInst>(UserInst)) { 797 assert(UserInst == OnlyStore && "Should only have load/stores"); 798 continue; 799 } 800 LoadInst *LI = cast<LoadInst>(UserInst); 801 802 // Okay, if we have a load from the alloca, we want to replace it with the 803 // only value stored to the alloca. We can do this if the value is 804 // dominated by the store. If not, we use the rest of the mem2reg machinery 805 // to insert the phi nodes as needed. 806 if (!StoringGlobalVal) { // Non-instructions are always dominated. 807 if (LI->getParent() == StoreBB) { 808 // If we have a use that is in the same block as the store, compare the 809 // indices of the two instructions to see which one came first. If the 810 // load came before the store, we can't handle it. 811 if (StoreIndex == -1) 812 StoreIndex = LBI.getInstructionIndex(OnlyStore); 813 814 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 815 // Can't handle this load, bail out. 816 Info.UsingBlocks.push_back(StoreBB); 817 continue; 818 } 819 820 } else if (LI->getParent() != StoreBB && 821 !dominates(StoreBB, LI->getParent())) { 822 // If the load and store are in different blocks, use BB dominance to 823 // check their relationships. If the store doesn't dom the use, bail 824 // out. 825 Info.UsingBlocks.push_back(LI->getParent()); 826 continue; 827 } 828 } 829 830 // Otherwise, we *can* safely rewrite this load. 831 Value *ReplVal = OnlyStore->getOperand(0); 832 // If the replacement value is the load, this must occur in unreachable 833 // code. 834 if (ReplVal == LI) 835 ReplVal = UndefValue::get(LI->getType()); 836 LI->replaceAllUsesWith(ReplVal); 837 if (AST && LI->getType()->isPointerTy()) 838 AST->deleteValue(LI); 839 LI->eraseFromParent(); 840 LBI.deleteValue(LI); 841 } 842 } 843 844 namespace { 845 846 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 847 /// first element of a pair. 848 struct StoreIndexSearchPredicate { 849 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 850 const std::pair<unsigned, StoreInst*> &RHS) { 851 return LHS.first < RHS.first; 852 } 853 }; 854 855 } 856 857 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 858 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 859 /// potentially useless PHI nodes by just performing a single linear pass over 860 /// the basic block using the Alloca. 861 /// 862 /// If we cannot promote this alloca (because it is read before it is written), 863 /// return true. This is necessary in cases where, due to control flow, the 864 /// alloca is potentially undefined on some control flow paths. e.g. code like 865 /// this is potentially correct: 866 /// 867 /// for (...) { if (c) { A = undef; undef = B; } } 868 /// 869 /// ... so long as A is not used before undef is set. 870 /// 871 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 872 LargeBlockInfo &LBI) { 873 // The trickiest case to handle is when we have large blocks. Because of this, 874 // this code is optimized assuming that large blocks happen. This does not 875 // significantly pessimize the small block case. This uses LargeBlockInfo to 876 // make it efficient to get the index of various operations in the block. 877 878 // Clear out UsingBlocks. We will reconstruct it here if needed. 879 Info.UsingBlocks.clear(); 880 881 // Walk the use-def list of the alloca, getting the locations of all stores. 882 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 883 StoresByIndexTy StoresByIndex; 884 885 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 886 UI != E; ++UI) 887 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 888 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 889 890 // If there are no stores to the alloca, just replace any loads with undef. 891 if (StoresByIndex.empty()) { 892 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 893 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 894 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 895 if (AST && LI->getType()->isPointerTy()) 896 AST->deleteValue(LI); 897 LBI.deleteValue(LI); 898 LI->eraseFromParent(); 899 } 900 return; 901 } 902 903 // Sort the stores by their index, making it efficient to do a lookup with a 904 // binary search. 905 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 906 907 // Walk all of the loads from this alloca, replacing them with the nearest 908 // store above them, if any. 909 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 910 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 911 if (!LI) continue; 912 913 unsigned LoadIdx = LBI.getInstructionIndex(LI); 914 915 // Find the nearest store that has a lower than this load. 916 StoresByIndexTy::iterator I = 917 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 918 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 919 StoreIndexSearchPredicate()); 920 921 // If there is no store before this load, then we can't promote this load. 922 if (I == StoresByIndex.begin()) { 923 // Can't handle this load, bail out. 924 Info.UsingBlocks.push_back(LI->getParent()); 925 continue; 926 } 927 928 // Otherwise, there was a store before this load, the load takes its value. 929 --I; 930 LI->replaceAllUsesWith(I->second->getOperand(0)); 931 if (AST && LI->getType()->isPointerTy()) 932 AST->deleteValue(LI); 933 LI->eraseFromParent(); 934 LBI.deleteValue(LI); 935 } 936 } 937 938 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 939 // Alloca returns true if there wasn't already a phi-node for that variable 940 // 941 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 942 unsigned &Version) { 943 // Look up the basic-block in question. 944 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 945 946 // If the BB already has a phi node added for the i'th alloca then we're done! 947 if (PN) return false; 948 949 // Create a PhiNode using the dereferenced type... and add the phi-node to the 950 // BasicBlock. 951 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), 952 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 953 BB->begin()); 954 ++NumPHIInsert; 955 PhiToAllocaMap[PN] = AllocaNo; 956 957 if (AST && PN->getType()->isPointerTy()) 958 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 959 960 return true; 961 } 962 963 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 964 // stores to the allocas which we are promoting. IncomingVals indicates what 965 // value each Alloca contains on exit from the predecessor block Pred. 966 // 967 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 968 RenamePassData::ValVector &IncomingVals, 969 std::vector<RenamePassData> &Worklist) { 970 NextIteration: 971 // If we are inserting any phi nodes into this BB, they will already be in the 972 // block. 973 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 974 // If we have PHI nodes to update, compute the number of edges from Pred to 975 // BB. 976 if (PhiToAllocaMap.count(APN)) { 977 // We want to be able to distinguish between PHI nodes being inserted by 978 // this invocation of mem2reg from those phi nodes that already existed in 979 // the IR before mem2reg was run. We determine that APN is being inserted 980 // because it is missing incoming edges. All other PHI nodes being 981 // inserted by this pass of mem2reg will have the same number of incoming 982 // operands so far. Remember this count. 983 unsigned NewPHINumOperands = APN->getNumOperands(); 984 985 unsigned NumEdges = 0; 986 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 987 if (*I == BB) 988 ++NumEdges; 989 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 990 991 // Add entries for all the phis. 992 BasicBlock::iterator PNI = BB->begin(); 993 do { 994 unsigned AllocaNo = PhiToAllocaMap[APN]; 995 996 // Add N incoming values to the PHI node. 997 for (unsigned i = 0; i != NumEdges; ++i) 998 APN->addIncoming(IncomingVals[AllocaNo], Pred); 999 1000 // The currently active variable for this block is now the PHI. 1001 IncomingVals[AllocaNo] = APN; 1002 1003 // Get the next phi node. 1004 ++PNI; 1005 APN = dyn_cast<PHINode>(PNI); 1006 if (APN == 0) break; 1007 1008 // Verify that it is missing entries. If not, it is not being inserted 1009 // by this mem2reg invocation so we want to ignore it. 1010 } while (APN->getNumOperands() == NewPHINumOperands); 1011 } 1012 } 1013 1014 // Don't revisit blocks. 1015 if (!Visited.insert(BB)) return; 1016 1017 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 1018 Instruction *I = II++; // get the instruction, increment iterator 1019 1020 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1021 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1022 if (!Src) continue; 1023 1024 DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 1025 if (AI == AllocaLookup.end()) continue; 1026 1027 Value *V = IncomingVals[AI->second]; 1028 1029 // Anything using the load now uses the current value. 1030 LI->replaceAllUsesWith(V); 1031 if (AST && LI->getType()->isPointerTy()) 1032 AST->deleteValue(LI); 1033 BB->getInstList().erase(LI); 1034 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1035 // Delete this instruction and mark the name as the current holder of the 1036 // value 1037 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1038 if (!Dest) continue; 1039 1040 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1041 if (ai == AllocaLookup.end()) 1042 continue; 1043 1044 // what value were we writing? 1045 IncomingVals[ai->second] = SI->getOperand(0); 1046 // Record debuginfo for the store before removing it. 1047 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { 1048 if (!DIB) 1049 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 1050 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 1051 } 1052 BB->getInstList().erase(SI); 1053 } 1054 } 1055 1056 // 'Recurse' to our successors. 1057 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1058 if (I == E) return; 1059 1060 // Keep track of the successors so we don't visit the same successor twice 1061 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1062 1063 // Handle the first successor without using the worklist. 1064 VisitedSuccs.insert(*I); 1065 Pred = BB; 1066 BB = *I; 1067 ++I; 1068 1069 for (; I != E; ++I) 1070 if (VisitedSuccs.insert(*I)) 1071 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1072 1073 goto NextIteration; 1074 } 1075 1076 /// PromoteMemToReg - Promote the specified list of alloca instructions into 1077 /// scalar registers, inserting PHI nodes as appropriate. This function does 1078 /// not modify the CFG of the function at all. All allocas must be from the 1079 /// same function. 1080 /// 1081 /// If AST is specified, the specified tracker is updated to reflect changes 1082 /// made to the IR. 1083 /// 1084 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1085 DominatorTree &DT, AliasSetTracker *AST) { 1086 // If there is nothing to do, bail out... 1087 if (Allocas.empty()) return; 1088 1089 PromoteMem2Reg(Allocas, DT, AST).run(); 1090 } 1091