1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using iterated dominator frontiers to place PHI nodes, then 13 // traversing the function in depth-first order to rewrite loads and stores as 14 // appropriate. 15 // 16 // The algorithm used here is based on: 17 // 18 // Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19 // In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20 // Programming Languages 21 // POPL '95. ACM, New York, NY, 62-73. 22 // 23 // It has been modified to not explicitly use the DJ graph data structure and to 24 // directly compute pruned SSA using per-variable liveness information. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #define DEBUG_TYPE "mem2reg" 29 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 30 #include "llvm/Constants.h" 31 #include "llvm/DerivedTypes.h" 32 #include "llvm/Function.h" 33 #include "llvm/Instructions.h" 34 #include "llvm/IntrinsicInst.h" 35 #include "llvm/Metadata.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/DebugInfo.h" 38 #include "llvm/Analysis/DIBuilder.h" 39 #include "llvm/Analysis/Dominators.h" 40 #include "llvm/Analysis/InstructionSimplify.h" 41 #include "llvm/Analysis/ValueTracking.h" 42 #include "llvm/Transforms/Utils/Local.h" 43 #include "llvm/ADT/DenseMap.h" 44 #include "llvm/ADT/SmallPtrSet.h" 45 #include "llvm/ADT/SmallVector.h" 46 #include "llvm/ADT/Statistic.h" 47 #include "llvm/ADT/STLExtras.h" 48 #include "llvm/Support/CFG.h" 49 #include <algorithm> 50 #include <queue> 51 using namespace llvm; 52 53 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 54 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 55 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 56 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 57 58 namespace llvm { 59 template<> 60 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 61 typedef std::pair<BasicBlock*, unsigned> EltTy; 62 static inline EltTy getEmptyKey() { 63 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 64 } 65 static inline EltTy getTombstoneKey() { 66 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 67 } 68 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 69 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 70 } 71 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 72 return LHS == RHS; 73 } 74 }; 75 } 76 77 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 78 /// This is true if there are only loads and stores to the alloca. 79 /// 80 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 81 // FIXME: If the memory unit is of pointer or integer type, we can permit 82 // assignments to subsections of the memory unit. 83 84 // Only allow direct and non-volatile loads and stores... 85 for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 86 UI != UE; ++UI) { // Loop over all of the uses of the alloca 87 const User *U = *UI; 88 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 89 // Note that atomic loads can be transformed; atomic semantics do 90 // not have any meaning for a local alloca. 91 if (LI->isVolatile()) 92 return false; 93 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 94 if (SI->getOperand(0) == AI) 95 return false; // Don't allow a store OF the AI, only INTO the AI. 96 // Note that atomic stores can be transformed; atomic semantics do 97 // not have any meaning for a local alloca. 98 if (SI->isVolatile()) 99 return false; 100 } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 101 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 102 II->getIntrinsicID() != Intrinsic::lifetime_end) 103 return false; 104 } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 105 if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) 106 return false; 107 if (!onlyUsedByLifetimeMarkers(BCI)) 108 return false; 109 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 110 if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) 111 return false; 112 if (!GEPI->hasAllZeroIndices()) 113 return false; 114 if (!onlyUsedByLifetimeMarkers(GEPI)) 115 return false; 116 } else { 117 return false; 118 } 119 } 120 121 return true; 122 } 123 124 namespace { 125 struct AllocaInfo; 126 127 // Data package used by RenamePass() 128 class RenamePassData { 129 public: 130 typedef std::vector<Value *> ValVector; 131 132 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 133 RenamePassData(BasicBlock *B, BasicBlock *P, 134 const ValVector &V) : BB(B), Pred(P), Values(V) {} 135 BasicBlock *BB; 136 BasicBlock *Pred; 137 ValVector Values; 138 139 void swap(RenamePassData &RHS) { 140 std::swap(BB, RHS.BB); 141 std::swap(Pred, RHS.Pred); 142 Values.swap(RHS.Values); 143 } 144 }; 145 146 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 147 /// load/store instructions in the block that directly load or store an alloca. 148 /// 149 /// This functionality is important because it avoids scanning large basic 150 /// blocks multiple times when promoting many allocas in the same block. 151 class LargeBlockInfo { 152 /// InstNumbers - For each instruction that we track, keep the index of the 153 /// instruction. The index starts out as the number of the instruction from 154 /// the start of the block. 155 DenseMap<const Instruction *, unsigned> InstNumbers; 156 public: 157 158 /// isInterestingInstruction - This code only looks at accesses to allocas. 159 static bool isInterestingInstruction(const Instruction *I) { 160 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 161 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 162 } 163 164 /// getInstructionIndex - Get or calculate the index of the specified 165 /// instruction. 166 unsigned getInstructionIndex(const Instruction *I) { 167 assert(isInterestingInstruction(I) && 168 "Not a load/store to/from an alloca?"); 169 170 // If we already have this instruction number, return it. 171 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 172 if (It != InstNumbers.end()) return It->second; 173 174 // Scan the whole block to get the instruction. This accumulates 175 // information for every interesting instruction in the block, in order to 176 // avoid gratuitus rescans. 177 const BasicBlock *BB = I->getParent(); 178 unsigned InstNo = 0; 179 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 180 BBI != E; ++BBI) 181 if (isInterestingInstruction(BBI)) 182 InstNumbers[BBI] = InstNo++; 183 It = InstNumbers.find(I); 184 185 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 186 return It->second; 187 } 188 189 void deleteValue(const Instruction *I) { 190 InstNumbers.erase(I); 191 } 192 193 void clear() { 194 InstNumbers.clear(); 195 } 196 }; 197 198 struct PromoteMem2Reg { 199 /// Allocas - The alloca instructions being promoted. 200 /// 201 std::vector<AllocaInst*> Allocas; 202 DominatorTree &DT; 203 DIBuilder *DIB; 204 205 /// AST - An AliasSetTracker object to update. If null, don't update it. 206 /// 207 AliasSetTracker *AST; 208 209 /// AllocaLookup - Reverse mapping of Allocas. 210 /// 211 DenseMap<AllocaInst*, unsigned> AllocaLookup; 212 213 /// NewPhiNodes - The PhiNodes we're adding. 214 /// 215 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 216 217 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 218 /// it corresponds to. 219 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 220 221 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 222 /// each alloca that is of pointer type, we keep track of what to copyValue 223 /// to the inserted PHI nodes here. 224 /// 225 std::vector<Value*> PointerAllocaValues; 226 227 /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 228 /// intrinsic that describes it, if any, so that we can convert it to a 229 /// dbg.value intrinsic if the alloca gets promoted. 230 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 231 232 /// Visited - The set of basic blocks the renamer has already visited. 233 /// 234 SmallPtrSet<BasicBlock*, 16> Visited; 235 236 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 237 /// non-determinstic behavior. 238 DenseMap<BasicBlock*, unsigned> BBNumbers; 239 240 /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. 241 DenseMap<DomTreeNode*, unsigned> DomLevels; 242 243 /// BBNumPreds - Lazily compute the number of predecessors a block has. 244 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 245 public: 246 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 247 AliasSetTracker *ast) 248 : Allocas(A), DT(dt), DIB(0), AST(ast) {} 249 ~PromoteMem2Reg() { 250 delete DIB; 251 } 252 253 void run(); 254 255 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 256 /// 257 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 258 return DT.dominates(BB1, BB2); 259 } 260 261 private: 262 void RemoveFromAllocasList(unsigned &AllocaIdx) { 263 Allocas[AllocaIdx] = Allocas.back(); 264 Allocas.pop_back(); 265 --AllocaIdx; 266 } 267 268 unsigned getNumPreds(const BasicBlock *BB) { 269 unsigned &NP = BBNumPreds[BB]; 270 if (NP == 0) 271 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 272 return NP-1; 273 } 274 275 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 276 AllocaInfo &Info); 277 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 278 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 279 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 280 281 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 282 LargeBlockInfo &LBI); 283 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 284 LargeBlockInfo &LBI); 285 286 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 287 RenamePassData::ValVector &IncVals, 288 std::vector<RenamePassData> &Worklist); 289 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 290 }; 291 292 struct AllocaInfo { 293 SmallVector<BasicBlock*, 32> DefiningBlocks; 294 SmallVector<BasicBlock*, 32> UsingBlocks; 295 296 StoreInst *OnlyStore; 297 BasicBlock *OnlyBlock; 298 bool OnlyUsedInOneBlock; 299 300 Value *AllocaPointerVal; 301 DbgDeclareInst *DbgDeclare; 302 303 void clear() { 304 DefiningBlocks.clear(); 305 UsingBlocks.clear(); 306 OnlyStore = 0; 307 OnlyBlock = 0; 308 OnlyUsedInOneBlock = true; 309 AllocaPointerVal = 0; 310 DbgDeclare = 0; 311 } 312 313 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 314 /// ivars. 315 void AnalyzeAlloca(AllocaInst *AI) { 316 clear(); 317 318 // As we scan the uses of the alloca instruction, keep track of stores, 319 // and decide whether all of the loads and stores to the alloca are within 320 // the same basic block. 321 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 322 UI != E;) { 323 Instruction *User = cast<Instruction>(*UI++); 324 325 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 326 // Remember the basic blocks which define new values for the alloca 327 DefiningBlocks.push_back(SI->getParent()); 328 AllocaPointerVal = SI->getOperand(0); 329 OnlyStore = SI; 330 } else { 331 LoadInst *LI = cast<LoadInst>(User); 332 // Otherwise it must be a load instruction, keep track of variable 333 // reads. 334 UsingBlocks.push_back(LI->getParent()); 335 AllocaPointerVal = LI; 336 } 337 338 if (OnlyUsedInOneBlock) { 339 if (OnlyBlock == 0) 340 OnlyBlock = User->getParent(); 341 else if (OnlyBlock != User->getParent()) 342 OnlyUsedInOneBlock = false; 343 } 344 } 345 346 DbgDeclare = FindAllocaDbgDeclare(AI); 347 } 348 }; 349 350 typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; 351 352 struct DomTreeNodeCompare { 353 bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { 354 return LHS.second < RHS.second; 355 } 356 }; 357 } // end of anonymous namespace 358 359 static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { 360 // Knowing that this alloca is promotable, we know that it's safe to kill all 361 // instructions except for load and store. 362 363 for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); 364 UI != UE;) { 365 Instruction *I = cast<Instruction>(*UI); 366 ++UI; 367 if (isa<LoadInst>(I) || isa<StoreInst>(I)) 368 continue; 369 370 if (!I->getType()->isVoidTy()) { 371 // The only users of this bitcast/GEP instruction are lifetime intrinsics. 372 // Follow the use/def chain to erase them now instead of leaving it for 373 // dead code elimination later. 374 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 375 UI != UE;) { 376 Instruction *Inst = cast<Instruction>(*UI); 377 ++UI; 378 Inst->eraseFromParent(); 379 } 380 } 381 I->eraseFromParent(); 382 } 383 } 384 385 void PromoteMem2Reg::run() { 386 Function &F = *DT.getRoot()->getParent(); 387 388 if (AST) PointerAllocaValues.resize(Allocas.size()); 389 AllocaDbgDeclares.resize(Allocas.size()); 390 391 AllocaInfo Info; 392 LargeBlockInfo LBI; 393 394 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 395 AllocaInst *AI = Allocas[AllocaNum]; 396 397 assert(isAllocaPromotable(AI) && 398 "Cannot promote non-promotable alloca!"); 399 assert(AI->getParent()->getParent() == &F && 400 "All allocas should be in the same function, which is same as DF!"); 401 402 removeLifetimeIntrinsicUsers(AI); 403 404 if (AI->use_empty()) { 405 // If there are no uses of the alloca, just delete it now. 406 if (AST) AST->deleteValue(AI); 407 AI->eraseFromParent(); 408 409 // Remove the alloca from the Allocas list, since it has been processed 410 RemoveFromAllocasList(AllocaNum); 411 ++NumDeadAlloca; 412 continue; 413 } 414 415 // Calculate the set of read and write-locations for each alloca. This is 416 // analogous to finding the 'uses' and 'definitions' of each variable. 417 Info.AnalyzeAlloca(AI); 418 419 // If there is only a single store to this value, replace any loads of 420 // it that are directly dominated by the definition with the value stored. 421 if (Info.DefiningBlocks.size() == 1) { 422 RewriteSingleStoreAlloca(AI, Info, LBI); 423 424 // Finally, after the scan, check to see if the store is all that is left. 425 if (Info.UsingBlocks.empty()) { 426 // Record debuginfo for the store and remove the declaration's 427 // debuginfo. 428 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 429 if (!DIB) 430 DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); 431 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); 432 DDI->eraseFromParent(); 433 } 434 // Remove the (now dead) store and alloca. 435 Info.OnlyStore->eraseFromParent(); 436 LBI.deleteValue(Info.OnlyStore); 437 438 if (AST) AST->deleteValue(AI); 439 AI->eraseFromParent(); 440 LBI.deleteValue(AI); 441 442 // The alloca has been processed, move on. 443 RemoveFromAllocasList(AllocaNum); 444 445 ++NumSingleStore; 446 continue; 447 } 448 } 449 450 // If the alloca is only read and written in one basic block, just perform a 451 // linear sweep over the block to eliminate it. 452 if (Info.OnlyUsedInOneBlock) { 453 PromoteSingleBlockAlloca(AI, Info, LBI); 454 455 // Finally, after the scan, check to see if the stores are all that is 456 // left. 457 if (Info.UsingBlocks.empty()) { 458 459 // Remove the (now dead) stores and alloca. 460 while (!AI->use_empty()) { 461 StoreInst *SI = cast<StoreInst>(AI->use_back()); 462 // Record debuginfo for the store before removing it. 463 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 464 if (!DIB) 465 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 466 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 467 } 468 SI->eraseFromParent(); 469 LBI.deleteValue(SI); 470 } 471 472 if (AST) AST->deleteValue(AI); 473 AI->eraseFromParent(); 474 LBI.deleteValue(AI); 475 476 // The alloca has been processed, move on. 477 RemoveFromAllocasList(AllocaNum); 478 479 // The alloca's debuginfo can be removed as well. 480 if (DbgDeclareInst *DDI = Info.DbgDeclare) 481 DDI->eraseFromParent(); 482 483 ++NumLocalPromoted; 484 continue; 485 } 486 } 487 488 // If we haven't computed dominator tree levels, do so now. 489 if (DomLevels.empty()) { 490 SmallVector<DomTreeNode*, 32> Worklist; 491 492 DomTreeNode *Root = DT.getRootNode(); 493 DomLevels[Root] = 0; 494 Worklist.push_back(Root); 495 496 while (!Worklist.empty()) { 497 DomTreeNode *Node = Worklist.pop_back_val(); 498 unsigned ChildLevel = DomLevels[Node] + 1; 499 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 500 CI != CE; ++CI) { 501 DomLevels[*CI] = ChildLevel; 502 Worklist.push_back(*CI); 503 } 504 } 505 } 506 507 // If we haven't computed a numbering for the BB's in the function, do so 508 // now. 509 if (BBNumbers.empty()) { 510 unsigned ID = 0; 511 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 512 BBNumbers[I] = ID++; 513 } 514 515 // If we have an AST to keep updated, remember some pointer value that is 516 // stored into the alloca. 517 if (AST) 518 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 519 520 // Remember the dbg.declare intrinsic describing this alloca, if any. 521 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 522 523 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 524 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 525 526 // At this point, we're committed to promoting the alloca using IDF's, and 527 // the standard SSA construction algorithm. Determine which blocks need PHI 528 // nodes and see if we can optimize out some work by avoiding insertion of 529 // dead phi nodes. 530 DetermineInsertionPoint(AI, AllocaNum, Info); 531 } 532 533 if (Allocas.empty()) 534 return; // All of the allocas must have been trivial! 535 536 LBI.clear(); 537 538 539 // Set the incoming values for the basic block to be null values for all of 540 // the alloca's. We do this in case there is a load of a value that has not 541 // been stored yet. In this case, it will get this null value. 542 // 543 RenamePassData::ValVector Values(Allocas.size()); 544 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 545 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 546 547 // Walks all basic blocks in the function performing the SSA rename algorithm 548 // and inserting the phi nodes we marked as necessary 549 // 550 std::vector<RenamePassData> RenamePassWorkList; 551 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 552 do { 553 RenamePassData RPD; 554 RPD.swap(RenamePassWorkList.back()); 555 RenamePassWorkList.pop_back(); 556 // RenamePass may add new worklist entries. 557 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 558 } while (!RenamePassWorkList.empty()); 559 560 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 561 Visited.clear(); 562 563 // Remove the allocas themselves from the function. 564 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 565 Instruction *A = Allocas[i]; 566 567 // If there are any uses of the alloca instructions left, they must be in 568 // unreachable basic blocks that were not processed by walking the dominator 569 // tree. Just delete the users now. 570 if (!A->use_empty()) 571 A->replaceAllUsesWith(UndefValue::get(A->getType())); 572 if (AST) AST->deleteValue(A); 573 A->eraseFromParent(); 574 } 575 576 // Remove alloca's dbg.declare instrinsics from the function. 577 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 578 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 579 DDI->eraseFromParent(); 580 581 // Loop over all of the PHI nodes and see if there are any that we can get 582 // rid of because they merge all of the same incoming values. This can 583 // happen due to undef values coming into the PHI nodes. This process is 584 // iterative, because eliminating one PHI node can cause others to be removed. 585 bool EliminatedAPHI = true; 586 while (EliminatedAPHI) { 587 EliminatedAPHI = false; 588 589 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 590 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 591 PHINode *PN = I->second; 592 593 // If this PHI node merges one value and/or undefs, get the value. 594 if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { 595 if (AST && PN->getType()->isPointerTy()) 596 AST->deleteValue(PN); 597 PN->replaceAllUsesWith(V); 598 PN->eraseFromParent(); 599 NewPhiNodes.erase(I++); 600 EliminatedAPHI = true; 601 continue; 602 } 603 ++I; 604 } 605 } 606 607 // At this point, the renamer has added entries to PHI nodes for all reachable 608 // code. Unfortunately, there may be unreachable blocks which the renamer 609 // hasn't traversed. If this is the case, the PHI nodes may not 610 // have incoming values for all predecessors. Loop over all PHI nodes we have 611 // created, inserting undef values if they are missing any incoming values. 612 // 613 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 614 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 615 // We want to do this once per basic block. As such, only process a block 616 // when we find the PHI that is the first entry in the block. 617 PHINode *SomePHI = I->second; 618 BasicBlock *BB = SomePHI->getParent(); 619 if (&BB->front() != SomePHI) 620 continue; 621 622 // Only do work here if there the PHI nodes are missing incoming values. We 623 // know that all PHI nodes that were inserted in a block will have the same 624 // number of incoming values, so we can just check any of them. 625 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 626 continue; 627 628 // Get the preds for BB. 629 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 630 631 // Ok, now we know that all of the PHI nodes are missing entries for some 632 // basic blocks. Start by sorting the incoming predecessors for efficient 633 // access. 634 std::sort(Preds.begin(), Preds.end()); 635 636 // Now we loop through all BB's which have entries in SomePHI and remove 637 // them from the Preds list. 638 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 639 // Do a log(n) search of the Preds list for the entry we want. 640 SmallVector<BasicBlock*, 16>::iterator EntIt = 641 std::lower_bound(Preds.begin(), Preds.end(), 642 SomePHI->getIncomingBlock(i)); 643 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 644 "PHI node has entry for a block which is not a predecessor!"); 645 646 // Remove the entry 647 Preds.erase(EntIt); 648 } 649 650 // At this point, the blocks left in the preds list must have dummy 651 // entries inserted into every PHI nodes for the block. Update all the phi 652 // nodes in this block that we are inserting (there could be phis before 653 // mem2reg runs). 654 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 655 BasicBlock::iterator BBI = BB->begin(); 656 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 657 SomePHI->getNumIncomingValues() == NumBadPreds) { 658 Value *UndefVal = UndefValue::get(SomePHI->getType()); 659 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 660 SomePHI->addIncoming(UndefVal, Preds[pred]); 661 } 662 } 663 664 NewPhiNodes.clear(); 665 } 666 667 668 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 669 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 670 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 671 /// would be dead). 672 void PromoteMem2Reg:: 673 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 674 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 675 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 676 677 // To determine liveness, we must iterate through the predecessors of blocks 678 // where the def is live. Blocks are added to the worklist if we need to 679 // check their predecessors. Start with all the using blocks. 680 SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 681 Info.UsingBlocks.end()); 682 683 // If any of the using blocks is also a definition block, check to see if the 684 // definition occurs before or after the use. If it happens before the use, 685 // the value isn't really live-in. 686 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 687 BasicBlock *BB = LiveInBlockWorklist[i]; 688 if (!DefBlocks.count(BB)) continue; 689 690 // Okay, this is a block that both uses and defines the value. If the first 691 // reference to the alloca is a def (store), then we know it isn't live-in. 692 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 693 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 694 if (SI->getOperand(1) != AI) continue; 695 696 // We found a store to the alloca before a load. The alloca is not 697 // actually live-in here. 698 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 699 LiveInBlockWorklist.pop_back(); 700 --i, --e; 701 break; 702 } 703 704 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 705 if (LI->getOperand(0) != AI) continue; 706 707 // Okay, we found a load before a store to the alloca. It is actually 708 // live into this block. 709 break; 710 } 711 } 712 } 713 714 // Now that we have a set of blocks where the phi is live-in, recursively add 715 // their predecessors until we find the full region the value is live. 716 while (!LiveInBlockWorklist.empty()) { 717 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 718 719 // The block really is live in here, insert it into the set. If already in 720 // the set, then it has already been processed. 721 if (!LiveInBlocks.insert(BB)) 722 continue; 723 724 // Since the value is live into BB, it is either defined in a predecessor or 725 // live into it to. Add the preds to the worklist unless they are a 726 // defining block. 727 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 728 BasicBlock *P = *PI; 729 730 // The value is not live into a predecessor if it defines the value. 731 if (DefBlocks.count(P)) 732 continue; 733 734 // Otherwise it is, add to the worklist. 735 LiveInBlockWorklist.push_back(P); 736 } 737 } 738 } 739 740 /// DetermineInsertionPoint - At this point, we're committed to promoting the 741 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 742 /// which blocks need phi nodes and see if we can optimize out some work by 743 /// avoiding insertion of dead phi nodes. 744 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 745 AllocaInfo &Info) { 746 // Unique the set of defining blocks for efficient lookup. 747 SmallPtrSet<BasicBlock*, 32> DefBlocks; 748 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 749 750 // Determine which blocks the value is live in. These are blocks which lead 751 // to uses. 752 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 753 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 754 755 // Use a priority queue keyed on dominator tree level so that inserted nodes 756 // are handled from the bottom of the dominator tree upwards. 757 typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, 758 DomTreeNodeCompare> IDFPriorityQueue; 759 IDFPriorityQueue PQ; 760 761 for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), 762 E = DefBlocks.end(); I != E; ++I) { 763 if (DomTreeNode *Node = DT.getNode(*I)) 764 PQ.push(std::make_pair(Node, DomLevels[Node])); 765 } 766 767 SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; 768 SmallPtrSet<DomTreeNode*, 32> Visited; 769 SmallVector<DomTreeNode*, 32> Worklist; 770 while (!PQ.empty()) { 771 DomTreeNodePair RootPair = PQ.top(); 772 PQ.pop(); 773 DomTreeNode *Root = RootPair.first; 774 unsigned RootLevel = RootPair.second; 775 776 // Walk all dominator tree children of Root, inspecting their CFG edges with 777 // targets elsewhere on the dominator tree. Only targets whose level is at 778 // most Root's level are added to the iterated dominance frontier of the 779 // definition set. 780 781 Worklist.clear(); 782 Worklist.push_back(Root); 783 784 while (!Worklist.empty()) { 785 DomTreeNode *Node = Worklist.pop_back_val(); 786 BasicBlock *BB = Node->getBlock(); 787 788 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 789 ++SI) { 790 DomTreeNode *SuccNode = DT.getNode(*SI); 791 792 // Quickly skip all CFG edges that are also dominator tree edges instead 793 // of catching them below. 794 if (SuccNode->getIDom() == Node) 795 continue; 796 797 unsigned SuccLevel = DomLevels[SuccNode]; 798 if (SuccLevel > RootLevel) 799 continue; 800 801 if (!Visited.insert(SuccNode)) 802 continue; 803 804 BasicBlock *SuccBB = SuccNode->getBlock(); 805 if (!LiveInBlocks.count(SuccBB)) 806 continue; 807 808 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 809 if (!DefBlocks.count(SuccBB)) 810 PQ.push(std::make_pair(SuccNode, SuccLevel)); 811 } 812 813 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 814 ++CI) { 815 if (!Visited.count(*CI)) 816 Worklist.push_back(*CI); 817 } 818 } 819 } 820 821 if (DFBlocks.size() > 1) 822 std::sort(DFBlocks.begin(), DFBlocks.end()); 823 824 unsigned CurrentVersion = 0; 825 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 826 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 827 } 828 829 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 830 /// replace any loads of it that are directly dominated by the definition with 831 /// the value stored. 832 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 833 AllocaInfo &Info, 834 LargeBlockInfo &LBI) { 835 StoreInst *OnlyStore = Info.OnlyStore; 836 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 837 BasicBlock *StoreBB = OnlyStore->getParent(); 838 int StoreIndex = -1; 839 840 // Clear out UsingBlocks. We will reconstruct it here if needed. 841 Info.UsingBlocks.clear(); 842 843 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 844 Instruction *UserInst = cast<Instruction>(*UI++); 845 if (!isa<LoadInst>(UserInst)) { 846 assert(UserInst == OnlyStore && "Should only have load/stores"); 847 continue; 848 } 849 LoadInst *LI = cast<LoadInst>(UserInst); 850 851 // Okay, if we have a load from the alloca, we want to replace it with the 852 // only value stored to the alloca. We can do this if the value is 853 // dominated by the store. If not, we use the rest of the mem2reg machinery 854 // to insert the phi nodes as needed. 855 if (!StoringGlobalVal) { // Non-instructions are always dominated. 856 if (LI->getParent() == StoreBB) { 857 // If we have a use that is in the same block as the store, compare the 858 // indices of the two instructions to see which one came first. If the 859 // load came before the store, we can't handle it. 860 if (StoreIndex == -1) 861 StoreIndex = LBI.getInstructionIndex(OnlyStore); 862 863 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 864 // Can't handle this load, bail out. 865 Info.UsingBlocks.push_back(StoreBB); 866 continue; 867 } 868 869 } else if (LI->getParent() != StoreBB && 870 !dominates(StoreBB, LI->getParent())) { 871 // If the load and store are in different blocks, use BB dominance to 872 // check their relationships. If the store doesn't dom the use, bail 873 // out. 874 Info.UsingBlocks.push_back(LI->getParent()); 875 continue; 876 } 877 } 878 879 // Otherwise, we *can* safely rewrite this load. 880 Value *ReplVal = OnlyStore->getOperand(0); 881 // If the replacement value is the load, this must occur in unreachable 882 // code. 883 if (ReplVal == LI) 884 ReplVal = UndefValue::get(LI->getType()); 885 LI->replaceAllUsesWith(ReplVal); 886 if (AST && LI->getType()->isPointerTy()) 887 AST->deleteValue(LI); 888 LI->eraseFromParent(); 889 LBI.deleteValue(LI); 890 } 891 } 892 893 namespace { 894 895 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 896 /// first element of a pair. 897 struct StoreIndexSearchPredicate { 898 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 899 const std::pair<unsigned, StoreInst*> &RHS) { 900 return LHS.first < RHS.first; 901 } 902 }; 903 904 } 905 906 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 907 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 908 /// potentially useless PHI nodes by just performing a single linear pass over 909 /// the basic block using the Alloca. 910 /// 911 /// If we cannot promote this alloca (because it is read before it is written), 912 /// return true. This is necessary in cases where, due to control flow, the 913 /// alloca is potentially undefined on some control flow paths. e.g. code like 914 /// this is potentially correct: 915 /// 916 /// for (...) { if (c) { A = undef; undef = B; } } 917 /// 918 /// ... so long as A is not used before undef is set. 919 /// 920 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 921 LargeBlockInfo &LBI) { 922 // The trickiest case to handle is when we have large blocks. Because of this, 923 // this code is optimized assuming that large blocks happen. This does not 924 // significantly pessimize the small block case. This uses LargeBlockInfo to 925 // make it efficient to get the index of various operations in the block. 926 927 // Clear out UsingBlocks. We will reconstruct it here if needed. 928 Info.UsingBlocks.clear(); 929 930 // Walk the use-def list of the alloca, getting the locations of all stores. 931 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 932 StoresByIndexTy StoresByIndex; 933 934 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 935 UI != E; ++UI) 936 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 937 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 938 939 // If there are no stores to the alloca, just replace any loads with undef. 940 if (StoresByIndex.empty()) { 941 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 942 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 943 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 944 if (AST && LI->getType()->isPointerTy()) 945 AST->deleteValue(LI); 946 LBI.deleteValue(LI); 947 LI->eraseFromParent(); 948 } 949 return; 950 } 951 952 // Sort the stores by their index, making it efficient to do a lookup with a 953 // binary search. 954 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 955 956 // Walk all of the loads from this alloca, replacing them with the nearest 957 // store above them, if any. 958 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 959 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 960 if (!LI) continue; 961 962 unsigned LoadIdx = LBI.getInstructionIndex(LI); 963 964 // Find the nearest store that has a lower than this load. 965 StoresByIndexTy::iterator I = 966 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 967 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 968 StoreIndexSearchPredicate()); 969 970 // If there is no store before this load, then we can't promote this load. 971 if (I == StoresByIndex.begin()) { 972 // Can't handle this load, bail out. 973 Info.UsingBlocks.push_back(LI->getParent()); 974 continue; 975 } 976 977 // Otherwise, there was a store before this load, the load takes its value. 978 --I; 979 LI->replaceAllUsesWith(I->second->getOperand(0)); 980 if (AST && LI->getType()->isPointerTy()) 981 AST->deleteValue(LI); 982 LI->eraseFromParent(); 983 LBI.deleteValue(LI); 984 } 985 } 986 987 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 988 // Alloca returns true if there wasn't already a phi-node for that variable 989 // 990 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 991 unsigned &Version) { 992 // Look up the basic-block in question. 993 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 994 995 // If the BB already has a phi node added for the i'th alloca then we're done! 996 if (PN) return false; 997 998 // Create a PhiNode using the dereferenced type... and add the phi-node to the 999 // BasicBlock. 1000 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), 1001 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 1002 BB->begin()); 1003 ++NumPHIInsert; 1004 PhiToAllocaMap[PN] = AllocaNo; 1005 1006 if (AST && PN->getType()->isPointerTy()) 1007 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 1008 1009 return true; 1010 } 1011 1012 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 1013 // stores to the allocas which we are promoting. IncomingVals indicates what 1014 // value each Alloca contains on exit from the predecessor block Pred. 1015 // 1016 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 1017 RenamePassData::ValVector &IncomingVals, 1018 std::vector<RenamePassData> &Worklist) { 1019 NextIteration: 1020 // If we are inserting any phi nodes into this BB, they will already be in the 1021 // block. 1022 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 1023 // If we have PHI nodes to update, compute the number of edges from Pred to 1024 // BB. 1025 if (PhiToAllocaMap.count(APN)) { 1026 // We want to be able to distinguish between PHI nodes being inserted by 1027 // this invocation of mem2reg from those phi nodes that already existed in 1028 // the IR before mem2reg was run. We determine that APN is being inserted 1029 // because it is missing incoming edges. All other PHI nodes being 1030 // inserted by this pass of mem2reg will have the same number of incoming 1031 // operands so far. Remember this count. 1032 unsigned NewPHINumOperands = APN->getNumOperands(); 1033 1034 unsigned NumEdges = 0; 1035 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 1036 if (*I == BB) 1037 ++NumEdges; 1038 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 1039 1040 // Add entries for all the phis. 1041 BasicBlock::iterator PNI = BB->begin(); 1042 do { 1043 unsigned AllocaNo = PhiToAllocaMap[APN]; 1044 1045 // Add N incoming values to the PHI node. 1046 for (unsigned i = 0; i != NumEdges; ++i) 1047 APN->addIncoming(IncomingVals[AllocaNo], Pred); 1048 1049 // The currently active variable for this block is now the PHI. 1050 IncomingVals[AllocaNo] = APN; 1051 1052 // Get the next phi node. 1053 ++PNI; 1054 APN = dyn_cast<PHINode>(PNI); 1055 if (APN == 0) break; 1056 1057 // Verify that it is missing entries. If not, it is not being inserted 1058 // by this mem2reg invocation so we want to ignore it. 1059 } while (APN->getNumOperands() == NewPHINumOperands); 1060 } 1061 } 1062 1063 // Don't revisit blocks. 1064 if (!Visited.insert(BB)) return; 1065 1066 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 1067 Instruction *I = II++; // get the instruction, increment iterator 1068 1069 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1070 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1071 if (!Src) continue; 1072 1073 DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 1074 if (AI == AllocaLookup.end()) continue; 1075 1076 Value *V = IncomingVals[AI->second]; 1077 1078 // Anything using the load now uses the current value. 1079 LI->replaceAllUsesWith(V); 1080 if (AST && LI->getType()->isPointerTy()) 1081 AST->deleteValue(LI); 1082 BB->getInstList().erase(LI); 1083 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1084 // Delete this instruction and mark the name as the current holder of the 1085 // value 1086 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1087 if (!Dest) continue; 1088 1089 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1090 if (ai == AllocaLookup.end()) 1091 continue; 1092 1093 // what value were we writing? 1094 IncomingVals[ai->second] = SI->getOperand(0); 1095 // Record debuginfo for the store before removing it. 1096 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { 1097 if (!DIB) 1098 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 1099 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 1100 } 1101 BB->getInstList().erase(SI); 1102 } 1103 } 1104 1105 // 'Recurse' to our successors. 1106 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1107 if (I == E) return; 1108 1109 // Keep track of the successors so we don't visit the same successor twice 1110 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1111 1112 // Handle the first successor without using the worklist. 1113 VisitedSuccs.insert(*I); 1114 Pred = BB; 1115 BB = *I; 1116 ++I; 1117 1118 for (; I != E; ++I) 1119 if (VisitedSuccs.insert(*I)) 1120 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1121 1122 goto NextIteration; 1123 } 1124 1125 /// PromoteMemToReg - Promote the specified list of alloca instructions into 1126 /// scalar registers, inserting PHI nodes as appropriate. This function does 1127 /// not modify the CFG of the function at all. All allocas must be from the 1128 /// same function. 1129 /// 1130 /// If AST is specified, the specified tracker is updated to reflect changes 1131 /// made to the IR. 1132 /// 1133 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1134 DominatorTree &DT, AliasSetTracker *AST) { 1135 // If there is nothing to do, bail out... 1136 if (Allocas.empty()) return; 1137 1138 PromoteMem2Reg(Allocas, DT, AST).run(); 1139 } 1140