1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using dominator frontiers to place PHI nodes, then traversing 13 // the function in depth-first order to rewrite loads and stores as appropriate. 14 // This is just the standard SSA construction algorithm to construct "pruned" 15 // SSA form. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #define DEBUG_TYPE "mem2reg" 20 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 21 #include "llvm/Constants.h" 22 #include "llvm/DerivedTypes.h" 23 #include "llvm/Function.h" 24 #include "llvm/Instructions.h" 25 #include "llvm/IntrinsicInst.h" 26 #include "llvm/Metadata.h" 27 #include "llvm/Analysis/DebugInfo.h" 28 #include "llvm/Analysis/Dominators.h" 29 #include "llvm/Analysis/AliasSetTracker.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/Support/CFG.h" 36 #include <algorithm> 37 using namespace llvm; 38 39 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 40 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 41 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 42 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 43 44 namespace llvm { 45 template<> 46 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 47 typedef std::pair<BasicBlock*, unsigned> EltTy; 48 static inline EltTy getEmptyKey() { 49 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 50 } 51 static inline EltTy getTombstoneKey() { 52 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 53 } 54 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 55 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 56 } 57 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 58 return LHS == RHS; 59 } 60 }; 61 } 62 63 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 64 /// This is true if there are only loads and stores to the alloca. 65 /// 66 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 67 // FIXME: If the memory unit is of pointer or integer type, we can permit 68 // assignments to subsections of the memory unit. 69 70 // Only allow direct and non-volatile loads and stores... 71 for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 72 UI != UE; ++UI) // Loop over all of the uses of the alloca 73 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 74 if (LI->isVolatile()) 75 return false; 76 } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 77 if (SI->getOperand(0) == AI) 78 return false; // Don't allow a store OF the AI, only INTO the AI. 79 if (SI->isVolatile()) 80 return false; 81 } else { 82 return false; 83 } 84 85 return true; 86 } 87 88 /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the 89 /// alloca 'V', if any. 90 static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { 91 if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) 92 for (Value::use_iterator UI = DebugNode->use_begin(), 93 E = DebugNode->use_end(); UI != E; ++UI) 94 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 95 return DDI; 96 97 return 0; 98 } 99 100 namespace { 101 struct AllocaInfo; 102 103 // Data package used by RenamePass() 104 class RenamePassData { 105 public: 106 typedef std::vector<Value *> ValVector; 107 108 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 109 RenamePassData(BasicBlock *B, BasicBlock *P, 110 const ValVector &V) : BB(B), Pred(P), Values(V) {} 111 BasicBlock *BB; 112 BasicBlock *Pred; 113 ValVector Values; 114 115 void swap(RenamePassData &RHS) { 116 std::swap(BB, RHS.BB); 117 std::swap(Pred, RHS.Pred); 118 Values.swap(RHS.Values); 119 } 120 }; 121 122 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 123 /// load/store instructions in the block that directly load or store an alloca. 124 /// 125 /// This functionality is important because it avoids scanning large basic 126 /// blocks multiple times when promoting many allocas in the same block. 127 class LargeBlockInfo { 128 /// InstNumbers - For each instruction that we track, keep the index of the 129 /// instruction. The index starts out as the number of the instruction from 130 /// the start of the block. 131 DenseMap<const Instruction *, unsigned> InstNumbers; 132 public: 133 134 /// isInterestingInstruction - This code only looks at accesses to allocas. 135 static bool isInterestingInstruction(const Instruction *I) { 136 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 137 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 138 } 139 140 /// getInstructionIndex - Get or calculate the index of the specified 141 /// instruction. 142 unsigned getInstructionIndex(const Instruction *I) { 143 assert(isInterestingInstruction(I) && 144 "Not a load/store to/from an alloca?"); 145 146 // If we already have this instruction number, return it. 147 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 148 if (It != InstNumbers.end()) return It->second; 149 150 // Scan the whole block to get the instruction. This accumulates 151 // information for every interesting instruction in the block, in order to 152 // avoid gratuitus rescans. 153 const BasicBlock *BB = I->getParent(); 154 unsigned InstNo = 0; 155 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 156 BBI != E; ++BBI) 157 if (isInterestingInstruction(BBI)) 158 InstNumbers[BBI] = InstNo++; 159 It = InstNumbers.find(I); 160 161 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 162 return It->second; 163 } 164 165 void deleteValue(const Instruction *I) { 166 InstNumbers.erase(I); 167 } 168 169 void clear() { 170 InstNumbers.clear(); 171 } 172 }; 173 174 struct PromoteMem2Reg { 175 /// Allocas - The alloca instructions being promoted. 176 /// 177 std::vector<AllocaInst*> Allocas; 178 DominatorTree &DT; 179 DominanceFrontier &DF; 180 DIFactory *DIF; 181 182 /// AST - An AliasSetTracker object to update. If null, don't update it. 183 /// 184 AliasSetTracker *AST; 185 186 /// AllocaLookup - Reverse mapping of Allocas. 187 /// 188 std::map<AllocaInst*, unsigned> AllocaLookup; 189 190 /// NewPhiNodes - The PhiNodes we're adding. 191 /// 192 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 193 194 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 195 /// it corresponds to. 196 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 197 198 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 199 /// each alloca that is of pointer type, we keep track of what to copyValue 200 /// to the inserted PHI nodes here. 201 /// 202 std::vector<Value*> PointerAllocaValues; 203 204 /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 205 /// intrinsic that describes it, if any, so that we can convert it to a 206 /// dbg.value intrinsic if the alloca gets promoted. 207 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 208 209 /// Visited - The set of basic blocks the renamer has already visited. 210 /// 211 SmallPtrSet<BasicBlock*, 16> Visited; 212 213 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 214 /// non-determinstic behavior. 215 DenseMap<BasicBlock*, unsigned> BBNumbers; 216 217 /// BBNumPreds - Lazily compute the number of predecessors a block has. 218 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 219 public: 220 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 221 DominanceFrontier &df, AliasSetTracker *ast) 222 : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} 223 ~PromoteMem2Reg() { 224 delete DIF; 225 } 226 227 void run(); 228 229 /// properlyDominates - Return true if I1 properly dominates I2. 230 /// 231 bool properlyDominates(Instruction *I1, Instruction *I2) const { 232 if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) 233 I1 = II->getNormalDest()->begin(); 234 return DT.properlyDominates(I1->getParent(), I2->getParent()); 235 } 236 237 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 238 /// 239 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 240 return DT.dominates(BB1, BB2); 241 } 242 243 private: 244 void RemoveFromAllocasList(unsigned &AllocaIdx) { 245 Allocas[AllocaIdx] = Allocas.back(); 246 Allocas.pop_back(); 247 --AllocaIdx; 248 } 249 250 unsigned getNumPreds(const BasicBlock *BB) { 251 unsigned &NP = BBNumPreds[BB]; 252 if (NP == 0) 253 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 254 return NP-1; 255 } 256 257 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 258 AllocaInfo &Info); 259 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 260 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 261 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 262 263 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 264 LargeBlockInfo &LBI); 265 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 266 LargeBlockInfo &LBI); 267 void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); 268 269 270 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 271 RenamePassData::ValVector &IncVals, 272 std::vector<RenamePassData> &Worklist); 273 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 274 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 275 }; 276 277 struct AllocaInfo { 278 std::vector<BasicBlock*> DefiningBlocks; 279 std::vector<BasicBlock*> UsingBlocks; 280 281 StoreInst *OnlyStore; 282 BasicBlock *OnlyBlock; 283 bool OnlyUsedInOneBlock; 284 285 Value *AllocaPointerVal; 286 DbgDeclareInst *DbgDeclare; 287 288 void clear() { 289 DefiningBlocks.clear(); 290 UsingBlocks.clear(); 291 OnlyStore = 0; 292 OnlyBlock = 0; 293 OnlyUsedInOneBlock = true; 294 AllocaPointerVal = 0; 295 DbgDeclare = 0; 296 } 297 298 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 299 /// ivars. 300 void AnalyzeAlloca(AllocaInst *AI) { 301 clear(); 302 303 // As we scan the uses of the alloca instruction, keep track of stores, 304 // and decide whether all of the loads and stores to the alloca are within 305 // the same basic block. 306 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 307 UI != E;) { 308 Instruction *User = cast<Instruction>(*UI++); 309 310 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 311 // Remember the basic blocks which define new values for the alloca 312 DefiningBlocks.push_back(SI->getParent()); 313 AllocaPointerVal = SI->getOperand(0); 314 OnlyStore = SI; 315 } else { 316 LoadInst *LI = cast<LoadInst>(User); 317 // Otherwise it must be a load instruction, keep track of variable 318 // reads. 319 UsingBlocks.push_back(LI->getParent()); 320 AllocaPointerVal = LI; 321 } 322 323 if (OnlyUsedInOneBlock) { 324 if (OnlyBlock == 0) 325 OnlyBlock = User->getParent(); 326 else if (OnlyBlock != User->getParent()) 327 OnlyUsedInOneBlock = false; 328 } 329 } 330 331 DbgDeclare = FindAllocaDbgDeclare(AI); 332 } 333 }; 334 } // end of anonymous namespace 335 336 337 void PromoteMem2Reg::run() { 338 Function &F = *DF.getRoot()->getParent(); 339 340 if (AST) PointerAllocaValues.resize(Allocas.size()); 341 AllocaDbgDeclares.resize(Allocas.size()); 342 343 AllocaInfo Info; 344 LargeBlockInfo LBI; 345 346 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 347 AllocaInst *AI = Allocas[AllocaNum]; 348 349 assert(isAllocaPromotable(AI) && 350 "Cannot promote non-promotable alloca!"); 351 assert(AI->getParent()->getParent() == &F && 352 "All allocas should be in the same function, which is same as DF!"); 353 354 if (AI->use_empty()) { 355 // If there are no uses of the alloca, just delete it now. 356 if (AST) AST->deleteValue(AI); 357 AI->eraseFromParent(); 358 359 // Remove the alloca from the Allocas list, since it has been processed 360 RemoveFromAllocasList(AllocaNum); 361 ++NumDeadAlloca; 362 continue; 363 } 364 365 // Calculate the set of read and write-locations for each alloca. This is 366 // analogous to finding the 'uses' and 'definitions' of each variable. 367 Info.AnalyzeAlloca(AI); 368 369 // If there is only a single store to this value, replace any loads of 370 // it that are directly dominated by the definition with the value stored. 371 if (Info.DefiningBlocks.size() == 1) { 372 RewriteSingleStoreAlloca(AI, Info, LBI); 373 374 // Finally, after the scan, check to see if the store is all that is left. 375 if (Info.UsingBlocks.empty()) { 376 // Record debuginfo for the store and remove the declaration's debuginfo. 377 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 378 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); 379 DDI->eraseFromParent(); 380 } 381 // Remove the (now dead) store and alloca. 382 Info.OnlyStore->eraseFromParent(); 383 LBI.deleteValue(Info.OnlyStore); 384 385 if (AST) AST->deleteValue(AI); 386 AI->eraseFromParent(); 387 LBI.deleteValue(AI); 388 389 // The alloca has been processed, move on. 390 RemoveFromAllocasList(AllocaNum); 391 392 ++NumSingleStore; 393 continue; 394 } 395 } 396 397 // If the alloca is only read and written in one basic block, just perform a 398 // linear sweep over the block to eliminate it. 399 if (Info.OnlyUsedInOneBlock) { 400 PromoteSingleBlockAlloca(AI, Info, LBI); 401 402 // Finally, after the scan, check to see if the stores are all that is 403 // left. 404 if (Info.UsingBlocks.empty()) { 405 406 // Remove the (now dead) stores and alloca. 407 while (!AI->use_empty()) { 408 StoreInst *SI = cast<StoreInst>(AI->use_back()); 409 // Record debuginfo for the store before removing it. 410 if (DbgDeclareInst *DDI = Info.DbgDeclare) 411 ConvertDebugDeclareToDebugValue(DDI, SI); 412 SI->eraseFromParent(); 413 LBI.deleteValue(SI); 414 } 415 416 if (AST) AST->deleteValue(AI); 417 AI->eraseFromParent(); 418 LBI.deleteValue(AI); 419 420 // The alloca has been processed, move on. 421 RemoveFromAllocasList(AllocaNum); 422 423 // The alloca's debuginfo can be removed as well. 424 if (DbgDeclareInst *DDI = Info.DbgDeclare) 425 DDI->eraseFromParent(); 426 427 ++NumLocalPromoted; 428 continue; 429 } 430 } 431 432 // If we haven't computed a numbering for the BB's in the function, do so 433 // now. 434 if (BBNumbers.empty()) { 435 unsigned ID = 0; 436 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 437 BBNumbers[I] = ID++; 438 } 439 440 // If we have an AST to keep updated, remember some pointer value that is 441 // stored into the alloca. 442 if (AST) 443 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 444 445 // Remember the dbg.declare intrinsic describing this alloca, if any. 446 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 447 448 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 449 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 450 451 // At this point, we're committed to promoting the alloca using IDF's, and 452 // the standard SSA construction algorithm. Determine which blocks need PHI 453 // nodes and see if we can optimize out some work by avoiding insertion of 454 // dead phi nodes. 455 DetermineInsertionPoint(AI, AllocaNum, Info); 456 } 457 458 if (Allocas.empty()) 459 return; // All of the allocas must have been trivial! 460 461 LBI.clear(); 462 463 464 // Set the incoming values for the basic block to be null values for all of 465 // the alloca's. We do this in case there is a load of a value that has not 466 // been stored yet. In this case, it will get this null value. 467 // 468 RenamePassData::ValVector Values(Allocas.size()); 469 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 470 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 471 472 // Walks all basic blocks in the function performing the SSA rename algorithm 473 // and inserting the phi nodes we marked as necessary 474 // 475 std::vector<RenamePassData> RenamePassWorkList; 476 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 477 do { 478 RenamePassData RPD; 479 RPD.swap(RenamePassWorkList.back()); 480 RenamePassWorkList.pop_back(); 481 // RenamePass may add new worklist entries. 482 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 483 } while (!RenamePassWorkList.empty()); 484 485 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 486 Visited.clear(); 487 488 // Remove the allocas themselves from the function. 489 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 490 Instruction *A = Allocas[i]; 491 492 // If there are any uses of the alloca instructions left, they must be in 493 // sections of dead code that were not processed on the dominance frontier. 494 // Just delete the users now. 495 // 496 if (!A->use_empty()) 497 A->replaceAllUsesWith(UndefValue::get(A->getType())); 498 if (AST) AST->deleteValue(A); 499 A->eraseFromParent(); 500 } 501 502 // Remove alloca's dbg.declare instrinsics from the function. 503 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 504 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 505 DDI->eraseFromParent(); 506 507 // Loop over all of the PHI nodes and see if there are any that we can get 508 // rid of because they merge all of the same incoming values. This can 509 // happen due to undef values coming into the PHI nodes. This process is 510 // iterative, because eliminating one PHI node can cause others to be removed. 511 bool EliminatedAPHI = true; 512 while (EliminatedAPHI) { 513 EliminatedAPHI = false; 514 515 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 516 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 517 PHINode *PN = I->second; 518 519 // If this PHI node merges one value and/or undefs, get the value. 520 if (Value *V = PN->hasConstantValue(&DT)) { 521 if (AST && PN->getType()->isPointerTy()) 522 AST->deleteValue(PN); 523 PN->replaceAllUsesWith(V); 524 PN->eraseFromParent(); 525 NewPhiNodes.erase(I++); 526 EliminatedAPHI = true; 527 continue; 528 } 529 ++I; 530 } 531 } 532 533 // At this point, the renamer has added entries to PHI nodes for all reachable 534 // code. Unfortunately, there may be unreachable blocks which the renamer 535 // hasn't traversed. If this is the case, the PHI nodes may not 536 // have incoming values for all predecessors. Loop over all PHI nodes we have 537 // created, inserting undef values if they are missing any incoming values. 538 // 539 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 540 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 541 // We want to do this once per basic block. As such, only process a block 542 // when we find the PHI that is the first entry in the block. 543 PHINode *SomePHI = I->second; 544 BasicBlock *BB = SomePHI->getParent(); 545 if (&BB->front() != SomePHI) 546 continue; 547 548 // Only do work here if there the PHI nodes are missing incoming values. We 549 // know that all PHI nodes that were inserted in a block will have the same 550 // number of incoming values, so we can just check any of them. 551 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 552 continue; 553 554 // Get the preds for BB. 555 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 556 557 // Ok, now we know that all of the PHI nodes are missing entries for some 558 // basic blocks. Start by sorting the incoming predecessors for efficient 559 // access. 560 std::sort(Preds.begin(), Preds.end()); 561 562 // Now we loop through all BB's which have entries in SomePHI and remove 563 // them from the Preds list. 564 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 565 // Do a log(n) search of the Preds list for the entry we want. 566 SmallVector<BasicBlock*, 16>::iterator EntIt = 567 std::lower_bound(Preds.begin(), Preds.end(), 568 SomePHI->getIncomingBlock(i)); 569 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 570 "PHI node has entry for a block which is not a predecessor!"); 571 572 // Remove the entry 573 Preds.erase(EntIt); 574 } 575 576 // At this point, the blocks left in the preds list must have dummy 577 // entries inserted into every PHI nodes for the block. Update all the phi 578 // nodes in this block that we are inserting (there could be phis before 579 // mem2reg runs). 580 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 581 BasicBlock::iterator BBI = BB->begin(); 582 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 583 SomePHI->getNumIncomingValues() == NumBadPreds) { 584 Value *UndefVal = UndefValue::get(SomePHI->getType()); 585 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 586 SomePHI->addIncoming(UndefVal, Preds[pred]); 587 } 588 } 589 590 NewPhiNodes.clear(); 591 } 592 593 594 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 595 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 596 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 597 /// would be dead). 598 void PromoteMem2Reg:: 599 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 600 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 601 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 602 603 // To determine liveness, we must iterate through the predecessors of blocks 604 // where the def is live. Blocks are added to the worklist if we need to 605 // check their predecessors. Start with all the using blocks. 606 SmallVector<BasicBlock*, 64> LiveInBlockWorklist; 607 LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 608 Info.UsingBlocks.begin(), Info.UsingBlocks.end()); 609 610 // If any of the using blocks is also a definition block, check to see if the 611 // definition occurs before or after the use. If it happens before the use, 612 // the value isn't really live-in. 613 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 614 BasicBlock *BB = LiveInBlockWorklist[i]; 615 if (!DefBlocks.count(BB)) continue; 616 617 // Okay, this is a block that both uses and defines the value. If the first 618 // reference to the alloca is a def (store), then we know it isn't live-in. 619 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 620 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 621 if (SI->getOperand(1) != AI) continue; 622 623 // We found a store to the alloca before a load. The alloca is not 624 // actually live-in here. 625 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 626 LiveInBlockWorklist.pop_back(); 627 --i, --e; 628 break; 629 } 630 631 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 632 if (LI->getOperand(0) != AI) continue; 633 634 // Okay, we found a load before a store to the alloca. It is actually 635 // live into this block. 636 break; 637 } 638 } 639 } 640 641 // Now that we have a set of blocks where the phi is live-in, recursively add 642 // their predecessors until we find the full region the value is live. 643 while (!LiveInBlockWorklist.empty()) { 644 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 645 646 // The block really is live in here, insert it into the set. If already in 647 // the set, then it has already been processed. 648 if (!LiveInBlocks.insert(BB)) 649 continue; 650 651 // Since the value is live into BB, it is either defined in a predecessor or 652 // live into it to. Add the preds to the worklist unless they are a 653 // defining block. 654 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 655 BasicBlock *P = *PI; 656 657 // The value is not live into a predecessor if it defines the value. 658 if (DefBlocks.count(P)) 659 continue; 660 661 // Otherwise it is, add to the worklist. 662 LiveInBlockWorklist.push_back(P); 663 } 664 } 665 } 666 667 /// DetermineInsertionPoint - At this point, we're committed to promoting the 668 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 669 /// which blocks need phi nodes and see if we can optimize out some work by 670 /// avoiding insertion of dead phi nodes. 671 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 672 AllocaInfo &Info) { 673 674 // Unique the set of defining blocks for efficient lookup. 675 SmallPtrSet<BasicBlock*, 32> DefBlocks; 676 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 677 678 // Determine which blocks the value is live in. These are blocks which lead 679 // to uses. 680 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 681 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 682 683 // Compute the locations where PhiNodes need to be inserted. Look at the 684 // dominance frontier of EACH basic-block we have a write in. 685 unsigned CurrentVersion = 0; 686 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 687 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 688 while (!Info.DefiningBlocks.empty()) { 689 BasicBlock *BB = Info.DefiningBlocks.back(); 690 Info.DefiningBlocks.pop_back(); 691 692 // Look up the DF for this write, add it to defining blocks. 693 DominanceFrontier::const_iterator it = DF.find(BB); 694 if (it == DF.end()) continue; 695 696 const DominanceFrontier::DomSetType &S = it->second; 697 698 // In theory we don't need the indirection through the DFBlocks vector. 699 // In practice, the order of calling QueuePhiNode would depend on the 700 // (unspecified) ordering of basic blocks in the dominance frontier, 701 // which would give PHI nodes non-determinstic subscripts. Fix this by 702 // processing blocks in order of the occurance in the function. 703 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 704 PE = S.end(); P != PE; ++P) { 705 // If the frontier block is not in the live-in set for the alloca, don't 706 // bother processing it. 707 if (!LiveInBlocks.count(*P)) 708 continue; 709 710 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 711 } 712 713 // Sort by which the block ordering in the function. 714 if (DFBlocks.size() > 1) 715 std::sort(DFBlocks.begin(), DFBlocks.end()); 716 717 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 718 BasicBlock *BB = DFBlocks[i].second; 719 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 720 Info.DefiningBlocks.push_back(BB); 721 } 722 DFBlocks.clear(); 723 } 724 } 725 726 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 727 /// replace any loads of it that are directly dominated by the definition with 728 /// the value stored. 729 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 730 AllocaInfo &Info, 731 LargeBlockInfo &LBI) { 732 StoreInst *OnlyStore = Info.OnlyStore; 733 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 734 BasicBlock *StoreBB = OnlyStore->getParent(); 735 int StoreIndex = -1; 736 737 // Clear out UsingBlocks. We will reconstruct it here if needed. 738 Info.UsingBlocks.clear(); 739 740 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 741 Instruction *UserInst = cast<Instruction>(*UI++); 742 if (!isa<LoadInst>(UserInst)) { 743 assert(UserInst == OnlyStore && "Should only have load/stores"); 744 continue; 745 } 746 LoadInst *LI = cast<LoadInst>(UserInst); 747 748 // Okay, if we have a load from the alloca, we want to replace it with the 749 // only value stored to the alloca. We can do this if the value is 750 // dominated by the store. If not, we use the rest of the mem2reg machinery 751 // to insert the phi nodes as needed. 752 if (!StoringGlobalVal) { // Non-instructions are always dominated. 753 if (LI->getParent() == StoreBB) { 754 // If we have a use that is in the same block as the store, compare the 755 // indices of the two instructions to see which one came first. If the 756 // load came before the store, we can't handle it. 757 if (StoreIndex == -1) 758 StoreIndex = LBI.getInstructionIndex(OnlyStore); 759 760 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 761 // Can't handle this load, bail out. 762 Info.UsingBlocks.push_back(StoreBB); 763 continue; 764 } 765 766 } else if (LI->getParent() != StoreBB && 767 !dominates(StoreBB, LI->getParent())) { 768 // If the load and store are in different blocks, use BB dominance to 769 // check their relationships. If the store doesn't dom the use, bail 770 // out. 771 Info.UsingBlocks.push_back(LI->getParent()); 772 continue; 773 } 774 } 775 776 // Otherwise, we *can* safely rewrite this load. 777 Value *ReplVal = OnlyStore->getOperand(0); 778 // If the replacement value is the load, this must occur in unreachable 779 // code. 780 if (ReplVal == LI) 781 ReplVal = UndefValue::get(LI->getType()); 782 LI->replaceAllUsesWith(ReplVal); 783 if (AST && LI->getType()->isPointerTy()) 784 AST->deleteValue(LI); 785 LI->eraseFromParent(); 786 LBI.deleteValue(LI); 787 } 788 } 789 790 namespace { 791 792 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 793 /// first element of a pair. 794 struct StoreIndexSearchPredicate { 795 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 796 const std::pair<unsigned, StoreInst*> &RHS) { 797 return LHS.first < RHS.first; 798 } 799 }; 800 801 } 802 803 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 804 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 805 /// potentially useless PHI nodes by just performing a single linear pass over 806 /// the basic block using the Alloca. 807 /// 808 /// If we cannot promote this alloca (because it is read before it is written), 809 /// return true. This is necessary in cases where, due to control flow, the 810 /// alloca is potentially undefined on some control flow paths. e.g. code like 811 /// this is potentially correct: 812 /// 813 /// for (...) { if (c) { A = undef; undef = B; } } 814 /// 815 /// ... so long as A is not used before undef is set. 816 /// 817 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 818 LargeBlockInfo &LBI) { 819 // The trickiest case to handle is when we have large blocks. Because of this, 820 // this code is optimized assuming that large blocks happen. This does not 821 // significantly pessimize the small block case. This uses LargeBlockInfo to 822 // make it efficient to get the index of various operations in the block. 823 824 // Clear out UsingBlocks. We will reconstruct it here if needed. 825 Info.UsingBlocks.clear(); 826 827 // Walk the use-def list of the alloca, getting the locations of all stores. 828 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 829 StoresByIndexTy StoresByIndex; 830 831 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 832 UI != E; ++UI) 833 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 834 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 835 836 // If there are no stores to the alloca, just replace any loads with undef. 837 if (StoresByIndex.empty()) { 838 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 839 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 840 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 841 if (AST && LI->getType()->isPointerTy()) 842 AST->deleteValue(LI); 843 LBI.deleteValue(LI); 844 LI->eraseFromParent(); 845 } 846 return; 847 } 848 849 // Sort the stores by their index, making it efficient to do a lookup with a 850 // binary search. 851 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 852 853 // Walk all of the loads from this alloca, replacing them with the nearest 854 // store above them, if any. 855 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 856 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 857 if (!LI) continue; 858 859 unsigned LoadIdx = LBI.getInstructionIndex(LI); 860 861 // Find the nearest store that has a lower than this load. 862 StoresByIndexTy::iterator I = 863 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 864 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 865 StoreIndexSearchPredicate()); 866 867 // If there is no store before this load, then we can't promote this load. 868 if (I == StoresByIndex.begin()) { 869 // Can't handle this load, bail out. 870 Info.UsingBlocks.push_back(LI->getParent()); 871 continue; 872 } 873 874 // Otherwise, there was a store before this load, the load takes its value. 875 --I; 876 LI->replaceAllUsesWith(I->second->getOperand(0)); 877 if (AST && LI->getType()->isPointerTy()) 878 AST->deleteValue(LI); 879 LI->eraseFromParent(); 880 LBI.deleteValue(LI); 881 } 882 } 883 884 // Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value 885 // that has an associated llvm.dbg.decl intrinsic. 886 void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, 887 StoreInst *SI) { 888 DIVariable DIVar(DDI->getVariable()); 889 if (!DIVar.Verify()) 890 return; 891 892 if (!DIF) 893 DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); 894 Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, 895 DIVar, SI); 896 897 // Propagate any debug metadata from the store onto the dbg.value. 898 if (MDNode *SIMD = SI->getMetadata("dbg")) 899 DbgVal->setMetadata("dbg", SIMD); 900 // Otherwise propagate debug metadata from dbg.declare. 901 else if (MDNode *MD = DDI->getMetadata("dbg")) 902 DbgVal->setMetadata("dbg", MD); 903 } 904 905 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 906 // Alloca returns true if there wasn't already a phi-node for that variable 907 // 908 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 909 unsigned &Version, 910 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 911 // Look up the basic-block in question. 912 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 913 914 // If the BB already has a phi node added for the i'th alloca then we're done! 915 if (PN) return false; 916 917 // Create a PhiNode using the dereferenced type... and add the phi-node to the 918 // BasicBlock. 919 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 920 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 921 BB->begin()); 922 ++NumPHIInsert; 923 PhiToAllocaMap[PN] = AllocaNo; 924 PN->reserveOperandSpace(getNumPreds(BB)); 925 926 InsertedPHINodes.insert(PN); 927 928 if (AST && PN->getType()->isPointerTy()) 929 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 930 931 return true; 932 } 933 934 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 935 // stores to the allocas which we are promoting. IncomingVals indicates what 936 // value each Alloca contains on exit from the predecessor block Pred. 937 // 938 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 939 RenamePassData::ValVector &IncomingVals, 940 std::vector<RenamePassData> &Worklist) { 941 NextIteration: 942 // If we are inserting any phi nodes into this BB, they will already be in the 943 // block. 944 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 945 // If we have PHI nodes to update, compute the number of edges from Pred to 946 // BB. 947 if (PhiToAllocaMap.count(APN)) { 948 // We want to be able to distinguish between PHI nodes being inserted by 949 // this invocation of mem2reg from those phi nodes that already existed in 950 // the IR before mem2reg was run. We determine that APN is being inserted 951 // because it is missing incoming edges. All other PHI nodes being 952 // inserted by this pass of mem2reg will have the same number of incoming 953 // operands so far. Remember this count. 954 unsigned NewPHINumOperands = APN->getNumOperands(); 955 956 unsigned NumEdges = 0; 957 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 958 if (*I == BB) 959 ++NumEdges; 960 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 961 962 // Add entries for all the phis. 963 BasicBlock::iterator PNI = BB->begin(); 964 do { 965 unsigned AllocaNo = PhiToAllocaMap[APN]; 966 967 // Add N incoming values to the PHI node. 968 for (unsigned i = 0; i != NumEdges; ++i) 969 APN->addIncoming(IncomingVals[AllocaNo], Pred); 970 971 // The currently active variable for this block is now the PHI. 972 IncomingVals[AllocaNo] = APN; 973 974 // Get the next phi node. 975 ++PNI; 976 APN = dyn_cast<PHINode>(PNI); 977 if (APN == 0) break; 978 979 // Verify that it is missing entries. If not, it is not being inserted 980 // by this mem2reg invocation so we want to ignore it. 981 } while (APN->getNumOperands() == NewPHINumOperands); 982 } 983 } 984 985 // Don't revisit blocks. 986 if (!Visited.insert(BB)) return; 987 988 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 989 Instruction *I = II++; // get the instruction, increment iterator 990 991 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 992 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 993 if (!Src) continue; 994 995 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 996 if (AI == AllocaLookup.end()) continue; 997 998 Value *V = IncomingVals[AI->second]; 999 1000 // Anything using the load now uses the current value. 1001 LI->replaceAllUsesWith(V); 1002 if (AST && LI->getType()->isPointerTy()) 1003 AST->deleteValue(LI); 1004 BB->getInstList().erase(LI); 1005 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1006 // Delete this instruction and mark the name as the current holder of the 1007 // value 1008 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1009 if (!Dest) continue; 1010 1011 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1012 if (ai == AllocaLookup.end()) 1013 continue; 1014 1015 // what value were we writing? 1016 IncomingVals[ai->second] = SI->getOperand(0); 1017 // Record debuginfo for the store before removing it. 1018 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) 1019 ConvertDebugDeclareToDebugValue(DDI, SI); 1020 BB->getInstList().erase(SI); 1021 } 1022 } 1023 1024 // 'Recurse' to our successors. 1025 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1026 if (I == E) return; 1027 1028 // Keep track of the successors so we don't visit the same successor twice 1029 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1030 1031 // Handle the first successor without using the worklist. 1032 VisitedSuccs.insert(*I); 1033 Pred = BB; 1034 BB = *I; 1035 ++I; 1036 1037 for (; I != E; ++I) 1038 if (VisitedSuccs.insert(*I)) 1039 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1040 1041 goto NextIteration; 1042 } 1043 1044 /// PromoteMemToReg - Promote the specified list of alloca instructions into 1045 /// scalar registers, inserting PHI nodes as appropriate. This function makes 1046 /// use of DominanceFrontier information. This function does not modify the CFG 1047 /// of the function at all. All allocas must be from the same function. 1048 /// 1049 /// If AST is specified, the specified tracker is updated to reflect changes 1050 /// made to the IR. 1051 /// 1052 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1053 DominatorTree &DT, DominanceFrontier &DF, 1054 AliasSetTracker *AST) { 1055 // If there is nothing to do, bail out... 1056 if (Allocas.empty()) return; 1057 1058 PromoteMem2Reg(Allocas, DT, DF, AST).run(); 1059 } 1060