1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using dominator frontiers to place PHI nodes, then traversing 13 // the function in depth-first order to rewrite loads and stores as appropriate. 14 // This is just the standard SSA construction algorithm to construct "pruned" 15 // SSA form. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #define DEBUG_TYPE "mem2reg" 20 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 21 #include "llvm/Constants.h" 22 #include "llvm/DerivedTypes.h" 23 #include "llvm/Function.h" 24 #include "llvm/Instructions.h" 25 #include "llvm/IntrinsicInst.h" 26 #include "llvm/LLVMContext.h" 27 #include "llvm/Analysis/Dominators.h" 28 #include "llvm/Analysis/AliasSetTracker.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/Support/CFG.h" 35 #include "llvm/Support/Compiler.h" 36 #include <algorithm> 37 using namespace llvm; 38 39 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 40 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 41 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 42 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 43 44 // Provide DenseMapInfo for all pointers. 45 namespace llvm { 46 template<> 47 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 48 typedef std::pair<BasicBlock*, unsigned> EltTy; 49 static inline EltTy getEmptyKey() { 50 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 51 } 52 static inline EltTy getTombstoneKey() { 53 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 54 } 55 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 56 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 57 } 58 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 59 return LHS == RHS; 60 } 61 static bool isPod() { return true; } 62 }; 63 } 64 65 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 66 /// This is true if there are only loads and stores to the alloca. 67 /// 68 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 69 // FIXME: If the memory unit is of pointer or integer type, we can permit 70 // assignments to subsections of the memory unit. 71 72 // Only allow direct and non-volatile loads and stores... 73 for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); 74 UI != UE; ++UI) // Loop over all of the uses of the alloca 75 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 76 if (LI->isVolatile()) 77 return false; 78 } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 79 if (SI->getOperand(0) == AI) 80 return false; // Don't allow a store OF the AI, only INTO the AI. 81 if (SI->isVolatile()) 82 return false; 83 } else if (const BitCastInst *BC = dyn_cast<BitCastInst>(*UI)) { 84 // A bitcast that does not feed into debug info inhibits promotion. 85 if (!BC->hasOneUse() || !isa<DbgInfoIntrinsic>(*BC->use_begin())) 86 return false; 87 // If the only use is by debug info, this alloca will not exist in 88 // non-debug code, so don't try to promote; this ensures the same 89 // codegen with debug info. Otherwise, debug info should not 90 // inhibit promotion (but we must examine other uses). 91 if (AI->hasOneUse()) 92 return false; 93 } else { 94 return false; 95 } 96 97 return true; 98 } 99 100 namespace { 101 struct AllocaInfo; 102 103 // Data package used by RenamePass() 104 class VISIBILITY_HIDDEN RenamePassData { 105 public: 106 typedef std::vector<Value *> ValVector; 107 108 RenamePassData() {} 109 RenamePassData(BasicBlock *B, BasicBlock *P, 110 const ValVector &V) : BB(B), Pred(P), Values(V) {} 111 BasicBlock *BB; 112 BasicBlock *Pred; 113 ValVector Values; 114 115 void swap(RenamePassData &RHS) { 116 std::swap(BB, RHS.BB); 117 std::swap(Pred, RHS.Pred); 118 Values.swap(RHS.Values); 119 } 120 }; 121 122 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 123 /// load/store instructions in the block that directly load or store an alloca. 124 /// 125 /// This functionality is important because it avoids scanning large basic 126 /// blocks multiple times when promoting many allocas in the same block. 127 class VISIBILITY_HIDDEN LargeBlockInfo { 128 /// InstNumbers - For each instruction that we track, keep the index of the 129 /// instruction. The index starts out as the number of the instruction from 130 /// the start of the block. 131 DenseMap<const Instruction *, unsigned> InstNumbers; 132 public: 133 134 /// isInterestingInstruction - This code only looks at accesses to allocas. 135 static bool isInterestingInstruction(const Instruction *I) { 136 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 137 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 138 } 139 140 /// getInstructionIndex - Get or calculate the index of the specified 141 /// instruction. 142 unsigned getInstructionIndex(const Instruction *I) { 143 assert(isInterestingInstruction(I) && 144 "Not a load/store to/from an alloca?"); 145 146 // If we already have this instruction number, return it. 147 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 148 if (It != InstNumbers.end()) return It->second; 149 150 // Scan the whole block to get the instruction. This accumulates 151 // information for every interesting instruction in the block, in order to 152 // avoid gratuitus rescans. 153 const BasicBlock *BB = I->getParent(); 154 unsigned InstNo = 0; 155 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 156 BBI != E; ++BBI) 157 if (isInterestingInstruction(BBI)) 158 InstNumbers[BBI] = InstNo++; 159 It = InstNumbers.find(I); 160 161 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 162 return It->second; 163 } 164 165 void deleteValue(const Instruction *I) { 166 InstNumbers.erase(I); 167 } 168 169 void clear() { 170 InstNumbers.clear(); 171 } 172 }; 173 174 struct VISIBILITY_HIDDEN PromoteMem2Reg { 175 /// Allocas - The alloca instructions being promoted. 176 /// 177 std::vector<AllocaInst*> Allocas; 178 DominatorTree &DT; 179 DominanceFrontier &DF; 180 181 /// AST - An AliasSetTracker object to update. If null, don't update it. 182 /// 183 AliasSetTracker *AST; 184 185 LLVMContext &Context; 186 187 /// AllocaLookup - Reverse mapping of Allocas. 188 /// 189 std::map<AllocaInst*, unsigned> AllocaLookup; 190 191 /// NewPhiNodes - The PhiNodes we're adding. 192 /// 193 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 194 195 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 196 /// it corresponds to. 197 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 198 199 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 200 /// each alloca that is of pointer type, we keep track of what to copyValue 201 /// to the inserted PHI nodes here. 202 /// 203 std::vector<Value*> PointerAllocaValues; 204 205 /// Visited - The set of basic blocks the renamer has already visited. 206 /// 207 SmallPtrSet<BasicBlock*, 16> Visited; 208 209 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 210 /// non-determinstic behavior. 211 DenseMap<BasicBlock*, unsigned> BBNumbers; 212 213 /// BBNumPreds - Lazily compute the number of predecessors a block has. 214 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 215 public: 216 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 217 DominanceFrontier &df, AliasSetTracker *ast, 218 LLVMContext &C) 219 : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {} 220 221 void run(); 222 223 /// properlyDominates - Return true if I1 properly dominates I2. 224 /// 225 bool properlyDominates(Instruction *I1, Instruction *I2) const { 226 if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) 227 I1 = II->getNormalDest()->begin(); 228 return DT.properlyDominates(I1->getParent(), I2->getParent()); 229 } 230 231 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 232 /// 233 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 234 return DT.dominates(BB1, BB2); 235 } 236 237 private: 238 void RemoveFromAllocasList(unsigned &AllocaIdx) { 239 Allocas[AllocaIdx] = Allocas.back(); 240 Allocas.pop_back(); 241 --AllocaIdx; 242 } 243 244 unsigned getNumPreds(const BasicBlock *BB) { 245 unsigned &NP = BBNumPreds[BB]; 246 if (NP == 0) 247 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 248 return NP-1; 249 } 250 251 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 252 AllocaInfo &Info); 253 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 254 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 255 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 256 257 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 258 LargeBlockInfo &LBI); 259 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 260 LargeBlockInfo &LBI); 261 262 263 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 264 RenamePassData::ValVector &IncVals, 265 std::vector<RenamePassData> &Worklist); 266 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 267 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 268 }; 269 270 struct AllocaInfo { 271 std::vector<BasicBlock*> DefiningBlocks; 272 std::vector<BasicBlock*> UsingBlocks; 273 274 StoreInst *OnlyStore; 275 BasicBlock *OnlyBlock; 276 bool OnlyUsedInOneBlock; 277 278 Value *AllocaPointerVal; 279 280 void clear() { 281 DefiningBlocks.clear(); 282 UsingBlocks.clear(); 283 OnlyStore = 0; 284 OnlyBlock = 0; 285 OnlyUsedInOneBlock = true; 286 AllocaPointerVal = 0; 287 } 288 289 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 290 /// ivars. 291 void AnalyzeAlloca(AllocaInst *AI) { 292 clear(); 293 294 // As we scan the uses of the alloca instruction, keep track of stores, 295 // and decide whether all of the loads and stores to the alloca are within 296 // the same basic block. 297 for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); 298 U != E;) { 299 Instruction *User = cast<Instruction>(*U); 300 ++U; 301 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 302 // Remove any uses of this alloca in DbgInfoInstrinsics. 303 assert(BC->hasOneUse() && "Unexpected alloca uses!"); 304 DbgInfoIntrinsic *DI = cast<DbgInfoIntrinsic>(*BC->use_begin()); 305 DI->eraseFromParent(); 306 BC->eraseFromParent(); 307 continue; 308 } 309 else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 310 // Remember the basic blocks which define new values for the alloca 311 DefiningBlocks.push_back(SI->getParent()); 312 AllocaPointerVal = SI->getOperand(0); 313 OnlyStore = SI; 314 } else { 315 LoadInst *LI = cast<LoadInst>(User); 316 // Otherwise it must be a load instruction, keep track of variable 317 // reads. 318 UsingBlocks.push_back(LI->getParent()); 319 AllocaPointerVal = LI; 320 } 321 322 if (OnlyUsedInOneBlock) { 323 if (OnlyBlock == 0) 324 OnlyBlock = User->getParent(); 325 else if (OnlyBlock != User->getParent()) 326 OnlyUsedInOneBlock = false; 327 } 328 } 329 } 330 }; 331 } // end of anonymous namespace 332 333 334 void PromoteMem2Reg::run() { 335 Function &F = *DF.getRoot()->getParent(); 336 337 if (AST) PointerAllocaValues.resize(Allocas.size()); 338 339 AllocaInfo Info; 340 LargeBlockInfo LBI; 341 342 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 343 AllocaInst *AI = Allocas[AllocaNum]; 344 345 assert(isAllocaPromotable(AI) && 346 "Cannot promote non-promotable alloca!"); 347 assert(AI->getParent()->getParent() == &F && 348 "All allocas should be in the same function, which is same as DF!"); 349 350 if (AI->use_empty()) { 351 // If there are no uses of the alloca, just delete it now. 352 if (AST) AST->deleteValue(AI); 353 AI->eraseFromParent(); 354 355 // Remove the alloca from the Allocas list, since it has been processed 356 RemoveFromAllocasList(AllocaNum); 357 ++NumDeadAlloca; 358 continue; 359 } 360 361 // Calculate the set of read and write-locations for each alloca. This is 362 // analogous to finding the 'uses' and 'definitions' of each variable. 363 Info.AnalyzeAlloca(AI); 364 365 // If there is only a single store to this value, replace any loads of 366 // it that are directly dominated by the definition with the value stored. 367 if (Info.DefiningBlocks.size() == 1) { 368 RewriteSingleStoreAlloca(AI, Info, LBI); 369 370 // Finally, after the scan, check to see if the store is all that is left. 371 if (Info.UsingBlocks.empty()) { 372 // Remove the (now dead) store and alloca. 373 Info.OnlyStore->eraseFromParent(); 374 LBI.deleteValue(Info.OnlyStore); 375 376 if (AST) AST->deleteValue(AI); 377 AI->eraseFromParent(); 378 LBI.deleteValue(AI); 379 380 // The alloca has been processed, move on. 381 RemoveFromAllocasList(AllocaNum); 382 383 ++NumSingleStore; 384 continue; 385 } 386 } 387 388 // If the alloca is only read and written in one basic block, just perform a 389 // linear sweep over the block to eliminate it. 390 if (Info.OnlyUsedInOneBlock) { 391 PromoteSingleBlockAlloca(AI, Info, LBI); 392 393 // Finally, after the scan, check to see if the stores are all that is 394 // left. 395 if (Info.UsingBlocks.empty()) { 396 397 // Remove the (now dead) stores and alloca. 398 while (!AI->use_empty()) { 399 StoreInst *SI = cast<StoreInst>(AI->use_back()); 400 SI->eraseFromParent(); 401 LBI.deleteValue(SI); 402 } 403 404 if (AST) AST->deleteValue(AI); 405 AI->eraseFromParent(); 406 LBI.deleteValue(AI); 407 408 // The alloca has been processed, move on. 409 RemoveFromAllocasList(AllocaNum); 410 411 ++NumLocalPromoted; 412 continue; 413 } 414 } 415 416 // If we haven't computed a numbering for the BB's in the function, do so 417 // now. 418 if (BBNumbers.empty()) { 419 unsigned ID = 0; 420 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 421 BBNumbers[I] = ID++; 422 } 423 424 // If we have an AST to keep updated, remember some pointer value that is 425 // stored into the alloca. 426 if (AST) 427 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 428 429 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 430 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 431 432 // At this point, we're committed to promoting the alloca using IDF's, and 433 // the standard SSA construction algorithm. Determine which blocks need PHI 434 // nodes and see if we can optimize out some work by avoiding insertion of 435 // dead phi nodes. 436 DetermineInsertionPoint(AI, AllocaNum, Info); 437 } 438 439 if (Allocas.empty()) 440 return; // All of the allocas must have been trivial! 441 442 LBI.clear(); 443 444 445 // Set the incoming values for the basic block to be null values for all of 446 // the alloca's. We do this in case there is a load of a value that has not 447 // been stored yet. In this case, it will get this null value. 448 // 449 RenamePassData::ValVector Values(Allocas.size()); 450 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 451 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 452 453 // Walks all basic blocks in the function performing the SSA rename algorithm 454 // and inserting the phi nodes we marked as necessary 455 // 456 std::vector<RenamePassData> RenamePassWorkList; 457 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 458 while (!RenamePassWorkList.empty()) { 459 RenamePassData RPD; 460 RPD.swap(RenamePassWorkList.back()); 461 RenamePassWorkList.pop_back(); 462 // RenamePass may add new worklist entries. 463 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 464 } 465 466 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 467 Visited.clear(); 468 469 // Remove the allocas themselves from the function. 470 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 471 Instruction *A = Allocas[i]; 472 473 // If there are any uses of the alloca instructions left, they must be in 474 // sections of dead code that were not processed on the dominance frontier. 475 // Just delete the users now. 476 // 477 if (!A->use_empty()) 478 A->replaceAllUsesWith(UndefValue::get(A->getType())); 479 if (AST) AST->deleteValue(A); 480 A->eraseFromParent(); 481 } 482 483 484 // Loop over all of the PHI nodes and see if there are any that we can get 485 // rid of because they merge all of the same incoming values. This can 486 // happen due to undef values coming into the PHI nodes. This process is 487 // iterative, because eliminating one PHI node can cause others to be removed. 488 bool EliminatedAPHI = true; 489 while (EliminatedAPHI) { 490 EliminatedAPHI = false; 491 492 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 493 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 494 PHINode *PN = I->second; 495 496 // If this PHI node merges one value and/or undefs, get the value. 497 if (Value *V = PN->hasConstantValue(&DT)) { 498 if (AST && isa<PointerType>(PN->getType())) 499 AST->deleteValue(PN); 500 PN->replaceAllUsesWith(V); 501 PN->eraseFromParent(); 502 NewPhiNodes.erase(I++); 503 EliminatedAPHI = true; 504 continue; 505 } 506 ++I; 507 } 508 } 509 510 // At this point, the renamer has added entries to PHI nodes for all reachable 511 // code. Unfortunately, there may be unreachable blocks which the renamer 512 // hasn't traversed. If this is the case, the PHI nodes may not 513 // have incoming values for all predecessors. Loop over all PHI nodes we have 514 // created, inserting undef values if they are missing any incoming values. 515 // 516 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 517 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 518 // We want to do this once per basic block. As such, only process a block 519 // when we find the PHI that is the first entry in the block. 520 PHINode *SomePHI = I->second; 521 BasicBlock *BB = SomePHI->getParent(); 522 if (&BB->front() != SomePHI) 523 continue; 524 525 // Only do work here if there the PHI nodes are missing incoming values. We 526 // know that all PHI nodes that were inserted in a block will have the same 527 // number of incoming values, so we can just check any of them. 528 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 529 continue; 530 531 // Get the preds for BB. 532 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 533 534 // Ok, now we know that all of the PHI nodes are missing entries for some 535 // basic blocks. Start by sorting the incoming predecessors for efficient 536 // access. 537 std::sort(Preds.begin(), Preds.end()); 538 539 // Now we loop through all BB's which have entries in SomePHI and remove 540 // them from the Preds list. 541 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 542 // Do a log(n) search of the Preds list for the entry we want. 543 SmallVector<BasicBlock*, 16>::iterator EntIt = 544 std::lower_bound(Preds.begin(), Preds.end(), 545 SomePHI->getIncomingBlock(i)); 546 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 547 "PHI node has entry for a block which is not a predecessor!"); 548 549 // Remove the entry 550 Preds.erase(EntIt); 551 } 552 553 // At this point, the blocks left in the preds list must have dummy 554 // entries inserted into every PHI nodes for the block. Update all the phi 555 // nodes in this block that we are inserting (there could be phis before 556 // mem2reg runs). 557 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 558 BasicBlock::iterator BBI = BB->begin(); 559 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 560 SomePHI->getNumIncomingValues() == NumBadPreds) { 561 Value *UndefVal = UndefValue::get(SomePHI->getType()); 562 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 563 SomePHI->addIncoming(UndefVal, Preds[pred]); 564 } 565 } 566 567 NewPhiNodes.clear(); 568 } 569 570 571 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 572 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 573 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 574 /// would be dead). 575 void PromoteMem2Reg:: 576 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 577 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 578 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 579 580 // To determine liveness, we must iterate through the predecessors of blocks 581 // where the def is live. Blocks are added to the worklist if we need to 582 // check their predecessors. Start with all the using blocks. 583 SmallVector<BasicBlock*, 64> LiveInBlockWorklist; 584 LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 585 Info.UsingBlocks.begin(), Info.UsingBlocks.end()); 586 587 // If any of the using blocks is also a definition block, check to see if the 588 // definition occurs before or after the use. If it happens before the use, 589 // the value isn't really live-in. 590 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 591 BasicBlock *BB = LiveInBlockWorklist[i]; 592 if (!DefBlocks.count(BB)) continue; 593 594 // Okay, this is a block that both uses and defines the value. If the first 595 // reference to the alloca is a def (store), then we know it isn't live-in. 596 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 597 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 598 if (SI->getOperand(1) != AI) continue; 599 600 // We found a store to the alloca before a load. The alloca is not 601 // actually live-in here. 602 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 603 LiveInBlockWorklist.pop_back(); 604 --i, --e; 605 break; 606 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 607 if (LI->getOperand(0) != AI) continue; 608 609 // Okay, we found a load before a store to the alloca. It is actually 610 // live into this block. 611 break; 612 } 613 } 614 } 615 616 // Now that we have a set of blocks where the phi is live-in, recursively add 617 // their predecessors until we find the full region the value is live. 618 while (!LiveInBlockWorklist.empty()) { 619 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 620 621 // The block really is live in here, insert it into the set. If already in 622 // the set, then it has already been processed. 623 if (!LiveInBlocks.insert(BB)) 624 continue; 625 626 // Since the value is live into BB, it is either defined in a predecessor or 627 // live into it to. Add the preds to the worklist unless they are a 628 // defining block. 629 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 630 BasicBlock *P = *PI; 631 632 // The value is not live into a predecessor if it defines the value. 633 if (DefBlocks.count(P)) 634 continue; 635 636 // Otherwise it is, add to the worklist. 637 LiveInBlockWorklist.push_back(P); 638 } 639 } 640 } 641 642 /// DetermineInsertionPoint - At this point, we're committed to promoting the 643 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 644 /// which blocks need phi nodes and see if we can optimize out some work by 645 /// avoiding insertion of dead phi nodes. 646 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 647 AllocaInfo &Info) { 648 649 // Unique the set of defining blocks for efficient lookup. 650 SmallPtrSet<BasicBlock*, 32> DefBlocks; 651 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 652 653 // Determine which blocks the value is live in. These are blocks which lead 654 // to uses. 655 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 656 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 657 658 // Compute the locations where PhiNodes need to be inserted. Look at the 659 // dominance frontier of EACH basic-block we have a write in. 660 unsigned CurrentVersion = 0; 661 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 662 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 663 while (!Info.DefiningBlocks.empty()) { 664 BasicBlock *BB = Info.DefiningBlocks.back(); 665 Info.DefiningBlocks.pop_back(); 666 667 // Look up the DF for this write, add it to defining blocks. 668 DominanceFrontier::const_iterator it = DF.find(BB); 669 if (it == DF.end()) continue; 670 671 const DominanceFrontier::DomSetType &S = it->second; 672 673 // In theory we don't need the indirection through the DFBlocks vector. 674 // In practice, the order of calling QueuePhiNode would depend on the 675 // (unspecified) ordering of basic blocks in the dominance frontier, 676 // which would give PHI nodes non-determinstic subscripts. Fix this by 677 // processing blocks in order of the occurance in the function. 678 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 679 PE = S.end(); P != PE; ++P) { 680 // If the frontier block is not in the live-in set for the alloca, don't 681 // bother processing it. 682 if (!LiveInBlocks.count(*P)) 683 continue; 684 685 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 686 } 687 688 // Sort by which the block ordering in the function. 689 if (DFBlocks.size() > 1) 690 std::sort(DFBlocks.begin(), DFBlocks.end()); 691 692 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 693 BasicBlock *BB = DFBlocks[i].second; 694 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 695 Info.DefiningBlocks.push_back(BB); 696 } 697 DFBlocks.clear(); 698 } 699 } 700 701 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 702 /// replace any loads of it that are directly dominated by the definition with 703 /// the value stored. 704 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 705 AllocaInfo &Info, 706 LargeBlockInfo &LBI) { 707 StoreInst *OnlyStore = Info.OnlyStore; 708 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 709 BasicBlock *StoreBB = OnlyStore->getParent(); 710 int StoreIndex = -1; 711 712 // Clear out UsingBlocks. We will reconstruct it here if needed. 713 Info.UsingBlocks.clear(); 714 715 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 716 Instruction *UserInst = cast<Instruction>(*UI++); 717 if (!isa<LoadInst>(UserInst)) { 718 assert(UserInst == OnlyStore && "Should only have load/stores"); 719 continue; 720 } 721 LoadInst *LI = cast<LoadInst>(UserInst); 722 723 // Okay, if we have a load from the alloca, we want to replace it with the 724 // only value stored to the alloca. We can do this if the value is 725 // dominated by the store. If not, we use the rest of the mem2reg machinery 726 // to insert the phi nodes as needed. 727 if (!StoringGlobalVal) { // Non-instructions are always dominated. 728 if (LI->getParent() == StoreBB) { 729 // If we have a use that is in the same block as the store, compare the 730 // indices of the two instructions to see which one came first. If the 731 // load came before the store, we can't handle it. 732 if (StoreIndex == -1) 733 StoreIndex = LBI.getInstructionIndex(OnlyStore); 734 735 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 736 // Can't handle this load, bail out. 737 Info.UsingBlocks.push_back(StoreBB); 738 continue; 739 } 740 741 } else if (LI->getParent() != StoreBB && 742 !dominates(StoreBB, LI->getParent())) { 743 // If the load and store are in different blocks, use BB dominance to 744 // check their relationships. If the store doesn't dom the use, bail 745 // out. 746 Info.UsingBlocks.push_back(LI->getParent()); 747 continue; 748 } 749 } 750 751 // Otherwise, we *can* safely rewrite this load. 752 LI->replaceAllUsesWith(OnlyStore->getOperand(0)); 753 if (AST && isa<PointerType>(LI->getType())) 754 AST->deleteValue(LI); 755 LI->eraseFromParent(); 756 LBI.deleteValue(LI); 757 } 758 } 759 760 namespace { 761 762 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 763 /// first element of a pair. 764 struct StoreIndexSearchPredicate { 765 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 766 const std::pair<unsigned, StoreInst*> &RHS) { 767 return LHS.first < RHS.first; 768 } 769 }; 770 771 } 772 773 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 774 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 775 /// potentially useless PHI nodes by just performing a single linear pass over 776 /// the basic block using the Alloca. 777 /// 778 /// If we cannot promote this alloca (because it is read before it is written), 779 /// return true. This is necessary in cases where, due to control flow, the 780 /// alloca is potentially undefined on some control flow paths. e.g. code like 781 /// this is potentially correct: 782 /// 783 /// for (...) { if (c) { A = undef; undef = B; } } 784 /// 785 /// ... so long as A is not used before undef is set. 786 /// 787 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 788 LargeBlockInfo &LBI) { 789 // The trickiest case to handle is when we have large blocks. Because of this, 790 // this code is optimized assuming that large blocks happen. This does not 791 // significantly pessimize the small block case. This uses LargeBlockInfo to 792 // make it efficient to get the index of various operations in the block. 793 794 // Clear out UsingBlocks. We will reconstruct it here if needed. 795 Info.UsingBlocks.clear(); 796 797 // Walk the use-def list of the alloca, getting the locations of all stores. 798 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 799 StoresByIndexTy StoresByIndex; 800 801 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 802 UI != E; ++UI) 803 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 804 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 805 806 // If there are no stores to the alloca, just replace any loads with undef. 807 if (StoresByIndex.empty()) { 808 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 809 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 810 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 811 if (AST && isa<PointerType>(LI->getType())) 812 AST->deleteValue(LI); 813 LBI.deleteValue(LI); 814 LI->eraseFromParent(); 815 } 816 return; 817 } 818 819 // Sort the stores by their index, making it efficient to do a lookup with a 820 // binary search. 821 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 822 823 // Walk all of the loads from this alloca, replacing them with the nearest 824 // store above them, if any. 825 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 826 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 827 if (!LI) continue; 828 829 unsigned LoadIdx = LBI.getInstructionIndex(LI); 830 831 // Find the nearest store that has a lower than this load. 832 StoresByIndexTy::iterator I = 833 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 834 std::pair<unsigned, StoreInst*>(LoadIdx, 0), 835 StoreIndexSearchPredicate()); 836 837 // If there is no store before this load, then we can't promote this load. 838 if (I == StoresByIndex.begin()) { 839 // Can't handle this load, bail out. 840 Info.UsingBlocks.push_back(LI->getParent()); 841 continue; 842 } 843 844 // Otherwise, there was a store before this load, the load takes its value. 845 --I; 846 LI->replaceAllUsesWith(I->second->getOperand(0)); 847 if (AST && isa<PointerType>(LI->getType())) 848 AST->deleteValue(LI); 849 LI->eraseFromParent(); 850 LBI.deleteValue(LI); 851 } 852 } 853 854 855 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 856 // Alloca returns true if there wasn't already a phi-node for that variable 857 // 858 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 859 unsigned &Version, 860 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 861 // Look up the basic-block in question. 862 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 863 864 // If the BB already has a phi node added for the i'th alloca then we're done! 865 if (PN) return false; 866 867 // Create a PhiNode using the dereferenced type... and add the phi-node to the 868 // BasicBlock. 869 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 870 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 871 BB->begin()); 872 ++NumPHIInsert; 873 PhiToAllocaMap[PN] = AllocaNo; 874 PN->reserveOperandSpace(getNumPreds(BB)); 875 876 InsertedPHINodes.insert(PN); 877 878 if (AST && isa<PointerType>(PN->getType())) 879 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 880 881 return true; 882 } 883 884 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 885 // stores to the allocas which we are promoting. IncomingVals indicates what 886 // value each Alloca contains on exit from the predecessor block Pred. 887 // 888 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 889 RenamePassData::ValVector &IncomingVals, 890 std::vector<RenamePassData> &Worklist) { 891 NextIteration: 892 // If we are inserting any phi nodes into this BB, they will already be in the 893 // block. 894 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 895 // If we have PHI nodes to update, compute the number of edges from Pred to 896 // BB. 897 if (PhiToAllocaMap.count(APN)) { 898 // We want to be able to distinguish between PHI nodes being inserted by 899 // this invocation of mem2reg from those phi nodes that already existed in 900 // the IR before mem2reg was run. We determine that APN is being inserted 901 // because it is missing incoming edges. All other PHI nodes being 902 // inserted by this pass of mem2reg will have the same number of incoming 903 // operands so far. Remember this count. 904 unsigned NewPHINumOperands = APN->getNumOperands(); 905 906 unsigned NumEdges = 0; 907 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 908 if (*I == BB) 909 ++NumEdges; 910 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 911 912 // Add entries for all the phis. 913 BasicBlock::iterator PNI = BB->begin(); 914 do { 915 unsigned AllocaNo = PhiToAllocaMap[APN]; 916 917 // Add N incoming values to the PHI node. 918 for (unsigned i = 0; i != NumEdges; ++i) 919 APN->addIncoming(IncomingVals[AllocaNo], Pred); 920 921 // The currently active variable for this block is now the PHI. 922 IncomingVals[AllocaNo] = APN; 923 924 // Get the next phi node. 925 ++PNI; 926 APN = dyn_cast<PHINode>(PNI); 927 if (APN == 0) break; 928 929 // Verify that it is missing entries. If not, it is not being inserted 930 // by this mem2reg invocation so we want to ignore it. 931 } while (APN->getNumOperands() == NewPHINumOperands); 932 } 933 } 934 935 // Don't revisit blocks. 936 if (!Visited.insert(BB)) return; 937 938 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 939 Instruction *I = II++; // get the instruction, increment iterator 940 941 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 942 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 943 if (!Src) continue; 944 945 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 946 if (AI == AllocaLookup.end()) continue; 947 948 Value *V = IncomingVals[AI->second]; 949 950 // Anything using the load now uses the current value. 951 LI->replaceAllUsesWith(V); 952 if (AST && isa<PointerType>(LI->getType())) 953 AST->deleteValue(LI); 954 BB->getInstList().erase(LI); 955 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 956 // Delete this instruction and mark the name as the current holder of the 957 // value 958 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 959 if (!Dest) continue; 960 961 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 962 if (ai == AllocaLookup.end()) 963 continue; 964 965 // what value were we writing? 966 IncomingVals[ai->second] = SI->getOperand(0); 967 BB->getInstList().erase(SI); 968 } 969 } 970 971 // 'Recurse' to our successors. 972 succ_iterator I = succ_begin(BB), E = succ_end(BB); 973 if (I == E) return; 974 975 // Keep track of the successors so we don't visit the same successor twice 976 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 977 978 // Handle the first successor without using the worklist. 979 VisitedSuccs.insert(*I); 980 Pred = BB; 981 BB = *I; 982 ++I; 983 984 for (; I != E; ++I) 985 if (VisitedSuccs.insert(*I)) 986 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 987 988 goto NextIteration; 989 } 990 991 /// PromoteMemToReg - Promote the specified list of alloca instructions into 992 /// scalar registers, inserting PHI nodes as appropriate. This function makes 993 /// use of DominanceFrontier information. This function does not modify the CFG 994 /// of the function at all. All allocas must be from the same function. 995 /// 996 /// If AST is specified, the specified tracker is updated to reflect changes 997 /// made to the IR. 998 /// 999 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1000 DominatorTree &DT, DominanceFrontier &DF, 1001 LLVMContext &Context, AliasSetTracker *AST) { 1002 // If there is nothing to do, bail out... 1003 if (Allocas.empty()) return; 1004 1005 PromoteMem2Reg(Allocas, DT, DF, AST, Context).run(); 1006 } 1007