1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using dominator frontiers to place PHI nodes, then traversing 13 // the function in depth-first order to rewrite loads and stores as appropriate. 14 // This is just the standard SSA construction algorithm to construct "pruned" 15 // SSA form. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #define DEBUG_TYPE "mem2reg" 20 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 21 #include "llvm/Constants.h" 22 #include "llvm/DerivedTypes.h" 23 #include "llvm/Function.h" 24 #include "llvm/Instructions.h" 25 #include "llvm/IntrinsicInst.h" 26 #include "llvm/LLVMContext.h" 27 #include "llvm/Analysis/Dominators.h" 28 #include "llvm/Analysis/AliasSetTracker.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/Support/CFG.h" 36 #include "llvm/Support/Compiler.h" 37 #include <algorithm> 38 using namespace llvm; 39 40 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 41 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 42 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 43 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 44 45 // Provide DenseMapInfo for all pointers. 46 namespace llvm { 47 template<> 48 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 49 typedef std::pair<BasicBlock*, unsigned> EltTy; 50 static inline EltTy getEmptyKey() { 51 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 52 } 53 static inline EltTy getTombstoneKey() { 54 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 55 } 56 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 57 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 58 } 59 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 60 return LHS == RHS; 61 } 62 static bool isPod() { return true; } 63 }; 64 } 65 66 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 67 /// This is true if there are only loads and stores to the alloca. 68 /// 69 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 70 // FIXME: If the memory unit is of pointer or integer type, we can permit 71 // assignments to subsections of the memory unit. 72 73 // Only allow direct and non-volatile loads and stores... 74 for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); 75 UI != UE; ++UI) // Loop over all of the uses of the alloca 76 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 77 if (LI->isVolatile()) 78 return false; 79 } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 80 if (SI->getOperand(0) == AI) 81 return false; // Don't allow a store OF the AI, only INTO the AI. 82 if (SI->isVolatile()) 83 return false; 84 } else if (const BitCastInst *BC = dyn_cast<BitCastInst>(*UI)) { 85 // A bitcast that does not feed into debug info inhibits promotion. 86 if (!BC->hasOneUse() || !isa<DbgInfoIntrinsic>(*BC->use_begin())) 87 return false; 88 // If the only use is by debug info, this alloca will not exist in 89 // non-debug code, so don't try to promote; this ensures the same 90 // codegen with debug info. Otherwise, debug info should not 91 // inhibit promotion (but we must examine other uses). 92 if (AI->hasOneUse()) 93 return false; 94 } else { 95 return false; 96 } 97 98 return true; 99 } 100 101 namespace { 102 struct AllocaInfo; 103 104 // Data package used by RenamePass() 105 class VISIBILITY_HIDDEN RenamePassData { 106 public: 107 typedef std::vector<Value *> ValVector; 108 109 RenamePassData() {} 110 RenamePassData(BasicBlock *B, BasicBlock *P, 111 const ValVector &V) : BB(B), Pred(P), Values(V) {} 112 BasicBlock *BB; 113 BasicBlock *Pred; 114 ValVector Values; 115 116 void swap(RenamePassData &RHS) { 117 std::swap(BB, RHS.BB); 118 std::swap(Pred, RHS.Pred); 119 Values.swap(RHS.Values); 120 } 121 }; 122 123 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 124 /// load/store instructions in the block that directly load or store an alloca. 125 /// 126 /// This functionality is important because it avoids scanning large basic 127 /// blocks multiple times when promoting many allocas in the same block. 128 class VISIBILITY_HIDDEN LargeBlockInfo { 129 /// InstNumbers - For each instruction that we track, keep the index of the 130 /// instruction. The index starts out as the number of the instruction from 131 /// the start of the block. 132 DenseMap<const Instruction *, unsigned> InstNumbers; 133 public: 134 135 /// isInterestingInstruction - This code only looks at accesses to allocas. 136 static bool isInterestingInstruction(const Instruction *I) { 137 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 138 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 139 } 140 141 /// getInstructionIndex - Get or calculate the index of the specified 142 /// instruction. 143 unsigned getInstructionIndex(const Instruction *I) { 144 assert(isInterestingInstruction(I) && 145 "Not a load/store to/from an alloca?"); 146 147 // If we already have this instruction number, return it. 148 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 149 if (It != InstNumbers.end()) return It->second; 150 151 // Scan the whole block to get the instruction. This accumulates 152 // information for every interesting instruction in the block, in order to 153 // avoid gratuitus rescans. 154 const BasicBlock *BB = I->getParent(); 155 unsigned InstNo = 0; 156 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 157 BBI != E; ++BBI) 158 if (isInterestingInstruction(BBI)) 159 InstNumbers[BBI] = InstNo++; 160 It = InstNumbers.find(I); 161 162 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 163 return It->second; 164 } 165 166 void deleteValue(const Instruction *I) { 167 InstNumbers.erase(I); 168 } 169 170 void clear() { 171 InstNumbers.clear(); 172 } 173 }; 174 175 struct VISIBILITY_HIDDEN PromoteMem2Reg { 176 /// Allocas - The alloca instructions being promoted. 177 /// 178 std::vector<AllocaInst*> Allocas; 179 DominatorTree &DT; 180 DominanceFrontier &DF; 181 182 /// AST - An AliasSetTracker object to update. If null, don't update it. 183 /// 184 AliasSetTracker *AST; 185 186 LLVMContext &Context; 187 188 /// AllocaLookup - Reverse mapping of Allocas. 189 /// 190 std::map<AllocaInst*, unsigned> AllocaLookup; 191 192 /// NewPhiNodes - The PhiNodes we're adding. 193 /// 194 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 195 196 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 197 /// it corresponds to. 198 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 199 200 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 201 /// each alloca that is of pointer type, we keep track of what to copyValue 202 /// to the inserted PHI nodes here. 203 /// 204 std::vector<Value*> PointerAllocaValues; 205 206 /// Visited - The set of basic blocks the renamer has already visited. 207 /// 208 SmallPtrSet<BasicBlock*, 16> Visited; 209 210 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 211 /// non-determinstic behavior. 212 DenseMap<BasicBlock*, unsigned> BBNumbers; 213 214 /// BBNumPreds - Lazily compute the number of predecessors a block has. 215 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 216 public: 217 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 218 DominanceFrontier &df, AliasSetTracker *ast, 219 LLVMContext &C) 220 : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {} 221 222 void run(); 223 224 /// properlyDominates - Return true if I1 properly dominates I2. 225 /// 226 bool properlyDominates(Instruction *I1, Instruction *I2) const { 227 if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) 228 I1 = II->getNormalDest()->begin(); 229 return DT.properlyDominates(I1->getParent(), I2->getParent()); 230 } 231 232 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 233 /// 234 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 235 return DT.dominates(BB1, BB2); 236 } 237 238 private: 239 void RemoveFromAllocasList(unsigned &AllocaIdx) { 240 Allocas[AllocaIdx] = Allocas.back(); 241 Allocas.pop_back(); 242 --AllocaIdx; 243 } 244 245 unsigned getNumPreds(const BasicBlock *BB) { 246 unsigned &NP = BBNumPreds[BB]; 247 if (NP == 0) 248 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 249 return NP-1; 250 } 251 252 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 253 AllocaInfo &Info); 254 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 255 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 256 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 257 258 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 259 LargeBlockInfo &LBI); 260 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 261 LargeBlockInfo &LBI); 262 263 264 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 265 RenamePassData::ValVector &IncVals, 266 std::vector<RenamePassData> &Worklist); 267 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 268 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 269 }; 270 271 struct AllocaInfo { 272 std::vector<BasicBlock*> DefiningBlocks; 273 std::vector<BasicBlock*> UsingBlocks; 274 275 StoreInst *OnlyStore; 276 BasicBlock *OnlyBlock; 277 bool OnlyUsedInOneBlock; 278 279 Value *AllocaPointerVal; 280 281 void clear() { 282 DefiningBlocks.clear(); 283 UsingBlocks.clear(); 284 OnlyStore = 0; 285 OnlyBlock = 0; 286 OnlyUsedInOneBlock = true; 287 AllocaPointerVal = 0; 288 } 289 290 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 291 /// ivars. 292 void AnalyzeAlloca(AllocaInst *AI) { 293 clear(); 294 295 // As we scan the uses of the alloca instruction, keep track of stores, 296 // and decide whether all of the loads and stores to the alloca are within 297 // the same basic block. 298 for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); 299 U != E;) { 300 Instruction *User = cast<Instruction>(*U); 301 ++U; 302 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 303 // Remove any uses of this alloca in DbgInfoInstrinsics. 304 assert(BC->hasOneUse() && "Unexpected alloca uses!"); 305 DbgInfoIntrinsic *DI = cast<DbgInfoIntrinsic>(*BC->use_begin()); 306 DI->eraseFromParent(); 307 BC->eraseFromParent(); 308 continue; 309 } 310 else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 311 // Remember the basic blocks which define new values for the alloca 312 DefiningBlocks.push_back(SI->getParent()); 313 AllocaPointerVal = SI->getOperand(0); 314 OnlyStore = SI; 315 } else { 316 LoadInst *LI = cast<LoadInst>(User); 317 // Otherwise it must be a load instruction, keep track of variable 318 // reads. 319 UsingBlocks.push_back(LI->getParent()); 320 AllocaPointerVal = LI; 321 } 322 323 if (OnlyUsedInOneBlock) { 324 if (OnlyBlock == 0) 325 OnlyBlock = User->getParent(); 326 else if (OnlyBlock != User->getParent()) 327 OnlyUsedInOneBlock = false; 328 } 329 } 330 } 331 }; 332 } // end of anonymous namespace 333 334 335 void PromoteMem2Reg::run() { 336 Function &F = *DF.getRoot()->getParent(); 337 338 if (AST) PointerAllocaValues.resize(Allocas.size()); 339 340 AllocaInfo Info; 341 LargeBlockInfo LBI; 342 343 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 344 AllocaInst *AI = Allocas[AllocaNum]; 345 346 assert(isAllocaPromotable(AI) && 347 "Cannot promote non-promotable alloca!"); 348 assert(AI->getParent()->getParent() == &F && 349 "All allocas should be in the same function, which is same as DF!"); 350 351 if (AI->use_empty()) { 352 // If there are no uses of the alloca, just delete it now. 353 if (AST) AST->deleteValue(AI); 354 AI->eraseFromParent(); 355 356 // Remove the alloca from the Allocas list, since it has been processed 357 RemoveFromAllocasList(AllocaNum); 358 ++NumDeadAlloca; 359 continue; 360 } 361 362 // Calculate the set of read and write-locations for each alloca. This is 363 // analogous to finding the 'uses' and 'definitions' of each variable. 364 Info.AnalyzeAlloca(AI); 365 366 // If there is only a single store to this value, replace any loads of 367 // it that are directly dominated by the definition with the value stored. 368 if (Info.DefiningBlocks.size() == 1) { 369 RewriteSingleStoreAlloca(AI, Info, LBI); 370 371 // Finally, after the scan, check to see if the store is all that is left. 372 if (Info.UsingBlocks.empty()) { 373 // Remove the (now dead) store and alloca. 374 Info.OnlyStore->eraseFromParent(); 375 LBI.deleteValue(Info.OnlyStore); 376 377 if (AST) AST->deleteValue(AI); 378 AI->eraseFromParent(); 379 LBI.deleteValue(AI); 380 381 // The alloca has been processed, move on. 382 RemoveFromAllocasList(AllocaNum); 383 384 ++NumSingleStore; 385 continue; 386 } 387 } 388 389 // If the alloca is only read and written in one basic block, just perform a 390 // linear sweep over the block to eliminate it. 391 if (Info.OnlyUsedInOneBlock) { 392 PromoteSingleBlockAlloca(AI, Info, LBI); 393 394 // Finally, after the scan, check to see if the stores are all that is 395 // left. 396 if (Info.UsingBlocks.empty()) { 397 398 // Remove the (now dead) stores and alloca. 399 while (!AI->use_empty()) { 400 StoreInst *SI = cast<StoreInst>(AI->use_back()); 401 SI->eraseFromParent(); 402 LBI.deleteValue(SI); 403 } 404 405 if (AST) AST->deleteValue(AI); 406 AI->eraseFromParent(); 407 LBI.deleteValue(AI); 408 409 // The alloca has been processed, move on. 410 RemoveFromAllocasList(AllocaNum); 411 412 ++NumLocalPromoted; 413 continue; 414 } 415 } 416 417 // If we haven't computed a numbering for the BB's in the function, do so 418 // now. 419 if (BBNumbers.empty()) { 420 unsigned ID = 0; 421 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 422 BBNumbers[I] = ID++; 423 } 424 425 // If we have an AST to keep updated, remember some pointer value that is 426 // stored into the alloca. 427 if (AST) 428 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 429 430 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 431 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 432 433 // At this point, we're committed to promoting the alloca using IDF's, and 434 // the standard SSA construction algorithm. Determine which blocks need PHI 435 // nodes and see if we can optimize out some work by avoiding insertion of 436 // dead phi nodes. 437 DetermineInsertionPoint(AI, AllocaNum, Info); 438 } 439 440 if (Allocas.empty()) 441 return; // All of the allocas must have been trivial! 442 443 LBI.clear(); 444 445 446 // Set the incoming values for the basic block to be null values for all of 447 // the alloca's. We do this in case there is a load of a value that has not 448 // been stored yet. In this case, it will get this null value. 449 // 450 RenamePassData::ValVector Values(Allocas.size()); 451 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 452 Values[i] = Context.getUndef(Allocas[i]->getAllocatedType()); 453 454 // Walks all basic blocks in the function performing the SSA rename algorithm 455 // and inserting the phi nodes we marked as necessary 456 // 457 std::vector<RenamePassData> RenamePassWorkList; 458 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 459 while (!RenamePassWorkList.empty()) { 460 RenamePassData RPD; 461 RPD.swap(RenamePassWorkList.back()); 462 RenamePassWorkList.pop_back(); 463 // RenamePass may add new worklist entries. 464 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 465 } 466 467 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 468 Visited.clear(); 469 470 // Remove the allocas themselves from the function. 471 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 472 Instruction *A = Allocas[i]; 473 474 // If there are any uses of the alloca instructions left, they must be in 475 // sections of dead code that were not processed on the dominance frontier. 476 // Just delete the users now. 477 // 478 if (!A->use_empty()) 479 A->replaceAllUsesWith(Context.getUndef(A->getType())); 480 if (AST) AST->deleteValue(A); 481 A->eraseFromParent(); 482 } 483 484 485 // Loop over all of the PHI nodes and see if there are any that we can get 486 // rid of because they merge all of the same incoming values. This can 487 // happen due to undef values coming into the PHI nodes. This process is 488 // iterative, because eliminating one PHI node can cause others to be removed. 489 bool EliminatedAPHI = true; 490 while (EliminatedAPHI) { 491 EliminatedAPHI = false; 492 493 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 494 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 495 PHINode *PN = I->second; 496 497 // If this PHI node merges one value and/or undefs, get the value. 498 if (Value *V = PN->hasConstantValue(true)) { 499 if (!isa<Instruction>(V) || 500 properlyDominates(cast<Instruction>(V), PN)) { 501 if (AST && isa<PointerType>(PN->getType())) 502 AST->deleteValue(PN); 503 PN->replaceAllUsesWith(V); 504 PN->eraseFromParent(); 505 NewPhiNodes.erase(I++); 506 EliminatedAPHI = true; 507 continue; 508 } 509 } 510 ++I; 511 } 512 } 513 514 // At this point, the renamer has added entries to PHI nodes for all reachable 515 // code. Unfortunately, there may be unreachable blocks which the renamer 516 // hasn't traversed. If this is the case, the PHI nodes may not 517 // have incoming values for all predecessors. Loop over all PHI nodes we have 518 // created, inserting undef values if they are missing any incoming values. 519 // 520 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 521 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 522 // We want to do this once per basic block. As such, only process a block 523 // when we find the PHI that is the first entry in the block. 524 PHINode *SomePHI = I->second; 525 BasicBlock *BB = SomePHI->getParent(); 526 if (&BB->front() != SomePHI) 527 continue; 528 529 // Only do work here if there the PHI nodes are missing incoming values. We 530 // know that all PHI nodes that were inserted in a block will have the same 531 // number of incoming values, so we can just check any of them. 532 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 533 continue; 534 535 // Get the preds for BB. 536 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 537 538 // Ok, now we know that all of the PHI nodes are missing entries for some 539 // basic blocks. Start by sorting the incoming predecessors for efficient 540 // access. 541 std::sort(Preds.begin(), Preds.end()); 542 543 // Now we loop through all BB's which have entries in SomePHI and remove 544 // them from the Preds list. 545 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 546 // Do a log(n) search of the Preds list for the entry we want. 547 SmallVector<BasicBlock*, 16>::iterator EntIt = 548 std::lower_bound(Preds.begin(), Preds.end(), 549 SomePHI->getIncomingBlock(i)); 550 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 551 "PHI node has entry for a block which is not a predecessor!"); 552 553 // Remove the entry 554 Preds.erase(EntIt); 555 } 556 557 // At this point, the blocks left in the preds list must have dummy 558 // entries inserted into every PHI nodes for the block. Update all the phi 559 // nodes in this block that we are inserting (there could be phis before 560 // mem2reg runs). 561 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 562 BasicBlock::iterator BBI = BB->begin(); 563 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 564 SomePHI->getNumIncomingValues() == NumBadPreds) { 565 Value *UndefVal = Context.getUndef(SomePHI->getType()); 566 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 567 SomePHI->addIncoming(UndefVal, Preds[pred]); 568 } 569 } 570 571 NewPhiNodes.clear(); 572 } 573 574 575 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 576 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 577 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 578 /// would be dead). 579 void PromoteMem2Reg:: 580 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 581 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 582 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 583 584 // To determine liveness, we must iterate through the predecessors of blocks 585 // where the def is live. Blocks are added to the worklist if we need to 586 // check their predecessors. Start with all the using blocks. 587 SmallVector<BasicBlock*, 64> LiveInBlockWorklist; 588 LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 589 Info.UsingBlocks.begin(), Info.UsingBlocks.end()); 590 591 // If any of the using blocks is also a definition block, check to see if the 592 // definition occurs before or after the use. If it happens before the use, 593 // the value isn't really live-in. 594 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 595 BasicBlock *BB = LiveInBlockWorklist[i]; 596 if (!DefBlocks.count(BB)) continue; 597 598 // Okay, this is a block that both uses and defines the value. If the first 599 // reference to the alloca is a def (store), then we know it isn't live-in. 600 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 601 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 602 if (SI->getOperand(1) != AI) continue; 603 604 // We found a store to the alloca before a load. The alloca is not 605 // actually live-in here. 606 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 607 LiveInBlockWorklist.pop_back(); 608 --i, --e; 609 break; 610 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 611 if (LI->getOperand(0) != AI) continue; 612 613 // Okay, we found a load before a store to the alloca. It is actually 614 // live into this block. 615 break; 616 } 617 } 618 } 619 620 // Now that we have a set of blocks where the phi is live-in, recursively add 621 // their predecessors until we find the full region the value is live. 622 while (!LiveInBlockWorklist.empty()) { 623 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 624 625 // The block really is live in here, insert it into the set. If already in 626 // the set, then it has already been processed. 627 if (!LiveInBlocks.insert(BB)) 628 continue; 629 630 // Since the value is live into BB, it is either defined in a predecessor or 631 // live into it to. Add the preds to the worklist unless they are a 632 // defining block. 633 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 634 BasicBlock *P = *PI; 635 636 // The value is not live into a predecessor if it defines the value. 637 if (DefBlocks.count(P)) 638 continue; 639 640 // Otherwise it is, add to the worklist. 641 LiveInBlockWorklist.push_back(P); 642 } 643 } 644 } 645 646 /// DetermineInsertionPoint - At this point, we're committed to promoting the 647 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 648 /// which blocks need phi nodes and see if we can optimize out some work by 649 /// avoiding insertion of dead phi nodes. 650 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 651 AllocaInfo &Info) { 652 653 // Unique the set of defining blocks for efficient lookup. 654 SmallPtrSet<BasicBlock*, 32> DefBlocks; 655 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 656 657 // Determine which blocks the value is live in. These are blocks which lead 658 // to uses. 659 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 660 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 661 662 // Compute the locations where PhiNodes need to be inserted. Look at the 663 // dominance frontier of EACH basic-block we have a write in. 664 unsigned CurrentVersion = 0; 665 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 666 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 667 while (!Info.DefiningBlocks.empty()) { 668 BasicBlock *BB = Info.DefiningBlocks.back(); 669 Info.DefiningBlocks.pop_back(); 670 671 // Look up the DF for this write, add it to defining blocks. 672 DominanceFrontier::const_iterator it = DF.find(BB); 673 if (it == DF.end()) continue; 674 675 const DominanceFrontier::DomSetType &S = it->second; 676 677 // In theory we don't need the indirection through the DFBlocks vector. 678 // In practice, the order of calling QueuePhiNode would depend on the 679 // (unspecified) ordering of basic blocks in the dominance frontier, 680 // which would give PHI nodes non-determinstic subscripts. Fix this by 681 // processing blocks in order of the occurance in the function. 682 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 683 PE = S.end(); P != PE; ++P) { 684 // If the frontier block is not in the live-in set for the alloca, don't 685 // bother processing it. 686 if (!LiveInBlocks.count(*P)) 687 continue; 688 689 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 690 } 691 692 // Sort by which the block ordering in the function. 693 if (DFBlocks.size() > 1) 694 std::sort(DFBlocks.begin(), DFBlocks.end()); 695 696 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 697 BasicBlock *BB = DFBlocks[i].second; 698 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 699 Info.DefiningBlocks.push_back(BB); 700 } 701 DFBlocks.clear(); 702 } 703 } 704 705 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 706 /// replace any loads of it that are directly dominated by the definition with 707 /// the value stored. 708 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 709 AllocaInfo &Info, 710 LargeBlockInfo &LBI) { 711 StoreInst *OnlyStore = Info.OnlyStore; 712 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 713 BasicBlock *StoreBB = OnlyStore->getParent(); 714 int StoreIndex = -1; 715 716 // Clear out UsingBlocks. We will reconstruct it here if needed. 717 Info.UsingBlocks.clear(); 718 719 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 720 Instruction *UserInst = cast<Instruction>(*UI++); 721 if (!isa<LoadInst>(UserInst)) { 722 assert(UserInst == OnlyStore && "Should only have load/stores"); 723 continue; 724 } 725 LoadInst *LI = cast<LoadInst>(UserInst); 726 727 // Okay, if we have a load from the alloca, we want to replace it with the 728 // only value stored to the alloca. We can do this if the value is 729 // dominated by the store. If not, we use the rest of the mem2reg machinery 730 // to insert the phi nodes as needed. 731 if (!StoringGlobalVal) { // Non-instructions are always dominated. 732 if (LI->getParent() == StoreBB) { 733 // If we have a use that is in the same block as the store, compare the 734 // indices of the two instructions to see which one came first. If the 735 // load came before the store, we can't handle it. 736 if (StoreIndex == -1) 737 StoreIndex = LBI.getInstructionIndex(OnlyStore); 738 739 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 740 // Can't handle this load, bail out. 741 Info.UsingBlocks.push_back(StoreBB); 742 continue; 743 } 744 745 } else if (LI->getParent() != StoreBB && 746 !dominates(StoreBB, LI->getParent())) { 747 // If the load and store are in different blocks, use BB dominance to 748 // check their relationships. If the store doesn't dom the use, bail 749 // out. 750 Info.UsingBlocks.push_back(LI->getParent()); 751 continue; 752 } 753 } 754 755 // Otherwise, we *can* safely rewrite this load. 756 LI->replaceAllUsesWith(OnlyStore->getOperand(0)); 757 if (AST && isa<PointerType>(LI->getType())) 758 AST->deleteValue(LI); 759 LI->eraseFromParent(); 760 LBI.deleteValue(LI); 761 } 762 } 763 764 765 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 766 /// first element of a pair. 767 struct StoreIndexSearchPredicate { 768 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 769 const std::pair<unsigned, StoreInst*> &RHS) { 770 return LHS.first < RHS.first; 771 } 772 }; 773 774 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 775 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 776 /// potentially useless PHI nodes by just performing a single linear pass over 777 /// the basic block using the Alloca. 778 /// 779 /// If we cannot promote this alloca (because it is read before it is written), 780 /// return true. This is necessary in cases where, due to control flow, the 781 /// alloca is potentially undefined on some control flow paths. e.g. code like 782 /// this is potentially correct: 783 /// 784 /// for (...) { if (c) { A = undef; undef = B; } } 785 /// 786 /// ... so long as A is not used before undef is set. 787 /// 788 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 789 LargeBlockInfo &LBI) { 790 // The trickiest case to handle is when we have large blocks. Because of this, 791 // this code is optimized assuming that large blocks happen. This does not 792 // significantly pessimize the small block case. This uses LargeBlockInfo to 793 // make it efficient to get the index of various operations in the block. 794 795 // Clear out UsingBlocks. We will reconstruct it here if needed. 796 Info.UsingBlocks.clear(); 797 798 // Walk the use-def list of the alloca, getting the locations of all stores. 799 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 800 StoresByIndexTy StoresByIndex; 801 802 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 803 UI != E; ++UI) 804 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 805 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 806 807 // If there are no stores to the alloca, just replace any loads with undef. 808 if (StoresByIndex.empty()) { 809 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 810 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 811 LI->replaceAllUsesWith(Context.getUndef(LI->getType())); 812 if (AST && isa<PointerType>(LI->getType())) 813 AST->deleteValue(LI); 814 LBI.deleteValue(LI); 815 LI->eraseFromParent(); 816 } 817 return; 818 } 819 820 // Sort the stores by their index, making it efficient to do a lookup with a 821 // binary search. 822 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 823 824 // Walk all of the loads from this alloca, replacing them with the nearest 825 // store above them, if any. 826 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 827 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 828 if (!LI) continue; 829 830 unsigned LoadIdx = LBI.getInstructionIndex(LI); 831 832 // Find the nearest store that has a lower than this load. 833 StoresByIndexTy::iterator I = 834 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 835 std::pair<unsigned, StoreInst*>(LoadIdx, 0), 836 StoreIndexSearchPredicate()); 837 838 // If there is no store before this load, then we can't promote this load. 839 if (I == StoresByIndex.begin()) { 840 // Can't handle this load, bail out. 841 Info.UsingBlocks.push_back(LI->getParent()); 842 continue; 843 } 844 845 // Otherwise, there was a store before this load, the load takes its value. 846 --I; 847 LI->replaceAllUsesWith(I->second->getOperand(0)); 848 if (AST && isa<PointerType>(LI->getType())) 849 AST->deleteValue(LI); 850 LI->eraseFromParent(); 851 LBI.deleteValue(LI); 852 } 853 } 854 855 856 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 857 // Alloca returns true if there wasn't already a phi-node for that variable 858 // 859 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 860 unsigned &Version, 861 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 862 // Look up the basic-block in question. 863 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 864 865 // If the BB already has a phi node added for the i'th alloca then we're done! 866 if (PN) return false; 867 868 // Create a PhiNode using the dereferenced type... and add the phi-node to the 869 // BasicBlock. 870 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 871 Allocas[AllocaNo]->getName() + "." + 872 utostr(Version++), BB->begin()); 873 ++NumPHIInsert; 874 PhiToAllocaMap[PN] = AllocaNo; 875 PN->reserveOperandSpace(getNumPreds(BB)); 876 877 InsertedPHINodes.insert(PN); 878 879 if (AST && isa<PointerType>(PN->getType())) 880 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 881 882 return true; 883 } 884 885 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 886 // stores to the allocas which we are promoting. IncomingVals indicates what 887 // value each Alloca contains on exit from the predecessor block Pred. 888 // 889 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 890 RenamePassData::ValVector &IncomingVals, 891 std::vector<RenamePassData> &Worklist) { 892 NextIteration: 893 // If we are inserting any phi nodes into this BB, they will already be in the 894 // block. 895 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 896 // If we have PHI nodes to update, compute the number of edges from Pred to 897 // BB. 898 if (PhiToAllocaMap.count(APN)) { 899 // We want to be able to distinguish between PHI nodes being inserted by 900 // this invocation of mem2reg from those phi nodes that already existed in 901 // the IR before mem2reg was run. We determine that APN is being inserted 902 // because it is missing incoming edges. All other PHI nodes being 903 // inserted by this pass of mem2reg will have the same number of incoming 904 // operands so far. Remember this count. 905 unsigned NewPHINumOperands = APN->getNumOperands(); 906 907 unsigned NumEdges = 0; 908 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 909 if (*I == BB) 910 ++NumEdges; 911 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 912 913 // Add entries for all the phis. 914 BasicBlock::iterator PNI = BB->begin(); 915 do { 916 unsigned AllocaNo = PhiToAllocaMap[APN]; 917 918 // Add N incoming values to the PHI node. 919 for (unsigned i = 0; i != NumEdges; ++i) 920 APN->addIncoming(IncomingVals[AllocaNo], Pred); 921 922 // The currently active variable for this block is now the PHI. 923 IncomingVals[AllocaNo] = APN; 924 925 // Get the next phi node. 926 ++PNI; 927 APN = dyn_cast<PHINode>(PNI); 928 if (APN == 0) break; 929 930 // Verify that it is missing entries. If not, it is not being inserted 931 // by this mem2reg invocation so we want to ignore it. 932 } while (APN->getNumOperands() == NewPHINumOperands); 933 } 934 } 935 936 // Don't revisit blocks. 937 if (!Visited.insert(BB)) return; 938 939 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 940 Instruction *I = II++; // get the instruction, increment iterator 941 942 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 943 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 944 if (!Src) continue; 945 946 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 947 if (AI == AllocaLookup.end()) continue; 948 949 Value *V = IncomingVals[AI->second]; 950 951 // Anything using the load now uses the current value. 952 LI->replaceAllUsesWith(V); 953 if (AST && isa<PointerType>(LI->getType())) 954 AST->deleteValue(LI); 955 BB->getInstList().erase(LI); 956 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 957 // Delete this instruction and mark the name as the current holder of the 958 // value 959 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 960 if (!Dest) continue; 961 962 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 963 if (ai == AllocaLookup.end()) 964 continue; 965 966 // what value were we writing? 967 IncomingVals[ai->second] = SI->getOperand(0); 968 BB->getInstList().erase(SI); 969 } 970 } 971 972 // 'Recurse' to our successors. 973 succ_iterator I = succ_begin(BB), E = succ_end(BB); 974 if (I == E) return; 975 976 // Keep track of the successors so we don't visit the same successor twice 977 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 978 979 // Handle the first successor without using the worklist. 980 VisitedSuccs.insert(*I); 981 Pred = BB; 982 BB = *I; 983 ++I; 984 985 for (; I != E; ++I) 986 if (VisitedSuccs.insert(*I)) 987 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 988 989 goto NextIteration; 990 } 991 992 /// PromoteMemToReg - Promote the specified list of alloca instructions into 993 /// scalar registers, inserting PHI nodes as appropriate. This function makes 994 /// use of DominanceFrontier information. This function does not modify the CFG 995 /// of the function at all. All allocas must be from the same function. 996 /// 997 /// If AST is specified, the specified tracker is updated to reflect changes 998 /// made to the IR. 999 /// 1000 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1001 DominatorTree &DT, DominanceFrontier &DF, 1002 LLVMContext &Context, AliasSetTracker *AST) { 1003 // If there is nothing to do, bail out... 1004 if (Allocas.empty()) return; 1005 1006 PromoteMem2Reg(Allocas, DT, DF, AST, Context).run(); 1007 } 1008