1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using dominator frontiers to place PHI nodes, then traversing 13 // the function in depth-first order to rewrite loads and stores as appropriate. 14 // This is just the standard SSA construction algorithm to construct "pruned" 15 // SSA form. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #define DEBUG_TYPE "mem2reg" 20 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 21 #include "llvm/Constants.h" 22 #include "llvm/DerivedTypes.h" 23 #include "llvm/Function.h" 24 #include "llvm/Instructions.h" 25 #include "llvm/IntrinsicInst.h" 26 #include "llvm/Metadata.h" 27 #include "llvm/Analysis/DebugInfo.h" 28 #include "llvm/Analysis/Dominators.h" 29 #include "llvm/Analysis/AliasSetTracker.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/Support/CFG.h" 36 #include <algorithm> 37 using namespace llvm; 38 39 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 40 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 41 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 42 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 43 44 namespace llvm { 45 template<> 46 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 47 typedef std::pair<BasicBlock*, unsigned> EltTy; 48 static inline EltTy getEmptyKey() { 49 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 50 } 51 static inline EltTy getTombstoneKey() { 52 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 53 } 54 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 55 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 56 } 57 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 58 return LHS == RHS; 59 } 60 }; 61 } 62 63 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 64 /// This is true if there are only loads and stores to the alloca. 65 /// 66 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 67 // FIXME: If the memory unit is of pointer or integer type, we can permit 68 // assignments to subsections of the memory unit. 69 70 // Only allow direct and non-volatile loads and stores... 71 for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 72 UI != UE; ++UI) { // Loop over all of the uses of the alloca 73 const User *U = *UI; 74 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 75 if (LI->isVolatile()) 76 return false; 77 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 78 if (SI->getOperand(0) == AI) 79 return false; // Don't allow a store OF the AI, only INTO the AI. 80 if (SI->isVolatile()) 81 return false; 82 } else { 83 return false; 84 } 85 } 86 87 return true; 88 } 89 90 /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the 91 /// alloca 'V', if any. 92 static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { 93 if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) 94 for (Value::use_iterator UI = DebugNode->use_begin(), 95 E = DebugNode->use_end(); UI != E; ++UI) 96 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 97 return DDI; 98 99 return 0; 100 } 101 102 namespace { 103 struct AllocaInfo; 104 105 // Data package used by RenamePass() 106 class RenamePassData { 107 public: 108 typedef std::vector<Value *> ValVector; 109 110 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 111 RenamePassData(BasicBlock *B, BasicBlock *P, 112 const ValVector &V) : BB(B), Pred(P), Values(V) {} 113 BasicBlock *BB; 114 BasicBlock *Pred; 115 ValVector Values; 116 117 void swap(RenamePassData &RHS) { 118 std::swap(BB, RHS.BB); 119 std::swap(Pred, RHS.Pred); 120 Values.swap(RHS.Values); 121 } 122 }; 123 124 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 125 /// load/store instructions in the block that directly load or store an alloca. 126 /// 127 /// This functionality is important because it avoids scanning large basic 128 /// blocks multiple times when promoting many allocas in the same block. 129 class LargeBlockInfo { 130 /// InstNumbers - For each instruction that we track, keep the index of the 131 /// instruction. The index starts out as the number of the instruction from 132 /// the start of the block. 133 DenseMap<const Instruction *, unsigned> InstNumbers; 134 public: 135 136 /// isInterestingInstruction - This code only looks at accesses to allocas. 137 static bool isInterestingInstruction(const Instruction *I) { 138 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 139 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 140 } 141 142 /// getInstructionIndex - Get or calculate the index of the specified 143 /// instruction. 144 unsigned getInstructionIndex(const Instruction *I) { 145 assert(isInterestingInstruction(I) && 146 "Not a load/store to/from an alloca?"); 147 148 // If we already have this instruction number, return it. 149 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 150 if (It != InstNumbers.end()) return It->second; 151 152 // Scan the whole block to get the instruction. This accumulates 153 // information for every interesting instruction in the block, in order to 154 // avoid gratuitus rescans. 155 const BasicBlock *BB = I->getParent(); 156 unsigned InstNo = 0; 157 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 158 BBI != E; ++BBI) 159 if (isInterestingInstruction(BBI)) 160 InstNumbers[BBI] = InstNo++; 161 It = InstNumbers.find(I); 162 163 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 164 return It->second; 165 } 166 167 void deleteValue(const Instruction *I) { 168 InstNumbers.erase(I); 169 } 170 171 void clear() { 172 InstNumbers.clear(); 173 } 174 }; 175 176 struct PromoteMem2Reg { 177 /// Allocas - The alloca instructions being promoted. 178 /// 179 std::vector<AllocaInst*> Allocas; 180 DominatorTree &DT; 181 DominanceFrontier &DF; 182 DIFactory *DIF; 183 184 /// AST - An AliasSetTracker object to update. If null, don't update it. 185 /// 186 AliasSetTracker *AST; 187 188 /// AllocaLookup - Reverse mapping of Allocas. 189 /// 190 std::map<AllocaInst*, unsigned> AllocaLookup; 191 192 /// NewPhiNodes - The PhiNodes we're adding. 193 /// 194 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 195 196 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 197 /// it corresponds to. 198 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 199 200 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 201 /// each alloca that is of pointer type, we keep track of what to copyValue 202 /// to the inserted PHI nodes here. 203 /// 204 std::vector<Value*> PointerAllocaValues; 205 206 /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 207 /// intrinsic that describes it, if any, so that we can convert it to a 208 /// dbg.value intrinsic if the alloca gets promoted. 209 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 210 211 /// Visited - The set of basic blocks the renamer has already visited. 212 /// 213 SmallPtrSet<BasicBlock*, 16> Visited; 214 215 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 216 /// non-determinstic behavior. 217 DenseMap<BasicBlock*, unsigned> BBNumbers; 218 219 /// BBNumPreds - Lazily compute the number of predecessors a block has. 220 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 221 public: 222 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 223 DominanceFrontier &df, AliasSetTracker *ast) 224 : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} 225 ~PromoteMem2Reg() { 226 delete DIF; 227 } 228 229 void run(); 230 231 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 232 /// 233 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 234 return DT.dominates(BB1, BB2); 235 } 236 237 private: 238 void RemoveFromAllocasList(unsigned &AllocaIdx) { 239 Allocas[AllocaIdx] = Allocas.back(); 240 Allocas.pop_back(); 241 --AllocaIdx; 242 } 243 244 unsigned getNumPreds(const BasicBlock *BB) { 245 unsigned &NP = BBNumPreds[BB]; 246 if (NP == 0) 247 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 248 return NP-1; 249 } 250 251 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 252 AllocaInfo &Info); 253 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 254 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 255 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 256 257 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 258 LargeBlockInfo &LBI); 259 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 260 LargeBlockInfo &LBI); 261 void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); 262 263 264 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 265 RenamePassData::ValVector &IncVals, 266 std::vector<RenamePassData> &Worklist); 267 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 268 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 269 }; 270 271 struct AllocaInfo { 272 std::vector<BasicBlock*> DefiningBlocks; 273 std::vector<BasicBlock*> UsingBlocks; 274 275 StoreInst *OnlyStore; 276 BasicBlock *OnlyBlock; 277 bool OnlyUsedInOneBlock; 278 279 Value *AllocaPointerVal; 280 DbgDeclareInst *DbgDeclare; 281 282 void clear() { 283 DefiningBlocks.clear(); 284 UsingBlocks.clear(); 285 OnlyStore = 0; 286 OnlyBlock = 0; 287 OnlyUsedInOneBlock = true; 288 AllocaPointerVal = 0; 289 DbgDeclare = 0; 290 } 291 292 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 293 /// ivars. 294 void AnalyzeAlloca(AllocaInst *AI) { 295 clear(); 296 297 // As we scan the uses of the alloca instruction, keep track of stores, 298 // and decide whether all of the loads and stores to the alloca are within 299 // the same basic block. 300 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 301 UI != E;) { 302 Instruction *User = cast<Instruction>(*UI++); 303 304 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 305 // Remember the basic blocks which define new values for the alloca 306 DefiningBlocks.push_back(SI->getParent()); 307 AllocaPointerVal = SI->getOperand(0); 308 OnlyStore = SI; 309 } else { 310 LoadInst *LI = cast<LoadInst>(User); 311 // Otherwise it must be a load instruction, keep track of variable 312 // reads. 313 UsingBlocks.push_back(LI->getParent()); 314 AllocaPointerVal = LI; 315 } 316 317 if (OnlyUsedInOneBlock) { 318 if (OnlyBlock == 0) 319 OnlyBlock = User->getParent(); 320 else if (OnlyBlock != User->getParent()) 321 OnlyUsedInOneBlock = false; 322 } 323 } 324 325 DbgDeclare = FindAllocaDbgDeclare(AI); 326 } 327 }; 328 } // end of anonymous namespace 329 330 331 void PromoteMem2Reg::run() { 332 Function &F = *DF.getRoot()->getParent(); 333 334 if (AST) PointerAllocaValues.resize(Allocas.size()); 335 AllocaDbgDeclares.resize(Allocas.size()); 336 337 AllocaInfo Info; 338 LargeBlockInfo LBI; 339 340 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 341 AllocaInst *AI = Allocas[AllocaNum]; 342 343 assert(isAllocaPromotable(AI) && 344 "Cannot promote non-promotable alloca!"); 345 assert(AI->getParent()->getParent() == &F && 346 "All allocas should be in the same function, which is same as DF!"); 347 348 if (AI->use_empty()) { 349 // If there are no uses of the alloca, just delete it now. 350 if (AST) AST->deleteValue(AI); 351 AI->eraseFromParent(); 352 353 // Remove the alloca from the Allocas list, since it has been processed 354 RemoveFromAllocasList(AllocaNum); 355 ++NumDeadAlloca; 356 continue; 357 } 358 359 // Calculate the set of read and write-locations for each alloca. This is 360 // analogous to finding the 'uses' and 'definitions' of each variable. 361 Info.AnalyzeAlloca(AI); 362 363 // If there is only a single store to this value, replace any loads of 364 // it that are directly dominated by the definition with the value stored. 365 if (Info.DefiningBlocks.size() == 1) { 366 RewriteSingleStoreAlloca(AI, Info, LBI); 367 368 // Finally, after the scan, check to see if the store is all that is left. 369 if (Info.UsingBlocks.empty()) { 370 // Record debuginfo for the store and remove the declaration's debuginfo. 371 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 372 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); 373 DDI->eraseFromParent(); 374 } 375 // Remove the (now dead) store and alloca. 376 Info.OnlyStore->eraseFromParent(); 377 LBI.deleteValue(Info.OnlyStore); 378 379 if (AST) AST->deleteValue(AI); 380 AI->eraseFromParent(); 381 LBI.deleteValue(AI); 382 383 // The alloca has been processed, move on. 384 RemoveFromAllocasList(AllocaNum); 385 386 ++NumSingleStore; 387 continue; 388 } 389 } 390 391 // If the alloca is only read and written in one basic block, just perform a 392 // linear sweep over the block to eliminate it. 393 if (Info.OnlyUsedInOneBlock) { 394 PromoteSingleBlockAlloca(AI, Info, LBI); 395 396 // Finally, after the scan, check to see if the stores are all that is 397 // left. 398 if (Info.UsingBlocks.empty()) { 399 400 // Remove the (now dead) stores and alloca. 401 while (!AI->use_empty()) { 402 StoreInst *SI = cast<StoreInst>(AI->use_back()); 403 // Record debuginfo for the store before removing it. 404 if (DbgDeclareInst *DDI = Info.DbgDeclare) 405 ConvertDebugDeclareToDebugValue(DDI, SI); 406 SI->eraseFromParent(); 407 LBI.deleteValue(SI); 408 } 409 410 if (AST) AST->deleteValue(AI); 411 AI->eraseFromParent(); 412 LBI.deleteValue(AI); 413 414 // The alloca has been processed, move on. 415 RemoveFromAllocasList(AllocaNum); 416 417 // The alloca's debuginfo can be removed as well. 418 if (DbgDeclareInst *DDI = Info.DbgDeclare) 419 DDI->eraseFromParent(); 420 421 ++NumLocalPromoted; 422 continue; 423 } 424 } 425 426 // If we haven't computed a numbering for the BB's in the function, do so 427 // now. 428 if (BBNumbers.empty()) { 429 unsigned ID = 0; 430 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 431 BBNumbers[I] = ID++; 432 } 433 434 // If we have an AST to keep updated, remember some pointer value that is 435 // stored into the alloca. 436 if (AST) 437 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 438 439 // Remember the dbg.declare intrinsic describing this alloca, if any. 440 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 441 442 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 443 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 444 445 // At this point, we're committed to promoting the alloca using IDF's, and 446 // the standard SSA construction algorithm. Determine which blocks need PHI 447 // nodes and see if we can optimize out some work by avoiding insertion of 448 // dead phi nodes. 449 DetermineInsertionPoint(AI, AllocaNum, Info); 450 } 451 452 if (Allocas.empty()) 453 return; // All of the allocas must have been trivial! 454 455 LBI.clear(); 456 457 458 // Set the incoming values for the basic block to be null values for all of 459 // the alloca's. We do this in case there is a load of a value that has not 460 // been stored yet. In this case, it will get this null value. 461 // 462 RenamePassData::ValVector Values(Allocas.size()); 463 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 464 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 465 466 // Walks all basic blocks in the function performing the SSA rename algorithm 467 // and inserting the phi nodes we marked as necessary 468 // 469 std::vector<RenamePassData> RenamePassWorkList; 470 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 471 do { 472 RenamePassData RPD; 473 RPD.swap(RenamePassWorkList.back()); 474 RenamePassWorkList.pop_back(); 475 // RenamePass may add new worklist entries. 476 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 477 } while (!RenamePassWorkList.empty()); 478 479 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 480 Visited.clear(); 481 482 // Remove the allocas themselves from the function. 483 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 484 Instruction *A = Allocas[i]; 485 486 // If there are any uses of the alloca instructions left, they must be in 487 // sections of dead code that were not processed on the dominance frontier. 488 // Just delete the users now. 489 // 490 if (!A->use_empty()) 491 A->replaceAllUsesWith(UndefValue::get(A->getType())); 492 if (AST) AST->deleteValue(A); 493 A->eraseFromParent(); 494 } 495 496 // Remove alloca's dbg.declare instrinsics from the function. 497 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 498 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 499 DDI->eraseFromParent(); 500 501 // Loop over all of the PHI nodes and see if there are any that we can get 502 // rid of because they merge all of the same incoming values. This can 503 // happen due to undef values coming into the PHI nodes. This process is 504 // iterative, because eliminating one PHI node can cause others to be removed. 505 bool EliminatedAPHI = true; 506 while (EliminatedAPHI) { 507 EliminatedAPHI = false; 508 509 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 510 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 511 PHINode *PN = I->second; 512 513 // If this PHI node merges one value and/or undefs, get the value. 514 if (Value *V = PN->hasConstantValue(&DT)) { 515 if (AST && PN->getType()->isPointerTy()) 516 AST->deleteValue(PN); 517 PN->replaceAllUsesWith(V); 518 PN->eraseFromParent(); 519 NewPhiNodes.erase(I++); 520 EliminatedAPHI = true; 521 continue; 522 } 523 ++I; 524 } 525 } 526 527 // At this point, the renamer has added entries to PHI nodes for all reachable 528 // code. Unfortunately, there may be unreachable blocks which the renamer 529 // hasn't traversed. If this is the case, the PHI nodes may not 530 // have incoming values for all predecessors. Loop over all PHI nodes we have 531 // created, inserting undef values if they are missing any incoming values. 532 // 533 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 534 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 535 // We want to do this once per basic block. As such, only process a block 536 // when we find the PHI that is the first entry in the block. 537 PHINode *SomePHI = I->second; 538 BasicBlock *BB = SomePHI->getParent(); 539 if (&BB->front() != SomePHI) 540 continue; 541 542 // Only do work here if there the PHI nodes are missing incoming values. We 543 // know that all PHI nodes that were inserted in a block will have the same 544 // number of incoming values, so we can just check any of them. 545 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 546 continue; 547 548 // Get the preds for BB. 549 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 550 551 // Ok, now we know that all of the PHI nodes are missing entries for some 552 // basic blocks. Start by sorting the incoming predecessors for efficient 553 // access. 554 std::sort(Preds.begin(), Preds.end()); 555 556 // Now we loop through all BB's which have entries in SomePHI and remove 557 // them from the Preds list. 558 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 559 // Do a log(n) search of the Preds list for the entry we want. 560 SmallVector<BasicBlock*, 16>::iterator EntIt = 561 std::lower_bound(Preds.begin(), Preds.end(), 562 SomePHI->getIncomingBlock(i)); 563 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 564 "PHI node has entry for a block which is not a predecessor!"); 565 566 // Remove the entry 567 Preds.erase(EntIt); 568 } 569 570 // At this point, the blocks left in the preds list must have dummy 571 // entries inserted into every PHI nodes for the block. Update all the phi 572 // nodes in this block that we are inserting (there could be phis before 573 // mem2reg runs). 574 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 575 BasicBlock::iterator BBI = BB->begin(); 576 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 577 SomePHI->getNumIncomingValues() == NumBadPreds) { 578 Value *UndefVal = UndefValue::get(SomePHI->getType()); 579 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 580 SomePHI->addIncoming(UndefVal, Preds[pred]); 581 } 582 } 583 584 NewPhiNodes.clear(); 585 } 586 587 588 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 589 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 590 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 591 /// would be dead). 592 void PromoteMem2Reg:: 593 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 594 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 595 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 596 597 // To determine liveness, we must iterate through the predecessors of blocks 598 // where the def is live. Blocks are added to the worklist if we need to 599 // check their predecessors. Start with all the using blocks. 600 SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 601 Info.UsingBlocks.end()); 602 603 // If any of the using blocks is also a definition block, check to see if the 604 // definition occurs before or after the use. If it happens before the use, 605 // the value isn't really live-in. 606 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 607 BasicBlock *BB = LiveInBlockWorklist[i]; 608 if (!DefBlocks.count(BB)) continue; 609 610 // Okay, this is a block that both uses and defines the value. If the first 611 // reference to the alloca is a def (store), then we know it isn't live-in. 612 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 613 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 614 if (SI->getOperand(1) != AI) continue; 615 616 // We found a store to the alloca before a load. The alloca is not 617 // actually live-in here. 618 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 619 LiveInBlockWorklist.pop_back(); 620 --i, --e; 621 break; 622 } 623 624 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 625 if (LI->getOperand(0) != AI) continue; 626 627 // Okay, we found a load before a store to the alloca. It is actually 628 // live into this block. 629 break; 630 } 631 } 632 } 633 634 // Now that we have a set of blocks where the phi is live-in, recursively add 635 // their predecessors until we find the full region the value is live. 636 while (!LiveInBlockWorklist.empty()) { 637 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 638 639 // The block really is live in here, insert it into the set. If already in 640 // the set, then it has already been processed. 641 if (!LiveInBlocks.insert(BB)) 642 continue; 643 644 // Since the value is live into BB, it is either defined in a predecessor or 645 // live into it to. Add the preds to the worklist unless they are a 646 // defining block. 647 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 648 BasicBlock *P = *PI; 649 650 // The value is not live into a predecessor if it defines the value. 651 if (DefBlocks.count(P)) 652 continue; 653 654 // Otherwise it is, add to the worklist. 655 LiveInBlockWorklist.push_back(P); 656 } 657 } 658 } 659 660 /// DetermineInsertionPoint - At this point, we're committed to promoting the 661 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 662 /// which blocks need phi nodes and see if we can optimize out some work by 663 /// avoiding insertion of dead phi nodes. 664 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 665 AllocaInfo &Info) { 666 667 // Unique the set of defining blocks for efficient lookup. 668 SmallPtrSet<BasicBlock*, 32> DefBlocks; 669 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 670 671 // Determine which blocks the value is live in. These are blocks which lead 672 // to uses. 673 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 674 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 675 676 // Compute the locations where PhiNodes need to be inserted. Look at the 677 // dominance frontier of EACH basic-block we have a write in. 678 unsigned CurrentVersion = 0; 679 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 680 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 681 while (!Info.DefiningBlocks.empty()) { 682 BasicBlock *BB = Info.DefiningBlocks.back(); 683 Info.DefiningBlocks.pop_back(); 684 685 // Look up the DF for this write, add it to defining blocks. 686 DominanceFrontier::const_iterator it = DF.find(BB); 687 if (it == DF.end()) continue; 688 689 const DominanceFrontier::DomSetType &S = it->second; 690 691 // In theory we don't need the indirection through the DFBlocks vector. 692 // In practice, the order of calling QueuePhiNode would depend on the 693 // (unspecified) ordering of basic blocks in the dominance frontier, 694 // which would give PHI nodes non-determinstic subscripts. Fix this by 695 // processing blocks in order of the occurance in the function. 696 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 697 PE = S.end(); P != PE; ++P) { 698 // If the frontier block is not in the live-in set for the alloca, don't 699 // bother processing it. 700 if (!LiveInBlocks.count(*P)) 701 continue; 702 703 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 704 } 705 706 // Sort by which the block ordering in the function. 707 if (DFBlocks.size() > 1) 708 std::sort(DFBlocks.begin(), DFBlocks.end()); 709 710 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 711 BasicBlock *BB = DFBlocks[i].second; 712 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 713 Info.DefiningBlocks.push_back(BB); 714 } 715 DFBlocks.clear(); 716 } 717 } 718 719 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 720 /// replace any loads of it that are directly dominated by the definition with 721 /// the value stored. 722 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 723 AllocaInfo &Info, 724 LargeBlockInfo &LBI) { 725 StoreInst *OnlyStore = Info.OnlyStore; 726 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 727 BasicBlock *StoreBB = OnlyStore->getParent(); 728 int StoreIndex = -1; 729 730 // Clear out UsingBlocks. We will reconstruct it here if needed. 731 Info.UsingBlocks.clear(); 732 733 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 734 Instruction *UserInst = cast<Instruction>(*UI++); 735 if (!isa<LoadInst>(UserInst)) { 736 assert(UserInst == OnlyStore && "Should only have load/stores"); 737 continue; 738 } 739 LoadInst *LI = cast<LoadInst>(UserInst); 740 741 // Okay, if we have a load from the alloca, we want to replace it with the 742 // only value stored to the alloca. We can do this if the value is 743 // dominated by the store. If not, we use the rest of the mem2reg machinery 744 // to insert the phi nodes as needed. 745 if (!StoringGlobalVal) { // Non-instructions are always dominated. 746 if (LI->getParent() == StoreBB) { 747 // If we have a use that is in the same block as the store, compare the 748 // indices of the two instructions to see which one came first. If the 749 // load came before the store, we can't handle it. 750 if (StoreIndex == -1) 751 StoreIndex = LBI.getInstructionIndex(OnlyStore); 752 753 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 754 // Can't handle this load, bail out. 755 Info.UsingBlocks.push_back(StoreBB); 756 continue; 757 } 758 759 } else if (LI->getParent() != StoreBB && 760 !dominates(StoreBB, LI->getParent())) { 761 // If the load and store are in different blocks, use BB dominance to 762 // check their relationships. If the store doesn't dom the use, bail 763 // out. 764 Info.UsingBlocks.push_back(LI->getParent()); 765 continue; 766 } 767 } 768 769 // Otherwise, we *can* safely rewrite this load. 770 Value *ReplVal = OnlyStore->getOperand(0); 771 // If the replacement value is the load, this must occur in unreachable 772 // code. 773 if (ReplVal == LI) 774 ReplVal = UndefValue::get(LI->getType()); 775 LI->replaceAllUsesWith(ReplVal); 776 if (AST && LI->getType()->isPointerTy()) 777 AST->deleteValue(LI); 778 LI->eraseFromParent(); 779 LBI.deleteValue(LI); 780 } 781 } 782 783 namespace { 784 785 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 786 /// first element of a pair. 787 struct StoreIndexSearchPredicate { 788 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 789 const std::pair<unsigned, StoreInst*> &RHS) { 790 return LHS.first < RHS.first; 791 } 792 }; 793 794 } 795 796 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 797 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 798 /// potentially useless PHI nodes by just performing a single linear pass over 799 /// the basic block using the Alloca. 800 /// 801 /// If we cannot promote this alloca (because it is read before it is written), 802 /// return true. This is necessary in cases where, due to control flow, the 803 /// alloca is potentially undefined on some control flow paths. e.g. code like 804 /// this is potentially correct: 805 /// 806 /// for (...) { if (c) { A = undef; undef = B; } } 807 /// 808 /// ... so long as A is not used before undef is set. 809 /// 810 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 811 LargeBlockInfo &LBI) { 812 // The trickiest case to handle is when we have large blocks. Because of this, 813 // this code is optimized assuming that large blocks happen. This does not 814 // significantly pessimize the small block case. This uses LargeBlockInfo to 815 // make it efficient to get the index of various operations in the block. 816 817 // Clear out UsingBlocks. We will reconstruct it here if needed. 818 Info.UsingBlocks.clear(); 819 820 // Walk the use-def list of the alloca, getting the locations of all stores. 821 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 822 StoresByIndexTy StoresByIndex; 823 824 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 825 UI != E; ++UI) 826 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 827 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 828 829 // If there are no stores to the alloca, just replace any loads with undef. 830 if (StoresByIndex.empty()) { 831 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 832 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 833 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 834 if (AST && LI->getType()->isPointerTy()) 835 AST->deleteValue(LI); 836 LBI.deleteValue(LI); 837 LI->eraseFromParent(); 838 } 839 return; 840 } 841 842 // Sort the stores by their index, making it efficient to do a lookup with a 843 // binary search. 844 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 845 846 // Walk all of the loads from this alloca, replacing them with the nearest 847 // store above them, if any. 848 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 849 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 850 if (!LI) continue; 851 852 unsigned LoadIdx = LBI.getInstructionIndex(LI); 853 854 // Find the nearest store that has a lower than this load. 855 StoresByIndexTy::iterator I = 856 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 857 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 858 StoreIndexSearchPredicate()); 859 860 // If there is no store before this load, then we can't promote this load. 861 if (I == StoresByIndex.begin()) { 862 // Can't handle this load, bail out. 863 Info.UsingBlocks.push_back(LI->getParent()); 864 continue; 865 } 866 867 // Otherwise, there was a store before this load, the load takes its value. 868 --I; 869 LI->replaceAllUsesWith(I->second->getOperand(0)); 870 if (AST && LI->getType()->isPointerTy()) 871 AST->deleteValue(LI); 872 LI->eraseFromParent(); 873 LBI.deleteValue(LI); 874 } 875 } 876 877 // Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value 878 // that has an associated llvm.dbg.decl intrinsic. 879 void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, 880 StoreInst *SI) { 881 DIVariable DIVar(DDI->getVariable()); 882 if (!DIVar.Verify()) 883 return; 884 885 if (!DIF) 886 DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); 887 Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, 888 DIVar, SI); 889 890 // Propagate any debug metadata from the store onto the dbg.value. 891 DebugLoc SIDL = SI->getDebugLoc(); 892 if (!SIDL.isUnknown()) 893 DbgVal->setDebugLoc(SIDL); 894 // Otherwise propagate debug metadata from dbg.declare. 895 else 896 DbgVal->setDebugLoc(DDI->getDebugLoc()); 897 } 898 899 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 900 // Alloca returns true if there wasn't already a phi-node for that variable 901 // 902 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 903 unsigned &Version, 904 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 905 // Look up the basic-block in question. 906 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 907 908 // If the BB already has a phi node added for the i'th alloca then we're done! 909 if (PN) return false; 910 911 // Create a PhiNode using the dereferenced type... and add the phi-node to the 912 // BasicBlock. 913 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 914 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 915 BB->begin()); 916 ++NumPHIInsert; 917 PhiToAllocaMap[PN] = AllocaNo; 918 PN->reserveOperandSpace(getNumPreds(BB)); 919 920 InsertedPHINodes.insert(PN); 921 922 if (AST && PN->getType()->isPointerTy()) 923 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 924 925 return true; 926 } 927 928 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 929 // stores to the allocas which we are promoting. IncomingVals indicates what 930 // value each Alloca contains on exit from the predecessor block Pred. 931 // 932 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 933 RenamePassData::ValVector &IncomingVals, 934 std::vector<RenamePassData> &Worklist) { 935 NextIteration: 936 // If we are inserting any phi nodes into this BB, they will already be in the 937 // block. 938 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 939 // If we have PHI nodes to update, compute the number of edges from Pred to 940 // BB. 941 if (PhiToAllocaMap.count(APN)) { 942 // We want to be able to distinguish between PHI nodes being inserted by 943 // this invocation of mem2reg from those phi nodes that already existed in 944 // the IR before mem2reg was run. We determine that APN is being inserted 945 // because it is missing incoming edges. All other PHI nodes being 946 // inserted by this pass of mem2reg will have the same number of incoming 947 // operands so far. Remember this count. 948 unsigned NewPHINumOperands = APN->getNumOperands(); 949 950 unsigned NumEdges = 0; 951 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 952 if (*I == BB) 953 ++NumEdges; 954 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 955 956 // Add entries for all the phis. 957 BasicBlock::iterator PNI = BB->begin(); 958 do { 959 unsigned AllocaNo = PhiToAllocaMap[APN]; 960 961 // Add N incoming values to the PHI node. 962 for (unsigned i = 0; i != NumEdges; ++i) 963 APN->addIncoming(IncomingVals[AllocaNo], Pred); 964 965 // The currently active variable for this block is now the PHI. 966 IncomingVals[AllocaNo] = APN; 967 968 // Get the next phi node. 969 ++PNI; 970 APN = dyn_cast<PHINode>(PNI); 971 if (APN == 0) break; 972 973 // Verify that it is missing entries. If not, it is not being inserted 974 // by this mem2reg invocation so we want to ignore it. 975 } while (APN->getNumOperands() == NewPHINumOperands); 976 } 977 } 978 979 // Don't revisit blocks. 980 if (!Visited.insert(BB)) return; 981 982 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 983 Instruction *I = II++; // get the instruction, increment iterator 984 985 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 986 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 987 if (!Src) continue; 988 989 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 990 if (AI == AllocaLookup.end()) continue; 991 992 Value *V = IncomingVals[AI->second]; 993 994 // Anything using the load now uses the current value. 995 LI->replaceAllUsesWith(V); 996 if (AST && LI->getType()->isPointerTy()) 997 AST->deleteValue(LI); 998 BB->getInstList().erase(LI); 999 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1000 // Delete this instruction and mark the name as the current holder of the 1001 // value 1002 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1003 if (!Dest) continue; 1004 1005 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1006 if (ai == AllocaLookup.end()) 1007 continue; 1008 1009 // what value were we writing? 1010 IncomingVals[ai->second] = SI->getOperand(0); 1011 // Record debuginfo for the store before removing it. 1012 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) 1013 ConvertDebugDeclareToDebugValue(DDI, SI); 1014 BB->getInstList().erase(SI); 1015 } 1016 } 1017 1018 // 'Recurse' to our successors. 1019 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1020 if (I == E) return; 1021 1022 // Keep track of the successors so we don't visit the same successor twice 1023 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1024 1025 // Handle the first successor without using the worklist. 1026 VisitedSuccs.insert(*I); 1027 Pred = BB; 1028 BB = *I; 1029 ++I; 1030 1031 for (; I != E; ++I) 1032 if (VisitedSuccs.insert(*I)) 1033 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1034 1035 goto NextIteration; 1036 } 1037 1038 /// PromoteMemToReg - Promote the specified list of alloca instructions into 1039 /// scalar registers, inserting PHI nodes as appropriate. This function makes 1040 /// use of DominanceFrontier information. This function does not modify the CFG 1041 /// of the function at all. All allocas must be from the same function. 1042 /// 1043 /// If AST is specified, the specified tracker is updated to reflect changes 1044 /// made to the IR. 1045 /// 1046 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1047 DominatorTree &DT, DominanceFrontier &DF, 1048 AliasSetTracker *AST) { 1049 // If there is nothing to do, bail out... 1050 if (Allocas.empty()) return; 1051 1052 PromoteMem2Reg(Allocas, DT, DF, AST).run(); 1053 } 1054