1 //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global common subexpression elimination on machine 11 // instructions using a scoped hash table based value numbering scheme. It 12 // must be run while the machine function is still in SSA form. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "machine-cse" 17 #include "llvm/CodeGen/Passes.h" 18 #include "llvm/CodeGen/MachineDominators.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/ScopedHashTable.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 29 using namespace llvm; 30 31 STATISTIC(NumCoalesces, "Number of copies coalesced"); 32 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 33 STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); 34 35 namespace { 36 class MachineCSE : public MachineFunctionPass { 37 const TargetInstrInfo *TII; 38 const TargetRegisterInfo *TRI; 39 AliasAnalysis *AA; 40 MachineDominatorTree *DT; 41 MachineRegisterInfo *MRI; 42 public: 43 static char ID; // Pass identification 44 MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {} 45 46 virtual bool runOnMachineFunction(MachineFunction &MF); 47 48 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 49 AU.setPreservesCFG(); 50 MachineFunctionPass::getAnalysisUsage(AU); 51 AU.addRequired<AliasAnalysis>(); 52 AU.addRequired<MachineDominatorTree>(); 53 AU.addPreserved<MachineDominatorTree>(); 54 } 55 56 private: 57 const unsigned LookAheadLimit; 58 typedef ScopedHashTableScope<MachineInstr*, unsigned, 59 MachineInstrExpressionTrait> ScopeType; 60 DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; 61 ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; 62 SmallVector<MachineInstr*, 64> Exps; 63 unsigned CurrVN; 64 65 bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); 66 bool isPhysDefTriviallyDead(unsigned Reg, 67 MachineBasicBlock::const_iterator I, 68 MachineBasicBlock::const_iterator E) const ; 69 bool hasLivePhysRegDefUse(const MachineInstr *MI, 70 const MachineBasicBlock *MBB, 71 unsigned &PhysDef) const; 72 bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 73 unsigned PhysDef) const; 74 bool isCSECandidate(MachineInstr *MI); 75 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 76 MachineInstr *CSMI, MachineInstr *MI); 77 void EnterScope(MachineBasicBlock *MBB); 78 void ExitScope(MachineBasicBlock *MBB); 79 bool ProcessBlock(MachineBasicBlock *MBB); 80 void ExitScopeIfDone(MachineDomTreeNode *Node, 81 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 82 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 83 bool PerformCSE(MachineDomTreeNode *Node); 84 }; 85 } // end anonymous namespace 86 87 char MachineCSE::ID = 0; 88 static RegisterPass<MachineCSE> 89 X("machine-cse", "Machine Common Subexpression Elimination"); 90 91 FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } 92 93 bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, 94 MachineBasicBlock *MBB) { 95 bool Changed = false; 96 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 97 MachineOperand &MO = MI->getOperand(i); 98 if (!MO.isReg() || !MO.isUse()) 99 continue; 100 unsigned Reg = MO.getReg(); 101 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 102 continue; 103 if (!MRI->hasOneUse(Reg)) 104 // Only coalesce single use copies. This ensure the copy will be 105 // deleted. 106 continue; 107 MachineInstr *DefMI = MRI->getVRegDef(Reg); 108 if (DefMI->getParent() != MBB) 109 continue; 110 unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 111 if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && 112 TargetRegisterInfo::isVirtualRegister(SrcReg) && 113 !SrcSubIdx && !DstSubIdx) { 114 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); 115 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 116 const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); 117 if (!NewRC) 118 continue; 119 DEBUG(dbgs() << "Coalescing: " << *DefMI); 120 DEBUG(dbgs() << "*** to: " << *MI); 121 MO.setReg(SrcReg); 122 MRI->clearKillFlags(SrcReg); 123 if (NewRC != SRC) 124 MRI->setRegClass(SrcReg, NewRC); 125 DefMI->eraseFromParent(); 126 ++NumCoalesces; 127 Changed = true; 128 } 129 130 if (!DefMI->isCopy()) 131 continue; 132 SrcReg = DefMI->getOperand(1).getReg(); 133 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 134 continue; 135 if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) 136 continue; 137 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); 138 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 139 const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); 140 if (!NewRC) 141 continue; 142 DEBUG(dbgs() << "Coalescing: " << *DefMI); 143 DEBUG(dbgs() << "*** to: " << *MI); 144 MO.setReg(SrcReg); 145 MRI->clearKillFlags(SrcReg); 146 if (NewRC != SRC) 147 MRI->setRegClass(SrcReg, NewRC); 148 DefMI->eraseFromParent(); 149 ++NumCoalesces; 150 Changed = true; 151 } 152 153 return Changed; 154 } 155 156 bool 157 MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 158 MachineBasicBlock::const_iterator I, 159 MachineBasicBlock::const_iterator E) const { 160 unsigned LookAheadLeft = LookAheadLimit; 161 while (LookAheadLeft) { 162 // Skip over dbg_value's. 163 while (I != E && I->isDebugValue()) 164 ++I; 165 166 if (I == E) 167 // Reached end of block, register is obviously dead. 168 return true; 169 170 bool SeenDef = false; 171 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 172 const MachineOperand &MO = I->getOperand(i); 173 if (!MO.isReg() || !MO.getReg()) 174 continue; 175 if (!TRI->regsOverlap(MO.getReg(), Reg)) 176 continue; 177 if (MO.isUse()) 178 // Found a use! 179 return false; 180 SeenDef = true; 181 } 182 if (SeenDef) 183 // See a def of Reg (or an alias) before encountering any use, it's 184 // trivially dead. 185 return true; 186 187 --LookAheadLeft; 188 ++I; 189 } 190 return false; 191 } 192 193 /// hasLivePhysRegDefUse - Return true if the specified instruction read / write 194 /// physical registers (except for dead defs of physical registers). It also 195 /// returns the physical register def by reference if it's the only one and the 196 /// instruction does not uses a physical register. 197 bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, 198 const MachineBasicBlock *MBB, 199 unsigned &PhysDef) const { 200 PhysDef = 0; 201 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 202 const MachineOperand &MO = MI->getOperand(i); 203 if (!MO.isReg()) 204 continue; 205 unsigned Reg = MO.getReg(); 206 if (!Reg) 207 continue; 208 if (TargetRegisterInfo::isVirtualRegister(Reg)) 209 continue; 210 if (MO.isUse()) { 211 // Can't touch anything to read a physical register. 212 PhysDef = 0; 213 return true; 214 } 215 if (MO.isDead()) 216 // If the def is dead, it's ok. 217 continue; 218 // Ok, this is a physical register def that's not marked "dead". That's 219 // common since this pass is run before livevariables. We can scan 220 // forward a few instructions and check if it is obviously dead. 221 if (PhysDef) { 222 // Multiple physical register defs. These are rare, forget about it. 223 PhysDef = 0; 224 return true; 225 } 226 PhysDef = Reg; 227 } 228 229 if (PhysDef) { 230 MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); 231 if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) 232 return true; 233 } 234 return false; 235 } 236 237 bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 238 unsigned PhysDef) const { 239 // For now conservatively returns false if the common subexpression is 240 // not in the same basic block as the given instruction. 241 MachineBasicBlock *MBB = MI->getParent(); 242 if (CSMI->getParent() != MBB) 243 return false; 244 MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); 245 MachineBasicBlock::const_iterator E = MI; 246 unsigned LookAheadLeft = LookAheadLimit; 247 while (LookAheadLeft) { 248 // Skip over dbg_value's. 249 while (I != E && I->isDebugValue()) 250 ++I; 251 252 if (I == E) 253 return true; 254 if (I->modifiesRegister(PhysDef, TRI)) 255 return false; 256 257 --LookAheadLeft; 258 ++I; 259 } 260 261 return false; 262 } 263 264 static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { 265 unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 266 return MI->isCopyLike() || 267 TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); 268 } 269 270 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 271 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 272 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) 273 return false; 274 275 // Ignore copies. 276 if (isCopy(MI, TII)) 277 return false; 278 279 // Ignore stuff that we obviously can't move. 280 const TargetInstrDesc &TID = MI->getDesc(); 281 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 282 TID.hasUnmodeledSideEffects()) 283 return false; 284 285 if (TID.mayLoad()) { 286 // Okay, this instruction does a load. As a refinement, we allow the target 287 // to decide whether the loaded value is actually a constant. If so, we can 288 // actually use it as a load. 289 if (!MI->isInvariantLoad(AA)) 290 // FIXME: we should be able to hoist loads with no other side effects if 291 // there are no other instructions which can change memory in this loop. 292 // This is a trivial form of alias analysis. 293 return false; 294 } 295 return true; 296 } 297 298 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 299 /// common expression that defines Reg. 300 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 301 MachineInstr *CSMI, MachineInstr *MI) { 302 // FIXME: Heuristics that works around the lack the live range splitting. 303 304 // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an 305 // immediate predecessor. We don't want to increase register pressure and end up 306 // causing other computation to be spilled. 307 if (MI->getDesc().isAsCheapAsAMove()) { 308 MachineBasicBlock *CSBB = CSMI->getParent(); 309 MachineBasicBlock *BB = MI->getParent(); 310 if (CSBB != BB && 311 find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) 312 return false; 313 } 314 315 // Heuristics #2: If the expression doesn't not use a vr and the only use 316 // of the redundant computation are copies, do not cse. 317 bool HasVRegUse = false; 318 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 319 const MachineOperand &MO = MI->getOperand(i); 320 if (MO.isReg() && MO.isUse() && MO.getReg() && 321 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 322 HasVRegUse = true; 323 break; 324 } 325 } 326 if (!HasVRegUse) { 327 bool HasNonCopyUse = false; 328 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 329 E = MRI->use_nodbg_end(); I != E; ++I) { 330 MachineInstr *Use = &*I; 331 // Ignore copies. 332 if (!isCopy(Use, TII)) { 333 HasNonCopyUse = true; 334 break; 335 } 336 } 337 if (!HasNonCopyUse) 338 return false; 339 } 340 341 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 342 // it unless the defined value is already used in the BB of the new use. 343 bool HasPHI = false; 344 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 345 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), 346 E = MRI->use_nodbg_end(); I != E; ++I) { 347 MachineInstr *Use = &*I; 348 HasPHI |= Use->isPHI(); 349 CSBBs.insert(Use->getParent()); 350 } 351 352 if (!HasPHI) 353 return true; 354 return CSBBs.count(MI->getParent()); 355 } 356 357 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 358 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 359 ScopeType *Scope = new ScopeType(VNT); 360 ScopeMap[MBB] = Scope; 361 } 362 363 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 364 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 365 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 366 assert(SI != ScopeMap.end()); 367 ScopeMap.erase(SI); 368 delete SI->second; 369 } 370 371 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 372 bool Changed = false; 373 374 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 375 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 376 MachineInstr *MI = &*I; 377 ++I; 378 379 if (!isCSECandidate(MI)) 380 continue; 381 382 bool DefPhys = false; 383 bool FoundCSE = VNT.count(MI); 384 if (!FoundCSE) { 385 // Look for trivial copy coalescing opportunities. 386 if (PerformTrivialCoalescing(MI, MBB)) { 387 // After coalescing MI itself may become a copy. 388 if (isCopy(MI, TII)) 389 continue; 390 FoundCSE = VNT.count(MI); 391 } 392 } 393 // FIXME: commute commutable instructions? 394 395 // If the instruction defines a physical register and the value *may* be 396 // used, then it's not safe to replace it with a common subexpression. 397 unsigned PhysDef = 0; 398 if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { 399 FoundCSE = false; 400 401 // ... Unless the CS is local and it also defines the physical register 402 // which is not clobbered in between. 403 if (PhysDef) { 404 unsigned CSVN = VNT.lookup(MI); 405 MachineInstr *CSMI = Exps[CSVN]; 406 if (PhysRegDefReaches(CSMI, MI, PhysDef)) { 407 FoundCSE = true; 408 DefPhys = true; 409 } 410 } 411 } 412 413 if (!FoundCSE) { 414 VNT.insert(MI, CurrVN++); 415 Exps.push_back(MI); 416 continue; 417 } 418 419 // Found a common subexpression, eliminate it. 420 unsigned CSVN = VNT.lookup(MI); 421 MachineInstr *CSMI = Exps[CSVN]; 422 DEBUG(dbgs() << "Examining: " << *MI); 423 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 424 425 // Check if it's profitable to perform this CSE. 426 bool DoCSE = true; 427 unsigned NumDefs = MI->getDesc().getNumDefs(); 428 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 429 MachineOperand &MO = MI->getOperand(i); 430 if (!MO.isReg() || !MO.isDef()) 431 continue; 432 unsigned OldReg = MO.getReg(); 433 unsigned NewReg = CSMI->getOperand(i).getReg(); 434 if (OldReg == NewReg) 435 continue; 436 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 437 TargetRegisterInfo::isVirtualRegister(NewReg) && 438 "Do not CSE physical register defs!"); 439 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 440 DoCSE = false; 441 break; 442 } 443 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 444 --NumDefs; 445 } 446 447 // Actually perform the elimination. 448 if (DoCSE) { 449 for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { 450 MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); 451 MRI->clearKillFlags(CSEPairs[i].second); 452 } 453 MI->eraseFromParent(); 454 ++NumCSEs; 455 if (DefPhys) 456 ++NumPhysCSEs; 457 } else { 458 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 459 VNT.insert(MI, CurrVN++); 460 Exps.push_back(MI); 461 } 462 CSEPairs.clear(); 463 } 464 465 return Changed; 466 } 467 468 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 469 /// dominator tree node if its a leaf or all of its children are done. Walk 470 /// up the dominator tree to destroy ancestors which are now done. 471 void 472 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 473 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 474 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 475 if (OpenChildren[Node]) 476 return; 477 478 // Pop scope. 479 ExitScope(Node->getBlock()); 480 481 // Now traverse upwards to pop ancestors whose offsprings are all done. 482 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 483 unsigned Left = --OpenChildren[Parent]; 484 if (Left != 0) 485 break; 486 ExitScope(Parent->getBlock()); 487 Node = Parent; 488 } 489 } 490 491 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 492 SmallVector<MachineDomTreeNode*, 32> Scopes; 493 SmallVector<MachineDomTreeNode*, 8> WorkList; 494 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 495 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 496 497 // Perform a DFS walk to determine the order of visit. 498 WorkList.push_back(Node); 499 do { 500 Node = WorkList.pop_back_val(); 501 Scopes.push_back(Node); 502 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 503 unsigned NumChildren = Children.size(); 504 OpenChildren[Node] = NumChildren; 505 for (unsigned i = 0; i != NumChildren; ++i) { 506 MachineDomTreeNode *Child = Children[i]; 507 ParentMap[Child] = Node; 508 WorkList.push_back(Child); 509 } 510 } while (!WorkList.empty()); 511 512 // Now perform CSE. 513 bool Changed = false; 514 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 515 MachineDomTreeNode *Node = Scopes[i]; 516 MachineBasicBlock *MBB = Node->getBlock(); 517 EnterScope(MBB); 518 Changed |= ProcessBlock(MBB); 519 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 520 ExitScopeIfDone(Node, OpenChildren, ParentMap); 521 } 522 523 return Changed; 524 } 525 526 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 527 TII = MF.getTarget().getInstrInfo(); 528 TRI = MF.getTarget().getRegisterInfo(); 529 MRI = &MF.getRegInfo(); 530 AA = &getAnalysis<AliasAnalysis>(); 531 DT = &getAnalysis<MachineDominatorTree>(); 532 return PerformCSE(DT->getRootNode()); 533 } 534