1 //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global common subexpression elimination on machine 11 // instructions using a scoped hash table based value numbering scheme. It 12 // must be run while the machine function is still in SSA form. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "machine-cse" 17 #include "llvm/CodeGen/Passes.h" 18 #include "llvm/CodeGen/MachineDominators.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/ScopedHashTable.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 29 using namespace llvm; 30 31 STATISTIC(NumCoalesces, "Number of copies coalesced"); 32 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 33 STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); 34 35 namespace { 36 class MachineCSE : public MachineFunctionPass { 37 const TargetInstrInfo *TII; 38 const TargetRegisterInfo *TRI; 39 AliasAnalysis *AA; 40 MachineDominatorTree *DT; 41 MachineRegisterInfo *MRI; 42 public: 43 static char ID; // Pass identification 44 MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {} 45 46 virtual bool runOnMachineFunction(MachineFunction &MF); 47 48 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 49 AU.setPreservesCFG(); 50 MachineFunctionPass::getAnalysisUsage(AU); 51 AU.addRequired<AliasAnalysis>(); 52 AU.addRequired<MachineDominatorTree>(); 53 AU.addPreserved<MachineDominatorTree>(); 54 } 55 56 private: 57 const unsigned LookAheadLimit; 58 typedef ScopedHashTableScope<MachineInstr*, unsigned, 59 MachineInstrExpressionTrait> ScopeType; 60 DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; 61 ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; 62 SmallVector<MachineInstr*, 64> Exps; 63 unsigned CurrVN; 64 65 bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); 66 bool isPhysDefTriviallyDead(unsigned Reg, 67 MachineBasicBlock::const_iterator I, 68 MachineBasicBlock::const_iterator E) const ; 69 bool hasLivePhysRegDefUse(const MachineInstr *MI, 70 const MachineBasicBlock *MBB, 71 unsigned &PhysDef) const; 72 bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 73 unsigned PhysDef) const; 74 bool isCSECandidate(MachineInstr *MI); 75 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 76 MachineInstr *CSMI, MachineInstr *MI); 77 void EnterScope(MachineBasicBlock *MBB); 78 void ExitScope(MachineBasicBlock *MBB); 79 bool ProcessBlock(MachineBasicBlock *MBB); 80 void ExitScopeIfDone(MachineDomTreeNode *Node, 81 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 82 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 83 bool PerformCSE(MachineDomTreeNode *Node); 84 }; 85 } // end anonymous namespace 86 87 char MachineCSE::ID = 0; 88 static RegisterPass<MachineCSE> 89 X("machine-cse", "Machine Common Subexpression Elimination"); 90 91 FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } 92 93 bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, 94 MachineBasicBlock *MBB) { 95 bool Changed = false; 96 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 97 MachineOperand &MO = MI->getOperand(i); 98 if (!MO.isReg() || !MO.isUse()) 99 continue; 100 unsigned Reg = MO.getReg(); 101 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 102 continue; 103 if (!MRI->hasOneUse(Reg)) 104 // Only coalesce single use copies. This ensure the copy will be 105 // deleted. 106 continue; 107 MachineInstr *DefMI = MRI->getVRegDef(Reg); 108 if (DefMI->getParent() != MBB) 109 continue; 110 unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 111 if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && 112 TargetRegisterInfo::isVirtualRegister(SrcReg) && 113 !SrcSubIdx && !DstSubIdx) { 114 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); 115 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 116 const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); 117 if (!NewRC) 118 continue; 119 DEBUG(dbgs() << "Coalescing: " << *DefMI); 120 DEBUG(dbgs() << "*** to: " << *MI); 121 MO.setReg(SrcReg); 122 MRI->clearKillFlags(SrcReg); 123 if (NewRC != SRC) 124 MRI->setRegClass(SrcReg, NewRC); 125 DefMI->eraseFromParent(); 126 ++NumCoalesces; 127 Changed = true; 128 } 129 } 130 131 return Changed; 132 } 133 134 bool 135 MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 136 MachineBasicBlock::const_iterator I, 137 MachineBasicBlock::const_iterator E) const { 138 unsigned LookAheadLeft = LookAheadLimit; 139 while (LookAheadLeft) { 140 // Skip over dbg_value's. 141 while (I != E && I->isDebugValue()) 142 ++I; 143 144 if (I == E) 145 // Reached end of block, register is obviously dead. 146 return true; 147 148 bool SeenDef = false; 149 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 150 const MachineOperand &MO = I->getOperand(i); 151 if (!MO.isReg() || !MO.getReg()) 152 continue; 153 if (!TRI->regsOverlap(MO.getReg(), Reg)) 154 continue; 155 if (MO.isUse()) 156 // Found a use! 157 return false; 158 SeenDef = true; 159 } 160 if (SeenDef) 161 // See a def of Reg (or an alias) before encountering any use, it's 162 // trivially dead. 163 return true; 164 165 --LookAheadLeft; 166 ++I; 167 } 168 return false; 169 } 170 171 /// hasLivePhysRegDefUse - Return true if the specified instruction read / write 172 /// physical registers (except for dead defs of physical registers). It also 173 /// returns the physical register def by reference if it's the only one and the 174 /// instruction does not uses a physical register. 175 bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, 176 const MachineBasicBlock *MBB, 177 unsigned &PhysDef) const { 178 PhysDef = 0; 179 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 180 const MachineOperand &MO = MI->getOperand(i); 181 if (!MO.isReg()) 182 continue; 183 unsigned Reg = MO.getReg(); 184 if (!Reg) 185 continue; 186 if (TargetRegisterInfo::isVirtualRegister(Reg)) 187 continue; 188 if (MO.isUse()) { 189 // Can't touch anything to read a physical register. 190 PhysDef = 0; 191 return true; 192 } 193 if (MO.isDead()) 194 // If the def is dead, it's ok. 195 continue; 196 // Ok, this is a physical register def that's not marked "dead". That's 197 // common since this pass is run before livevariables. We can scan 198 // forward a few instructions and check if it is obviously dead. 199 if (PhysDef) { 200 // Multiple physical register defs. These are rare, forget about it. 201 PhysDef = 0; 202 return true; 203 } 204 PhysDef = Reg; 205 } 206 207 if (PhysDef) { 208 MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); 209 if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) 210 return true; 211 } 212 return false; 213 } 214 215 bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 216 unsigned PhysDef) const { 217 // For now conservatively returns false if the common subexpression is 218 // not in the same basic block as the given instruction. 219 MachineBasicBlock *MBB = MI->getParent(); 220 if (CSMI->getParent() != MBB) 221 return false; 222 MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); 223 MachineBasicBlock::const_iterator E = MI; 224 unsigned LookAheadLeft = LookAheadLimit; 225 while (LookAheadLeft) { 226 // Skip over dbg_value's. 227 while (I != E && I->isDebugValue()) 228 ++I; 229 230 if (I == E) 231 return true; 232 if (I->modifiesRegister(PhysDef, TRI)) 233 return false; 234 235 --LookAheadLeft; 236 ++I; 237 } 238 239 return false; 240 } 241 242 static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { 243 unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 244 return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || 245 MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg(); 246 } 247 248 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 249 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 250 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) 251 return false; 252 253 // Ignore copies. 254 if (isCopy(MI, TII)) 255 return false; 256 257 // Ignore stuff that we obviously can't move. 258 const TargetInstrDesc &TID = MI->getDesc(); 259 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 260 TID.hasUnmodeledSideEffects()) 261 return false; 262 263 if (TID.mayLoad()) { 264 // Okay, this instruction does a load. As a refinement, we allow the target 265 // to decide whether the loaded value is actually a constant. If so, we can 266 // actually use it as a load. 267 if (!MI->isInvariantLoad(AA)) 268 // FIXME: we should be able to hoist loads with no other side effects if 269 // there are no other instructions which can change memory in this loop. 270 // This is a trivial form of alias analysis. 271 return false; 272 } 273 return true; 274 } 275 276 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 277 /// common expression that defines Reg. 278 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 279 MachineInstr *CSMI, MachineInstr *MI) { 280 // FIXME: Heuristics that works around the lack the live range splitting. 281 282 // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an 283 // immediate predecessor. We don't want to increase register pressure and end up 284 // causing other computation to be spilled. 285 if (MI->getDesc().isAsCheapAsAMove()) { 286 MachineBasicBlock *CSBB = CSMI->getParent(); 287 MachineBasicBlock *BB = MI->getParent(); 288 if (CSBB != BB && 289 find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) 290 return false; 291 } 292 293 // Heuristics #2: If the expression doesn't not use a vr and the only use 294 // of the redundant computation are copies, do not cse. 295 bool HasVRegUse = false; 296 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 297 const MachineOperand &MO = MI->getOperand(i); 298 if (MO.isReg() && MO.isUse() && MO.getReg() && 299 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 300 HasVRegUse = true; 301 break; 302 } 303 } 304 if (!HasVRegUse) { 305 bool HasNonCopyUse = false; 306 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 307 E = MRI->use_nodbg_end(); I != E; ++I) { 308 MachineInstr *Use = &*I; 309 // Ignore copies. 310 if (!isCopy(Use, TII)) { 311 HasNonCopyUse = true; 312 break; 313 } 314 } 315 if (!HasNonCopyUse) 316 return false; 317 } 318 319 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 320 // it unless the defined value is already used in the BB of the new use. 321 bool HasPHI = false; 322 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 323 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), 324 E = MRI->use_nodbg_end(); I != E; ++I) { 325 MachineInstr *Use = &*I; 326 HasPHI |= Use->isPHI(); 327 CSBBs.insert(Use->getParent()); 328 } 329 330 if (!HasPHI) 331 return true; 332 return CSBBs.count(MI->getParent()); 333 } 334 335 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 336 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 337 ScopeType *Scope = new ScopeType(VNT); 338 ScopeMap[MBB] = Scope; 339 } 340 341 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 342 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 343 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 344 assert(SI != ScopeMap.end()); 345 ScopeMap.erase(SI); 346 delete SI->second; 347 } 348 349 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 350 bool Changed = false; 351 352 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 353 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 354 MachineInstr *MI = &*I; 355 ++I; 356 357 if (!isCSECandidate(MI)) 358 continue; 359 360 bool DefPhys = false; 361 bool FoundCSE = VNT.count(MI); 362 if (!FoundCSE) { 363 // Look for trivial copy coalescing opportunities. 364 if (PerformTrivialCoalescing(MI, MBB)) { 365 // After coalescing MI itself may become a copy. 366 if (isCopy(MI, TII)) 367 continue; 368 FoundCSE = VNT.count(MI); 369 } 370 } 371 // FIXME: commute commutable instructions? 372 373 // If the instruction defines a physical register and the value *may* be 374 // used, then it's not safe to replace it with a common subexpression. 375 unsigned PhysDef = 0; 376 if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { 377 FoundCSE = false; 378 379 // ... Unless the CS is local and it also defines the physical register 380 // which is not clobbered in between. 381 if (PhysDef) { 382 unsigned CSVN = VNT.lookup(MI); 383 MachineInstr *CSMI = Exps[CSVN]; 384 if (PhysRegDefReaches(CSMI, MI, PhysDef)) { 385 FoundCSE = true; 386 DefPhys = true; 387 } 388 } 389 } 390 391 if (!FoundCSE) { 392 VNT.insert(MI, CurrVN++); 393 Exps.push_back(MI); 394 continue; 395 } 396 397 // Found a common subexpression, eliminate it. 398 unsigned CSVN = VNT.lookup(MI); 399 MachineInstr *CSMI = Exps[CSVN]; 400 DEBUG(dbgs() << "Examining: " << *MI); 401 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 402 403 // Check if it's profitable to perform this CSE. 404 bool DoCSE = true; 405 unsigned NumDefs = MI->getDesc().getNumDefs(); 406 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 407 MachineOperand &MO = MI->getOperand(i); 408 if (!MO.isReg() || !MO.isDef()) 409 continue; 410 unsigned OldReg = MO.getReg(); 411 unsigned NewReg = CSMI->getOperand(i).getReg(); 412 if (OldReg == NewReg) 413 continue; 414 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 415 TargetRegisterInfo::isVirtualRegister(NewReg) && 416 "Do not CSE physical register defs!"); 417 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 418 DoCSE = false; 419 break; 420 } 421 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 422 --NumDefs; 423 } 424 425 // Actually perform the elimination. 426 if (DoCSE) { 427 for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { 428 MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); 429 MRI->clearKillFlags(CSEPairs[i].second); 430 } 431 MI->eraseFromParent(); 432 ++NumCSEs; 433 if (DefPhys) 434 ++NumPhysCSEs; 435 } else { 436 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 437 VNT.insert(MI, CurrVN++); 438 Exps.push_back(MI); 439 } 440 CSEPairs.clear(); 441 } 442 443 return Changed; 444 } 445 446 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 447 /// dominator tree node if its a leaf or all of its children are done. Walk 448 /// up the dominator tree to destroy ancestors which are now done. 449 void 450 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 451 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 452 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 453 if (OpenChildren[Node]) 454 return; 455 456 // Pop scope. 457 ExitScope(Node->getBlock()); 458 459 // Now traverse upwards to pop ancestors whose offsprings are all done. 460 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 461 unsigned Left = --OpenChildren[Parent]; 462 if (Left != 0) 463 break; 464 ExitScope(Parent->getBlock()); 465 Node = Parent; 466 } 467 } 468 469 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 470 SmallVector<MachineDomTreeNode*, 32> Scopes; 471 SmallVector<MachineDomTreeNode*, 8> WorkList; 472 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 473 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 474 475 // Perform a DFS walk to determine the order of visit. 476 WorkList.push_back(Node); 477 do { 478 Node = WorkList.pop_back_val(); 479 Scopes.push_back(Node); 480 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 481 unsigned NumChildren = Children.size(); 482 OpenChildren[Node] = NumChildren; 483 for (unsigned i = 0; i != NumChildren; ++i) { 484 MachineDomTreeNode *Child = Children[i]; 485 ParentMap[Child] = Node; 486 WorkList.push_back(Child); 487 } 488 } while (!WorkList.empty()); 489 490 // Now perform CSE. 491 bool Changed = false; 492 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 493 MachineDomTreeNode *Node = Scopes[i]; 494 MachineBasicBlock *MBB = Node->getBlock(); 495 EnterScope(MBB); 496 Changed |= ProcessBlock(MBB); 497 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 498 ExitScopeIfDone(Node, OpenChildren, ParentMap); 499 } 500 501 return Changed; 502 } 503 504 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 505 TII = MF.getTarget().getInstrInfo(); 506 TRI = MF.getTarget().getRegisterInfo(); 507 MRI = &MF.getRegInfo(); 508 AA = &getAnalysis<AliasAnalysis>(); 509 DT = &getAnalysis<MachineDominatorTree>(); 510 return PerformCSE(DT->getRootNode()); 511 } 512