1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global common subexpression elimination on machine 11 // instructions using a scoped hash table based value numbering scheme. It 12 // must be run while the machine function is still in SSA form. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/ScopedHashTable.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/Passes.h" 31 #include "llvm/CodeGen/TargetInstrInfo.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCRegisterInfo.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/Allocator.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/RecyclingAllocator.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetOpcodes.h" 40 #include "llvm/Target/TargetRegisterInfo.h" 41 #include "llvm/Target/TargetSubtargetInfo.h" 42 #include <cassert> 43 #include <iterator> 44 #include <utility> 45 #include <vector> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "machine-cse" 50 51 STATISTIC(NumCoalesces, "Number of copies coalesced"); 52 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 53 STATISTIC(NumPhysCSEs, 54 "Number of physreg referencing common subexpr eliminated"); 55 STATISTIC(NumCrossBBCSEs, 56 "Number of cross-MBB physreg referencing CS eliminated"); 57 STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); 58 59 namespace { 60 61 class MachineCSE : public MachineFunctionPass { 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 AliasAnalysis *AA; 65 MachineDominatorTree *DT; 66 MachineRegisterInfo *MRI; 67 68 public: 69 static char ID; // Pass identification 70 71 MachineCSE() : MachineFunctionPass(ID) { 72 initializeMachineCSEPass(*PassRegistry::getPassRegistry()); 73 } 74 75 bool runOnMachineFunction(MachineFunction &MF) override; 76 77 void getAnalysisUsage(AnalysisUsage &AU) const override { 78 AU.setPreservesCFG(); 79 MachineFunctionPass::getAnalysisUsage(AU); 80 AU.addRequired<AAResultsWrapperPass>(); 81 AU.addPreservedID(MachineLoopInfoID); 82 AU.addRequired<MachineDominatorTree>(); 83 AU.addPreserved<MachineDominatorTree>(); 84 } 85 86 void releaseMemory() override { 87 ScopeMap.clear(); 88 Exps.clear(); 89 } 90 91 private: 92 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, 93 ScopedHashTableVal<MachineInstr *, unsigned>>; 94 using ScopedHTType = 95 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, 96 AllocatorTy>; 97 using ScopeType = ScopedHTType::ScopeTy; 98 99 unsigned LookAheadLimit = 0; 100 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; 101 ScopedHTType VNT; 102 SmallVector<MachineInstr *, 64> Exps; 103 unsigned CurrVN = 0; 104 105 bool PerformTrivialCopyPropagation(MachineInstr *MI, 106 MachineBasicBlock *MBB); 107 bool isPhysDefTriviallyDead(unsigned Reg, 108 MachineBasicBlock::const_iterator I, 109 MachineBasicBlock::const_iterator E) const; 110 bool hasLivePhysRegDefUses(const MachineInstr *MI, 111 const MachineBasicBlock *MBB, 112 SmallSet<unsigned,8> &PhysRefs, 113 SmallVectorImpl<unsigned> &PhysDefs, 114 bool &PhysUseDef) const; 115 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 116 SmallSet<unsigned,8> &PhysRefs, 117 SmallVectorImpl<unsigned> &PhysDefs, 118 bool &NonLocal) const; 119 bool isCSECandidate(MachineInstr *MI); 120 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 121 MachineInstr *CSMI, MachineInstr *MI); 122 void EnterScope(MachineBasicBlock *MBB); 123 void ExitScope(MachineBasicBlock *MBB); 124 bool ProcessBlock(MachineBasicBlock *MBB); 125 void ExitScopeIfDone(MachineDomTreeNode *Node, 126 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); 127 bool PerformCSE(MachineDomTreeNode *Node); 128 }; 129 130 } // end anonymous namespace 131 132 char MachineCSE::ID = 0; 133 134 char &llvm::MachineCSEID = MachineCSE::ID; 135 136 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, 137 "Machine Common Subexpression Elimination", false, false) 138 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 139 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 140 INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, 141 "Machine Common Subexpression Elimination", false, false) 142 143 /// The source register of a COPY machine instruction can be propagated to all 144 /// its users, and this propagation could increase the probability of finding 145 /// common subexpressions. If the COPY has only one user, the COPY itself can 146 /// be removed. 147 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, 148 MachineBasicBlock *MBB) { 149 bool Changed = false; 150 for (MachineOperand &MO : MI->operands()) { 151 if (!MO.isReg() || !MO.isUse()) 152 continue; 153 unsigned Reg = MO.getReg(); 154 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 155 continue; 156 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); 157 MachineInstr *DefMI = MRI->getVRegDef(Reg); 158 if (!DefMI->isCopy()) 159 continue; 160 unsigned SrcReg = DefMI->getOperand(1).getReg(); 161 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 162 continue; 163 if (DefMI->getOperand(0).getSubReg()) 164 continue; 165 // FIXME: We should trivially coalesce subregister copies to expose CSE 166 // opportunities on instructions with truncated operands (see 167 // cse-add-with-overflow.ll). This can be done here as follows: 168 // if (SrcSubReg) 169 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, 170 // SrcSubReg); 171 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); 172 // 173 // The 2-addr pass has been updated to handle coalesced subregs. However, 174 // some machine-specific code still can't handle it. 175 // To handle it properly we also need a way find a constrained subregister 176 // class given a super-reg class and subreg index. 177 if (DefMI->getOperand(1).getSubReg()) 178 continue; 179 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 180 if (!MRI->constrainRegClass(SrcReg, RC)) 181 continue; 182 DEBUG(dbgs() << "Coalescing: " << *DefMI); 183 DEBUG(dbgs() << "*** to: " << *MI); 184 // Propagate SrcReg of copies to MI. 185 MO.setReg(SrcReg); 186 MRI->clearKillFlags(SrcReg); 187 // Coalesce single use copies. 188 if (OnlyOneUse) { 189 DefMI->eraseFromParent(); 190 ++NumCoalesces; 191 } 192 Changed = true; 193 } 194 195 return Changed; 196 } 197 198 bool 199 MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 200 MachineBasicBlock::const_iterator I, 201 MachineBasicBlock::const_iterator E) const { 202 unsigned LookAheadLeft = LookAheadLimit; 203 while (LookAheadLeft) { 204 // Skip over dbg_value's. 205 I = skipDebugInstructionsForward(I, E); 206 207 if (I == E) 208 // Reached end of block, we don't know if register is dead or not. 209 return false; 210 211 bool SeenDef = false; 212 for (const MachineOperand &MO : I->operands()) { 213 if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) 214 SeenDef = true; 215 if (!MO.isReg() || !MO.getReg()) 216 continue; 217 if (!TRI->regsOverlap(MO.getReg(), Reg)) 218 continue; 219 if (MO.isUse()) 220 // Found a use! 221 return false; 222 SeenDef = true; 223 } 224 if (SeenDef) 225 // See a def of Reg (or an alias) before encountering any use, it's 226 // trivially dead. 227 return true; 228 229 --LookAheadLeft; 230 ++I; 231 } 232 return false; 233 } 234 235 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write 236 /// physical registers (except for dead defs of physical registers). It also 237 /// returns the physical register def by reference if it's the only one and the 238 /// instruction does not uses a physical register. 239 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, 240 const MachineBasicBlock *MBB, 241 SmallSet<unsigned,8> &PhysRefs, 242 SmallVectorImpl<unsigned> &PhysDefs, 243 bool &PhysUseDef) const{ 244 // First, add all uses to PhysRefs. 245 for (const MachineOperand &MO : MI->operands()) { 246 if (!MO.isReg() || MO.isDef()) 247 continue; 248 unsigned Reg = MO.getReg(); 249 if (!Reg) 250 continue; 251 if (TargetRegisterInfo::isVirtualRegister(Reg)) 252 continue; 253 // Reading constant physregs is ok. 254 if (!MRI->isConstantPhysReg(Reg)) 255 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 256 PhysRefs.insert(*AI); 257 } 258 259 // Next, collect all defs into PhysDefs. If any is already in PhysRefs 260 // (which currently contains only uses), set the PhysUseDef flag. 261 PhysUseDef = false; 262 MachineBasicBlock::const_iterator I = MI; I = std::next(I); 263 for (const MachineOperand &MO : MI->operands()) { 264 if (!MO.isReg() || !MO.isDef()) 265 continue; 266 unsigned Reg = MO.getReg(); 267 if (!Reg) 268 continue; 269 if (TargetRegisterInfo::isVirtualRegister(Reg)) 270 continue; 271 // Check against PhysRefs even if the def is "dead". 272 if (PhysRefs.count(Reg)) 273 PhysUseDef = true; 274 // If the def is dead, it's ok. But the def may not marked "dead". That's 275 // common since this pass is run before livevariables. We can scan 276 // forward a few instructions and check if it is obviously dead. 277 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) 278 PhysDefs.push_back(Reg); 279 } 280 281 // Finally, add all defs to PhysRefs as well. 282 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) 283 for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) 284 PhysRefs.insert(*AI); 285 286 return !PhysRefs.empty(); 287 } 288 289 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 290 SmallSet<unsigned,8> &PhysRefs, 291 SmallVectorImpl<unsigned> &PhysDefs, 292 bool &NonLocal) const { 293 // For now conservatively returns false if the common subexpression is 294 // not in the same basic block as the given instruction. The only exception 295 // is if the common subexpression is in the sole predecessor block. 296 const MachineBasicBlock *MBB = MI->getParent(); 297 const MachineBasicBlock *CSMBB = CSMI->getParent(); 298 299 bool CrossMBB = false; 300 if (CSMBB != MBB) { 301 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) 302 return false; 303 304 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { 305 if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) 306 // Avoid extending live range of physical registers if they are 307 //allocatable or reserved. 308 return false; 309 } 310 CrossMBB = true; 311 } 312 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); 313 MachineBasicBlock::const_iterator E = MI; 314 MachineBasicBlock::const_iterator EE = CSMBB->end(); 315 unsigned LookAheadLeft = LookAheadLimit; 316 while (LookAheadLeft) { 317 // Skip over dbg_value's. 318 while (I != E && I != EE && I->isDebugValue()) 319 ++I; 320 321 if (I == EE) { 322 assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); 323 (void)CrossMBB; 324 CrossMBB = false; 325 NonLocal = true; 326 I = MBB->begin(); 327 EE = MBB->end(); 328 continue; 329 } 330 331 if (I == E) 332 return true; 333 334 for (const MachineOperand &MO : I->operands()) { 335 // RegMasks go on instructions like calls that clobber lots of physregs. 336 // Don't attempt to CSE across such an instruction. 337 if (MO.isRegMask()) 338 return false; 339 if (!MO.isReg() || !MO.isDef()) 340 continue; 341 unsigned MOReg = MO.getReg(); 342 if (TargetRegisterInfo::isVirtualRegister(MOReg)) 343 continue; 344 if (PhysRefs.count(MOReg)) 345 return false; 346 } 347 348 --LookAheadLeft; 349 ++I; 350 } 351 352 return false; 353 } 354 355 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 356 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || 357 MI->isInlineAsm() || MI->isDebugValue()) 358 return false; 359 360 // Ignore copies. 361 if (MI->isCopyLike()) 362 return false; 363 364 // Ignore stuff that we obviously can't move. 365 if (MI->mayStore() || MI->isCall() || MI->isTerminator() || 366 MI->hasUnmodeledSideEffects()) 367 return false; 368 369 if (MI->mayLoad()) { 370 // Okay, this instruction does a load. As a refinement, we allow the target 371 // to decide whether the loaded value is actually a constant. If so, we can 372 // actually use it as a load. 373 if (!MI->isDereferenceableInvariantLoad(AA)) 374 // FIXME: we should be able to hoist loads with no other side effects if 375 // there are no other instructions which can change memory in this loop. 376 // This is a trivial form of alias analysis. 377 return false; 378 } 379 380 // Ignore stack guard loads, otherwise the register that holds CSEed value may 381 // be spilled and get loaded back with corrupted data. 382 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) 383 return false; 384 385 return true; 386 } 387 388 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 389 /// common expression that defines Reg. 390 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 391 MachineInstr *CSMI, MachineInstr *MI) { 392 // FIXME: Heuristics that works around the lack the live range splitting. 393 394 // If CSReg is used at all uses of Reg, CSE should not increase register 395 // pressure of CSReg. 396 bool MayIncreasePressure = true; 397 if (TargetRegisterInfo::isVirtualRegister(CSReg) && 398 TargetRegisterInfo::isVirtualRegister(Reg)) { 399 MayIncreasePressure = false; 400 SmallPtrSet<MachineInstr*, 8> CSUses; 401 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { 402 CSUses.insert(&MI); 403 } 404 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 405 if (!CSUses.count(&MI)) { 406 MayIncreasePressure = true; 407 break; 408 } 409 } 410 } 411 if (!MayIncreasePressure) return true; 412 413 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in 414 // an immediate predecessor. We don't want to increase register pressure and 415 // end up causing other computation to be spilled. 416 if (TII->isAsCheapAsAMove(*MI)) { 417 MachineBasicBlock *CSBB = CSMI->getParent(); 418 MachineBasicBlock *BB = MI->getParent(); 419 if (CSBB != BB && !CSBB->isSuccessor(BB)) 420 return false; 421 } 422 423 // Heuristics #2: If the expression doesn't not use a vr and the only use 424 // of the redundant computation are copies, do not cse. 425 bool HasVRegUse = false; 426 for (const MachineOperand &MO : MI->operands()) { 427 if (MO.isReg() && MO.isUse() && 428 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 429 HasVRegUse = true; 430 break; 431 } 432 } 433 if (!HasVRegUse) { 434 bool HasNonCopyUse = false; 435 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 436 // Ignore copies. 437 if (!MI.isCopyLike()) { 438 HasNonCopyUse = true; 439 break; 440 } 441 } 442 if (!HasNonCopyUse) 443 return false; 444 } 445 446 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 447 // it unless the defined value is already used in the BB of the new use. 448 bool HasPHI = false; 449 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 450 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { 451 HasPHI |= MI.isPHI(); 452 CSBBs.insert(MI.getParent()); 453 } 454 455 if (!HasPHI) 456 return true; 457 return CSBBs.count(MI->getParent()); 458 } 459 460 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 461 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 462 ScopeType *Scope = new ScopeType(VNT); 463 ScopeMap[MBB] = Scope; 464 } 465 466 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 467 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 468 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 469 assert(SI != ScopeMap.end()); 470 delete SI->second; 471 ScopeMap.erase(SI); 472 } 473 474 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 475 bool Changed = false; 476 477 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 478 SmallVector<unsigned, 2> ImplicitDefsToUpdate; 479 SmallVector<unsigned, 2> ImplicitDefs; 480 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 481 MachineInstr *MI = &*I; 482 ++I; 483 484 if (!isCSECandidate(MI)) 485 continue; 486 487 bool FoundCSE = VNT.count(MI); 488 if (!FoundCSE) { 489 // Using trivial copy propagation to find more CSE opportunities. 490 if (PerformTrivialCopyPropagation(MI, MBB)) { 491 Changed = true; 492 493 // After coalescing MI itself may become a copy. 494 if (MI->isCopyLike()) 495 continue; 496 497 // Try again to see if CSE is possible. 498 FoundCSE = VNT.count(MI); 499 } 500 } 501 502 // Commute commutable instructions. 503 bool Commuted = false; 504 if (!FoundCSE && MI->isCommutable()) { 505 if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) { 506 Commuted = true; 507 FoundCSE = VNT.count(NewMI); 508 if (NewMI != MI) { 509 // New instruction. It doesn't need to be kept. 510 NewMI->eraseFromParent(); 511 Changed = true; 512 } else if (!FoundCSE) 513 // MI was changed but it didn't help, commute it back! 514 (void)TII->commuteInstruction(*MI); 515 } 516 } 517 518 // If the instruction defines physical registers and the values *may* be 519 // used, then it's not safe to replace it with a common subexpression. 520 // It's also not safe if the instruction uses physical registers. 521 bool CrossMBBPhysDef = false; 522 SmallSet<unsigned, 8> PhysRefs; 523 SmallVector<unsigned, 2> PhysDefs; 524 bool PhysUseDef = false; 525 if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, 526 PhysDefs, PhysUseDef)) { 527 FoundCSE = false; 528 529 // ... Unless the CS is local or is in the sole predecessor block 530 // and it also defines the physical register which is not clobbered 531 // in between and the physical register uses were not clobbered. 532 // This can never be the case if the instruction both uses and 533 // defines the same physical register, which was detected above. 534 if (!PhysUseDef) { 535 unsigned CSVN = VNT.lookup(MI); 536 MachineInstr *CSMI = Exps[CSVN]; 537 if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) 538 FoundCSE = true; 539 } 540 } 541 542 if (!FoundCSE) { 543 VNT.insert(MI, CurrVN++); 544 Exps.push_back(MI); 545 continue; 546 } 547 548 // Found a common subexpression, eliminate it. 549 unsigned CSVN = VNT.lookup(MI); 550 MachineInstr *CSMI = Exps[CSVN]; 551 DEBUG(dbgs() << "Examining: " << *MI); 552 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 553 554 // Check if it's profitable to perform this CSE. 555 bool DoCSE = true; 556 unsigned NumDefs = MI->getDesc().getNumDefs() + 557 MI->getDesc().getNumImplicitDefs(); 558 559 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 560 MachineOperand &MO = MI->getOperand(i); 561 if (!MO.isReg() || !MO.isDef()) 562 continue; 563 unsigned OldReg = MO.getReg(); 564 unsigned NewReg = CSMI->getOperand(i).getReg(); 565 566 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 567 // we should make sure it is not dead at CSMI. 568 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) 569 ImplicitDefsToUpdate.push_back(i); 570 571 // Keep track of implicit defs of CSMI and MI, to clear possibly 572 // made-redundant kill flags. 573 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) 574 ImplicitDefs.push_back(OldReg); 575 576 if (OldReg == NewReg) { 577 --NumDefs; 578 continue; 579 } 580 581 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 582 TargetRegisterInfo::isVirtualRegister(NewReg) && 583 "Do not CSE physical register defs!"); 584 585 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 586 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 587 DoCSE = false; 588 break; 589 } 590 591 // Don't perform CSE if the result of the old instruction cannot exist 592 // within the register class of the new instruction. 593 const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); 594 if (!MRI->constrainRegClass(NewReg, OldRC)) { 595 DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); 596 DoCSE = false; 597 break; 598 } 599 600 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 601 --NumDefs; 602 } 603 604 // Actually perform the elimination. 605 if (DoCSE) { 606 for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { 607 unsigned OldReg = CSEPair.first; 608 unsigned NewReg = CSEPair.second; 609 // OldReg may have been unused but is used now, clear the Dead flag 610 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); 611 assert(Def != nullptr && "CSEd register has no unique definition?"); 612 Def->clearRegisterDeads(NewReg); 613 // Replace with NewReg and clear kill flags which may be wrong now. 614 MRI->replaceRegWith(OldReg, NewReg); 615 MRI->clearKillFlags(NewReg); 616 } 617 618 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 619 // we should make sure it is not dead at CSMI. 620 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) 621 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); 622 623 // Go through implicit defs of CSMI and MI, and clear the kill flags on 624 // their uses in all the instructions between CSMI and MI. 625 // We might have made some of the kill flags redundant, consider: 626 // subs ... %NZCV<imp-def> <- CSMI 627 // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore 628 // subs ... %NZCV<imp-def> <- MI, to be eliminated 629 // csinc ... %NZCV<imp-use,kill> 630 // Since we eliminated MI, and reused a register imp-def'd by CSMI 631 // (here %NZCV), that register, if it was killed before MI, should have 632 // that kill flag removed, because it's lifetime was extended. 633 if (CSMI->getParent() == MI->getParent()) { 634 for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) 635 for (auto ImplicitDef : ImplicitDefs) 636 if (MachineOperand *MO = II->findRegisterUseOperand( 637 ImplicitDef, /*isKill=*/true, TRI)) 638 MO->setIsKill(false); 639 } else { 640 // If the instructions aren't in the same BB, bail out and clear the 641 // kill flag on all uses of the imp-def'd register. 642 for (auto ImplicitDef : ImplicitDefs) 643 MRI->clearKillFlags(ImplicitDef); 644 } 645 646 if (CrossMBBPhysDef) { 647 // Add physical register defs now coming in from a predecessor to MBB 648 // livein list. 649 while (!PhysDefs.empty()) { 650 unsigned LiveIn = PhysDefs.pop_back_val(); 651 if (!MBB->isLiveIn(LiveIn)) 652 MBB->addLiveIn(LiveIn); 653 } 654 ++NumCrossBBCSEs; 655 } 656 657 MI->eraseFromParent(); 658 ++NumCSEs; 659 if (!PhysRefs.empty()) 660 ++NumPhysCSEs; 661 if (Commuted) 662 ++NumCommutes; 663 Changed = true; 664 } else { 665 VNT.insert(MI, CurrVN++); 666 Exps.push_back(MI); 667 } 668 CSEPairs.clear(); 669 ImplicitDefsToUpdate.clear(); 670 ImplicitDefs.clear(); 671 } 672 673 return Changed; 674 } 675 676 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 677 /// dominator tree node if its a leaf or all of its children are done. Walk 678 /// up the dominator tree to destroy ancestors which are now done. 679 void 680 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 681 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { 682 if (OpenChildren[Node]) 683 return; 684 685 // Pop scope. 686 ExitScope(Node->getBlock()); 687 688 // Now traverse upwards to pop ancestors whose offsprings are all done. 689 while (MachineDomTreeNode *Parent = Node->getIDom()) { 690 unsigned Left = --OpenChildren[Parent]; 691 if (Left != 0) 692 break; 693 ExitScope(Parent->getBlock()); 694 Node = Parent; 695 } 696 } 697 698 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 699 SmallVector<MachineDomTreeNode*, 32> Scopes; 700 SmallVector<MachineDomTreeNode*, 8> WorkList; 701 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 702 703 CurrVN = 0; 704 705 // Perform a DFS walk to determine the order of visit. 706 WorkList.push_back(Node); 707 do { 708 Node = WorkList.pop_back_val(); 709 Scopes.push_back(Node); 710 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 711 OpenChildren[Node] = Children.size(); 712 for (MachineDomTreeNode *Child : Children) 713 WorkList.push_back(Child); 714 } while (!WorkList.empty()); 715 716 // Now perform CSE. 717 bool Changed = false; 718 for (MachineDomTreeNode *Node : Scopes) { 719 MachineBasicBlock *MBB = Node->getBlock(); 720 EnterScope(MBB); 721 Changed |= ProcessBlock(MBB); 722 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 723 ExitScopeIfDone(Node, OpenChildren); 724 } 725 726 return Changed; 727 } 728 729 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 730 if (skipFunction(*MF.getFunction())) 731 return false; 732 733 TII = MF.getSubtarget().getInstrInfo(); 734 TRI = MF.getSubtarget().getRegisterInfo(); 735 MRI = &MF.getRegInfo(); 736 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 737 DT = &getAnalysis<MachineDominatorTree>(); 738 LookAheadLimit = TII->getMachineCSELookAheadLimit(); 739 return PerformCSE(DT->getRootNode()); 740 } 741