1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global common subexpression elimination on machine 11 // instructions using a scoped hash table based value numbering scheme. It 12 // must be run while the machine function is still in SSA form. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/ScopedHashTable.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/Passes.h" 31 #include "llvm/CodeGen/TargetInstrInfo.h" 32 #include "llvm/CodeGen/TargetOpcodes.h" 33 #include "llvm/CodeGen/TargetRegisterInfo.h" 34 #include "llvm/CodeGen/TargetSubtargetInfo.h" 35 #include "llvm/MC/MCInstrDesc.h" 36 #include "llvm/MC/MCRegisterInfo.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/Allocator.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/RecyclingAllocator.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cassert> 43 #include <iterator> 44 #include <utility> 45 #include <vector> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "machine-cse" 50 51 STATISTIC(NumCoalesces, "Number of copies coalesced"); 52 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 53 STATISTIC(NumPhysCSEs, 54 "Number of physreg referencing common subexpr eliminated"); 55 STATISTIC(NumCrossBBCSEs, 56 "Number of cross-MBB physreg referencing CS eliminated"); 57 STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); 58 59 namespace { 60 61 class MachineCSE : public MachineFunctionPass { 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 AliasAnalysis *AA; 65 MachineDominatorTree *DT; 66 MachineRegisterInfo *MRI; 67 68 public: 69 static char ID; // Pass identification 70 71 MachineCSE() : MachineFunctionPass(ID) { 72 initializeMachineCSEPass(*PassRegistry::getPassRegistry()); 73 } 74 75 bool runOnMachineFunction(MachineFunction &MF) override; 76 77 void getAnalysisUsage(AnalysisUsage &AU) const override { 78 AU.setPreservesCFG(); 79 MachineFunctionPass::getAnalysisUsage(AU); 80 AU.addRequired<AAResultsWrapperPass>(); 81 AU.addPreservedID(MachineLoopInfoID); 82 AU.addRequired<MachineDominatorTree>(); 83 AU.addPreserved<MachineDominatorTree>(); 84 } 85 86 void releaseMemory() override { 87 ScopeMap.clear(); 88 Exps.clear(); 89 } 90 91 private: 92 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, 93 ScopedHashTableVal<MachineInstr *, unsigned>>; 94 using ScopedHTType = 95 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, 96 AllocatorTy>; 97 using ScopeType = ScopedHTType::ScopeTy; 98 99 unsigned LookAheadLimit = 0; 100 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; 101 ScopedHTType VNT; 102 SmallVector<MachineInstr *, 64> Exps; 103 unsigned CurrVN = 0; 104 105 bool PerformTrivialCopyPropagation(MachineInstr *MI, 106 MachineBasicBlock *MBB); 107 bool isPhysDefTriviallyDead(unsigned Reg, 108 MachineBasicBlock::const_iterator I, 109 MachineBasicBlock::const_iterator E) const; 110 bool hasLivePhysRegDefUses(const MachineInstr *MI, 111 const MachineBasicBlock *MBB, 112 SmallSet<unsigned,8> &PhysRefs, 113 SmallVectorImpl<unsigned> &PhysDefs, 114 bool &PhysUseDef) const; 115 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 116 SmallSet<unsigned,8> &PhysRefs, 117 SmallVectorImpl<unsigned> &PhysDefs, 118 bool &NonLocal) const; 119 bool isCSECandidate(MachineInstr *MI); 120 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 121 MachineInstr *CSMI, MachineInstr *MI); 122 void EnterScope(MachineBasicBlock *MBB); 123 void ExitScope(MachineBasicBlock *MBB); 124 bool ProcessBlock(MachineBasicBlock *MBB); 125 void ExitScopeIfDone(MachineDomTreeNode *Node, 126 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); 127 bool PerformCSE(MachineDomTreeNode *Node); 128 }; 129 130 } // end anonymous namespace 131 132 char MachineCSE::ID = 0; 133 134 char &llvm::MachineCSEID = MachineCSE::ID; 135 136 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, 137 "Machine Common Subexpression Elimination", false, false) 138 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 139 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 140 INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, 141 "Machine Common Subexpression Elimination", false, false) 142 143 /// The source register of a COPY machine instruction can be propagated to all 144 /// its users, and this propagation could increase the probability of finding 145 /// common subexpressions. If the COPY has only one user, the COPY itself can 146 /// be removed. 147 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, 148 MachineBasicBlock *MBB) { 149 bool Changed = false; 150 for (MachineOperand &MO : MI->operands()) { 151 if (!MO.isReg() || !MO.isUse()) 152 continue; 153 unsigned Reg = MO.getReg(); 154 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 155 continue; 156 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); 157 MachineInstr *DefMI = MRI->getVRegDef(Reg); 158 if (!DefMI->isCopy()) 159 continue; 160 unsigned SrcReg = DefMI->getOperand(1).getReg(); 161 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 162 continue; 163 if (DefMI->getOperand(0).getSubReg()) 164 continue; 165 // FIXME: We should trivially coalesce subregister copies to expose CSE 166 // opportunities on instructions with truncated operands (see 167 // cse-add-with-overflow.ll). This can be done here as follows: 168 // if (SrcSubReg) 169 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, 170 // SrcSubReg); 171 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); 172 // 173 // The 2-addr pass has been updated to handle coalesced subregs. However, 174 // some machine-specific code still can't handle it. 175 // To handle it properly we also need a way find a constrained subregister 176 // class given a super-reg class and subreg index. 177 if (DefMI->getOperand(1).getSubReg()) 178 continue; 179 if (!MRI->constrainRegAttrs(SrcReg, Reg)) 180 continue; 181 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); 182 LLVM_DEBUG(dbgs() << "*** to: " << *MI); 183 184 // Collect matching debug values. 185 SmallVector<MachineInstr *, 2> DbgValues; 186 DefMI->collectDebugValues(DbgValues); 187 // Propagate SrcReg to debug value instructions. 188 for (auto *DBI : DbgValues) 189 DBI->getOperand(0).setReg(SrcReg); 190 191 // Propagate SrcReg of copies to MI. 192 MO.setReg(SrcReg); 193 MRI->clearKillFlags(SrcReg); 194 // Coalesce single use copies. 195 if (OnlyOneUse) { 196 DefMI->eraseFromParent(); 197 ++NumCoalesces; 198 } 199 Changed = true; 200 } 201 202 return Changed; 203 } 204 205 bool 206 MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 207 MachineBasicBlock::const_iterator I, 208 MachineBasicBlock::const_iterator E) const { 209 unsigned LookAheadLeft = LookAheadLimit; 210 while (LookAheadLeft) { 211 // Skip over dbg_value's. 212 I = skipDebugInstructionsForward(I, E); 213 214 if (I == E) 215 // Reached end of block, we don't know if register is dead or not. 216 return false; 217 218 bool SeenDef = false; 219 for (const MachineOperand &MO : I->operands()) { 220 if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) 221 SeenDef = true; 222 if (!MO.isReg() || !MO.getReg()) 223 continue; 224 if (!TRI->regsOverlap(MO.getReg(), Reg)) 225 continue; 226 if (MO.isUse()) 227 // Found a use! 228 return false; 229 SeenDef = true; 230 } 231 if (SeenDef) 232 // See a def of Reg (or an alias) before encountering any use, it's 233 // trivially dead. 234 return true; 235 236 --LookAheadLeft; 237 ++I; 238 } 239 return false; 240 } 241 242 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write 243 /// physical registers (except for dead defs of physical registers). It also 244 /// returns the physical register def by reference if it's the only one and the 245 /// instruction does not uses a physical register. 246 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, 247 const MachineBasicBlock *MBB, 248 SmallSet<unsigned,8> &PhysRefs, 249 SmallVectorImpl<unsigned> &PhysDefs, 250 bool &PhysUseDef) const{ 251 // First, add all uses to PhysRefs. 252 for (const MachineOperand &MO : MI->operands()) { 253 if (!MO.isReg() || MO.isDef()) 254 continue; 255 unsigned Reg = MO.getReg(); 256 if (!Reg) 257 continue; 258 if (TargetRegisterInfo::isVirtualRegister(Reg)) 259 continue; 260 // Reading either caller preserved or constant physregs is ok. 261 if (!MRI->isCallerPreservedOrConstPhysReg(Reg)) 262 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 263 PhysRefs.insert(*AI); 264 } 265 266 // Next, collect all defs into PhysDefs. If any is already in PhysRefs 267 // (which currently contains only uses), set the PhysUseDef flag. 268 PhysUseDef = false; 269 MachineBasicBlock::const_iterator I = MI; I = std::next(I); 270 for (const MachineOperand &MO : MI->operands()) { 271 if (!MO.isReg() || !MO.isDef()) 272 continue; 273 unsigned Reg = MO.getReg(); 274 if (!Reg) 275 continue; 276 if (TargetRegisterInfo::isVirtualRegister(Reg)) 277 continue; 278 // Check against PhysRefs even if the def is "dead". 279 if (PhysRefs.count(Reg)) 280 PhysUseDef = true; 281 // If the def is dead, it's ok. But the def may not marked "dead". That's 282 // common since this pass is run before livevariables. We can scan 283 // forward a few instructions and check if it is obviously dead. 284 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) 285 PhysDefs.push_back(Reg); 286 } 287 288 // Finally, add all defs to PhysRefs as well. 289 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) 290 for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) 291 PhysRefs.insert(*AI); 292 293 return !PhysRefs.empty(); 294 } 295 296 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 297 SmallSet<unsigned,8> &PhysRefs, 298 SmallVectorImpl<unsigned> &PhysDefs, 299 bool &NonLocal) const { 300 // For now conservatively returns false if the common subexpression is 301 // not in the same basic block as the given instruction. The only exception 302 // is if the common subexpression is in the sole predecessor block. 303 const MachineBasicBlock *MBB = MI->getParent(); 304 const MachineBasicBlock *CSMBB = CSMI->getParent(); 305 306 bool CrossMBB = false; 307 if (CSMBB != MBB) { 308 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) 309 return false; 310 311 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { 312 if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) 313 // Avoid extending live range of physical registers if they are 314 //allocatable or reserved. 315 return false; 316 } 317 CrossMBB = true; 318 } 319 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); 320 MachineBasicBlock::const_iterator E = MI; 321 MachineBasicBlock::const_iterator EE = CSMBB->end(); 322 unsigned LookAheadLeft = LookAheadLimit; 323 while (LookAheadLeft) { 324 // Skip over dbg_value's. 325 while (I != E && I != EE && I->isDebugInstr()) 326 ++I; 327 328 if (I == EE) { 329 assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); 330 (void)CrossMBB; 331 CrossMBB = false; 332 NonLocal = true; 333 I = MBB->begin(); 334 EE = MBB->end(); 335 continue; 336 } 337 338 if (I == E) 339 return true; 340 341 for (const MachineOperand &MO : I->operands()) { 342 // RegMasks go on instructions like calls that clobber lots of physregs. 343 // Don't attempt to CSE across such an instruction. 344 if (MO.isRegMask()) 345 return false; 346 if (!MO.isReg() || !MO.isDef()) 347 continue; 348 unsigned MOReg = MO.getReg(); 349 if (TargetRegisterInfo::isVirtualRegister(MOReg)) 350 continue; 351 if (PhysRefs.count(MOReg)) 352 return false; 353 } 354 355 --LookAheadLeft; 356 ++I; 357 } 358 359 return false; 360 } 361 362 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 363 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || 364 MI->isInlineAsm() || MI->isDebugInstr()) 365 return false; 366 367 // Ignore copies. 368 if (MI->isCopyLike()) 369 return false; 370 371 // Ignore stuff that we obviously can't move. 372 if (MI->mayStore() || MI->isCall() || MI->isTerminator() || 373 MI->hasUnmodeledSideEffects()) 374 return false; 375 376 if (MI->mayLoad()) { 377 // Okay, this instruction does a load. As a refinement, we allow the target 378 // to decide whether the loaded value is actually a constant. If so, we can 379 // actually use it as a load. 380 if (!MI->isDereferenceableInvariantLoad(AA)) 381 // FIXME: we should be able to hoist loads with no other side effects if 382 // there are no other instructions which can change memory in this loop. 383 // This is a trivial form of alias analysis. 384 return false; 385 } 386 387 // Ignore stack guard loads, otherwise the register that holds CSEed value may 388 // be spilled and get loaded back with corrupted data. 389 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) 390 return false; 391 392 return true; 393 } 394 395 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 396 /// common expression that defines Reg. 397 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 398 MachineInstr *CSMI, MachineInstr *MI) { 399 // FIXME: Heuristics that works around the lack the live range splitting. 400 401 // If CSReg is used at all uses of Reg, CSE should not increase register 402 // pressure of CSReg. 403 bool MayIncreasePressure = true; 404 if (TargetRegisterInfo::isVirtualRegister(CSReg) && 405 TargetRegisterInfo::isVirtualRegister(Reg)) { 406 MayIncreasePressure = false; 407 SmallPtrSet<MachineInstr*, 8> CSUses; 408 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { 409 CSUses.insert(&MI); 410 } 411 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 412 if (!CSUses.count(&MI)) { 413 MayIncreasePressure = true; 414 break; 415 } 416 } 417 } 418 if (!MayIncreasePressure) return true; 419 420 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in 421 // an immediate predecessor. We don't want to increase register pressure and 422 // end up causing other computation to be spilled. 423 if (TII->isAsCheapAsAMove(*MI)) { 424 MachineBasicBlock *CSBB = CSMI->getParent(); 425 MachineBasicBlock *BB = MI->getParent(); 426 if (CSBB != BB && !CSBB->isSuccessor(BB)) 427 return false; 428 } 429 430 // Heuristics #2: If the expression doesn't not use a vr and the only use 431 // of the redundant computation are copies, do not cse. 432 bool HasVRegUse = false; 433 for (const MachineOperand &MO : MI->operands()) { 434 if (MO.isReg() && MO.isUse() && 435 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 436 HasVRegUse = true; 437 break; 438 } 439 } 440 if (!HasVRegUse) { 441 bool HasNonCopyUse = false; 442 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 443 // Ignore copies. 444 if (!MI.isCopyLike()) { 445 HasNonCopyUse = true; 446 break; 447 } 448 } 449 if (!HasNonCopyUse) 450 return false; 451 } 452 453 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 454 // it unless the defined value is already used in the BB of the new use. 455 bool HasPHI = false; 456 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) { 457 HasPHI |= UseMI.isPHI(); 458 if (UseMI.getParent() == MI->getParent()) 459 return true; 460 } 461 462 return !HasPHI; 463 } 464 465 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 466 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 467 ScopeType *Scope = new ScopeType(VNT); 468 ScopeMap[MBB] = Scope; 469 } 470 471 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 472 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 473 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 474 assert(SI != ScopeMap.end()); 475 delete SI->second; 476 ScopeMap.erase(SI); 477 } 478 479 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 480 bool Changed = false; 481 482 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 483 SmallVector<unsigned, 2> ImplicitDefsToUpdate; 484 SmallVector<unsigned, 2> ImplicitDefs; 485 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 486 MachineInstr *MI = &*I; 487 ++I; 488 489 if (!isCSECandidate(MI)) 490 continue; 491 492 bool FoundCSE = VNT.count(MI); 493 if (!FoundCSE) { 494 // Using trivial copy propagation to find more CSE opportunities. 495 if (PerformTrivialCopyPropagation(MI, MBB)) { 496 Changed = true; 497 498 // After coalescing MI itself may become a copy. 499 if (MI->isCopyLike()) 500 continue; 501 502 // Try again to see if CSE is possible. 503 FoundCSE = VNT.count(MI); 504 } 505 } 506 507 // Commute commutable instructions. 508 bool Commuted = false; 509 if (!FoundCSE && MI->isCommutable()) { 510 if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) { 511 Commuted = true; 512 FoundCSE = VNT.count(NewMI); 513 if (NewMI != MI) { 514 // New instruction. It doesn't need to be kept. 515 NewMI->eraseFromParent(); 516 Changed = true; 517 } else if (!FoundCSE) 518 // MI was changed but it didn't help, commute it back! 519 (void)TII->commuteInstruction(*MI); 520 } 521 } 522 523 // If the instruction defines physical registers and the values *may* be 524 // used, then it's not safe to replace it with a common subexpression. 525 // It's also not safe if the instruction uses physical registers. 526 bool CrossMBBPhysDef = false; 527 SmallSet<unsigned, 8> PhysRefs; 528 SmallVector<unsigned, 2> PhysDefs; 529 bool PhysUseDef = false; 530 if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, 531 PhysDefs, PhysUseDef)) { 532 FoundCSE = false; 533 534 // ... Unless the CS is local or is in the sole predecessor block 535 // and it also defines the physical register which is not clobbered 536 // in between and the physical register uses were not clobbered. 537 // This can never be the case if the instruction both uses and 538 // defines the same physical register, which was detected above. 539 if (!PhysUseDef) { 540 unsigned CSVN = VNT.lookup(MI); 541 MachineInstr *CSMI = Exps[CSVN]; 542 if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) 543 FoundCSE = true; 544 } 545 } 546 547 if (!FoundCSE) { 548 VNT.insert(MI, CurrVN++); 549 Exps.push_back(MI); 550 continue; 551 } 552 553 // Found a common subexpression, eliminate it. 554 unsigned CSVN = VNT.lookup(MI); 555 MachineInstr *CSMI = Exps[CSVN]; 556 LLVM_DEBUG(dbgs() << "Examining: " << *MI); 557 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 558 559 // Check if it's profitable to perform this CSE. 560 bool DoCSE = true; 561 unsigned NumDefs = MI->getNumDefs(); 562 563 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 564 MachineOperand &MO = MI->getOperand(i); 565 if (!MO.isReg() || !MO.isDef()) 566 continue; 567 unsigned OldReg = MO.getReg(); 568 unsigned NewReg = CSMI->getOperand(i).getReg(); 569 570 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 571 // we should make sure it is not dead at CSMI. 572 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) 573 ImplicitDefsToUpdate.push_back(i); 574 575 // Keep track of implicit defs of CSMI and MI, to clear possibly 576 // made-redundant kill flags. 577 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) 578 ImplicitDefs.push_back(OldReg); 579 580 if (OldReg == NewReg) { 581 --NumDefs; 582 continue; 583 } 584 585 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 586 TargetRegisterInfo::isVirtualRegister(NewReg) && 587 "Do not CSE physical register defs!"); 588 589 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 590 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 591 DoCSE = false; 592 break; 593 } 594 595 // Don't perform CSE if the result of the new instruction cannot exist 596 // within the constraints (register class, bank, or low-level type) of 597 // the old instruction. 598 if (!MRI->constrainRegAttrs(NewReg, OldReg)) { 599 LLVM_DEBUG( 600 dbgs() << "*** Not the same register constraints, avoid CSE!\n"); 601 DoCSE = false; 602 break; 603 } 604 605 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 606 --NumDefs; 607 } 608 609 // Actually perform the elimination. 610 if (DoCSE) { 611 for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { 612 unsigned OldReg = CSEPair.first; 613 unsigned NewReg = CSEPair.second; 614 // OldReg may have been unused but is used now, clear the Dead flag 615 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); 616 assert(Def != nullptr && "CSEd register has no unique definition?"); 617 Def->clearRegisterDeads(NewReg); 618 // Replace with NewReg and clear kill flags which may be wrong now. 619 MRI->replaceRegWith(OldReg, NewReg); 620 MRI->clearKillFlags(NewReg); 621 } 622 623 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 624 // we should make sure it is not dead at CSMI. 625 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) 626 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); 627 628 // Go through implicit defs of CSMI and MI, and clear the kill flags on 629 // their uses in all the instructions between CSMI and MI. 630 // We might have made some of the kill flags redundant, consider: 631 // subs ... implicit-def %nzcv <- CSMI 632 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore 633 // subs ... implicit-def %nzcv <- MI, to be eliminated 634 // csinc ... implicit killed %nzcv 635 // Since we eliminated MI, and reused a register imp-def'd by CSMI 636 // (here %nzcv), that register, if it was killed before MI, should have 637 // that kill flag removed, because it's lifetime was extended. 638 if (CSMI->getParent() == MI->getParent()) { 639 for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) 640 for (auto ImplicitDef : ImplicitDefs) 641 if (MachineOperand *MO = II->findRegisterUseOperand( 642 ImplicitDef, /*isKill=*/true, TRI)) 643 MO->setIsKill(false); 644 } else { 645 // If the instructions aren't in the same BB, bail out and clear the 646 // kill flag on all uses of the imp-def'd register. 647 for (auto ImplicitDef : ImplicitDefs) 648 MRI->clearKillFlags(ImplicitDef); 649 } 650 651 if (CrossMBBPhysDef) { 652 // Add physical register defs now coming in from a predecessor to MBB 653 // livein list. 654 while (!PhysDefs.empty()) { 655 unsigned LiveIn = PhysDefs.pop_back_val(); 656 if (!MBB->isLiveIn(LiveIn)) 657 MBB->addLiveIn(LiveIn); 658 } 659 ++NumCrossBBCSEs; 660 } 661 662 MI->eraseFromParent(); 663 ++NumCSEs; 664 if (!PhysRefs.empty()) 665 ++NumPhysCSEs; 666 if (Commuted) 667 ++NumCommutes; 668 Changed = true; 669 } else { 670 VNT.insert(MI, CurrVN++); 671 Exps.push_back(MI); 672 } 673 CSEPairs.clear(); 674 ImplicitDefsToUpdate.clear(); 675 ImplicitDefs.clear(); 676 } 677 678 return Changed; 679 } 680 681 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 682 /// dominator tree node if its a leaf or all of its children are done. Walk 683 /// up the dominator tree to destroy ancestors which are now done. 684 void 685 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 686 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { 687 if (OpenChildren[Node]) 688 return; 689 690 // Pop scope. 691 ExitScope(Node->getBlock()); 692 693 // Now traverse upwards to pop ancestors whose offsprings are all done. 694 while (MachineDomTreeNode *Parent = Node->getIDom()) { 695 unsigned Left = --OpenChildren[Parent]; 696 if (Left != 0) 697 break; 698 ExitScope(Parent->getBlock()); 699 Node = Parent; 700 } 701 } 702 703 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 704 SmallVector<MachineDomTreeNode*, 32> Scopes; 705 SmallVector<MachineDomTreeNode*, 8> WorkList; 706 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 707 708 CurrVN = 0; 709 710 // Perform a DFS walk to determine the order of visit. 711 WorkList.push_back(Node); 712 do { 713 Node = WorkList.pop_back_val(); 714 Scopes.push_back(Node); 715 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 716 OpenChildren[Node] = Children.size(); 717 for (MachineDomTreeNode *Child : Children) 718 WorkList.push_back(Child); 719 } while (!WorkList.empty()); 720 721 // Now perform CSE. 722 bool Changed = false; 723 for (MachineDomTreeNode *Node : Scopes) { 724 MachineBasicBlock *MBB = Node->getBlock(); 725 EnterScope(MBB); 726 Changed |= ProcessBlock(MBB); 727 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 728 ExitScopeIfDone(Node, OpenChildren); 729 } 730 731 return Changed; 732 } 733 734 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 735 if (skipFunction(MF.getFunction())) 736 return false; 737 738 TII = MF.getSubtarget().getInstrInfo(); 739 TRI = MF.getSubtarget().getRegisterInfo(); 740 MRI = &MF.getRegInfo(); 741 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 742 DT = &getAnalysis<MachineDominatorTree>(); 743 LookAheadLimit = TII->getMachineCSELookAheadLimit(); 744 return PerformCSE(DT->getRootNode()); 745 } 746