1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs global common subexpression elimination on machine 10 // instructions using a scoped hash table based value numbering scheme. It 11 // must be run while the machine function is still in SSA form. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/ScopedHashTable.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/CodeGen/MachineBasicBlock.h" 23 #include "llvm/CodeGen/MachineDominators.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/CodeGen/TargetInstrInfo.h" 31 #include "llvm/CodeGen/TargetOpcodes.h" 32 #include "llvm/CodeGen/TargetRegisterInfo.h" 33 #include "llvm/CodeGen/TargetSubtargetInfo.h" 34 #include "llvm/MC/MCInstrDesc.h" 35 #include "llvm/MC/MCRegisterInfo.h" 36 #include "llvm/Pass.h" 37 #include "llvm/Support/Allocator.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/RecyclingAllocator.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <iterator> 43 #include <utility> 44 #include <vector> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "machine-cse" 49 50 STATISTIC(NumCoalesces, "Number of copies coalesced"); 51 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 52 STATISTIC(NumPhysCSEs, 53 "Number of physreg referencing common subexpr eliminated"); 54 STATISTIC(NumCrossBBCSEs, 55 "Number of cross-MBB physreg referencing CS eliminated"); 56 STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); 57 58 namespace { 59 60 class MachineCSE : public MachineFunctionPass { 61 const TargetInstrInfo *TII; 62 const TargetRegisterInfo *TRI; 63 AliasAnalysis *AA; 64 MachineDominatorTree *DT; 65 MachineRegisterInfo *MRI; 66 67 public: 68 static char ID; // Pass identification 69 70 MachineCSE() : MachineFunctionPass(ID) { 71 initializeMachineCSEPass(*PassRegistry::getPassRegistry()); 72 } 73 74 bool runOnMachineFunction(MachineFunction &MF) override; 75 76 void getAnalysisUsage(AnalysisUsage &AU) const override { 77 AU.setPreservesCFG(); 78 MachineFunctionPass::getAnalysisUsage(AU); 79 AU.addRequired<AAResultsWrapperPass>(); 80 AU.addPreservedID(MachineLoopInfoID); 81 AU.addRequired<MachineDominatorTree>(); 82 AU.addPreserved<MachineDominatorTree>(); 83 } 84 85 void releaseMemory() override { 86 ScopeMap.clear(); 87 Exps.clear(); 88 } 89 90 private: 91 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, 92 ScopedHashTableVal<MachineInstr *, unsigned>>; 93 using ScopedHTType = 94 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, 95 AllocatorTy>; 96 using ScopeType = ScopedHTType::ScopeTy; 97 98 unsigned LookAheadLimit = 0; 99 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; 100 ScopedHTType VNT; 101 SmallVector<MachineInstr *, 64> Exps; 102 unsigned CurrVN = 0; 103 104 bool PerformTrivialCopyPropagation(MachineInstr *MI, 105 MachineBasicBlock *MBB); 106 bool isPhysDefTriviallyDead(unsigned Reg, 107 MachineBasicBlock::const_iterator I, 108 MachineBasicBlock::const_iterator E) const; 109 bool hasLivePhysRegDefUses(const MachineInstr *MI, 110 const MachineBasicBlock *MBB, 111 SmallSet<unsigned,8> &PhysRefs, 112 SmallVectorImpl<unsigned> &PhysDefs, 113 bool &PhysUseDef) const; 114 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 115 SmallSet<unsigned,8> &PhysRefs, 116 SmallVectorImpl<unsigned> &PhysDefs, 117 bool &NonLocal) const; 118 bool isCSECandidate(MachineInstr *MI); 119 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 120 MachineInstr *CSMI, MachineInstr *MI); 121 void EnterScope(MachineBasicBlock *MBB); 122 void ExitScope(MachineBasicBlock *MBB); 123 bool ProcessBlock(MachineBasicBlock *MBB); 124 void ExitScopeIfDone(MachineDomTreeNode *Node, 125 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); 126 bool PerformCSE(MachineDomTreeNode *Node); 127 }; 128 129 } // end anonymous namespace 130 131 char MachineCSE::ID = 0; 132 133 char &llvm::MachineCSEID = MachineCSE::ID; 134 135 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, 136 "Machine Common Subexpression Elimination", false, false) 137 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 138 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 139 INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, 140 "Machine Common Subexpression Elimination", false, false) 141 142 /// The source register of a COPY machine instruction can be propagated to all 143 /// its users, and this propagation could increase the probability of finding 144 /// common subexpressions. If the COPY has only one user, the COPY itself can 145 /// be removed. 146 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, 147 MachineBasicBlock *MBB) { 148 bool Changed = false; 149 for (MachineOperand &MO : MI->operands()) { 150 if (!MO.isReg() || !MO.isUse()) 151 continue; 152 unsigned Reg = MO.getReg(); 153 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 154 continue; 155 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); 156 MachineInstr *DefMI = MRI->getVRegDef(Reg); 157 if (!DefMI->isCopy()) 158 continue; 159 unsigned SrcReg = DefMI->getOperand(1).getReg(); 160 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 161 continue; 162 if (DefMI->getOperand(0).getSubReg()) 163 continue; 164 // FIXME: We should trivially coalesce subregister copies to expose CSE 165 // opportunities on instructions with truncated operands (see 166 // cse-add-with-overflow.ll). This can be done here as follows: 167 // if (SrcSubReg) 168 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, 169 // SrcSubReg); 170 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); 171 // 172 // The 2-addr pass has been updated to handle coalesced subregs. However, 173 // some machine-specific code still can't handle it. 174 // To handle it properly we also need a way find a constrained subregister 175 // class given a super-reg class and subreg index. 176 if (DefMI->getOperand(1).getSubReg()) 177 continue; 178 if (!MRI->constrainRegAttrs(SrcReg, Reg)) 179 continue; 180 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); 181 LLVM_DEBUG(dbgs() << "*** to: " << *MI); 182 183 // Update matching debug values. 184 DefMI->changeDebugValuesDefReg(SrcReg); 185 186 // Propagate SrcReg of copies to MI. 187 MO.setReg(SrcReg); 188 MRI->clearKillFlags(SrcReg); 189 // Coalesce single use copies. 190 if (OnlyOneUse) { 191 DefMI->eraseFromParent(); 192 ++NumCoalesces; 193 } 194 Changed = true; 195 } 196 197 return Changed; 198 } 199 200 bool 201 MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 202 MachineBasicBlock::const_iterator I, 203 MachineBasicBlock::const_iterator E) const { 204 unsigned LookAheadLeft = LookAheadLimit; 205 while (LookAheadLeft) { 206 // Skip over dbg_value's. 207 I = skipDebugInstructionsForward(I, E); 208 209 if (I == E) 210 // Reached end of block, we don't know if register is dead or not. 211 return false; 212 213 bool SeenDef = false; 214 for (const MachineOperand &MO : I->operands()) { 215 if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) 216 SeenDef = true; 217 if (!MO.isReg() || !MO.getReg()) 218 continue; 219 if (!TRI->regsOverlap(MO.getReg(), Reg)) 220 continue; 221 if (MO.isUse()) 222 // Found a use! 223 return false; 224 SeenDef = true; 225 } 226 if (SeenDef) 227 // See a def of Reg (or an alias) before encountering any use, it's 228 // trivially dead. 229 return true; 230 231 --LookAheadLeft; 232 ++I; 233 } 234 return false; 235 } 236 237 static bool isCallerPreservedOrConstPhysReg(unsigned Reg, 238 const MachineFunction &MF, 239 const TargetRegisterInfo &TRI) { 240 // MachineRegisterInfo::isConstantPhysReg directly called by 241 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the 242 // reserved registers to be frozen. That doesn't cause a problem post-ISel as 243 // most (if not all) targets freeze reserved registers right after ISel. 244 // 245 // It does cause issues mid-GlobalISel, however, hence the additional 246 // reservedRegsFrozen check. 247 const MachineRegisterInfo &MRI = MF.getRegInfo(); 248 return TRI.isCallerPreservedPhysReg(Reg, MF) || 249 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg)); 250 } 251 252 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write 253 /// physical registers (except for dead defs of physical registers). It also 254 /// returns the physical register def by reference if it's the only one and the 255 /// instruction does not uses a physical register. 256 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, 257 const MachineBasicBlock *MBB, 258 SmallSet<unsigned,8> &PhysRefs, 259 SmallVectorImpl<unsigned> &PhysDefs, 260 bool &PhysUseDef) const{ 261 // First, add all uses to PhysRefs. 262 for (const MachineOperand &MO : MI->operands()) { 263 if (!MO.isReg() || MO.isDef()) 264 continue; 265 unsigned Reg = MO.getReg(); 266 if (!Reg) 267 continue; 268 if (TargetRegisterInfo::isVirtualRegister(Reg)) 269 continue; 270 // Reading either caller preserved or constant physregs is ok. 271 if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI)) 272 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 273 PhysRefs.insert(*AI); 274 } 275 276 // Next, collect all defs into PhysDefs. If any is already in PhysRefs 277 // (which currently contains only uses), set the PhysUseDef flag. 278 PhysUseDef = false; 279 MachineBasicBlock::const_iterator I = MI; I = std::next(I); 280 for (const MachineOperand &MO : MI->operands()) { 281 if (!MO.isReg() || !MO.isDef()) 282 continue; 283 unsigned Reg = MO.getReg(); 284 if (!Reg) 285 continue; 286 if (TargetRegisterInfo::isVirtualRegister(Reg)) 287 continue; 288 // Check against PhysRefs even if the def is "dead". 289 if (PhysRefs.count(Reg)) 290 PhysUseDef = true; 291 // If the def is dead, it's ok. But the def may not marked "dead". That's 292 // common since this pass is run before livevariables. We can scan 293 // forward a few instructions and check if it is obviously dead. 294 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) 295 PhysDefs.push_back(Reg); 296 } 297 298 // Finally, add all defs to PhysRefs as well. 299 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) 300 for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) 301 PhysRefs.insert(*AI); 302 303 return !PhysRefs.empty(); 304 } 305 306 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 307 SmallSet<unsigned,8> &PhysRefs, 308 SmallVectorImpl<unsigned> &PhysDefs, 309 bool &NonLocal) const { 310 // For now conservatively returns false if the common subexpression is 311 // not in the same basic block as the given instruction. The only exception 312 // is if the common subexpression is in the sole predecessor block. 313 const MachineBasicBlock *MBB = MI->getParent(); 314 const MachineBasicBlock *CSMBB = CSMI->getParent(); 315 316 bool CrossMBB = false; 317 if (CSMBB != MBB) { 318 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) 319 return false; 320 321 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { 322 if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) 323 // Avoid extending live range of physical registers if they are 324 //allocatable or reserved. 325 return false; 326 } 327 CrossMBB = true; 328 } 329 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); 330 MachineBasicBlock::const_iterator E = MI; 331 MachineBasicBlock::const_iterator EE = CSMBB->end(); 332 unsigned LookAheadLeft = LookAheadLimit; 333 while (LookAheadLeft) { 334 // Skip over dbg_value's. 335 while (I != E && I != EE && I->isDebugInstr()) 336 ++I; 337 338 if (I == EE) { 339 assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); 340 (void)CrossMBB; 341 CrossMBB = false; 342 NonLocal = true; 343 I = MBB->begin(); 344 EE = MBB->end(); 345 continue; 346 } 347 348 if (I == E) 349 return true; 350 351 for (const MachineOperand &MO : I->operands()) { 352 // RegMasks go on instructions like calls that clobber lots of physregs. 353 // Don't attempt to CSE across such an instruction. 354 if (MO.isRegMask()) 355 return false; 356 if (!MO.isReg() || !MO.isDef()) 357 continue; 358 unsigned MOReg = MO.getReg(); 359 if (TargetRegisterInfo::isVirtualRegister(MOReg)) 360 continue; 361 if (PhysRefs.count(MOReg)) 362 return false; 363 } 364 365 --LookAheadLeft; 366 ++I; 367 } 368 369 return false; 370 } 371 372 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 373 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || 374 MI->isInlineAsm() || MI->isDebugInstr()) 375 return false; 376 377 // Ignore copies. 378 if (MI->isCopyLike()) 379 return false; 380 381 // Ignore stuff that we obviously can't move. 382 if (MI->mayStore() || MI->isCall() || MI->isTerminator() || 383 MI->hasUnmodeledSideEffects()) 384 return false; 385 386 if (MI->mayLoad()) { 387 // Okay, this instruction does a load. As a refinement, we allow the target 388 // to decide whether the loaded value is actually a constant. If so, we can 389 // actually use it as a load. 390 if (!MI->isDereferenceableInvariantLoad(AA)) 391 // FIXME: we should be able to hoist loads with no other side effects if 392 // there are no other instructions which can change memory in this loop. 393 // This is a trivial form of alias analysis. 394 return false; 395 } 396 397 // Ignore stack guard loads, otherwise the register that holds CSEed value may 398 // be spilled and get loaded back with corrupted data. 399 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) 400 return false; 401 402 return true; 403 } 404 405 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 406 /// common expression that defines Reg. 407 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 408 MachineInstr *CSMI, MachineInstr *MI) { 409 // FIXME: Heuristics that works around the lack the live range splitting. 410 411 // If CSReg is used at all uses of Reg, CSE should not increase register 412 // pressure of CSReg. 413 bool MayIncreasePressure = true; 414 if (TargetRegisterInfo::isVirtualRegister(CSReg) && 415 TargetRegisterInfo::isVirtualRegister(Reg)) { 416 MayIncreasePressure = false; 417 SmallPtrSet<MachineInstr*, 8> CSUses; 418 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { 419 CSUses.insert(&MI); 420 } 421 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 422 if (!CSUses.count(&MI)) { 423 MayIncreasePressure = true; 424 break; 425 } 426 } 427 } 428 if (!MayIncreasePressure) return true; 429 430 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in 431 // an immediate predecessor. We don't want to increase register pressure and 432 // end up causing other computation to be spilled. 433 if (TII->isAsCheapAsAMove(*MI)) { 434 MachineBasicBlock *CSBB = CSMI->getParent(); 435 MachineBasicBlock *BB = MI->getParent(); 436 if (CSBB != BB && !CSBB->isSuccessor(BB)) 437 return false; 438 } 439 440 // Heuristics #2: If the expression doesn't not use a vr and the only use 441 // of the redundant computation are copies, do not cse. 442 bool HasVRegUse = false; 443 for (const MachineOperand &MO : MI->operands()) { 444 if (MO.isReg() && MO.isUse() && 445 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 446 HasVRegUse = true; 447 break; 448 } 449 } 450 if (!HasVRegUse) { 451 bool HasNonCopyUse = false; 452 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 453 // Ignore copies. 454 if (!MI.isCopyLike()) { 455 HasNonCopyUse = true; 456 break; 457 } 458 } 459 if (!HasNonCopyUse) 460 return false; 461 } 462 463 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 464 // it unless the defined value is already used in the BB of the new use. 465 bool HasPHI = false; 466 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) { 467 HasPHI |= UseMI.isPHI(); 468 if (UseMI.getParent() == MI->getParent()) 469 return true; 470 } 471 472 return !HasPHI; 473 } 474 475 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 476 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 477 ScopeType *Scope = new ScopeType(VNT); 478 ScopeMap[MBB] = Scope; 479 } 480 481 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 482 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 483 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 484 assert(SI != ScopeMap.end()); 485 delete SI->second; 486 ScopeMap.erase(SI); 487 } 488 489 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 490 bool Changed = false; 491 492 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 493 SmallVector<unsigned, 2> ImplicitDefsToUpdate; 494 SmallVector<unsigned, 2> ImplicitDefs; 495 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 496 MachineInstr *MI = &*I; 497 ++I; 498 499 if (!isCSECandidate(MI)) 500 continue; 501 502 bool FoundCSE = VNT.count(MI); 503 if (!FoundCSE) { 504 // Using trivial copy propagation to find more CSE opportunities. 505 if (PerformTrivialCopyPropagation(MI, MBB)) { 506 Changed = true; 507 508 // After coalescing MI itself may become a copy. 509 if (MI->isCopyLike()) 510 continue; 511 512 // Try again to see if CSE is possible. 513 FoundCSE = VNT.count(MI); 514 } 515 } 516 517 // Commute commutable instructions. 518 bool Commuted = false; 519 if (!FoundCSE && MI->isCommutable()) { 520 if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) { 521 Commuted = true; 522 FoundCSE = VNT.count(NewMI); 523 if (NewMI != MI) { 524 // New instruction. It doesn't need to be kept. 525 NewMI->eraseFromParent(); 526 Changed = true; 527 } else if (!FoundCSE) 528 // MI was changed but it didn't help, commute it back! 529 (void)TII->commuteInstruction(*MI); 530 } 531 } 532 533 // If the instruction defines physical registers and the values *may* be 534 // used, then it's not safe to replace it with a common subexpression. 535 // It's also not safe if the instruction uses physical registers. 536 bool CrossMBBPhysDef = false; 537 SmallSet<unsigned, 8> PhysRefs; 538 SmallVector<unsigned, 2> PhysDefs; 539 bool PhysUseDef = false; 540 if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, 541 PhysDefs, PhysUseDef)) { 542 FoundCSE = false; 543 544 // ... Unless the CS is local or is in the sole predecessor block 545 // and it also defines the physical register which is not clobbered 546 // in between and the physical register uses were not clobbered. 547 // This can never be the case if the instruction both uses and 548 // defines the same physical register, which was detected above. 549 if (!PhysUseDef) { 550 unsigned CSVN = VNT.lookup(MI); 551 MachineInstr *CSMI = Exps[CSVN]; 552 if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) 553 FoundCSE = true; 554 } 555 } 556 557 if (!FoundCSE) { 558 VNT.insert(MI, CurrVN++); 559 Exps.push_back(MI); 560 continue; 561 } 562 563 // Found a common subexpression, eliminate it. 564 unsigned CSVN = VNT.lookup(MI); 565 MachineInstr *CSMI = Exps[CSVN]; 566 LLVM_DEBUG(dbgs() << "Examining: " << *MI); 567 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 568 569 // Check if it's profitable to perform this CSE. 570 bool DoCSE = true; 571 unsigned NumDefs = MI->getNumDefs(); 572 573 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 574 MachineOperand &MO = MI->getOperand(i); 575 if (!MO.isReg() || !MO.isDef()) 576 continue; 577 unsigned OldReg = MO.getReg(); 578 unsigned NewReg = CSMI->getOperand(i).getReg(); 579 580 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 581 // we should make sure it is not dead at CSMI. 582 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) 583 ImplicitDefsToUpdate.push_back(i); 584 585 // Keep track of implicit defs of CSMI and MI, to clear possibly 586 // made-redundant kill flags. 587 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) 588 ImplicitDefs.push_back(OldReg); 589 590 if (OldReg == NewReg) { 591 --NumDefs; 592 continue; 593 } 594 595 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 596 TargetRegisterInfo::isVirtualRegister(NewReg) && 597 "Do not CSE physical register defs!"); 598 599 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 600 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 601 DoCSE = false; 602 break; 603 } 604 605 // Don't perform CSE if the result of the new instruction cannot exist 606 // within the constraints (register class, bank, or low-level type) of 607 // the old instruction. 608 if (!MRI->constrainRegAttrs(NewReg, OldReg)) { 609 LLVM_DEBUG( 610 dbgs() << "*** Not the same register constraints, avoid CSE!\n"); 611 DoCSE = false; 612 break; 613 } 614 615 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 616 --NumDefs; 617 } 618 619 // Actually perform the elimination. 620 if (DoCSE) { 621 for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { 622 unsigned OldReg = CSEPair.first; 623 unsigned NewReg = CSEPair.second; 624 // OldReg may have been unused but is used now, clear the Dead flag 625 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); 626 assert(Def != nullptr && "CSEd register has no unique definition?"); 627 Def->clearRegisterDeads(NewReg); 628 // Replace with NewReg and clear kill flags which may be wrong now. 629 MRI->replaceRegWith(OldReg, NewReg); 630 MRI->clearKillFlags(NewReg); 631 } 632 633 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 634 // we should make sure it is not dead at CSMI. 635 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) 636 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); 637 638 // Go through implicit defs of CSMI and MI, and clear the kill flags on 639 // their uses in all the instructions between CSMI and MI. 640 // We might have made some of the kill flags redundant, consider: 641 // subs ... implicit-def %nzcv <- CSMI 642 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore 643 // subs ... implicit-def %nzcv <- MI, to be eliminated 644 // csinc ... implicit killed %nzcv 645 // Since we eliminated MI, and reused a register imp-def'd by CSMI 646 // (here %nzcv), that register, if it was killed before MI, should have 647 // that kill flag removed, because it's lifetime was extended. 648 if (CSMI->getParent() == MI->getParent()) { 649 for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) 650 for (auto ImplicitDef : ImplicitDefs) 651 if (MachineOperand *MO = II->findRegisterUseOperand( 652 ImplicitDef, /*isKill=*/true, TRI)) 653 MO->setIsKill(false); 654 } else { 655 // If the instructions aren't in the same BB, bail out and clear the 656 // kill flag on all uses of the imp-def'd register. 657 for (auto ImplicitDef : ImplicitDefs) 658 MRI->clearKillFlags(ImplicitDef); 659 } 660 661 if (CrossMBBPhysDef) { 662 // Add physical register defs now coming in from a predecessor to MBB 663 // livein list. 664 while (!PhysDefs.empty()) { 665 unsigned LiveIn = PhysDefs.pop_back_val(); 666 if (!MBB->isLiveIn(LiveIn)) 667 MBB->addLiveIn(LiveIn); 668 } 669 ++NumCrossBBCSEs; 670 } 671 672 MI->eraseFromParent(); 673 ++NumCSEs; 674 if (!PhysRefs.empty()) 675 ++NumPhysCSEs; 676 if (Commuted) 677 ++NumCommutes; 678 Changed = true; 679 } else { 680 VNT.insert(MI, CurrVN++); 681 Exps.push_back(MI); 682 } 683 CSEPairs.clear(); 684 ImplicitDefsToUpdate.clear(); 685 ImplicitDefs.clear(); 686 } 687 688 return Changed; 689 } 690 691 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 692 /// dominator tree node if its a leaf or all of its children are done. Walk 693 /// up the dominator tree to destroy ancestors which are now done. 694 void 695 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 696 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { 697 if (OpenChildren[Node]) 698 return; 699 700 // Pop scope. 701 ExitScope(Node->getBlock()); 702 703 // Now traverse upwards to pop ancestors whose offsprings are all done. 704 while (MachineDomTreeNode *Parent = Node->getIDom()) { 705 unsigned Left = --OpenChildren[Parent]; 706 if (Left != 0) 707 break; 708 ExitScope(Parent->getBlock()); 709 Node = Parent; 710 } 711 } 712 713 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 714 SmallVector<MachineDomTreeNode*, 32> Scopes; 715 SmallVector<MachineDomTreeNode*, 8> WorkList; 716 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 717 718 CurrVN = 0; 719 720 // Perform a DFS walk to determine the order of visit. 721 WorkList.push_back(Node); 722 do { 723 Node = WorkList.pop_back_val(); 724 Scopes.push_back(Node); 725 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 726 OpenChildren[Node] = Children.size(); 727 for (MachineDomTreeNode *Child : Children) 728 WorkList.push_back(Child); 729 } while (!WorkList.empty()); 730 731 // Now perform CSE. 732 bool Changed = false; 733 for (MachineDomTreeNode *Node : Scopes) { 734 MachineBasicBlock *MBB = Node->getBlock(); 735 EnterScope(MBB); 736 Changed |= ProcessBlock(MBB); 737 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 738 ExitScopeIfDone(Node, OpenChildren); 739 } 740 741 return Changed; 742 } 743 744 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 745 if (skipFunction(MF.getFunction())) 746 return false; 747 748 TII = MF.getSubtarget().getInstrInfo(); 749 TRI = MF.getSubtarget().getRegisterInfo(); 750 MRI = &MF.getRegInfo(); 751 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 752 DT = &getAnalysis<MachineDominatorTree>(); 753 LookAheadLimit = TII->getMachineCSELookAheadLimit(); 754 return PerformCSE(DT->getRootNode()); 755 } 756