1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs global common subexpression elimination on machine
10 // instructions using a scoped hash table based value numbering scheme. It
11 // must be run while the machine function is still in SSA form.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/ScopedHashTable.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineDominators.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/CodeGen/TargetOpcodes.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCRegisterInfo.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Allocator.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/RecyclingAllocator.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <iterator>
43 #include <utility>
44 #include <vector>
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "machine-cse"
49 
50 STATISTIC(NumCoalesces, "Number of copies coalesced");
51 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
52 STATISTIC(NumPhysCSEs,
53           "Number of physreg referencing common subexpr eliminated");
54 STATISTIC(NumCrossBBCSEs,
55           "Number of cross-MBB physreg referencing CS eliminated");
56 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
57 
58 namespace {
59 
60   class MachineCSE : public MachineFunctionPass {
61     const TargetInstrInfo *TII;
62     const TargetRegisterInfo *TRI;
63     AliasAnalysis *AA;
64     MachineDominatorTree *DT;
65     MachineRegisterInfo *MRI;
66 
67   public:
68     static char ID; // Pass identification
69 
70     MachineCSE() : MachineFunctionPass(ID) {
71       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
72     }
73 
74     bool runOnMachineFunction(MachineFunction &MF) override;
75 
76     void getAnalysisUsage(AnalysisUsage &AU) const override {
77       AU.setPreservesCFG();
78       MachineFunctionPass::getAnalysisUsage(AU);
79       AU.addRequired<AAResultsWrapperPass>();
80       AU.addPreservedID(MachineLoopInfoID);
81       AU.addRequired<MachineDominatorTree>();
82       AU.addPreserved<MachineDominatorTree>();
83     }
84 
85     void releaseMemory() override {
86       ScopeMap.clear();
87       Exps.clear();
88     }
89 
90   private:
91     using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
92                             ScopedHashTableVal<MachineInstr *, unsigned>>;
93     using ScopedHTType =
94         ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
95                         AllocatorTy>;
96     using ScopeType = ScopedHTType::ScopeTy;
97 
98     unsigned LookAheadLimit = 0;
99     DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
100     ScopedHTType VNT;
101     SmallVector<MachineInstr *, 64> Exps;
102     unsigned CurrVN = 0;
103 
104     bool PerformTrivialCopyPropagation(MachineInstr *MI,
105                                        MachineBasicBlock *MBB);
106     bool isPhysDefTriviallyDead(unsigned Reg,
107                                 MachineBasicBlock::const_iterator I,
108                                 MachineBasicBlock::const_iterator E) const;
109     bool hasLivePhysRegDefUses(const MachineInstr *MI,
110                                const MachineBasicBlock *MBB,
111                                SmallSet<unsigned,8> &PhysRefs,
112                                SmallVectorImpl<unsigned> &PhysDefs,
113                                bool &PhysUseDef) const;
114     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
115                           SmallSet<unsigned,8> &PhysRefs,
116                           SmallVectorImpl<unsigned> &PhysDefs,
117                           bool &NonLocal) const;
118     bool isCSECandidate(MachineInstr *MI);
119     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
120                            MachineInstr *CSMI, MachineInstr *MI);
121     void EnterScope(MachineBasicBlock *MBB);
122     void ExitScope(MachineBasicBlock *MBB);
123     bool ProcessBlock(MachineBasicBlock *MBB);
124     void ExitScopeIfDone(MachineDomTreeNode *Node,
125                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
126     bool PerformCSE(MachineDomTreeNode *Node);
127   };
128 
129 } // end anonymous namespace
130 
131 char MachineCSE::ID = 0;
132 
133 char &llvm::MachineCSEID = MachineCSE::ID;
134 
135 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
136                       "Machine Common Subexpression Elimination", false, false)
137 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
138 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
139 INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
140                     "Machine Common Subexpression Elimination", false, false)
141 
142 /// The source register of a COPY machine instruction can be propagated to all
143 /// its users, and this propagation could increase the probability of finding
144 /// common subexpressions. If the COPY has only one user, the COPY itself can
145 /// be removed.
146 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
147                                                MachineBasicBlock *MBB) {
148   bool Changed = false;
149   for (MachineOperand &MO : MI->operands()) {
150     if (!MO.isReg() || !MO.isUse())
151       continue;
152     unsigned Reg = MO.getReg();
153     if (!TargetRegisterInfo::isVirtualRegister(Reg))
154       continue;
155     bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
156     MachineInstr *DefMI = MRI->getVRegDef(Reg);
157     if (!DefMI->isCopy())
158       continue;
159     unsigned SrcReg = DefMI->getOperand(1).getReg();
160     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
161       continue;
162     if (DefMI->getOperand(0).getSubReg())
163       continue;
164     // FIXME: We should trivially coalesce subregister copies to expose CSE
165     // opportunities on instructions with truncated operands (see
166     // cse-add-with-overflow.ll). This can be done here as follows:
167     // if (SrcSubReg)
168     //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
169     //                                     SrcSubReg);
170     // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
171     //
172     // The 2-addr pass has been updated to handle coalesced subregs. However,
173     // some machine-specific code still can't handle it.
174     // To handle it properly we also need a way find a constrained subregister
175     // class given a super-reg class and subreg index.
176     if (DefMI->getOperand(1).getSubReg())
177       continue;
178     if (!MRI->constrainRegAttrs(SrcReg, Reg))
179       continue;
180     LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
181     LLVM_DEBUG(dbgs() << "***     to: " << *MI);
182 
183     // Update matching debug values.
184     DefMI->changeDebugValuesDefReg(SrcReg);
185 
186     // Propagate SrcReg of copies to MI.
187     MO.setReg(SrcReg);
188     MRI->clearKillFlags(SrcReg);
189     // Coalesce single use copies.
190     if (OnlyOneUse) {
191       DefMI->eraseFromParent();
192       ++NumCoalesces;
193     }
194     Changed = true;
195   }
196 
197   return Changed;
198 }
199 
200 bool
201 MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
202                                    MachineBasicBlock::const_iterator I,
203                                    MachineBasicBlock::const_iterator E) const {
204   unsigned LookAheadLeft = LookAheadLimit;
205   while (LookAheadLeft) {
206     // Skip over dbg_value's.
207     I = skipDebugInstructionsForward(I, E);
208 
209     if (I == E)
210       // Reached end of block, we don't know if register is dead or not.
211       return false;
212 
213     bool SeenDef = false;
214     for (const MachineOperand &MO : I->operands()) {
215       if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
216         SeenDef = true;
217       if (!MO.isReg() || !MO.getReg())
218         continue;
219       if (!TRI->regsOverlap(MO.getReg(), Reg))
220         continue;
221       if (MO.isUse())
222         // Found a use!
223         return false;
224       SeenDef = true;
225     }
226     if (SeenDef)
227       // See a def of Reg (or an alias) before encountering any use, it's
228       // trivially dead.
229       return true;
230 
231     --LookAheadLeft;
232     ++I;
233   }
234   return false;
235 }
236 
237 static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
238                                             const MachineFunction &MF,
239                                             const TargetRegisterInfo &TRI) {
240   // MachineRegisterInfo::isConstantPhysReg directly called by
241   // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
242   // reserved registers to be frozen. That doesn't cause a problem  post-ISel as
243   // most (if not all) targets freeze reserved registers right after ISel.
244   //
245   // It does cause issues mid-GlobalISel, however, hence the additional
246   // reservedRegsFrozen check.
247   const MachineRegisterInfo &MRI = MF.getRegInfo();
248   return TRI.isCallerPreservedPhysReg(Reg, MF) ||
249          (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
250 }
251 
252 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
253 /// physical registers (except for dead defs of physical registers). It also
254 /// returns the physical register def by reference if it's the only one and the
255 /// instruction does not uses a physical register.
256 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
257                                        const MachineBasicBlock *MBB,
258                                        SmallSet<unsigned,8> &PhysRefs,
259                                        SmallVectorImpl<unsigned> &PhysDefs,
260                                        bool &PhysUseDef) const{
261   // First, add all uses to PhysRefs.
262   for (const MachineOperand &MO : MI->operands()) {
263     if (!MO.isReg() || MO.isDef())
264       continue;
265     unsigned Reg = MO.getReg();
266     if (!Reg)
267       continue;
268     if (TargetRegisterInfo::isVirtualRegister(Reg))
269       continue;
270     // Reading either caller preserved or constant physregs is ok.
271     if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
272       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
273         PhysRefs.insert(*AI);
274   }
275 
276   // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
277   // (which currently contains only uses), set the PhysUseDef flag.
278   PhysUseDef = false;
279   MachineBasicBlock::const_iterator I = MI; I = std::next(I);
280   for (const MachineOperand &MO : MI->operands()) {
281     if (!MO.isReg() || !MO.isDef())
282       continue;
283     unsigned Reg = MO.getReg();
284     if (!Reg)
285       continue;
286     if (TargetRegisterInfo::isVirtualRegister(Reg))
287       continue;
288     // Check against PhysRefs even if the def is "dead".
289     if (PhysRefs.count(Reg))
290       PhysUseDef = true;
291     // If the def is dead, it's ok. But the def may not marked "dead". That's
292     // common since this pass is run before livevariables. We can scan
293     // forward a few instructions and check if it is obviously dead.
294     if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
295       PhysDefs.push_back(Reg);
296   }
297 
298   // Finally, add all defs to PhysRefs as well.
299   for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
300     for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
301       PhysRefs.insert(*AI);
302 
303   return !PhysRefs.empty();
304 }
305 
306 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
307                                   SmallSet<unsigned,8> &PhysRefs,
308                                   SmallVectorImpl<unsigned> &PhysDefs,
309                                   bool &NonLocal) const {
310   // For now conservatively returns false if the common subexpression is
311   // not in the same basic block as the given instruction. The only exception
312   // is if the common subexpression is in the sole predecessor block.
313   const MachineBasicBlock *MBB = MI->getParent();
314   const MachineBasicBlock *CSMBB = CSMI->getParent();
315 
316   bool CrossMBB = false;
317   if (CSMBB != MBB) {
318     if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
319       return false;
320 
321     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
322       if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
323         // Avoid extending live range of physical registers if they are
324         //allocatable or reserved.
325         return false;
326     }
327     CrossMBB = true;
328   }
329   MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
330   MachineBasicBlock::const_iterator E = MI;
331   MachineBasicBlock::const_iterator EE = CSMBB->end();
332   unsigned LookAheadLeft = LookAheadLimit;
333   while (LookAheadLeft) {
334     // Skip over dbg_value's.
335     while (I != E && I != EE && I->isDebugInstr())
336       ++I;
337 
338     if (I == EE) {
339       assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
340       (void)CrossMBB;
341       CrossMBB = false;
342       NonLocal = true;
343       I = MBB->begin();
344       EE = MBB->end();
345       continue;
346     }
347 
348     if (I == E)
349       return true;
350 
351     for (const MachineOperand &MO : I->operands()) {
352       // RegMasks go on instructions like calls that clobber lots of physregs.
353       // Don't attempt to CSE across such an instruction.
354       if (MO.isRegMask())
355         return false;
356       if (!MO.isReg() || !MO.isDef())
357         continue;
358       unsigned MOReg = MO.getReg();
359       if (TargetRegisterInfo::isVirtualRegister(MOReg))
360         continue;
361       if (PhysRefs.count(MOReg))
362         return false;
363     }
364 
365     --LookAheadLeft;
366     ++I;
367   }
368 
369   return false;
370 }
371 
372 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
373   if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
374       MI->isInlineAsm() || MI->isDebugInstr())
375     return false;
376 
377   // Ignore copies.
378   if (MI->isCopyLike())
379     return false;
380 
381   // Ignore stuff that we obviously can't move.
382   if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
383       MI->hasUnmodeledSideEffects())
384     return false;
385 
386   if (MI->mayLoad()) {
387     // Okay, this instruction does a load. As a refinement, we allow the target
388     // to decide whether the loaded value is actually a constant. If so, we can
389     // actually use it as a load.
390     if (!MI->isDereferenceableInvariantLoad(AA))
391       // FIXME: we should be able to hoist loads with no other side effects if
392       // there are no other instructions which can change memory in this loop.
393       // This is a trivial form of alias analysis.
394       return false;
395   }
396 
397   // Ignore stack guard loads, otherwise the register that holds CSEed value may
398   // be spilled and get loaded back with corrupted data.
399   if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
400     return false;
401 
402   return true;
403 }
404 
405 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
406 /// common expression that defines Reg.
407 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
408                                    MachineInstr *CSMI, MachineInstr *MI) {
409   // FIXME: Heuristics that works around the lack the live range splitting.
410 
411   // If CSReg is used at all uses of Reg, CSE should not increase register
412   // pressure of CSReg.
413   bool MayIncreasePressure = true;
414   if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
415       TargetRegisterInfo::isVirtualRegister(Reg)) {
416     MayIncreasePressure = false;
417     SmallPtrSet<MachineInstr*, 8> CSUses;
418     for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
419       CSUses.insert(&MI);
420     }
421     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
422       if (!CSUses.count(&MI)) {
423         MayIncreasePressure = true;
424         break;
425       }
426     }
427   }
428   if (!MayIncreasePressure) return true;
429 
430   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
431   // an immediate predecessor. We don't want to increase register pressure and
432   // end up causing other computation to be spilled.
433   if (TII->isAsCheapAsAMove(*MI)) {
434     MachineBasicBlock *CSBB = CSMI->getParent();
435     MachineBasicBlock *BB = MI->getParent();
436     if (CSBB != BB && !CSBB->isSuccessor(BB))
437       return false;
438   }
439 
440   // Heuristics #2: If the expression doesn't not use a vr and the only use
441   // of the redundant computation are copies, do not cse.
442   bool HasVRegUse = false;
443   for (const MachineOperand &MO : MI->operands()) {
444     if (MO.isReg() && MO.isUse() &&
445         TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
446       HasVRegUse = true;
447       break;
448     }
449   }
450   if (!HasVRegUse) {
451     bool HasNonCopyUse = false;
452     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
453       // Ignore copies.
454       if (!MI.isCopyLike()) {
455         HasNonCopyUse = true;
456         break;
457       }
458     }
459     if (!HasNonCopyUse)
460       return false;
461   }
462 
463   // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
464   // it unless the defined value is already used in the BB of the new use.
465   bool HasPHI = false;
466   for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
467     HasPHI |= UseMI.isPHI();
468     if (UseMI.getParent() == MI->getParent())
469       return true;
470   }
471 
472   return !HasPHI;
473 }
474 
475 void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
476   LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
477   ScopeType *Scope = new ScopeType(VNT);
478   ScopeMap[MBB] = Scope;
479 }
480 
481 void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
482   LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
483   DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
484   assert(SI != ScopeMap.end());
485   delete SI->second;
486   ScopeMap.erase(SI);
487 }
488 
489 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
490   bool Changed = false;
491 
492   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
493   SmallVector<unsigned, 2> ImplicitDefsToUpdate;
494   SmallVector<unsigned, 2> ImplicitDefs;
495   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
496     MachineInstr *MI = &*I;
497     ++I;
498 
499     if (!isCSECandidate(MI))
500       continue;
501 
502     bool FoundCSE = VNT.count(MI);
503     if (!FoundCSE) {
504       // Using trivial copy propagation to find more CSE opportunities.
505       if (PerformTrivialCopyPropagation(MI, MBB)) {
506         Changed = true;
507 
508         // After coalescing MI itself may become a copy.
509         if (MI->isCopyLike())
510           continue;
511 
512         // Try again to see if CSE is possible.
513         FoundCSE = VNT.count(MI);
514       }
515     }
516 
517     // Commute commutable instructions.
518     bool Commuted = false;
519     if (!FoundCSE && MI->isCommutable()) {
520       if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
521         Commuted = true;
522         FoundCSE = VNT.count(NewMI);
523         if (NewMI != MI) {
524           // New instruction. It doesn't need to be kept.
525           NewMI->eraseFromParent();
526           Changed = true;
527         } else if (!FoundCSE)
528           // MI was changed but it didn't help, commute it back!
529           (void)TII->commuteInstruction(*MI);
530       }
531     }
532 
533     // If the instruction defines physical registers and the values *may* be
534     // used, then it's not safe to replace it with a common subexpression.
535     // It's also not safe if the instruction uses physical registers.
536     bool CrossMBBPhysDef = false;
537     SmallSet<unsigned, 8> PhysRefs;
538     SmallVector<unsigned, 2> PhysDefs;
539     bool PhysUseDef = false;
540     if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
541                                           PhysDefs, PhysUseDef)) {
542       FoundCSE = false;
543 
544       // ... Unless the CS is local or is in the sole predecessor block
545       // and it also defines the physical register which is not clobbered
546       // in between and the physical register uses were not clobbered.
547       // This can never be the case if the instruction both uses and
548       // defines the same physical register, which was detected above.
549       if (!PhysUseDef) {
550         unsigned CSVN = VNT.lookup(MI);
551         MachineInstr *CSMI = Exps[CSVN];
552         if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
553           FoundCSE = true;
554       }
555     }
556 
557     if (!FoundCSE) {
558       VNT.insert(MI, CurrVN++);
559       Exps.push_back(MI);
560       continue;
561     }
562 
563     // Found a common subexpression, eliminate it.
564     unsigned CSVN = VNT.lookup(MI);
565     MachineInstr *CSMI = Exps[CSVN];
566     LLVM_DEBUG(dbgs() << "Examining: " << *MI);
567     LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
568 
569     // Check if it's profitable to perform this CSE.
570     bool DoCSE = true;
571     unsigned NumDefs = MI->getNumDefs();
572 
573     for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
574       MachineOperand &MO = MI->getOperand(i);
575       if (!MO.isReg() || !MO.isDef())
576         continue;
577       unsigned OldReg = MO.getReg();
578       unsigned NewReg = CSMI->getOperand(i).getReg();
579 
580       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
581       // we should make sure it is not dead at CSMI.
582       if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
583         ImplicitDefsToUpdate.push_back(i);
584 
585       // Keep track of implicit defs of CSMI and MI, to clear possibly
586       // made-redundant kill flags.
587       if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
588         ImplicitDefs.push_back(OldReg);
589 
590       if (OldReg == NewReg) {
591         --NumDefs;
592         continue;
593       }
594 
595       assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
596              TargetRegisterInfo::isVirtualRegister(NewReg) &&
597              "Do not CSE physical register defs!");
598 
599       if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
600         LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
601         DoCSE = false;
602         break;
603       }
604 
605       // Don't perform CSE if the result of the new instruction cannot exist
606       // within the constraints (register class, bank, or low-level type) of
607       // the old instruction.
608       if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
609         LLVM_DEBUG(
610             dbgs() << "*** Not the same register constraints, avoid CSE!\n");
611         DoCSE = false;
612         break;
613       }
614 
615       CSEPairs.push_back(std::make_pair(OldReg, NewReg));
616       --NumDefs;
617     }
618 
619     // Actually perform the elimination.
620     if (DoCSE) {
621       for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
622         unsigned OldReg = CSEPair.first;
623         unsigned NewReg = CSEPair.second;
624         // OldReg may have been unused but is used now, clear the Dead flag
625         MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
626         assert(Def != nullptr && "CSEd register has no unique definition?");
627         Def->clearRegisterDeads(NewReg);
628         // Replace with NewReg and clear kill flags which may be wrong now.
629         MRI->replaceRegWith(OldReg, NewReg);
630         MRI->clearKillFlags(NewReg);
631       }
632 
633       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
634       // we should make sure it is not dead at CSMI.
635       for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
636         CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
637 
638       // Go through implicit defs of CSMI and MI, and clear the kill flags on
639       // their uses in all the instructions between CSMI and MI.
640       // We might have made some of the kill flags redundant, consider:
641       //   subs  ... implicit-def %nzcv    <- CSMI
642       //   csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
643       //   subs  ... implicit-def %nzcv    <- MI, to be eliminated
644       //   csinc ... implicit killed %nzcv
645       // Since we eliminated MI, and reused a register imp-def'd by CSMI
646       // (here %nzcv), that register, if it was killed before MI, should have
647       // that kill flag removed, because it's lifetime was extended.
648       if (CSMI->getParent() == MI->getParent()) {
649         for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
650           for (auto ImplicitDef : ImplicitDefs)
651             if (MachineOperand *MO = II->findRegisterUseOperand(
652                     ImplicitDef, /*isKill=*/true, TRI))
653               MO->setIsKill(false);
654       } else {
655         // If the instructions aren't in the same BB, bail out and clear the
656         // kill flag on all uses of the imp-def'd register.
657         for (auto ImplicitDef : ImplicitDefs)
658           MRI->clearKillFlags(ImplicitDef);
659       }
660 
661       if (CrossMBBPhysDef) {
662         // Add physical register defs now coming in from a predecessor to MBB
663         // livein list.
664         while (!PhysDefs.empty()) {
665           unsigned LiveIn = PhysDefs.pop_back_val();
666           if (!MBB->isLiveIn(LiveIn))
667             MBB->addLiveIn(LiveIn);
668         }
669         ++NumCrossBBCSEs;
670       }
671 
672       MI->eraseFromParent();
673       ++NumCSEs;
674       if (!PhysRefs.empty())
675         ++NumPhysCSEs;
676       if (Commuted)
677         ++NumCommutes;
678       Changed = true;
679     } else {
680       VNT.insert(MI, CurrVN++);
681       Exps.push_back(MI);
682     }
683     CSEPairs.clear();
684     ImplicitDefsToUpdate.clear();
685     ImplicitDefs.clear();
686   }
687 
688   return Changed;
689 }
690 
691 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
692 /// dominator tree node if its a leaf or all of its children are done. Walk
693 /// up the dominator tree to destroy ancestors which are now done.
694 void
695 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
696                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
697   if (OpenChildren[Node])
698     return;
699 
700   // Pop scope.
701   ExitScope(Node->getBlock());
702 
703   // Now traverse upwards to pop ancestors whose offsprings are all done.
704   while (MachineDomTreeNode *Parent = Node->getIDom()) {
705     unsigned Left = --OpenChildren[Parent];
706     if (Left != 0)
707       break;
708     ExitScope(Parent->getBlock());
709     Node = Parent;
710   }
711 }
712 
713 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
714   SmallVector<MachineDomTreeNode*, 32> Scopes;
715   SmallVector<MachineDomTreeNode*, 8> WorkList;
716   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
717 
718   CurrVN = 0;
719 
720   // Perform a DFS walk to determine the order of visit.
721   WorkList.push_back(Node);
722   do {
723     Node = WorkList.pop_back_val();
724     Scopes.push_back(Node);
725     const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
726     OpenChildren[Node] = Children.size();
727     for (MachineDomTreeNode *Child : Children)
728       WorkList.push_back(Child);
729   } while (!WorkList.empty());
730 
731   // Now perform CSE.
732   bool Changed = false;
733   for (MachineDomTreeNode *Node : Scopes) {
734     MachineBasicBlock *MBB = Node->getBlock();
735     EnterScope(MBB);
736     Changed |= ProcessBlock(MBB);
737     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
738     ExitScopeIfDone(Node, OpenChildren);
739   }
740 
741   return Changed;
742 }
743 
744 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
745   if (skipFunction(MF.getFunction()))
746     return false;
747 
748   TII = MF.getSubtarget().getInstrInfo();
749   TRI = MF.getSubtarget().getRegisterInfo();
750   MRI = &MF.getRegInfo();
751   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
752   DT = &getAnalysis<MachineDominatorTree>();
753   LookAheadLimit = TII->getMachineCSELookAheadLimit();
754   return PerformCSE(DT->getRootNode());
755 }
756