1*4ba319b5SDimitry Andric //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//
2*4ba319b5SDimitry Andric //
3*4ba319b5SDimitry Andric // The LLVM Compiler Infrastructure
4*4ba319b5SDimitry Andric //
5*4ba319b5SDimitry Andric // This file is distributed under the University of Illinois Open Source
6*4ba319b5SDimitry Andric // License. See LICENSE.TXT for details.
7*4ba319b5SDimitry Andric //
8*4ba319b5SDimitry Andric //===----------------------------------------------------------------------===//
9*4ba319b5SDimitry Andric //
10*4ba319b5SDimitry Andric /// \file Break False Dependency pass.
11*4ba319b5SDimitry Andric ///
12*4ba319b5SDimitry Andric /// Some instructions have false dependencies which cause unnecessary stalls.
13*4ba319b5SDimitry Andric /// For exmaple, instructions that only write part of a register, and implicitly
14*4ba319b5SDimitry Andric /// need to read the other parts of the register. This may cause unwanted
15*4ba319b5SDimitry Andric /// stalls preventing otherwise unrelated instructions from executing in
16*4ba319b5SDimitry Andric /// parallel in an out-of-order CPU.
17*4ba319b5SDimitry Andric /// This pass is aimed at identifying and avoiding these depepndencies when
18*4ba319b5SDimitry Andric /// possible.
19*4ba319b5SDimitry Andric //
20*4ba319b5SDimitry Andric //===----------------------------------------------------------------------===//
21*4ba319b5SDimitry Andric
22*4ba319b5SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h"
23*4ba319b5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
24*4ba319b5SDimitry Andric #include "llvm/CodeGen/ReachingDefAnalysis.h"
25*4ba319b5SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
26*4ba319b5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
27*4ba319b5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
28*4ba319b5SDimitry Andric
29*4ba319b5SDimitry Andric
30*4ba319b5SDimitry Andric using namespace llvm;
31*4ba319b5SDimitry Andric
32*4ba319b5SDimitry Andric namespace llvm {
33*4ba319b5SDimitry Andric
34*4ba319b5SDimitry Andric class BreakFalseDeps : public MachineFunctionPass {
35*4ba319b5SDimitry Andric private:
36*4ba319b5SDimitry Andric MachineFunction *MF;
37*4ba319b5SDimitry Andric const TargetInstrInfo *TII;
38*4ba319b5SDimitry Andric const TargetRegisterInfo *TRI;
39*4ba319b5SDimitry Andric RegisterClassInfo RegClassInfo;
40*4ba319b5SDimitry Andric
41*4ba319b5SDimitry Andric /// List of undefined register reads in this block in forward order.
42*4ba319b5SDimitry Andric std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;
43*4ba319b5SDimitry Andric
44*4ba319b5SDimitry Andric /// Storage for register unit liveness.
45*4ba319b5SDimitry Andric LivePhysRegs LiveRegSet;
46*4ba319b5SDimitry Andric
47*4ba319b5SDimitry Andric ReachingDefAnalysis *RDA;
48*4ba319b5SDimitry Andric
49*4ba319b5SDimitry Andric public:
50*4ba319b5SDimitry Andric static char ID; // Pass identification, replacement for typeid
51*4ba319b5SDimitry Andric
BreakFalseDeps()52*4ba319b5SDimitry Andric BreakFalseDeps() : MachineFunctionPass(ID) {
53*4ba319b5SDimitry Andric initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry());
54*4ba319b5SDimitry Andric }
55*4ba319b5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const56*4ba319b5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
57*4ba319b5SDimitry Andric AU.setPreservesAll();
58*4ba319b5SDimitry Andric AU.addRequired<ReachingDefAnalysis>();
59*4ba319b5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
60*4ba319b5SDimitry Andric }
61*4ba319b5SDimitry Andric
62*4ba319b5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
63*4ba319b5SDimitry Andric
getRequiredProperties() const64*4ba319b5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override {
65*4ba319b5SDimitry Andric return MachineFunctionProperties().set(
66*4ba319b5SDimitry Andric MachineFunctionProperties::Property::NoVRegs);
67*4ba319b5SDimitry Andric }
68*4ba319b5SDimitry Andric
69*4ba319b5SDimitry Andric private:
70*4ba319b5SDimitry Andric /// Process he given basic block.
71*4ba319b5SDimitry Andric void processBasicBlock(MachineBasicBlock *MBB);
72*4ba319b5SDimitry Andric
73*4ba319b5SDimitry Andric /// Update def-ages for registers defined by MI.
74*4ba319b5SDimitry Andric /// Also break dependencies on partial defs and undef uses.
75*4ba319b5SDimitry Andric void processDefs(MachineInstr *MI);
76*4ba319b5SDimitry Andric
77*4ba319b5SDimitry Andric /// Helps avoid false dependencies on undef registers by updating the
78*4ba319b5SDimitry Andric /// machine instructions' undef operand to use a register that the instruction
79*4ba319b5SDimitry Andric /// is truly dependent on, or use a register with clearance higher than Pref.
80*4ba319b5SDimitry Andric /// Returns true if it was able to find a true dependency, thus not requiring
81*4ba319b5SDimitry Andric /// a dependency breaking instruction regardless of clearance.
82*4ba319b5SDimitry Andric bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
83*4ba319b5SDimitry Andric unsigned Pref);
84*4ba319b5SDimitry Andric
85*4ba319b5SDimitry Andric /// Return true to if it makes sense to break dependence on a partial
86*4ba319b5SDimitry Andric /// def or undef use.
87*4ba319b5SDimitry Andric bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref);
88*4ba319b5SDimitry Andric
89*4ba319b5SDimitry Andric /// Break false dependencies on undefined register reads.
90*4ba319b5SDimitry Andric /// Walk the block backward computing precise liveness. This is expensive, so
91*4ba319b5SDimitry Andric /// we only do it on demand. Note that the occurrence of undefined register
92*4ba319b5SDimitry Andric /// reads that should be broken is very rare, but when they occur we may have
93*4ba319b5SDimitry Andric /// many in a single block.
94*4ba319b5SDimitry Andric void processUndefReads(MachineBasicBlock *);
95*4ba319b5SDimitry Andric };
96*4ba319b5SDimitry Andric
97*4ba319b5SDimitry Andric } // namespace llvm
98*4ba319b5SDimitry Andric
99*4ba319b5SDimitry Andric #define DEBUG_TYPE "break-false-deps"
100*4ba319b5SDimitry Andric
101*4ba319b5SDimitry Andric char BreakFalseDeps::ID = 0;
102*4ba319b5SDimitry Andric INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)103*4ba319b5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
104*4ba319b5SDimitry Andric INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
105*4ba319b5SDimitry Andric
106*4ba319b5SDimitry Andric FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); }
107*4ba319b5SDimitry Andric
pickBestRegisterForUndef(MachineInstr * MI,unsigned OpIdx,unsigned Pref)108*4ba319b5SDimitry Andric bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
109*4ba319b5SDimitry Andric unsigned Pref) {
110*4ba319b5SDimitry Andric MachineOperand &MO = MI->getOperand(OpIdx);
111*4ba319b5SDimitry Andric assert(MO.isUndef() && "Expected undef machine operand");
112*4ba319b5SDimitry Andric
113*4ba319b5SDimitry Andric unsigned OriginalReg = MO.getReg();
114*4ba319b5SDimitry Andric
115*4ba319b5SDimitry Andric // Update only undef operands that have reg units that are mapped to one root.
116*4ba319b5SDimitry Andric for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
117*4ba319b5SDimitry Andric unsigned NumRoots = 0;
118*4ba319b5SDimitry Andric for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) {
119*4ba319b5SDimitry Andric NumRoots++;
120*4ba319b5SDimitry Andric if (NumRoots > 1)
121*4ba319b5SDimitry Andric return false;
122*4ba319b5SDimitry Andric }
123*4ba319b5SDimitry Andric }
124*4ba319b5SDimitry Andric
125*4ba319b5SDimitry Andric // Get the undef operand's register class
126*4ba319b5SDimitry Andric const TargetRegisterClass *OpRC =
127*4ba319b5SDimitry Andric TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
128*4ba319b5SDimitry Andric
129*4ba319b5SDimitry Andric // If the instruction has a true dependency, we can hide the false depdency
130*4ba319b5SDimitry Andric // behind it.
131*4ba319b5SDimitry Andric for (MachineOperand &CurrMO : MI->operands()) {
132*4ba319b5SDimitry Andric if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
133*4ba319b5SDimitry Andric !OpRC->contains(CurrMO.getReg()))
134*4ba319b5SDimitry Andric continue;
135*4ba319b5SDimitry Andric // We found a true dependency - replace the undef register with the true
136*4ba319b5SDimitry Andric // dependency.
137*4ba319b5SDimitry Andric MO.setReg(CurrMO.getReg());
138*4ba319b5SDimitry Andric return true;
139*4ba319b5SDimitry Andric }
140*4ba319b5SDimitry Andric
141*4ba319b5SDimitry Andric // Go over all registers in the register class and find the register with
142*4ba319b5SDimitry Andric // max clearance or clearance higher than Pref.
143*4ba319b5SDimitry Andric unsigned MaxClearance = 0;
144*4ba319b5SDimitry Andric unsigned MaxClearanceReg = OriginalReg;
145*4ba319b5SDimitry Andric ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
146*4ba319b5SDimitry Andric for (MCPhysReg Reg : Order) {
147*4ba319b5SDimitry Andric unsigned Clearance = RDA->getClearance(MI, Reg);
148*4ba319b5SDimitry Andric if (Clearance <= MaxClearance)
149*4ba319b5SDimitry Andric continue;
150*4ba319b5SDimitry Andric MaxClearance = Clearance;
151*4ba319b5SDimitry Andric MaxClearanceReg = Reg;
152*4ba319b5SDimitry Andric
153*4ba319b5SDimitry Andric if (MaxClearance > Pref)
154*4ba319b5SDimitry Andric break;
155*4ba319b5SDimitry Andric }
156*4ba319b5SDimitry Andric
157*4ba319b5SDimitry Andric // Update the operand if we found a register with better clearance.
158*4ba319b5SDimitry Andric if (MaxClearanceReg != OriginalReg)
159*4ba319b5SDimitry Andric MO.setReg(MaxClearanceReg);
160*4ba319b5SDimitry Andric
161*4ba319b5SDimitry Andric return false;
162*4ba319b5SDimitry Andric }
163*4ba319b5SDimitry Andric
shouldBreakDependence(MachineInstr * MI,unsigned OpIdx,unsigned Pref)164*4ba319b5SDimitry Andric bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
165*4ba319b5SDimitry Andric unsigned Pref) {
166*4ba319b5SDimitry Andric unsigned reg = MI->getOperand(OpIdx).getReg();
167*4ba319b5SDimitry Andric unsigned Clearance = RDA->getClearance(MI, reg);
168*4ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
169*4ba319b5SDimitry Andric
170*4ba319b5SDimitry Andric if (Pref > Clearance) {
171*4ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << ": Break dependency.\n");
172*4ba319b5SDimitry Andric return true;
173*4ba319b5SDimitry Andric }
174*4ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << ": OK .\n");
175*4ba319b5SDimitry Andric return false;
176*4ba319b5SDimitry Andric }
177*4ba319b5SDimitry Andric
processDefs(MachineInstr * MI)178*4ba319b5SDimitry Andric void BreakFalseDeps::processDefs(MachineInstr *MI) {
179*4ba319b5SDimitry Andric assert(!MI->isDebugInstr() && "Won't process debug values");
180*4ba319b5SDimitry Andric
181*4ba319b5SDimitry Andric // Break dependence on undef uses. Do this before updating LiveRegs below.
182*4ba319b5SDimitry Andric unsigned OpNum;
183*4ba319b5SDimitry Andric unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
184*4ba319b5SDimitry Andric if (Pref) {
185*4ba319b5SDimitry Andric bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
186*4ba319b5SDimitry Andric // We don't need to bother trying to break a dependency if this
187*4ba319b5SDimitry Andric // instruction has a true dependency on that register through another
188*4ba319b5SDimitry Andric // operand - we'll have to wait for it to be available regardless.
189*4ba319b5SDimitry Andric if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
190*4ba319b5SDimitry Andric UndefReads.push_back(std::make_pair(MI, OpNum));
191*4ba319b5SDimitry Andric }
192*4ba319b5SDimitry Andric
193*4ba319b5SDimitry Andric const MCInstrDesc &MCID = MI->getDesc();
194*4ba319b5SDimitry Andric for (unsigned i = 0,
195*4ba319b5SDimitry Andric e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
196*4ba319b5SDimitry Andric i != e; ++i) {
197*4ba319b5SDimitry Andric MachineOperand &MO = MI->getOperand(i);
198*4ba319b5SDimitry Andric if (!MO.isReg() || !MO.getReg())
199*4ba319b5SDimitry Andric continue;
200*4ba319b5SDimitry Andric if (MO.isUse())
201*4ba319b5SDimitry Andric continue;
202*4ba319b5SDimitry Andric // Check clearance before partial register updates.
203*4ba319b5SDimitry Andric unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
204*4ba319b5SDimitry Andric if (Pref && shouldBreakDependence(MI, i, Pref))
205*4ba319b5SDimitry Andric TII->breakPartialRegDependency(*MI, i, TRI);
206*4ba319b5SDimitry Andric }
207*4ba319b5SDimitry Andric }
208*4ba319b5SDimitry Andric
processUndefReads(MachineBasicBlock * MBB)209*4ba319b5SDimitry Andric void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
210*4ba319b5SDimitry Andric if (UndefReads.empty())
211*4ba319b5SDimitry Andric return;
212*4ba319b5SDimitry Andric
213*4ba319b5SDimitry Andric // Collect this block's live out register units.
214*4ba319b5SDimitry Andric LiveRegSet.init(*TRI);
215*4ba319b5SDimitry Andric // We do not need to care about pristine registers as they are just preserved
216*4ba319b5SDimitry Andric // but not actually used in the function.
217*4ba319b5SDimitry Andric LiveRegSet.addLiveOutsNoPristines(*MBB);
218*4ba319b5SDimitry Andric
219*4ba319b5SDimitry Andric MachineInstr *UndefMI = UndefReads.back().first;
220*4ba319b5SDimitry Andric unsigned OpIdx = UndefReads.back().second;
221*4ba319b5SDimitry Andric
222*4ba319b5SDimitry Andric for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
223*4ba319b5SDimitry Andric // Update liveness, including the current instruction's defs.
224*4ba319b5SDimitry Andric LiveRegSet.stepBackward(I);
225*4ba319b5SDimitry Andric
226*4ba319b5SDimitry Andric if (UndefMI == &I) {
227*4ba319b5SDimitry Andric if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
228*4ba319b5SDimitry Andric TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
229*4ba319b5SDimitry Andric
230*4ba319b5SDimitry Andric UndefReads.pop_back();
231*4ba319b5SDimitry Andric if (UndefReads.empty())
232*4ba319b5SDimitry Andric return;
233*4ba319b5SDimitry Andric
234*4ba319b5SDimitry Andric UndefMI = UndefReads.back().first;
235*4ba319b5SDimitry Andric OpIdx = UndefReads.back().second;
236*4ba319b5SDimitry Andric }
237*4ba319b5SDimitry Andric }
238*4ba319b5SDimitry Andric }
239*4ba319b5SDimitry Andric
processBasicBlock(MachineBasicBlock * MBB)240*4ba319b5SDimitry Andric void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) {
241*4ba319b5SDimitry Andric UndefReads.clear();
242*4ba319b5SDimitry Andric // If this block is not done, it makes little sense to make any decisions
243*4ba319b5SDimitry Andric // based on clearance information. We need to make a second pass anyway,
244*4ba319b5SDimitry Andric // and by then we'll have better information, so we can avoid doing the work
245*4ba319b5SDimitry Andric // to try and break dependencies now.
246*4ba319b5SDimitry Andric for (MachineInstr &MI : *MBB) {
247*4ba319b5SDimitry Andric if (!MI.isDebugInstr())
248*4ba319b5SDimitry Andric processDefs(&MI);
249*4ba319b5SDimitry Andric }
250*4ba319b5SDimitry Andric processUndefReads(MBB);
251*4ba319b5SDimitry Andric }
252*4ba319b5SDimitry Andric
runOnMachineFunction(MachineFunction & mf)253*4ba319b5SDimitry Andric bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
254*4ba319b5SDimitry Andric if (skipFunction(mf.getFunction()))
255*4ba319b5SDimitry Andric return false;
256*4ba319b5SDimitry Andric MF = &mf;
257*4ba319b5SDimitry Andric TII = MF->getSubtarget().getInstrInfo();
258*4ba319b5SDimitry Andric TRI = MF->getSubtarget().getRegisterInfo();
259*4ba319b5SDimitry Andric RDA = &getAnalysis<ReachingDefAnalysis>();
260*4ba319b5SDimitry Andric
261*4ba319b5SDimitry Andric RegClassInfo.runOnMachineFunction(mf);
262*4ba319b5SDimitry Andric
263*4ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
264*4ba319b5SDimitry Andric
265*4ba319b5SDimitry Andric // Traverse the basic blocks.
266*4ba319b5SDimitry Andric for (MachineBasicBlock &MBB : mf) {
267*4ba319b5SDimitry Andric processBasicBlock(&MBB);
268*4ba319b5SDimitry Andric }
269*4ba319b5SDimitry Andric
270*4ba319b5SDimitry Andric return false;
271*4ba319b5SDimitry Andric }
272