1*b5893f02SDimitry Andric //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2*b5893f02SDimitry Andric //
3*b5893f02SDimitry Andric //                     The LLVM Compiler Infrastructure
4*b5893f02SDimitry Andric //
5*b5893f02SDimitry Andric // This file is distributed under the University of Illinois Open Source
6*b5893f02SDimitry Andric // License. See LICENSE.TXT for details.
7*b5893f02SDimitry Andric //
8*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
9*b5893f02SDimitry Andric /// \file
10*b5893f02SDimitry Andric /// This pass inserts changes to the Mode register settings as required.
11*b5893f02SDimitry Andric /// Note that currently it only deals with the Double Precision Floating Point
12*b5893f02SDimitry Andric /// rounding mode setting, but is intended to be generic enough to be easily
13*b5893f02SDimitry Andric /// expanded.
14*b5893f02SDimitry Andric ///
15*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
16*b5893f02SDimitry Andric //
17*b5893f02SDimitry Andric #include "AMDGPU.h"
18*b5893f02SDimitry Andric #include "AMDGPUInstrInfo.h"
19*b5893f02SDimitry Andric #include "AMDGPUSubtarget.h"
20*b5893f02SDimitry Andric #include "SIInstrInfo.h"
21*b5893f02SDimitry Andric #include "SIMachineFunctionInfo.h"
22*b5893f02SDimitry Andric #include "llvm/ADT/Statistic.h"
23*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
24*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
25*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
26*b5893f02SDimitry Andric #include "llvm/IR/Constants.h"
27*b5893f02SDimitry Andric #include "llvm/IR/Function.h"
28*b5893f02SDimitry Andric #include "llvm/IR/LLVMContext.h"
29*b5893f02SDimitry Andric #include "llvm/Support/Debug.h"
30*b5893f02SDimitry Andric #include "llvm/Support/raw_ostream.h"
31*b5893f02SDimitry Andric #include "llvm/Target/TargetMachine.h"
32*b5893f02SDimitry Andric #include <queue>
33*b5893f02SDimitry Andric 
34*b5893f02SDimitry Andric #define DEBUG_TYPE "si-mode-register"
35*b5893f02SDimitry Andric 
36*b5893f02SDimitry Andric STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
37*b5893f02SDimitry Andric 
38*b5893f02SDimitry Andric using namespace llvm;
39*b5893f02SDimitry Andric 
40*b5893f02SDimitry Andric struct Status {
41*b5893f02SDimitry Andric   // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
42*b5893f02SDimitry Andric   // known value
43*b5893f02SDimitry Andric   unsigned Mask;
44*b5893f02SDimitry Andric   unsigned Mode;
45*b5893f02SDimitry Andric 
StatusStatus46*b5893f02SDimitry Andric   Status() : Mask(0), Mode(0){};
47*b5893f02SDimitry Andric 
StatusStatus48*b5893f02SDimitry Andric   Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) {
49*b5893f02SDimitry Andric     Mode &= Mask;
50*b5893f02SDimitry Andric   };
51*b5893f02SDimitry Andric 
52*b5893f02SDimitry Andric   // merge two status values such that only values that don't conflict are
53*b5893f02SDimitry Andric   // preserved
mergeStatus54*b5893f02SDimitry Andric   Status merge(const Status &S) const {
55*b5893f02SDimitry Andric     return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
56*b5893f02SDimitry Andric   }
57*b5893f02SDimitry Andric 
58*b5893f02SDimitry Andric   // merge an unknown value by using the unknown value's mask to remove bits
59*b5893f02SDimitry Andric   // from the result
mergeUnknownStatus60*b5893f02SDimitry Andric   Status mergeUnknown(unsigned newMask) {
61*b5893f02SDimitry Andric     return Status(Mask & ~newMask, Mode & ~newMask);
62*b5893f02SDimitry Andric   }
63*b5893f02SDimitry Andric 
64*b5893f02SDimitry Andric   // intersect two Status values to produce a mode and mask that is a subset
65*b5893f02SDimitry Andric   // of both values
intersectStatus66*b5893f02SDimitry Andric   Status intersect(const Status &S) const {
67*b5893f02SDimitry Andric     unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
68*b5893f02SDimitry Andric     unsigned NewMode = (Mode & NewMask);
69*b5893f02SDimitry Andric     return Status(NewMask, NewMode);
70*b5893f02SDimitry Andric   }
71*b5893f02SDimitry Andric 
72*b5893f02SDimitry Andric   // produce the delta required to change the Mode to the required Mode
deltaStatus73*b5893f02SDimitry Andric   Status delta(const Status &S) const {
74*b5893f02SDimitry Andric     return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
75*b5893f02SDimitry Andric   }
76*b5893f02SDimitry Andric 
operator ==Status77*b5893f02SDimitry Andric   bool operator==(const Status &S) const {
78*b5893f02SDimitry Andric     return (Mask == S.Mask) && (Mode == S.Mode);
79*b5893f02SDimitry Andric   }
80*b5893f02SDimitry Andric 
operator !=Status81*b5893f02SDimitry Andric   bool operator!=(const Status &S) const { return !(*this == S); }
82*b5893f02SDimitry Andric 
isCompatibleStatus83*b5893f02SDimitry Andric   bool isCompatible(Status &S) {
84*b5893f02SDimitry Andric     return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
85*b5893f02SDimitry Andric   }
86*b5893f02SDimitry Andric 
isCombinableStatus87*b5893f02SDimitry Andric   bool isCombinable(Status &S) {
88*b5893f02SDimitry Andric     return !(Mask & S.Mask) || isCompatible(S);
89*b5893f02SDimitry Andric   }
90*b5893f02SDimitry Andric };
91*b5893f02SDimitry Andric 
92*b5893f02SDimitry Andric class BlockData {
93*b5893f02SDimitry Andric public:
94*b5893f02SDimitry Andric   // The Status that represents the mode register settings required by the
95*b5893f02SDimitry Andric   // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
96*b5893f02SDimitry Andric   Status Require;
97*b5893f02SDimitry Andric 
98*b5893f02SDimitry Andric   // The Status that represents the net changes to the Mode register made by
99*b5893f02SDimitry Andric   // this block, Calculated in Phase 1.
100*b5893f02SDimitry Andric   Status Change;
101*b5893f02SDimitry Andric 
102*b5893f02SDimitry Andric   // The Status that represents the mode register settings on exit from this
103*b5893f02SDimitry Andric   // block. Calculated in Phase 2.
104*b5893f02SDimitry Andric   Status Exit;
105*b5893f02SDimitry Andric 
106*b5893f02SDimitry Andric   // The Status that represents the intersection of exit Mode register settings
107*b5893f02SDimitry Andric   // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
108*b5893f02SDimitry Andric   Status Pred;
109*b5893f02SDimitry Andric 
110*b5893f02SDimitry Andric   // In Phase 1 we record the first instruction that has a mode requirement,
111*b5893f02SDimitry Andric   // which is used in Phase 3 if we need to insert a mode change.
112*b5893f02SDimitry Andric   MachineInstr *FirstInsertionPoint;
113*b5893f02SDimitry Andric 
BlockData()114*b5893f02SDimitry Andric   BlockData() : FirstInsertionPoint(nullptr) {};
115*b5893f02SDimitry Andric };
116*b5893f02SDimitry Andric 
117*b5893f02SDimitry Andric namespace {
118*b5893f02SDimitry Andric 
119*b5893f02SDimitry Andric class SIModeRegister : public MachineFunctionPass {
120*b5893f02SDimitry Andric public:
121*b5893f02SDimitry Andric   static char ID;
122*b5893f02SDimitry Andric 
123*b5893f02SDimitry Andric   std::vector<std::unique_ptr<BlockData>> BlockInfo;
124*b5893f02SDimitry Andric   std::queue<MachineBasicBlock *> Phase2List;
125*b5893f02SDimitry Andric 
126*b5893f02SDimitry Andric   // The default mode register setting currently only caters for the floating
127*b5893f02SDimitry Andric   // point double precision rounding mode.
128*b5893f02SDimitry Andric   // We currently assume the default rounding mode is Round to Nearest
129*b5893f02SDimitry Andric   // NOTE: this should come from a per function rounding mode setting once such
130*b5893f02SDimitry Andric   // a setting exists.
131*b5893f02SDimitry Andric   unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
132*b5893f02SDimitry Andric   Status DefaultStatus =
133*b5893f02SDimitry Andric       Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
134*b5893f02SDimitry Andric 
135*b5893f02SDimitry Andric public:
SIModeRegister()136*b5893f02SDimitry Andric   SIModeRegister() : MachineFunctionPass(ID) {}
137*b5893f02SDimitry Andric 
138*b5893f02SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
139*b5893f02SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const140*b5893f02SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
141*b5893f02SDimitry Andric     AU.setPreservesCFG();
142*b5893f02SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
143*b5893f02SDimitry Andric   }
144*b5893f02SDimitry Andric 
145*b5893f02SDimitry Andric   void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
146*b5893f02SDimitry Andric 
147*b5893f02SDimitry Andric   void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
148*b5893f02SDimitry Andric 
149*b5893f02SDimitry Andric   void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
150*b5893f02SDimitry Andric 
151*b5893f02SDimitry Andric   Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
152*b5893f02SDimitry Andric 
153*b5893f02SDimitry Andric   void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
154*b5893f02SDimitry Andric                     const SIInstrInfo *TII, Status InstrMode);
155*b5893f02SDimitry Andric };
156*b5893f02SDimitry Andric } // End anonymous namespace.
157*b5893f02SDimitry Andric 
158*b5893f02SDimitry Andric INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
159*b5893f02SDimitry Andric                 "Insert required mode register values", false, false)
160*b5893f02SDimitry Andric 
161*b5893f02SDimitry Andric char SIModeRegister::ID = 0;
162*b5893f02SDimitry Andric 
163*b5893f02SDimitry Andric char &llvm::SIModeRegisterID = SIModeRegister::ID;
164*b5893f02SDimitry Andric 
createSIModeRegisterPass()165*b5893f02SDimitry Andric FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
166*b5893f02SDimitry Andric 
167*b5893f02SDimitry Andric // Determine the Mode register setting required for this instruction.
168*b5893f02SDimitry Andric // Instructions which don't use the Mode register return a null Status.
169*b5893f02SDimitry Andric // Note this currently only deals with instructions that use the floating point
170*b5893f02SDimitry Andric // double precision setting.
getInstructionMode(MachineInstr & MI,const SIInstrInfo * TII)171*b5893f02SDimitry Andric Status SIModeRegister::getInstructionMode(MachineInstr &MI,
172*b5893f02SDimitry Andric                                           const SIInstrInfo *TII) {
173*b5893f02SDimitry Andric   if (TII->usesFPDPRounding(MI)) {
174*b5893f02SDimitry Andric     switch (MI.getOpcode()) {
175*b5893f02SDimitry Andric     case AMDGPU::V_INTERP_P1LL_F16:
176*b5893f02SDimitry Andric     case AMDGPU::V_INTERP_P1LV_F16:
177*b5893f02SDimitry Andric     case AMDGPU::V_INTERP_P2_F16:
178*b5893f02SDimitry Andric       // f16 interpolation instructions need double precision round to zero
179*b5893f02SDimitry Andric       return Status(FP_ROUND_MODE_DP(3),
180*b5893f02SDimitry Andric                     FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
181*b5893f02SDimitry Andric     default:
182*b5893f02SDimitry Andric       return DefaultStatus;
183*b5893f02SDimitry Andric     }
184*b5893f02SDimitry Andric   }
185*b5893f02SDimitry Andric   return Status();
186*b5893f02SDimitry Andric }
187*b5893f02SDimitry Andric 
188*b5893f02SDimitry Andric // Insert a setreg instruction to update the Mode register.
189*b5893f02SDimitry Andric // It is possible (though unlikely) for an instruction to require a change to
190*b5893f02SDimitry Andric // the value of disjoint parts of the Mode register when we don't know the
191*b5893f02SDimitry Andric // value of the intervening bits. In that case we need to use more than one
192*b5893f02SDimitry Andric // setreg instruction.
insertSetreg(MachineBasicBlock & MBB,MachineInstr * MI,const SIInstrInfo * TII,Status InstrMode)193*b5893f02SDimitry Andric void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
194*b5893f02SDimitry Andric                                   const SIInstrInfo *TII, Status InstrMode) {
195*b5893f02SDimitry Andric   while (InstrMode.Mask) {
196*b5893f02SDimitry Andric     unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
197*b5893f02SDimitry Andric     unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
198*b5893f02SDimitry Andric     unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
199*b5893f02SDimitry Andric     BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
200*b5893f02SDimitry Andric         .addImm(Value)
201*b5893f02SDimitry Andric         .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
202*b5893f02SDimitry Andric                 (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
203*b5893f02SDimitry Andric                 (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
204*b5893f02SDimitry Andric     ++NumSetregInserted;
205*b5893f02SDimitry Andric     InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
206*b5893f02SDimitry Andric   }
207*b5893f02SDimitry Andric }
208*b5893f02SDimitry Andric 
209*b5893f02SDimitry Andric // In Phase 1 we iterate through the instructions of the block and for each
210*b5893f02SDimitry Andric // instruction we get its mode usage. If the instruction uses the Mode register
211*b5893f02SDimitry Andric // we:
212*b5893f02SDimitry Andric // - update the Change status, which tracks the changes to the Mode register
213*b5893f02SDimitry Andric //   made by this block
214*b5893f02SDimitry Andric // - if this instruction's requirements are compatible with the current setting
215*b5893f02SDimitry Andric //   of the Mode register we merge the modes
216*b5893f02SDimitry Andric // - if it isn't compatible and an InsertionPoint isn't set, then we set the
217*b5893f02SDimitry Andric //   InsertionPoint to the current instruction, and we remember the current
218*b5893f02SDimitry Andric //   mode
219*b5893f02SDimitry Andric // - if it isn't compatible and InsertionPoint is set we insert a seteg before
220*b5893f02SDimitry Andric //   that instruction (unless this instruction forms part of the block's
221*b5893f02SDimitry Andric //   entry requirements in which case the insertion is deferred until Phase 3
222*b5893f02SDimitry Andric //   when predecessor exit values are known), and move the insertion point to
223*b5893f02SDimitry Andric //   this instruction
224*b5893f02SDimitry Andric // - if this is a setreg instruction we treat it as an incompatible instruction.
225*b5893f02SDimitry Andric //   This is sub-optimal but avoids some nasty corner cases, and is expected to
226*b5893f02SDimitry Andric //   occur very rarely.
227*b5893f02SDimitry Andric // - on exit we have set the Require, Change, and initial Exit modes.
processBlockPhase1(MachineBasicBlock & MBB,const SIInstrInfo * TII)228*b5893f02SDimitry Andric void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
229*b5893f02SDimitry Andric                                         const SIInstrInfo *TII) {
230*b5893f02SDimitry Andric   auto NewInfo = llvm::make_unique<BlockData>();
231*b5893f02SDimitry Andric   MachineInstr *InsertionPoint = nullptr;
232*b5893f02SDimitry Andric   // RequirePending is used to indicate whether we are collecting the initial
233*b5893f02SDimitry Andric   // requirements for the block, and need to defer the first InsertionPoint to
234*b5893f02SDimitry Andric   // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
235*b5893f02SDimitry Andric   // we discover an explict setreg that means this block doesn't have any
236*b5893f02SDimitry Andric   // initial requirements.
237*b5893f02SDimitry Andric   bool RequirePending = true;
238*b5893f02SDimitry Andric   Status IPChange;
239*b5893f02SDimitry Andric   for (MachineInstr &MI : MBB) {
240*b5893f02SDimitry Andric     Status InstrMode = getInstructionMode(MI, TII);
241*b5893f02SDimitry Andric     if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
242*b5893f02SDimitry Andric         (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
243*b5893f02SDimitry Andric       // We preserve any explicit mode register setreg instruction we encounter,
244*b5893f02SDimitry Andric       // as we assume it has been inserted by a higher authority (this is
245*b5893f02SDimitry Andric       // likely to be a very rare occurrence).
246*b5893f02SDimitry Andric       unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
247*b5893f02SDimitry Andric       if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
248*b5893f02SDimitry Andric           AMDGPU::Hwreg::ID_MODE)
249*b5893f02SDimitry Andric         continue;
250*b5893f02SDimitry Andric 
251*b5893f02SDimitry Andric       unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
252*b5893f02SDimitry Andric                         AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
253*b5893f02SDimitry Andric                        1;
254*b5893f02SDimitry Andric       unsigned Offset =
255*b5893f02SDimitry Andric           (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
256*b5893f02SDimitry Andric       unsigned Mask = ((1 << Width) - 1) << Offset;
257*b5893f02SDimitry Andric 
258*b5893f02SDimitry Andric       // If an InsertionPoint is set we will insert a setreg there.
259*b5893f02SDimitry Andric       if (InsertionPoint) {
260*b5893f02SDimitry Andric         insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
261*b5893f02SDimitry Andric         InsertionPoint = nullptr;
262*b5893f02SDimitry Andric       }
263*b5893f02SDimitry Andric       // If this is an immediate then we know the value being set, but if it is
264*b5893f02SDimitry Andric       // not an immediate then we treat the modified bits of the mode register
265*b5893f02SDimitry Andric       // as unknown.
266*b5893f02SDimitry Andric       if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
267*b5893f02SDimitry Andric         unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
268*b5893f02SDimitry Andric         unsigned Mode = (Val << Offset) & Mask;
269*b5893f02SDimitry Andric         Status Setreg = Status(Mask, Mode);
270*b5893f02SDimitry Andric         // If we haven't already set the initial requirements for the block we
271*b5893f02SDimitry Andric         // don't need to as the requirements start from this explicit setreg.
272*b5893f02SDimitry Andric         RequirePending = false;
273*b5893f02SDimitry Andric         NewInfo->Change = NewInfo->Change.merge(Setreg);
274*b5893f02SDimitry Andric       } else {
275*b5893f02SDimitry Andric         NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
276*b5893f02SDimitry Andric       }
277*b5893f02SDimitry Andric     } else if (!NewInfo->Change.isCompatible(InstrMode)) {
278*b5893f02SDimitry Andric       // This instruction uses the Mode register and its requirements aren't
279*b5893f02SDimitry Andric       // compatible with the current mode.
280*b5893f02SDimitry Andric       if (InsertionPoint) {
281*b5893f02SDimitry Andric         // If the required mode change cannot be included in the current
282*b5893f02SDimitry Andric         // InsertionPoint changes, we need a setreg and start a new
283*b5893f02SDimitry Andric         // InsertionPoint.
284*b5893f02SDimitry Andric         if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
285*b5893f02SDimitry Andric           if (RequirePending) {
286*b5893f02SDimitry Andric             // This is the first insertionPoint in the block so we will defer
287*b5893f02SDimitry Andric             // the insertion of the setreg to Phase 3 where we know whether or
288*b5893f02SDimitry Andric             // not it is actually needed.
289*b5893f02SDimitry Andric             NewInfo->FirstInsertionPoint = InsertionPoint;
290*b5893f02SDimitry Andric             NewInfo->Require = NewInfo->Change;
291*b5893f02SDimitry Andric             RequirePending = false;
292*b5893f02SDimitry Andric           } else {
293*b5893f02SDimitry Andric             insertSetreg(MBB, InsertionPoint, TII,
294*b5893f02SDimitry Andric                          IPChange.delta(NewInfo->Change));
295*b5893f02SDimitry Andric             IPChange = NewInfo->Change;
296*b5893f02SDimitry Andric           }
297*b5893f02SDimitry Andric           // Set the new InsertionPoint
298*b5893f02SDimitry Andric           InsertionPoint = &MI;
299*b5893f02SDimitry Andric         }
300*b5893f02SDimitry Andric         NewInfo->Change = NewInfo->Change.merge(InstrMode);
301*b5893f02SDimitry Andric       } else {
302*b5893f02SDimitry Andric         // No InsertionPoint is currently set - this is either the first in
303*b5893f02SDimitry Andric         // the block or we have previously seen an explicit setreg.
304*b5893f02SDimitry Andric         InsertionPoint = &MI;
305*b5893f02SDimitry Andric         IPChange = NewInfo->Change;
306*b5893f02SDimitry Andric         NewInfo->Change = NewInfo->Change.merge(InstrMode);
307*b5893f02SDimitry Andric       }
308*b5893f02SDimitry Andric     }
309*b5893f02SDimitry Andric   }
310*b5893f02SDimitry Andric   if (RequirePending) {
311*b5893f02SDimitry Andric     // If we haven't yet set the initial requirements for the block we set them
312*b5893f02SDimitry Andric     // now.
313*b5893f02SDimitry Andric     NewInfo->FirstInsertionPoint = InsertionPoint;
314*b5893f02SDimitry Andric     NewInfo->Require = NewInfo->Change;
315*b5893f02SDimitry Andric   } else if (InsertionPoint) {
316*b5893f02SDimitry Andric     // We need to insert a setreg at the InsertionPoint
317*b5893f02SDimitry Andric     insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
318*b5893f02SDimitry Andric   }
319*b5893f02SDimitry Andric   NewInfo->Exit = NewInfo->Change;
320*b5893f02SDimitry Andric   BlockInfo[MBB.getNumber()] = std::move(NewInfo);
321*b5893f02SDimitry Andric }
322*b5893f02SDimitry Andric 
323*b5893f02SDimitry Andric // In Phase 2 we revisit each block and calculate the common Mode register
324*b5893f02SDimitry Andric // value provided by all predecessor blocks. If the Exit value for the block
325*b5893f02SDimitry Andric // is changed, then we add the successor blocks to the worklist so that the
326*b5893f02SDimitry Andric // exit value is propagated.
processBlockPhase2(MachineBasicBlock & MBB,const SIInstrInfo * TII)327*b5893f02SDimitry Andric void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
328*b5893f02SDimitry Andric                                         const SIInstrInfo *TII) {
329*b5893f02SDimitry Andric //  BlockData *BI = BlockInfo[MBB.getNumber()];
330*b5893f02SDimitry Andric   unsigned ThisBlock = MBB.getNumber();
331*b5893f02SDimitry Andric   if (MBB.pred_empty()) {
332*b5893f02SDimitry Andric     // There are no predecessors, so use the default starting status.
333*b5893f02SDimitry Andric     BlockInfo[ThisBlock]->Pred = DefaultStatus;
334*b5893f02SDimitry Andric   } else {
335*b5893f02SDimitry Andric     // Build a status that is common to all the predecessors by intersecting
336*b5893f02SDimitry Andric     // all the predecessor exit status values.
337*b5893f02SDimitry Andric     MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
338*b5893f02SDimitry Andric     MachineBasicBlock &PB = *(*P);
339*b5893f02SDimitry Andric     BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
340*b5893f02SDimitry Andric 
341*b5893f02SDimitry Andric     for (P = std::next(P); P != E; P = std::next(P)) {
342*b5893f02SDimitry Andric       MachineBasicBlock *Pred = *P;
343*b5893f02SDimitry Andric       BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
344*b5893f02SDimitry Andric     }
345*b5893f02SDimitry Andric   }
346*b5893f02SDimitry Andric   Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
347*b5893f02SDimitry Andric   if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
348*b5893f02SDimitry Andric     BlockInfo[ThisBlock]->Exit = TmpStatus;
349*b5893f02SDimitry Andric     // Add the successors to the work list so we can propagate the changed exit
350*b5893f02SDimitry Andric     // status.
351*b5893f02SDimitry Andric     for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
352*b5893f02SDimitry Andric                                           E = MBB.succ_end();
353*b5893f02SDimitry Andric          S != E; S = std::next(S)) {
354*b5893f02SDimitry Andric       MachineBasicBlock &B = *(*S);
355*b5893f02SDimitry Andric       Phase2List.push(&B);
356*b5893f02SDimitry Andric     }
357*b5893f02SDimitry Andric   }
358*b5893f02SDimitry Andric }
359*b5893f02SDimitry Andric 
360*b5893f02SDimitry Andric // In Phase 3 we revisit each block and if it has an insertion point defined we
361*b5893f02SDimitry Andric // check whether the predecessor mode meets the block's entry requirements. If
362*b5893f02SDimitry Andric // not we insert an appropriate setreg instruction to modify the Mode register.
processBlockPhase3(MachineBasicBlock & MBB,const SIInstrInfo * TII)363*b5893f02SDimitry Andric void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
364*b5893f02SDimitry Andric                                         const SIInstrInfo *TII) {
365*b5893f02SDimitry Andric //  BlockData *BI = BlockInfo[MBB.getNumber()];
366*b5893f02SDimitry Andric   unsigned ThisBlock = MBB.getNumber();
367*b5893f02SDimitry Andric   if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
368*b5893f02SDimitry Andric     Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
369*b5893f02SDimitry Andric     if (BlockInfo[ThisBlock]->FirstInsertionPoint)
370*b5893f02SDimitry Andric       insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
371*b5893f02SDimitry Andric     else
372*b5893f02SDimitry Andric       insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
373*b5893f02SDimitry Andric   }
374*b5893f02SDimitry Andric }
375*b5893f02SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)376*b5893f02SDimitry Andric bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
377*b5893f02SDimitry Andric   BlockInfo.resize(MF.getNumBlockIDs());
378*b5893f02SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
379*b5893f02SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
380*b5893f02SDimitry Andric 
381*b5893f02SDimitry Andric   // Processing is performed in a number of phases
382*b5893f02SDimitry Andric 
383*b5893f02SDimitry Andric   // Phase 1 - determine the initial mode required by each block, and add setreg
384*b5893f02SDimitry Andric   // instructions for intra block requirements.
385*b5893f02SDimitry Andric   for (MachineBasicBlock &BB : MF)
386*b5893f02SDimitry Andric     processBlockPhase1(BB, TII);
387*b5893f02SDimitry Andric 
388*b5893f02SDimitry Andric   // Phase 2 - determine the exit mode from each block. We add all blocks to the
389*b5893f02SDimitry Andric   // list here, but will also add any that need to be revisited during Phase 2
390*b5893f02SDimitry Andric   // processing.
391*b5893f02SDimitry Andric   for (MachineBasicBlock &BB : MF)
392*b5893f02SDimitry Andric     Phase2List.push(&BB);
393*b5893f02SDimitry Andric   while (!Phase2List.empty()) {
394*b5893f02SDimitry Andric     processBlockPhase2(*Phase2List.front(), TII);
395*b5893f02SDimitry Andric     Phase2List.pop();
396*b5893f02SDimitry Andric   }
397*b5893f02SDimitry Andric 
398*b5893f02SDimitry Andric   // Phase 3 - add an initial setreg to each block where the required entry mode
399*b5893f02SDimitry Andric   // is not satisfied by the exit mode of all its predecessors.
400*b5893f02SDimitry Andric   for (MachineBasicBlock &BB : MF)
401*b5893f02SDimitry Andric     processBlockPhase3(BB, TII);
402*b5893f02SDimitry Andric 
403*b5893f02SDimitry Andric   BlockInfo.clear();
404*b5893f02SDimitry Andric 
405*b5893f02SDimitry Andric   return NumSetregInserted > 0;
406*b5893f02SDimitry Andric }
407