1*b5893f02SDimitry Andric //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2*b5893f02SDimitry Andric //
3*b5893f02SDimitry Andric // The LLVM Compiler Infrastructure
4*b5893f02SDimitry Andric //
5*b5893f02SDimitry Andric // This file is distributed under the University of Illinois Open Source
6*b5893f02SDimitry Andric // License. See LICENSE.TXT for details.
7*b5893f02SDimitry Andric //
8*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
9*b5893f02SDimitry Andric /// \file
10*b5893f02SDimitry Andric /// This pass inserts changes to the Mode register settings as required.
11*b5893f02SDimitry Andric /// Note that currently it only deals with the Double Precision Floating Point
12*b5893f02SDimitry Andric /// rounding mode setting, but is intended to be generic enough to be easily
13*b5893f02SDimitry Andric /// expanded.
14*b5893f02SDimitry Andric ///
15*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
16*b5893f02SDimitry Andric //
17*b5893f02SDimitry Andric #include "AMDGPU.h"
18*b5893f02SDimitry Andric #include "AMDGPUInstrInfo.h"
19*b5893f02SDimitry Andric #include "AMDGPUSubtarget.h"
20*b5893f02SDimitry Andric #include "SIInstrInfo.h"
21*b5893f02SDimitry Andric #include "SIMachineFunctionInfo.h"
22*b5893f02SDimitry Andric #include "llvm/ADT/Statistic.h"
23*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
24*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
25*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
26*b5893f02SDimitry Andric #include "llvm/IR/Constants.h"
27*b5893f02SDimitry Andric #include "llvm/IR/Function.h"
28*b5893f02SDimitry Andric #include "llvm/IR/LLVMContext.h"
29*b5893f02SDimitry Andric #include "llvm/Support/Debug.h"
30*b5893f02SDimitry Andric #include "llvm/Support/raw_ostream.h"
31*b5893f02SDimitry Andric #include "llvm/Target/TargetMachine.h"
32*b5893f02SDimitry Andric #include <queue>
33*b5893f02SDimitry Andric
34*b5893f02SDimitry Andric #define DEBUG_TYPE "si-mode-register"
35*b5893f02SDimitry Andric
36*b5893f02SDimitry Andric STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
37*b5893f02SDimitry Andric
38*b5893f02SDimitry Andric using namespace llvm;
39*b5893f02SDimitry Andric
40*b5893f02SDimitry Andric struct Status {
41*b5893f02SDimitry Andric // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
42*b5893f02SDimitry Andric // known value
43*b5893f02SDimitry Andric unsigned Mask;
44*b5893f02SDimitry Andric unsigned Mode;
45*b5893f02SDimitry Andric
StatusStatus46*b5893f02SDimitry Andric Status() : Mask(0), Mode(0){};
47*b5893f02SDimitry Andric
StatusStatus48*b5893f02SDimitry Andric Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) {
49*b5893f02SDimitry Andric Mode &= Mask;
50*b5893f02SDimitry Andric };
51*b5893f02SDimitry Andric
52*b5893f02SDimitry Andric // merge two status values such that only values that don't conflict are
53*b5893f02SDimitry Andric // preserved
mergeStatus54*b5893f02SDimitry Andric Status merge(const Status &S) const {
55*b5893f02SDimitry Andric return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
56*b5893f02SDimitry Andric }
57*b5893f02SDimitry Andric
58*b5893f02SDimitry Andric // merge an unknown value by using the unknown value's mask to remove bits
59*b5893f02SDimitry Andric // from the result
mergeUnknownStatus60*b5893f02SDimitry Andric Status mergeUnknown(unsigned newMask) {
61*b5893f02SDimitry Andric return Status(Mask & ~newMask, Mode & ~newMask);
62*b5893f02SDimitry Andric }
63*b5893f02SDimitry Andric
64*b5893f02SDimitry Andric // intersect two Status values to produce a mode and mask that is a subset
65*b5893f02SDimitry Andric // of both values
intersectStatus66*b5893f02SDimitry Andric Status intersect(const Status &S) const {
67*b5893f02SDimitry Andric unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
68*b5893f02SDimitry Andric unsigned NewMode = (Mode & NewMask);
69*b5893f02SDimitry Andric return Status(NewMask, NewMode);
70*b5893f02SDimitry Andric }
71*b5893f02SDimitry Andric
72*b5893f02SDimitry Andric // produce the delta required to change the Mode to the required Mode
deltaStatus73*b5893f02SDimitry Andric Status delta(const Status &S) const {
74*b5893f02SDimitry Andric return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
75*b5893f02SDimitry Andric }
76*b5893f02SDimitry Andric
operator ==Status77*b5893f02SDimitry Andric bool operator==(const Status &S) const {
78*b5893f02SDimitry Andric return (Mask == S.Mask) && (Mode == S.Mode);
79*b5893f02SDimitry Andric }
80*b5893f02SDimitry Andric
operator !=Status81*b5893f02SDimitry Andric bool operator!=(const Status &S) const { return !(*this == S); }
82*b5893f02SDimitry Andric
isCompatibleStatus83*b5893f02SDimitry Andric bool isCompatible(Status &S) {
84*b5893f02SDimitry Andric return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
85*b5893f02SDimitry Andric }
86*b5893f02SDimitry Andric
isCombinableStatus87*b5893f02SDimitry Andric bool isCombinable(Status &S) {
88*b5893f02SDimitry Andric return !(Mask & S.Mask) || isCompatible(S);
89*b5893f02SDimitry Andric }
90*b5893f02SDimitry Andric };
91*b5893f02SDimitry Andric
92*b5893f02SDimitry Andric class BlockData {
93*b5893f02SDimitry Andric public:
94*b5893f02SDimitry Andric // The Status that represents the mode register settings required by the
95*b5893f02SDimitry Andric // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
96*b5893f02SDimitry Andric Status Require;
97*b5893f02SDimitry Andric
98*b5893f02SDimitry Andric // The Status that represents the net changes to the Mode register made by
99*b5893f02SDimitry Andric // this block, Calculated in Phase 1.
100*b5893f02SDimitry Andric Status Change;
101*b5893f02SDimitry Andric
102*b5893f02SDimitry Andric // The Status that represents the mode register settings on exit from this
103*b5893f02SDimitry Andric // block. Calculated in Phase 2.
104*b5893f02SDimitry Andric Status Exit;
105*b5893f02SDimitry Andric
106*b5893f02SDimitry Andric // The Status that represents the intersection of exit Mode register settings
107*b5893f02SDimitry Andric // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
108*b5893f02SDimitry Andric Status Pred;
109*b5893f02SDimitry Andric
110*b5893f02SDimitry Andric // In Phase 1 we record the first instruction that has a mode requirement,
111*b5893f02SDimitry Andric // which is used in Phase 3 if we need to insert a mode change.
112*b5893f02SDimitry Andric MachineInstr *FirstInsertionPoint;
113*b5893f02SDimitry Andric
BlockData()114*b5893f02SDimitry Andric BlockData() : FirstInsertionPoint(nullptr) {};
115*b5893f02SDimitry Andric };
116*b5893f02SDimitry Andric
117*b5893f02SDimitry Andric namespace {
118*b5893f02SDimitry Andric
119*b5893f02SDimitry Andric class SIModeRegister : public MachineFunctionPass {
120*b5893f02SDimitry Andric public:
121*b5893f02SDimitry Andric static char ID;
122*b5893f02SDimitry Andric
123*b5893f02SDimitry Andric std::vector<std::unique_ptr<BlockData>> BlockInfo;
124*b5893f02SDimitry Andric std::queue<MachineBasicBlock *> Phase2List;
125*b5893f02SDimitry Andric
126*b5893f02SDimitry Andric // The default mode register setting currently only caters for the floating
127*b5893f02SDimitry Andric // point double precision rounding mode.
128*b5893f02SDimitry Andric // We currently assume the default rounding mode is Round to Nearest
129*b5893f02SDimitry Andric // NOTE: this should come from a per function rounding mode setting once such
130*b5893f02SDimitry Andric // a setting exists.
131*b5893f02SDimitry Andric unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
132*b5893f02SDimitry Andric Status DefaultStatus =
133*b5893f02SDimitry Andric Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
134*b5893f02SDimitry Andric
135*b5893f02SDimitry Andric public:
SIModeRegister()136*b5893f02SDimitry Andric SIModeRegister() : MachineFunctionPass(ID) {}
137*b5893f02SDimitry Andric
138*b5893f02SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
139*b5893f02SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const140*b5893f02SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
141*b5893f02SDimitry Andric AU.setPreservesCFG();
142*b5893f02SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
143*b5893f02SDimitry Andric }
144*b5893f02SDimitry Andric
145*b5893f02SDimitry Andric void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
146*b5893f02SDimitry Andric
147*b5893f02SDimitry Andric void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
148*b5893f02SDimitry Andric
149*b5893f02SDimitry Andric void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
150*b5893f02SDimitry Andric
151*b5893f02SDimitry Andric Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
152*b5893f02SDimitry Andric
153*b5893f02SDimitry Andric void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
154*b5893f02SDimitry Andric const SIInstrInfo *TII, Status InstrMode);
155*b5893f02SDimitry Andric };
156*b5893f02SDimitry Andric } // End anonymous namespace.
157*b5893f02SDimitry Andric
158*b5893f02SDimitry Andric INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
159*b5893f02SDimitry Andric "Insert required mode register values", false, false)
160*b5893f02SDimitry Andric
161*b5893f02SDimitry Andric char SIModeRegister::ID = 0;
162*b5893f02SDimitry Andric
163*b5893f02SDimitry Andric char &llvm::SIModeRegisterID = SIModeRegister::ID;
164*b5893f02SDimitry Andric
createSIModeRegisterPass()165*b5893f02SDimitry Andric FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
166*b5893f02SDimitry Andric
167*b5893f02SDimitry Andric // Determine the Mode register setting required for this instruction.
168*b5893f02SDimitry Andric // Instructions which don't use the Mode register return a null Status.
169*b5893f02SDimitry Andric // Note this currently only deals with instructions that use the floating point
170*b5893f02SDimitry Andric // double precision setting.
getInstructionMode(MachineInstr & MI,const SIInstrInfo * TII)171*b5893f02SDimitry Andric Status SIModeRegister::getInstructionMode(MachineInstr &MI,
172*b5893f02SDimitry Andric const SIInstrInfo *TII) {
173*b5893f02SDimitry Andric if (TII->usesFPDPRounding(MI)) {
174*b5893f02SDimitry Andric switch (MI.getOpcode()) {
175*b5893f02SDimitry Andric case AMDGPU::V_INTERP_P1LL_F16:
176*b5893f02SDimitry Andric case AMDGPU::V_INTERP_P1LV_F16:
177*b5893f02SDimitry Andric case AMDGPU::V_INTERP_P2_F16:
178*b5893f02SDimitry Andric // f16 interpolation instructions need double precision round to zero
179*b5893f02SDimitry Andric return Status(FP_ROUND_MODE_DP(3),
180*b5893f02SDimitry Andric FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
181*b5893f02SDimitry Andric default:
182*b5893f02SDimitry Andric return DefaultStatus;
183*b5893f02SDimitry Andric }
184*b5893f02SDimitry Andric }
185*b5893f02SDimitry Andric return Status();
186*b5893f02SDimitry Andric }
187*b5893f02SDimitry Andric
188*b5893f02SDimitry Andric // Insert a setreg instruction to update the Mode register.
189*b5893f02SDimitry Andric // It is possible (though unlikely) for an instruction to require a change to
190*b5893f02SDimitry Andric // the value of disjoint parts of the Mode register when we don't know the
191*b5893f02SDimitry Andric // value of the intervening bits. In that case we need to use more than one
192*b5893f02SDimitry Andric // setreg instruction.
insertSetreg(MachineBasicBlock & MBB,MachineInstr * MI,const SIInstrInfo * TII,Status InstrMode)193*b5893f02SDimitry Andric void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
194*b5893f02SDimitry Andric const SIInstrInfo *TII, Status InstrMode) {
195*b5893f02SDimitry Andric while (InstrMode.Mask) {
196*b5893f02SDimitry Andric unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
197*b5893f02SDimitry Andric unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
198*b5893f02SDimitry Andric unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
199*b5893f02SDimitry Andric BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
200*b5893f02SDimitry Andric .addImm(Value)
201*b5893f02SDimitry Andric .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
202*b5893f02SDimitry Andric (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
203*b5893f02SDimitry Andric (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
204*b5893f02SDimitry Andric ++NumSetregInserted;
205*b5893f02SDimitry Andric InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
206*b5893f02SDimitry Andric }
207*b5893f02SDimitry Andric }
208*b5893f02SDimitry Andric
209*b5893f02SDimitry Andric // In Phase 1 we iterate through the instructions of the block and for each
210*b5893f02SDimitry Andric // instruction we get its mode usage. If the instruction uses the Mode register
211*b5893f02SDimitry Andric // we:
212*b5893f02SDimitry Andric // - update the Change status, which tracks the changes to the Mode register
213*b5893f02SDimitry Andric // made by this block
214*b5893f02SDimitry Andric // - if this instruction's requirements are compatible with the current setting
215*b5893f02SDimitry Andric // of the Mode register we merge the modes
216*b5893f02SDimitry Andric // - if it isn't compatible and an InsertionPoint isn't set, then we set the
217*b5893f02SDimitry Andric // InsertionPoint to the current instruction, and we remember the current
218*b5893f02SDimitry Andric // mode
219*b5893f02SDimitry Andric // - if it isn't compatible and InsertionPoint is set we insert a seteg before
220*b5893f02SDimitry Andric // that instruction (unless this instruction forms part of the block's
221*b5893f02SDimitry Andric // entry requirements in which case the insertion is deferred until Phase 3
222*b5893f02SDimitry Andric // when predecessor exit values are known), and move the insertion point to
223*b5893f02SDimitry Andric // this instruction
224*b5893f02SDimitry Andric // - if this is a setreg instruction we treat it as an incompatible instruction.
225*b5893f02SDimitry Andric // This is sub-optimal but avoids some nasty corner cases, and is expected to
226*b5893f02SDimitry Andric // occur very rarely.
227*b5893f02SDimitry Andric // - on exit we have set the Require, Change, and initial Exit modes.
processBlockPhase1(MachineBasicBlock & MBB,const SIInstrInfo * TII)228*b5893f02SDimitry Andric void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
229*b5893f02SDimitry Andric const SIInstrInfo *TII) {
230*b5893f02SDimitry Andric auto NewInfo = llvm::make_unique<BlockData>();
231*b5893f02SDimitry Andric MachineInstr *InsertionPoint = nullptr;
232*b5893f02SDimitry Andric // RequirePending is used to indicate whether we are collecting the initial
233*b5893f02SDimitry Andric // requirements for the block, and need to defer the first InsertionPoint to
234*b5893f02SDimitry Andric // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
235*b5893f02SDimitry Andric // we discover an explict setreg that means this block doesn't have any
236*b5893f02SDimitry Andric // initial requirements.
237*b5893f02SDimitry Andric bool RequirePending = true;
238*b5893f02SDimitry Andric Status IPChange;
239*b5893f02SDimitry Andric for (MachineInstr &MI : MBB) {
240*b5893f02SDimitry Andric Status InstrMode = getInstructionMode(MI, TII);
241*b5893f02SDimitry Andric if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
242*b5893f02SDimitry Andric (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
243*b5893f02SDimitry Andric // We preserve any explicit mode register setreg instruction we encounter,
244*b5893f02SDimitry Andric // as we assume it has been inserted by a higher authority (this is
245*b5893f02SDimitry Andric // likely to be a very rare occurrence).
246*b5893f02SDimitry Andric unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
247*b5893f02SDimitry Andric if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
248*b5893f02SDimitry Andric AMDGPU::Hwreg::ID_MODE)
249*b5893f02SDimitry Andric continue;
250*b5893f02SDimitry Andric
251*b5893f02SDimitry Andric unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
252*b5893f02SDimitry Andric AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
253*b5893f02SDimitry Andric 1;
254*b5893f02SDimitry Andric unsigned Offset =
255*b5893f02SDimitry Andric (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
256*b5893f02SDimitry Andric unsigned Mask = ((1 << Width) - 1) << Offset;
257*b5893f02SDimitry Andric
258*b5893f02SDimitry Andric // If an InsertionPoint is set we will insert a setreg there.
259*b5893f02SDimitry Andric if (InsertionPoint) {
260*b5893f02SDimitry Andric insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
261*b5893f02SDimitry Andric InsertionPoint = nullptr;
262*b5893f02SDimitry Andric }
263*b5893f02SDimitry Andric // If this is an immediate then we know the value being set, but if it is
264*b5893f02SDimitry Andric // not an immediate then we treat the modified bits of the mode register
265*b5893f02SDimitry Andric // as unknown.
266*b5893f02SDimitry Andric if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
267*b5893f02SDimitry Andric unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
268*b5893f02SDimitry Andric unsigned Mode = (Val << Offset) & Mask;
269*b5893f02SDimitry Andric Status Setreg = Status(Mask, Mode);
270*b5893f02SDimitry Andric // If we haven't already set the initial requirements for the block we
271*b5893f02SDimitry Andric // don't need to as the requirements start from this explicit setreg.
272*b5893f02SDimitry Andric RequirePending = false;
273*b5893f02SDimitry Andric NewInfo->Change = NewInfo->Change.merge(Setreg);
274*b5893f02SDimitry Andric } else {
275*b5893f02SDimitry Andric NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
276*b5893f02SDimitry Andric }
277*b5893f02SDimitry Andric } else if (!NewInfo->Change.isCompatible(InstrMode)) {
278*b5893f02SDimitry Andric // This instruction uses the Mode register and its requirements aren't
279*b5893f02SDimitry Andric // compatible with the current mode.
280*b5893f02SDimitry Andric if (InsertionPoint) {
281*b5893f02SDimitry Andric // If the required mode change cannot be included in the current
282*b5893f02SDimitry Andric // InsertionPoint changes, we need a setreg and start a new
283*b5893f02SDimitry Andric // InsertionPoint.
284*b5893f02SDimitry Andric if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
285*b5893f02SDimitry Andric if (RequirePending) {
286*b5893f02SDimitry Andric // This is the first insertionPoint in the block so we will defer
287*b5893f02SDimitry Andric // the insertion of the setreg to Phase 3 where we know whether or
288*b5893f02SDimitry Andric // not it is actually needed.
289*b5893f02SDimitry Andric NewInfo->FirstInsertionPoint = InsertionPoint;
290*b5893f02SDimitry Andric NewInfo->Require = NewInfo->Change;
291*b5893f02SDimitry Andric RequirePending = false;
292*b5893f02SDimitry Andric } else {
293*b5893f02SDimitry Andric insertSetreg(MBB, InsertionPoint, TII,
294*b5893f02SDimitry Andric IPChange.delta(NewInfo->Change));
295*b5893f02SDimitry Andric IPChange = NewInfo->Change;
296*b5893f02SDimitry Andric }
297*b5893f02SDimitry Andric // Set the new InsertionPoint
298*b5893f02SDimitry Andric InsertionPoint = &MI;
299*b5893f02SDimitry Andric }
300*b5893f02SDimitry Andric NewInfo->Change = NewInfo->Change.merge(InstrMode);
301*b5893f02SDimitry Andric } else {
302*b5893f02SDimitry Andric // No InsertionPoint is currently set - this is either the first in
303*b5893f02SDimitry Andric // the block or we have previously seen an explicit setreg.
304*b5893f02SDimitry Andric InsertionPoint = &MI;
305*b5893f02SDimitry Andric IPChange = NewInfo->Change;
306*b5893f02SDimitry Andric NewInfo->Change = NewInfo->Change.merge(InstrMode);
307*b5893f02SDimitry Andric }
308*b5893f02SDimitry Andric }
309*b5893f02SDimitry Andric }
310*b5893f02SDimitry Andric if (RequirePending) {
311*b5893f02SDimitry Andric // If we haven't yet set the initial requirements for the block we set them
312*b5893f02SDimitry Andric // now.
313*b5893f02SDimitry Andric NewInfo->FirstInsertionPoint = InsertionPoint;
314*b5893f02SDimitry Andric NewInfo->Require = NewInfo->Change;
315*b5893f02SDimitry Andric } else if (InsertionPoint) {
316*b5893f02SDimitry Andric // We need to insert a setreg at the InsertionPoint
317*b5893f02SDimitry Andric insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
318*b5893f02SDimitry Andric }
319*b5893f02SDimitry Andric NewInfo->Exit = NewInfo->Change;
320*b5893f02SDimitry Andric BlockInfo[MBB.getNumber()] = std::move(NewInfo);
321*b5893f02SDimitry Andric }
322*b5893f02SDimitry Andric
323*b5893f02SDimitry Andric // In Phase 2 we revisit each block and calculate the common Mode register
324*b5893f02SDimitry Andric // value provided by all predecessor blocks. If the Exit value for the block
325*b5893f02SDimitry Andric // is changed, then we add the successor blocks to the worklist so that the
326*b5893f02SDimitry Andric // exit value is propagated.
processBlockPhase2(MachineBasicBlock & MBB,const SIInstrInfo * TII)327*b5893f02SDimitry Andric void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
328*b5893f02SDimitry Andric const SIInstrInfo *TII) {
329*b5893f02SDimitry Andric // BlockData *BI = BlockInfo[MBB.getNumber()];
330*b5893f02SDimitry Andric unsigned ThisBlock = MBB.getNumber();
331*b5893f02SDimitry Andric if (MBB.pred_empty()) {
332*b5893f02SDimitry Andric // There are no predecessors, so use the default starting status.
333*b5893f02SDimitry Andric BlockInfo[ThisBlock]->Pred = DefaultStatus;
334*b5893f02SDimitry Andric } else {
335*b5893f02SDimitry Andric // Build a status that is common to all the predecessors by intersecting
336*b5893f02SDimitry Andric // all the predecessor exit status values.
337*b5893f02SDimitry Andric MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
338*b5893f02SDimitry Andric MachineBasicBlock &PB = *(*P);
339*b5893f02SDimitry Andric BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
340*b5893f02SDimitry Andric
341*b5893f02SDimitry Andric for (P = std::next(P); P != E; P = std::next(P)) {
342*b5893f02SDimitry Andric MachineBasicBlock *Pred = *P;
343*b5893f02SDimitry Andric BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
344*b5893f02SDimitry Andric }
345*b5893f02SDimitry Andric }
346*b5893f02SDimitry Andric Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
347*b5893f02SDimitry Andric if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
348*b5893f02SDimitry Andric BlockInfo[ThisBlock]->Exit = TmpStatus;
349*b5893f02SDimitry Andric // Add the successors to the work list so we can propagate the changed exit
350*b5893f02SDimitry Andric // status.
351*b5893f02SDimitry Andric for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
352*b5893f02SDimitry Andric E = MBB.succ_end();
353*b5893f02SDimitry Andric S != E; S = std::next(S)) {
354*b5893f02SDimitry Andric MachineBasicBlock &B = *(*S);
355*b5893f02SDimitry Andric Phase2List.push(&B);
356*b5893f02SDimitry Andric }
357*b5893f02SDimitry Andric }
358*b5893f02SDimitry Andric }
359*b5893f02SDimitry Andric
360*b5893f02SDimitry Andric // In Phase 3 we revisit each block and if it has an insertion point defined we
361*b5893f02SDimitry Andric // check whether the predecessor mode meets the block's entry requirements. If
362*b5893f02SDimitry Andric // not we insert an appropriate setreg instruction to modify the Mode register.
processBlockPhase3(MachineBasicBlock & MBB,const SIInstrInfo * TII)363*b5893f02SDimitry Andric void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
364*b5893f02SDimitry Andric const SIInstrInfo *TII) {
365*b5893f02SDimitry Andric // BlockData *BI = BlockInfo[MBB.getNumber()];
366*b5893f02SDimitry Andric unsigned ThisBlock = MBB.getNumber();
367*b5893f02SDimitry Andric if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
368*b5893f02SDimitry Andric Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
369*b5893f02SDimitry Andric if (BlockInfo[ThisBlock]->FirstInsertionPoint)
370*b5893f02SDimitry Andric insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
371*b5893f02SDimitry Andric else
372*b5893f02SDimitry Andric insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
373*b5893f02SDimitry Andric }
374*b5893f02SDimitry Andric }
375*b5893f02SDimitry Andric
runOnMachineFunction(MachineFunction & MF)376*b5893f02SDimitry Andric bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
377*b5893f02SDimitry Andric BlockInfo.resize(MF.getNumBlockIDs());
378*b5893f02SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
379*b5893f02SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
380*b5893f02SDimitry Andric
381*b5893f02SDimitry Andric // Processing is performed in a number of phases
382*b5893f02SDimitry Andric
383*b5893f02SDimitry Andric // Phase 1 - determine the initial mode required by each block, and add setreg
384*b5893f02SDimitry Andric // instructions for intra block requirements.
385*b5893f02SDimitry Andric for (MachineBasicBlock &BB : MF)
386*b5893f02SDimitry Andric processBlockPhase1(BB, TII);
387*b5893f02SDimitry Andric
388*b5893f02SDimitry Andric // Phase 2 - determine the exit mode from each block. We add all blocks to the
389*b5893f02SDimitry Andric // list here, but will also add any that need to be revisited during Phase 2
390*b5893f02SDimitry Andric // processing.
391*b5893f02SDimitry Andric for (MachineBasicBlock &BB : MF)
392*b5893f02SDimitry Andric Phase2List.push(&BB);
393*b5893f02SDimitry Andric while (!Phase2List.empty()) {
394*b5893f02SDimitry Andric processBlockPhase2(*Phase2List.front(), TII);
395*b5893f02SDimitry Andric Phase2List.pop();
396*b5893f02SDimitry Andric }
397*b5893f02SDimitry Andric
398*b5893f02SDimitry Andric // Phase 3 - add an initial setreg to each block where the required entry mode
399*b5893f02SDimitry Andric // is not satisfied by the exit mode of all its predecessors.
400*b5893f02SDimitry Andric for (MachineBasicBlock &BB : MF)
401*b5893f02SDimitry Andric processBlockPhase3(BB, TII);
402*b5893f02SDimitry Andric
403*b5893f02SDimitry Andric BlockInfo.clear();
404*b5893f02SDimitry Andric
405*b5893f02SDimitry Andric return NumSetregInserted > 0;
406*b5893f02SDimitry Andric }
407