1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements hazard recognizers for scheduling on GCN processors.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "GCNHazardRecognizer.h"
15 #include "AMDGPUSubtarget.h"
16 #include "SIInstrInfo.h"
17 #include "llvm/CodeGen/ScheduleDAG.h"
18 #include "llvm/Support/Debug.h"
19 
20 using namespace llvm;
21 
22 //===----------------------------------------------------------------------===//
23 // Hazard Recoginizer Implementation
24 //===----------------------------------------------------------------------===//
25 
26 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
27   CurrCycleInstr(nullptr),
28   MF(MF),
29   ST(MF.getSubtarget<SISubtarget>()) {
30   MaxLookAhead = 5;
31 }
32 
33 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
34   EmitInstruction(SU->getInstr());
35 }
36 
37 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
38   CurrCycleInstr = MI;
39 }
40 
41 static bool isDivFMas(unsigned Opcode) {
42   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
43 }
44 
45 static bool isSGetReg(unsigned Opcode) {
46   return Opcode == AMDGPU::S_GETREG_B32;
47 }
48 
49 static bool isSSetReg(unsigned Opcode) {
50   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
51 }
52 
53 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
54 
55   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
56                                                      AMDGPU::OpName::simm16);
57   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
58 }
59 
60 ScheduleHazardRecognizer::HazardType
61 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
62   MachineInstr *MI = SU->getInstr();
63 
64   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
65     return NoopHazard;
66 
67   if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
68     return NoopHazard;
69 
70   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
71     return NoopHazard;
72 
73   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
74     return NoopHazard;
75 
76   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
77     return NoopHazard;
78 
79   return NoHazard;
80 }
81 
82 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
83   return PreEmitNoops(SU->getInstr());
84 }
85 
86 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
87   if (SIInstrInfo::isSMRD(*MI))
88     return std::max(0, checkSMRDHazards(MI));
89 
90   if (SIInstrInfo::isVMEM(*MI))
91     return std::max(0, checkVMEMHazards(MI));
92 
93   if (SIInstrInfo::isDPP(*MI))
94     return std::max(0, checkDPPHazards(MI));
95 
96   if (isDivFMas(MI->getOpcode()))
97     return std::max(0, checkDivFMasHazards(MI));
98 
99   if (isSGetReg(MI->getOpcode()))
100     return std::max(0, checkGetRegHazards(MI));
101 
102   return 0;
103 }
104 
105 void GCNHazardRecognizer::EmitNoop() {
106   EmittedInstrs.push_front(nullptr);
107 }
108 
109 void GCNHazardRecognizer::AdvanceCycle() {
110 
111   // When the scheduler detects a stall, it will call AdvanceCycle() without
112   // emitting any instructions.
113   if (!CurrCycleInstr)
114     return;
115 
116   const SIInstrInfo *TII = ST.getInstrInfo();
117   unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
118 
119   // Keep track of emitted instructions
120   EmittedInstrs.push_front(CurrCycleInstr);
121 
122   // Add a nullptr for each additional wait state after the first.  Make sure
123   // not to add more than getMaxLookAhead() items to the list, since we
124   // truncate the list to that size right after this loop.
125   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
126        i < e; ++i) {
127     EmittedInstrs.push_front(nullptr);
128   }
129 
130   // getMaxLookahead() is the largest number of wait states we will ever need
131   // to insert, so there is no point in keeping track of more than that many
132   // wait states.
133   EmittedInstrs.resize(getMaxLookAhead());
134 
135   CurrCycleInstr = nullptr;
136 }
137 
138 void GCNHazardRecognizer::RecedeCycle() {
139   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
140 }
141 
142 //===----------------------------------------------------------------------===//
143 // Helper Functions
144 //===----------------------------------------------------------------------===//
145 
146 int GCNHazardRecognizer::getWaitStatesSinceDef(
147     unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
148   const SIRegisterInfo *TRI = ST.getRegisterInfo();
149 
150   int WaitStates = -1;
151   for (MachineInstr *MI : EmittedInstrs) {
152     ++WaitStates;
153     if (!MI || !IsHazardDef(MI))
154       continue;
155     if (MI->modifiesRegister(Reg, TRI))
156       return WaitStates;
157   }
158   return std::numeric_limits<int>::max();
159 }
160 
161 int GCNHazardRecognizer::getWaitStatesSinceSetReg(
162     function_ref<bool(MachineInstr *)> IsHazard) {
163 
164   int WaitStates = -1;
165   for (MachineInstr *MI : EmittedInstrs) {
166     ++WaitStates;
167     if (!MI || !isSSetReg(MI->getOpcode()) || !IsHazard(MI))
168       continue;
169     return WaitStates;
170   }
171   return std::numeric_limits<int>::max();
172 }
173 
174 //===----------------------------------------------------------------------===//
175 // No-op Hazard Detection
176 //===----------------------------------------------------------------------===//
177 
178 static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops,
179                          std::set<unsigned> &Set) {
180   for (const MachineOperand &Op : Ops) {
181     if (Op.isReg())
182       Set.insert(Op.getReg());
183   }
184 }
185 
186 int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
187   // SMEM soft clause are only present on VI+
188   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
189     return 0;
190 
191   // A soft-clause is any group of consecutive SMEM instructions.  The
192   // instructions in this group may return out of order and/or may be
193   // replayed (i.e. the same instruction issued more than once).
194   //
195   // In order to handle these situations correctly we need to make sure
196   // that when a clause has more than one instruction, no instruction in the
197   // clause writes to a register that is read another instruction in the clause
198   // (including itself). If we encounter this situaion, we need to break the
199   // clause by inserting a non SMEM instruction.
200 
201   std::set<unsigned> ClauseDefs;
202   std::set<unsigned> ClauseUses;
203 
204   for (MachineInstr *MI : EmittedInstrs) {
205 
206     // When we hit a non-SMEM instruction then we have passed the start of the
207     // clause and we can stop.
208     if (!MI || !SIInstrInfo::isSMRD(*MI))
209       break;
210 
211     addRegsToSet(MI->defs(), ClauseDefs);
212     addRegsToSet(MI->uses(), ClauseUses);
213   }
214 
215   if (ClauseDefs.empty())
216     return 0;
217 
218   // FIXME: When we support stores, we need to make sure not to put loads and
219   // stores in the same clause if they use the same address.  For now, just
220   // start a new clause whenever we see a store.
221   if (SMEM->mayStore())
222     return 1;
223 
224   addRegsToSet(SMEM->defs(), ClauseDefs);
225   addRegsToSet(SMEM->uses(), ClauseUses);
226 
227   std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size()));
228   std::vector<unsigned>::iterator End;
229 
230   End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(),
231                               ClauseUses.begin(), ClauseUses.end(), Result.begin());
232 
233   // If the set of defs and uses intersect then we cannot add this instruction
234   // to the clause, so we have a hazard.
235   if (End != Result.begin())
236     return 1;
237 
238   return 0;
239 }
240 
241 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
242   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
243   const SIInstrInfo *TII = ST.getInstrInfo();
244   int WaitStatesNeeded = 0;
245 
246   WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
247 
248   // This SMRD hazard only affects SI.
249   if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
250     return WaitStatesNeeded;
251 
252   // A read of an SGPR by SMRD instruction requires 4 wait states when the
253   // SGPR was written by a VALU instruction.
254   int SmrdSgprWaitStates = 4;
255   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
256 
257   for (const MachineOperand &Use : SMRD->uses()) {
258     if (!Use.isReg())
259       continue;
260     int WaitStatesNeededForUse =
261         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
262     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
263   }
264   return WaitStatesNeeded;
265 }
266 
267 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
268   const SIInstrInfo *TII = ST.getInstrInfo();
269 
270   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
271     return 0;
272 
273   const SIRegisterInfo &TRI = TII->getRegisterInfo();
274 
275   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
276   // SGPR was written by a VALU Instruction.
277   int VmemSgprWaitStates = 5;
278   int WaitStatesNeeded = 0;
279   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
280 
281   for (const MachineOperand &Use : VMEM->uses()) {
282     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
283       continue;
284 
285     int WaitStatesNeededForUse =
286         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
287     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
288   }
289   return WaitStatesNeeded;
290 }
291 
292 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
293   const SIRegisterInfo *TRI = ST.getRegisterInfo();
294 
295   // Check for DPP VGPR read after VALU VGPR write.
296   int DppVgprWaitStates = 2;
297   int WaitStatesNeeded = 0;
298 
299   for (const MachineOperand &Use : DPP->uses()) {
300     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
301       continue;
302     int WaitStatesNeededForUse =
303         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
304     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
305   }
306 
307   return WaitStatesNeeded;
308 }
309 
310 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
311   const SIInstrInfo *TII = ST.getInstrInfo();
312 
313   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
314   // instruction.
315   const int DivFMasWaitStates = 4;
316   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
317   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
318 
319   return DivFMasWaitStates - WaitStatesNeeded;
320 }
321 
322 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
323   const SIInstrInfo *TII = ST.getInstrInfo();
324   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
325 
326   const int GetRegWaitStates = 2;
327   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
328     return GetRegHWReg == getHWReg(TII, *MI);
329   };
330   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
331 
332   return GetRegWaitStates - WaitStatesNeeded;
333 }
334