1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This contains a MachineSchedStrategy implementation for maximizing wave
12 /// occupancy on GCN hardware.
13 //===----------------------------------------------------------------------===//
14 
15 #include "GCNSchedStrategy.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/RegisterClassInfo.h"
21 
22 #define DEBUG_TYPE "misched"
23 
24 using namespace llvm;
25 
26 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
27     const MachineSchedContext *C) :
28     GenericScheduler(C) { }
29 
30 static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
31                             const MachineFunction &MF) {
32 
33   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
34   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
35   unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
36                                       ST.getOccupancyWithNumVGPRs(VGPRs));
37   return std::min(MinRegOccupancy,
38                   ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
39                                                   *MF.getFunction()));
40 }
41 
42 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
43                                      bool AtTop, const RegPressureTracker &RPTracker,
44                                      const SIRegisterInfo *SRI,
45                                      int SGPRPressure,
46                                      int VGPRPressure,
47                                      int SGPRExcessLimit,
48                                      int VGPRExcessLimit,
49                                      int SGPRCriticalLimit,
50                                      int VGPRCriticalLimit) {
51 
52   Cand.SU = SU;
53   Cand.AtTop = AtTop;
54 
55   // getDownwardPressure() and getUpwardPressure() make temporary changes to
56   // the the tracker, so we need to pass those function a non-const copy.
57   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
58 
59   std::vector<unsigned> Pressure;
60   std::vector<unsigned> MaxPressure;
61 
62   if (AtTop)
63     TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
64   else {
65     // FIXME: I think for bottom up scheduling, the register pressure is cached
66     // and can be retrieved by DAG->getPressureDif(SU).
67     TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
68   }
69 
70   int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
71   int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
72 
73   // If two instructions increase the pressure of different register sets
74   // by the same amount, the generic scheduler will prefer to schedule the
75   // instruction that increases the set with the least amount of registers,
76   // which in our case would be SGPRs.  This is rarely what we want, so
77   // when we report excess/critical register pressure, we do it either
78   // only for VGPRs or only for SGPRs.
79 
80   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
81   const int MaxVGPRPressureInc = 16;
82   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
83   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
84 
85 
86   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
87   // to increase the likelihood we don't go over the limits.  We should improve
88   // the analysis to look through dependencies to find the path with the least
89   // register pressure.
90   // FIXME: This is also necessary, because some passes that run after
91   // scheduling and before regalloc increase register pressure.
92   const int ErrorMargin = 3;
93   VGPRExcessLimit -= ErrorMargin;
94   SGPRExcessLimit -= ErrorMargin;
95 
96   // We only need to update the RPDelata for instructions that increase
97   // register pressure.  Instructions that decrease or keep reg pressure
98   // the same will be marked as RegExcess in tryCandidate() when they
99   // are compared with instructions that increase the register pressure.
100   if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
101     Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
102     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
103   }
104 
105   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
106     Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
107     Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
108   }
109 
110   // Register pressure is considered 'CRITICAL' if it is approaching a value
111   // that would reduce the wave occupancy for the execution unit.  When
112   // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
113   // has the same cost, so we don't need to prefer one over the other.
114 
115   VGPRCriticalLimit -= ErrorMargin;
116   SGPRCriticalLimit -= ErrorMargin;
117 
118   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
119   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
120 
121   if (SGPRDelta >= 0 || VGPRDelta >= 0) {
122     if (SGPRDelta > VGPRDelta) {
123       Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
124       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
125     } else {
126       Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
127       Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
128     }
129   }
130 }
131 
132 // This function is mostly cut and pasted from
133 // GenericScheduler::pickNodeFromQueue()
134 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
135                                          const CandPolicy &ZonePolicy,
136                                          const RegPressureTracker &RPTracker,
137                                          SchedCandidate &Cand) {
138   const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
139   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
140   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
141   unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
142   unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
143   unsigned SGPRExcessLimit =
144       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
145   unsigned VGPRExcessLimit =
146       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
147   unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
148   unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true);
149   unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves);
150 
151   ReadyQueue &Q = Zone.Available;
152   for (SUnit *SU : Q) {
153 
154     SchedCandidate TryCand(ZonePolicy);
155     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
156                   SGPRPressure, VGPRPressure,
157                   SGPRExcessLimit, VGPRExcessLimit,
158                   SGPRCriticalLimit, VGPRCriticalLimit);
159     // Pass SchedBoundary only when comparing nodes from the same boundary.
160     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
161     GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
162     if (TryCand.Reason != NoCand) {
163       // Initialize resource delta if needed in case future heuristics query it.
164       if (TryCand.ResDelta == SchedResourceDelta())
165         TryCand.initResourceDelta(Zone.DAG, SchedModel);
166       Cand.setBest(TryCand);
167     }
168   }
169 }
170 
171 static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
172   switch (Reason) {
173   default:
174     return Reason;
175   case GenericSchedulerBase::RegCritical:
176   case GenericSchedulerBase::RegExcess:
177     return -Reason;
178  }
179 }
180 
181 // This function is mostly cut and pasted from
182 // GenericScheduler::pickNodeBidirectional()
183 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
184   // Schedule as far as possible in the direction of no choice. This is most
185   // efficient, but also provides the best heuristics for CriticalPSets.
186   if (SUnit *SU = Bot.pickOnlyChoice()) {
187     IsTopNode = false;
188     return SU;
189   }
190   if (SUnit *SU = Top.pickOnlyChoice()) {
191     IsTopNode = true;
192     return SU;
193   }
194   // Set the bottom-up policy based on the state of the current bottom zone and
195   // the instructions outside the zone, including the top zone.
196   CandPolicy BotPolicy;
197   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
198   // Set the top-down policy based on the state of the current top zone and
199   // the instructions outside the zone, including the bottom zone.
200   CandPolicy TopPolicy;
201   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
202 
203   // See if BotCand is still valid (because we previously scheduled from Top).
204   DEBUG(dbgs() << "Picking from Bot:\n");
205   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
206       BotCand.Policy != BotPolicy) {
207     BotCand.reset(CandPolicy());
208     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
209     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
210   } else {
211     DEBUG(traceCandidate(BotCand));
212   }
213 
214   // Check if the top Q has a better candidate.
215   DEBUG(dbgs() << "Picking from Top:\n");
216   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
217       TopCand.Policy != TopPolicy) {
218     TopCand.reset(CandPolicy());
219     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
220     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
221   } else {
222     DEBUG(traceCandidate(TopCand));
223   }
224 
225   // Pick best from BotCand and TopCand.
226   DEBUG(
227     dbgs() << "Top Cand: ";
228     traceCandidate(TopCand);
229     dbgs() << "Bot Cand: ";
230     traceCandidate(BotCand);
231   );
232   SchedCandidate Cand;
233   if (TopCand.Reason == BotCand.Reason) {
234     Cand = BotCand;
235     GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
236     TopCand.Reason = NoCand;
237     GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
238     if (TopCand.Reason != NoCand) {
239       Cand.setBest(TopCand);
240     } else {
241       TopCand.Reason = TopReason;
242     }
243   } else {
244     if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
245       Cand = TopCand;
246     } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
247       Cand = BotCand;
248     } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
249       Cand = TopCand;
250     } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
251       Cand = BotCand;
252     } else {
253       int TopRank = getBidirectionalReasonRank(TopCand.Reason);
254       int BotRank = getBidirectionalReasonRank(BotCand.Reason);
255       if (TopRank > BotRank) {
256         Cand = TopCand;
257       } else {
258         Cand = BotCand;
259       }
260     }
261   }
262   DEBUG(
263     dbgs() << "Picking: ";
264     traceCandidate(Cand);
265   );
266 
267   IsTopNode = Cand.AtTop;
268   return Cand.SU;
269 }
270 
271 // This function is mostly cut and pasted from
272 // GenericScheduler::pickNode()
273 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
274   if (DAG->top() == DAG->bottom()) {
275     assert(Top.Available.empty() && Top.Pending.empty() &&
276            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
277     return nullptr;
278   }
279   SUnit *SU;
280   do {
281     if (RegionPolicy.OnlyTopDown) {
282       SU = Top.pickOnlyChoice();
283       if (!SU) {
284         CandPolicy NoPolicy;
285         TopCand.reset(NoPolicy);
286         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
287         assert(TopCand.Reason != NoCand && "failed to find a candidate");
288         SU = TopCand.SU;
289       }
290       IsTopNode = true;
291     } else if (RegionPolicy.OnlyBottomUp) {
292       SU = Bot.pickOnlyChoice();
293       if (!SU) {
294         CandPolicy NoPolicy;
295         BotCand.reset(NoPolicy);
296         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
297         assert(BotCand.Reason != NoCand && "failed to find a candidate");
298         SU = BotCand.SU;
299       }
300       IsTopNode = false;
301     } else {
302       SU = pickNodeBidirectional(IsTopNode);
303     }
304   } while (SU->isScheduled);
305 
306   if (SU->isTopReady())
307     Top.removeReady(SU);
308   if (SU->isBottomReady())
309     Bot.removeReady(SU);
310 
311   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
312   return SU;
313 }
314