1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16 #include "GCNRegPressure.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/CodeGen/MachineScheduler.h"
19
20 namespace llvm {
21
22 class SIMachineFunctionInfo;
23 class SIRegisterInfo;
24 class GCNSubtarget;
25
26 /// This is a minimal scheduler strategy. The main difference between this
27 /// and the GenericScheduler is that GCNSchedStrategy uses different
28 /// heuristics to determine excess/critical pressure sets. Its goal is to
29 /// maximize kernel occupancy (i.e. maximum number of waves per simd).
30 class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
31 SUnit *pickNodeBidirectional(bool &IsTopNode);
32
33 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
34 const RegPressureTracker &RPTracker,
35 SchedCandidate &Cand);
36
37 void initCandidate(SchedCandidate &Cand, SUnit *SU,
38 bool AtTop, const RegPressureTracker &RPTracker,
39 const SIRegisterInfo *SRI,
40 unsigned SGPRPressure, unsigned VGPRPressure);
41
42 std::vector<unsigned> Pressure;
43
44 std::vector<unsigned> MaxPressure;
45
46 unsigned SGPRExcessLimit;
47
48 unsigned VGPRExcessLimit;
49
50 unsigned TargetOccupancy;
51
52 MachineFunction *MF;
53
54 public:
55 // schedule() have seen a clustered memory operation. Set it to false
56 // before a region scheduling to know if the region had such clusters.
57 bool HasClusteredNodes;
58
59 // schedule() have seen an excess register pressure and had to track
60 // register pressure for actual scheduling heuristics.
61 bool HasExcessPressure;
62
63 unsigned SGPRCriticalLimit;
64
65 unsigned VGPRCriticalLimit;
66
67 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
68
69 SUnit *pickNode(bool &IsTopNode) override;
70
71 void initialize(ScheduleDAGMI *DAG) override;
72
getTargetOccupancy()73 unsigned getTargetOccupancy() { return TargetOccupancy; }
74
setTargetOccupancy(unsigned Occ)75 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
76 };
77
78 enum class GCNSchedStageID : unsigned {
79 InitialSchedule = 0,
80 UnclusteredReschedule = 1,
81 ClusteredLowOccupancyReschedule = 2,
82 PreRARematerialize = 3,
83 LastStage = PreRARematerialize
84 };
85
86 #ifndef NDEBUG
87 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
88 #endif
89
90 inline GCNSchedStageID &operator++(GCNSchedStageID &Stage, int) {
91 assert(Stage != GCNSchedStageID::PreRARematerialize);
92 Stage = static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
93 return Stage;
94 }
95
nextStage(const GCNSchedStageID Stage)96 inline GCNSchedStageID nextStage(const GCNSchedStageID Stage) {
97 return static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
98 }
99
100 inline bool operator>(GCNSchedStageID &LHS, GCNSchedStageID &RHS) {
101 return static_cast<unsigned>(LHS) > static_cast<unsigned>(RHS);
102 }
103
104 class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
105 friend class GCNSchedStage;
106 friend class InitialScheduleStage;
107 friend class UnclusteredRescheduleStage;
108 friend class ClusteredLowOccStage;
109 friend class PreRARematStage;
110
111 const GCNSubtarget &ST;
112
113 SIMachineFunctionInfo &MFI;
114
115 // Occupancy target at the beginning of function scheduling cycle.
116 unsigned StartingOccupancy;
117
118 // Minimal real occupancy recorder for the function.
119 unsigned MinOccupancy;
120
121 // Vector of regions recorder for later rescheduling
122 SmallVector<std::pair<MachineBasicBlock::iterator,
123 MachineBasicBlock::iterator>, 32> Regions;
124
125 // Records if a region is not yet scheduled, or schedule has been reverted,
126 // or we generally desire to reschedule it.
127 BitVector RescheduleRegions;
128
129 // Record regions which use clustered loads/stores.
130 BitVector RegionsWithClusters;
131
132 // Record regions with high register pressure.
133 BitVector RegionsWithHighRP;
134
135 // Regions that has the same occupancy as the latest MinOccupancy
136 BitVector RegionsWithMinOcc;
137
138 // Region live-in cache.
139 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
140
141 // Region pressure cache.
142 SmallVector<GCNRegPressure, 32> Pressure;
143
144 // Temporary basic block live-in cache.
145 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
146
147 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
148
149 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
150
151 // Return current region pressure.
152 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
153
154 // Compute and cache live-ins and pressure for all regions in block.
155 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
156
157 // Update region boundaries when removing MI or inserting NewMI before MI.
158 void updateRegionBoundaries(
159 SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
160 MachineBasicBlock::iterator>> &RegionBoundaries,
161 MachineBasicBlock::iterator MI, MachineInstr *NewMI,
162 bool Removing = false);
163
164 void runSchedStages();
165
166 public:
167 GCNScheduleDAGMILive(MachineSchedContext *C,
168 std::unique_ptr<MachineSchedStrategy> S);
169
170 void schedule() override;
171
172 void finalizeSchedule() override;
173 };
174
175 // GCNSchedStrategy applies multiple scheduling stages to a function.
176 class GCNSchedStage {
177 protected:
178 GCNScheduleDAGMILive &DAG;
179
180 GCNMaxOccupancySchedStrategy &S;
181
182 MachineFunction &MF;
183
184 SIMachineFunctionInfo &MFI;
185
186 const GCNSubtarget &ST;
187
188 const GCNSchedStageID StageID;
189
190 // The current block being scheduled.
191 MachineBasicBlock *CurrentMBB = nullptr;
192
193 // Current region index.
194 unsigned RegionIdx = 0;
195
196 // Record the original order of instructions before scheduling.
197 std::vector<MachineInstr *> Unsched;
198
199 // RP before scheduling the current region.
200 GCNRegPressure PressureBefore;
201
202 // RP after scheduling the current region.
203 GCNRegPressure PressureAfter;
204
205 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
206
207 public:
208 // Initialize state for a scheduling stage. Returns false if the current stage
209 // should be skipped.
210 virtual bool initGCNSchedStage();
211
212 // Finalize state after finishing a scheduling pass on the function.
213 virtual void finalizeGCNSchedStage();
214
215 // Setup for scheduling a region. Returns false if the current region should
216 // be skipped.
217 virtual bool initGCNRegion();
218
219 // Track whether a new region is also a new MBB.
220 void setupNewBlock();
221
222 // Finalize state after scheudling a region.
223 virtual void finalizeGCNRegion();
224
225 // Check result of scheduling.
226 void checkScheduling();
227
228 // Returns true if scheduling should be reverted.
229 virtual bool shouldRevertScheduling(unsigned WavesAfter);
230
231 // Returns true if the new schedule may result in more spilling.
232 bool mayCauseSpilling(unsigned WavesAfter);
233
234 // Attempt to revert scheduling for this region.
235 void revertScheduling();
236
advanceRegion()237 void advanceRegion() { RegionIdx++; }
238
239 virtual ~GCNSchedStage() = default;
240 };
241
242 class InitialScheduleStage : public GCNSchedStage {
243 public:
244 void finalizeGCNRegion() override;
245
246 bool shouldRevertScheduling(unsigned WavesAfter) override;
247
InitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)248 InitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
249 : GCNSchedStage(StageID, DAG) {}
250 };
251
252 class UnclusteredRescheduleStage : public GCNSchedStage {
253 private:
254 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
255
256 public:
257 bool initGCNSchedStage() override;
258
259 void finalizeGCNSchedStage() override;
260
261 bool initGCNRegion() override;
262
263 bool shouldRevertScheduling(unsigned WavesAfter) override;
264
UnclusteredRescheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)265 UnclusteredRescheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
266 : GCNSchedStage(StageID, DAG) {}
267 };
268
269 // Retry function scheduling if we found resulting occupancy and it is
270 // lower than used for other scheduling passes. This will give more freedom
271 // to schedule low register pressure blocks.
272 class ClusteredLowOccStage : public GCNSchedStage {
273 public:
274 bool initGCNSchedStage() override;
275
276 bool initGCNRegion() override;
277
278 bool shouldRevertScheduling(unsigned WavesAfter) override;
279
ClusteredLowOccStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)280 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
281 : GCNSchedStage(StageID, DAG) {}
282 };
283
284 class PreRARematStage : public GCNSchedStage {
285 private:
286 // Each region at MinOccupancy will have their own list of trivially
287 // rematerializable instructions we can remat to reduce RP. The list maps an
288 // instruction to the position we should remat before, usually the MI using
289 // the rematerializable instruction.
290 MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
291 RematerializableInsts;
292
293 // Map a trivially remateriazable def to a list of regions at MinOccupancy
294 // that has the defined reg as a live-in.
295 DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
296
297 // Collect all trivially rematerializable VGPR instructions with a single def
298 // and single use outside the defining block into RematerializableInsts.
299 void collectRematerializableInstructions();
300
301 bool isTriviallyReMaterializable(const MachineInstr &MI);
302
303 // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
304 // Attempt to reduce RP of VGPR by sinking trivially rematerializable
305 // instructions. Returns true if we were able to sink instruction(s).
306 bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
307 const TargetInstrInfo *TII);
308
309 public:
310 bool initGCNSchedStage() override;
311
312 bool initGCNRegion() override;
313
314 bool shouldRevertScheduling(unsigned WavesAfter) override;
315
PreRARematStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)316 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
317 : GCNSchedStage(StageID, DAG) {}
318 };
319
320 } // End namespace llvm
321
322 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
323