1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This contains a MachineSchedStrategy implementation for maximizing wave
11 /// occupancy on GCN hardware.
12 ///
13 /// This pass will apply multiple scheduling stages to the same function.
14 /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15 /// entry point for the scheduling of those regions is
16 /// GCNScheduleDAGMILive::runSchedStages.
17
18 /// Generally, the reason for having multiple scheduling stages is to account
19 /// for the kernel-wide effect of register usage on occupancy. Usually, only a
20 /// few scheduling regions will have register pressure high enough to limit
21 /// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22 /// other regions.
23 ///
24 //===----------------------------------------------------------------------===//
25
26 #include "GCNSchedStrategy.h"
27 #include "SIMachineFunctionInfo.h"
28 #include "llvm/CodeGen/RegisterClassInfo.h"
29
30 #define DEBUG_TYPE "machine-scheduler"
31
32 using namespace llvm;
33
GCNMaxOccupancySchedStrategy(const MachineSchedContext * C)34 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
35 const MachineSchedContext *C)
36 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
37 HasClusteredNodes(false), HasExcessPressure(false) {}
38
initialize(ScheduleDAGMI * DAG)39 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
40 GenericScheduler::initialize(DAG);
41
42 MF = &DAG->MF;
43
44 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
45
46 // FIXME: This is also necessary, because some passes that run after
47 // scheduling and before regalloc increase register pressure.
48 const unsigned ErrorMargin = 3;
49
50 SGPRExcessLimit =
51 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
52 VGPRExcessLimit =
53 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
54
55 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
56 // Set the initial TargetOccupnacy to the maximum occupancy that we can
57 // achieve for this function. This effectively sets a lower bound on the
58 // 'Critical' register limits in the scheduler.
59 TargetOccupancy = MFI.getOccupancy();
60 SGPRCriticalLimit =
61 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
62 VGPRCriticalLimit =
63 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
64
65 // Subtract error margin from register limits and avoid overflow.
66 SGPRCriticalLimit =
67 std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit);
68 VGPRCriticalLimit =
69 std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit);
70 SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit);
71 VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
72 }
73
initCandidate(SchedCandidate & Cand,SUnit * SU,bool AtTop,const RegPressureTracker & RPTracker,const SIRegisterInfo * SRI,unsigned SGPRPressure,unsigned VGPRPressure)74 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
75 bool AtTop, const RegPressureTracker &RPTracker,
76 const SIRegisterInfo *SRI,
77 unsigned SGPRPressure,
78 unsigned VGPRPressure) {
79
80 Cand.SU = SU;
81 Cand.AtTop = AtTop;
82
83 // getDownwardPressure() and getUpwardPressure() make temporary changes to
84 // the tracker, so we need to pass those function a non-const copy.
85 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
86
87 Pressure.clear();
88 MaxPressure.clear();
89
90 if (AtTop)
91 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
92 else {
93 // FIXME: I think for bottom up scheduling, the register pressure is cached
94 // and can be retrieved by DAG->getPressureDif(SU).
95 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
96 }
97
98 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
99 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
100
101 // If two instructions increase the pressure of different register sets
102 // by the same amount, the generic scheduler will prefer to schedule the
103 // instruction that increases the set with the least amount of registers,
104 // which in our case would be SGPRs. This is rarely what we want, so
105 // when we report excess/critical register pressure, we do it either
106 // only for VGPRs or only for SGPRs.
107
108 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
109 const unsigned MaxVGPRPressureInc = 16;
110 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
111 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
112
113
114 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
115 // to increase the likelihood we don't go over the limits. We should improve
116 // the analysis to look through dependencies to find the path with the least
117 // register pressure.
118
119 // We only need to update the RPDelta for instructions that increase register
120 // pressure. Instructions that decrease or keep reg pressure the same will be
121 // marked as RegExcess in tryCandidate() when they are compared with
122 // instructions that increase the register pressure.
123 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
124 HasExcessPressure = true;
125 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
126 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
127 }
128
129 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
130 HasExcessPressure = true;
131 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
132 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
133 }
134
135 // Register pressure is considered 'CRITICAL' if it is approaching a value
136 // that would reduce the wave occupancy for the execution unit. When
137 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
138 // has the same cost, so we don't need to prefer one over the other.
139
140 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
141 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
142
143 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
144 HasExcessPressure = true;
145 if (SGPRDelta > VGPRDelta) {
146 Cand.RPDelta.CriticalMax =
147 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
148 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
149 } else {
150 Cand.RPDelta.CriticalMax =
151 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
152 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
153 }
154 }
155 }
156
157 // This function is mostly cut and pasted from
158 // GenericScheduler::pickNodeFromQueue()
pickNodeFromQueue(SchedBoundary & Zone,const CandPolicy & ZonePolicy,const RegPressureTracker & RPTracker,SchedCandidate & Cand)159 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
160 const CandPolicy &ZonePolicy,
161 const RegPressureTracker &RPTracker,
162 SchedCandidate &Cand) {
163 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
164 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
165 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
166 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
167 ReadyQueue &Q = Zone.Available;
168 for (SUnit *SU : Q) {
169
170 SchedCandidate TryCand(ZonePolicy);
171 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
172 SGPRPressure, VGPRPressure);
173 // Pass SchedBoundary only when comparing nodes from the same boundary.
174 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
175 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
176 if (TryCand.Reason != NoCand) {
177 // Initialize resource delta if needed in case future heuristics query it.
178 if (TryCand.ResDelta == SchedResourceDelta())
179 TryCand.initResourceDelta(Zone.DAG, SchedModel);
180 Cand.setBest(TryCand);
181 LLVM_DEBUG(traceCandidate(Cand));
182 }
183 }
184 }
185
186 // This function is mostly cut and pasted from
187 // GenericScheduler::pickNodeBidirectional()
pickNodeBidirectional(bool & IsTopNode)188 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
189 // Schedule as far as possible in the direction of no choice. This is most
190 // efficient, but also provides the best heuristics for CriticalPSets.
191 if (SUnit *SU = Bot.pickOnlyChoice()) {
192 IsTopNode = false;
193 return SU;
194 }
195 if (SUnit *SU = Top.pickOnlyChoice()) {
196 IsTopNode = true;
197 return SU;
198 }
199 // Set the bottom-up policy based on the state of the current bottom zone and
200 // the instructions outside the zone, including the top zone.
201 CandPolicy BotPolicy;
202 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
203 // Set the top-down policy based on the state of the current top zone and
204 // the instructions outside the zone, including the bottom zone.
205 CandPolicy TopPolicy;
206 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
207
208 // See if BotCand is still valid (because we previously scheduled from Top).
209 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
210 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
211 BotCand.Policy != BotPolicy) {
212 BotCand.reset(CandPolicy());
213 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
214 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
215 } else {
216 LLVM_DEBUG(traceCandidate(BotCand));
217 #ifndef NDEBUG
218 if (VerifyScheduling) {
219 SchedCandidate TCand;
220 TCand.reset(CandPolicy());
221 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
222 assert(TCand.SU == BotCand.SU &&
223 "Last pick result should correspond to re-picking right now");
224 }
225 #endif
226 }
227
228 // Check if the top Q has a better candidate.
229 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
230 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
231 TopCand.Policy != TopPolicy) {
232 TopCand.reset(CandPolicy());
233 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
234 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
235 } else {
236 LLVM_DEBUG(traceCandidate(TopCand));
237 #ifndef NDEBUG
238 if (VerifyScheduling) {
239 SchedCandidate TCand;
240 TCand.reset(CandPolicy());
241 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
242 assert(TCand.SU == TopCand.SU &&
243 "Last pick result should correspond to re-picking right now");
244 }
245 #endif
246 }
247
248 // Pick best from BotCand and TopCand.
249 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
250 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
251 SchedCandidate Cand = BotCand;
252 TopCand.Reason = NoCand;
253 GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
254 if (TopCand.Reason != NoCand) {
255 Cand.setBest(TopCand);
256 }
257 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
258
259 IsTopNode = Cand.AtTop;
260 return Cand.SU;
261 }
262
263 // This function is mostly cut and pasted from
264 // GenericScheduler::pickNode()
pickNode(bool & IsTopNode)265 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
266 if (DAG->top() == DAG->bottom()) {
267 assert(Top.Available.empty() && Top.Pending.empty() &&
268 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
269 return nullptr;
270 }
271 SUnit *SU;
272 do {
273 if (RegionPolicy.OnlyTopDown) {
274 SU = Top.pickOnlyChoice();
275 if (!SU) {
276 CandPolicy NoPolicy;
277 TopCand.reset(NoPolicy);
278 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
279 assert(TopCand.Reason != NoCand && "failed to find a candidate");
280 SU = TopCand.SU;
281 }
282 IsTopNode = true;
283 } else if (RegionPolicy.OnlyBottomUp) {
284 SU = Bot.pickOnlyChoice();
285 if (!SU) {
286 CandPolicy NoPolicy;
287 BotCand.reset(NoPolicy);
288 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
289 assert(BotCand.Reason != NoCand && "failed to find a candidate");
290 SU = BotCand.SU;
291 }
292 IsTopNode = false;
293 } else {
294 SU = pickNodeBidirectional(IsTopNode);
295 }
296 } while (SU->isScheduled);
297
298 if (SU->isTopReady())
299 Top.removeReady(SU);
300 if (SU->isBottomReady())
301 Bot.removeReady(SU);
302
303 if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
304 for (SDep &Dep : SU->Preds) {
305 if (Dep.isCluster()) {
306 HasClusteredNodes = true;
307 break;
308 }
309 }
310 }
311
312 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
313 << *SU->getInstr());
314 return SU;
315 }
316
GCNScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)317 GCNScheduleDAGMILive::GCNScheduleDAGMILive(
318 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
319 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
320 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
321 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
322
323 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
324 }
325
schedule()326 void GCNScheduleDAGMILive::schedule() {
327 // Collect all scheduling regions. The actual scheduling is performed in
328 // GCNScheduleDAGMILive::finalizeSchedule.
329 Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
330 }
331
332 GCNRegPressure
getRealRegPressure(unsigned RegionIdx) const333 GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
334 GCNDownwardRPTracker RPTracker(*LIS);
335 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
336 return RPTracker.moveMaxPressure();
337 }
338
computeBlockPressure(unsigned RegionIdx,const MachineBasicBlock * MBB)339 void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
340 const MachineBasicBlock *MBB) {
341 GCNDownwardRPTracker RPTracker(*LIS);
342
343 // If the block has the only successor then live-ins of that successor are
344 // live-outs of the current block. We can reuse calculated live set if the
345 // successor will be sent to scheduling past current block.
346 const MachineBasicBlock *OnlySucc = nullptr;
347 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
348 SlotIndexes *Ind = LIS->getSlotIndexes();
349 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
350 OnlySucc = *MBB->succ_begin();
351 }
352
353 // Scheduler sends regions from the end of the block upwards.
354 size_t CurRegion = RegionIdx;
355 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
356 if (Regions[CurRegion].first->getParent() != MBB)
357 break;
358 --CurRegion;
359
360 auto I = MBB->begin();
361 auto LiveInIt = MBBLiveIns.find(MBB);
362 auto &Rgn = Regions[CurRegion];
363 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
364 if (LiveInIt != MBBLiveIns.end()) {
365 auto LiveIn = std::move(LiveInIt->second);
366 RPTracker.reset(*MBB->begin(), &LiveIn);
367 MBBLiveIns.erase(LiveInIt);
368 } else {
369 I = Rgn.first;
370 auto LRS = BBLiveInMap.lookup(NonDbgMI);
371 #ifdef EXPENSIVE_CHECKS
372 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
373 #endif
374 RPTracker.reset(*I, &LRS);
375 }
376
377 for (;;) {
378 I = RPTracker.getNext();
379
380 if (Regions[CurRegion].first == I || NonDbgMI == I) {
381 LiveIns[CurRegion] = RPTracker.getLiveRegs();
382 RPTracker.clearMaxPressure();
383 }
384
385 if (Regions[CurRegion].second == I) {
386 Pressure[CurRegion] = RPTracker.moveMaxPressure();
387 if (CurRegion-- == RegionIdx)
388 break;
389 }
390 RPTracker.advanceToNext();
391 RPTracker.advanceBeforeNext();
392 }
393
394 if (OnlySucc) {
395 if (I != MBB->end()) {
396 RPTracker.advanceToNext();
397 RPTracker.advance(MBB->end());
398 }
399 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
400 RPTracker.advanceBeforeNext();
401 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
402 }
403 }
404
405 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
getBBLiveInMap() const406 GCNScheduleDAGMILive::getBBLiveInMap() const {
407 assert(!Regions.empty());
408 std::vector<MachineInstr *> BBStarters;
409 BBStarters.reserve(Regions.size());
410 auto I = Regions.rbegin(), E = Regions.rend();
411 auto *BB = I->first->getParent();
412 do {
413 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
414 BBStarters.push_back(MI);
415 do {
416 ++I;
417 } while (I != E && I->first->getParent() == BB);
418 } while (I != E);
419 return getLiveRegMap(BBStarters, false /*After*/, *LIS);
420 }
421
finalizeSchedule()422 void GCNScheduleDAGMILive::finalizeSchedule() {
423 // Start actual scheduling here. This function is called by the base
424 // MachineScheduler after all regions have been recorded by
425 // GCNScheduleDAGMILive::schedule().
426 LiveIns.resize(Regions.size());
427 Pressure.resize(Regions.size());
428 RescheduleRegions.resize(Regions.size());
429 RegionsWithClusters.resize(Regions.size());
430 RegionsWithHighRP.resize(Regions.size());
431 RegionsWithMinOcc.resize(Regions.size());
432 RescheduleRegions.set();
433 RegionsWithClusters.reset();
434 RegionsWithHighRP.reset();
435 RegionsWithMinOcc.reset();
436
437 runSchedStages();
438 }
439
runSchedStages()440 void GCNScheduleDAGMILive::runSchedStages() {
441 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
442 InitialScheduleStage S0(GCNSchedStageID::InitialSchedule, *this);
443 UnclusteredRescheduleStage S1(GCNSchedStageID::UnclusteredReschedule, *this);
444 ClusteredLowOccStage S2(GCNSchedStageID::ClusteredLowOccupancyReschedule,
445 *this);
446 PreRARematStage S3(GCNSchedStageID::PreRARematerialize, *this);
447 GCNSchedStage *SchedStages[] = {&S0, &S1, &S2, &S3};
448
449 if (!Regions.empty())
450 BBLiveInMap = getBBLiveInMap();
451
452 for (auto *Stage : SchedStages) {
453 if (!Stage->initGCNSchedStage())
454 continue;
455
456 for (auto Region : Regions) {
457 RegionBegin = Region.first;
458 RegionEnd = Region.second;
459 // Setup for scheduling the region and check whether it should be skipped.
460 if (!Stage->initGCNRegion()) {
461 Stage->advanceRegion();
462 exitRegion();
463 continue;
464 }
465
466 ScheduleDAGMILive::schedule();
467 Stage->finalizeGCNRegion();
468 }
469
470 Stage->finalizeGCNSchedStage();
471 }
472 }
473
474 #ifndef NDEBUG
operator <<(raw_ostream & OS,const GCNSchedStageID & StageID)475 raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
476 switch (StageID) {
477 case GCNSchedStageID::InitialSchedule:
478 OS << "Initial Schedule";
479 break;
480 case GCNSchedStageID::UnclusteredReschedule:
481 OS << "Unclustered Reschedule";
482 break;
483 case GCNSchedStageID::ClusteredLowOccupancyReschedule:
484 OS << "Clustered Low Occupancy Reschedule";
485 break;
486 case GCNSchedStageID::PreRARematerialize:
487 OS << "Pre-RA Rematerialize";
488 break;
489 }
490 return OS;
491 }
492 #endif
493
GCNSchedStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)494 GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
495 : DAG(DAG), S(static_cast<GCNMaxOccupancySchedStrategy &>(*DAG.SchedImpl)),
496 MF(DAG.MF), MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
497
initGCNSchedStage()498 bool GCNSchedStage::initGCNSchedStage() {
499 if (!DAG.LIS)
500 return false;
501
502 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
503 return true;
504 }
505
initGCNSchedStage()506 bool UnclusteredRescheduleStage::initGCNSchedStage() {
507 if (!GCNSchedStage::initGCNSchedStage())
508 return false;
509
510 if (DAG.RescheduleRegions.none())
511 return false;
512
513 SavedMutations.swap(DAG.Mutations);
514
515 LLVM_DEBUG(dbgs() << "Retrying function scheduling without clustering.\n");
516 return true;
517 }
518
initGCNSchedStage()519 bool ClusteredLowOccStage::initGCNSchedStage() {
520 if (!GCNSchedStage::initGCNSchedStage())
521 return false;
522
523 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
524 // been dropped. All regions will have already been scheduled with the ideal
525 // occupancy targets.
526 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
527 return false;
528
529 LLVM_DEBUG(
530 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
531 << DAG.MinOccupancy << ".\n");
532 return true;
533 }
534
initGCNSchedStage()535 bool PreRARematStage::initGCNSchedStage() {
536 if (!GCNSchedStage::initGCNSchedStage())
537 return false;
538
539 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
540 return false;
541
542 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
543 // Check maximum occupancy
544 if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
545 DAG.MinOccupancy)
546 return false;
547
548 // FIXME: This pass will invalidate cached MBBLiveIns for regions
549 // inbetween the defs and region we sinked the def to. Cached pressure
550 // for regions where a def is sinked from will also be invalidated. Will
551 // need to be fixed if there is another pass after this pass.
552
553 collectRematerializableInstructions();
554 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
555 return false;
556
557 LLVM_DEBUG(
558 dbgs() << "Retrying function scheduling with improved occupancy of "
559 << DAG.MinOccupancy << " from rematerializing\n");
560 return true;
561 }
562
finalizeGCNSchedStage()563 void GCNSchedStage::finalizeGCNSchedStage() {
564 DAG.finishBlock();
565 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
566 }
567
finalizeGCNSchedStage()568 void UnclusteredRescheduleStage::finalizeGCNSchedStage() {
569 SavedMutations.swap(DAG.Mutations);
570
571 GCNSchedStage::finalizeGCNSchedStage();
572 }
573
initGCNRegion()574 bool GCNSchedStage::initGCNRegion() {
575 // Check whether this new region is also a new block.
576 if (DAG.RegionBegin->getParent() != CurrentMBB)
577 setupNewBlock();
578
579 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
580 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
581
582 // Skip empty scheduling regions (0 or 1 schedulable instructions).
583 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
584 return false;
585
586 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
587 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
588 << " " << CurrentMBB->getName()
589 << "\n From: " << *DAG.begin() << " To: ";
590 if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
591 else dbgs() << "End";
592 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
593
594 // Save original instruction order before scheduling for possible revert.
595 Unsched.clear();
596 Unsched.reserve(DAG.NumRegionInstrs);
597 for (auto &I : DAG)
598 Unsched.push_back(&I);
599
600 PressureBefore = DAG.Pressure[RegionIdx];
601
602 LLVM_DEBUG(
603 dbgs() << "Pressure before scheduling:\nRegion live-ins:";
604 GCNRPTracker::printLiveRegs(dbgs(), DAG.LiveIns[RegionIdx], DAG.MRI);
605 dbgs() << "Region live-in pressure: ";
606 llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]).print(dbgs());
607 dbgs() << "Region register pressure: "; PressureBefore.print(dbgs()));
608
609 // Set HasClusteredNodes to true for late stages where we have already
610 // collected it. That way pickNode() will not scan SDep's when not needed.
611 S.HasClusteredNodes = StageID > GCNSchedStageID::InitialSchedule;
612 S.HasExcessPressure = false;
613
614 return true;
615 }
616
initGCNRegion()617 bool UnclusteredRescheduleStage::initGCNRegion() {
618 if (!DAG.RescheduleRegions[RegionIdx])
619 return false;
620
621 return GCNSchedStage::initGCNRegion();
622 }
623
initGCNRegion()624 bool ClusteredLowOccStage::initGCNRegion() {
625 // We may need to reschedule this region if it doesn't have clusters so it
626 // wasn't rescheduled in the last stage, or if we found it was testing
627 // critical register pressure limits in the unclustered reschedule stage. The
628 // later is because we may not have been able to raise the min occupancy in
629 // the previous stage so the region may be overly constrained even if it was
630 // already rescheduled.
631 if (!DAG.RegionsWithClusters[RegionIdx] && !DAG.RegionsWithHighRP[RegionIdx])
632 return false;
633
634 return GCNSchedStage::initGCNRegion();
635 }
636
initGCNRegion()637 bool PreRARematStage::initGCNRegion() {
638 if (!DAG.RescheduleRegions[RegionIdx])
639 return false;
640
641 return GCNSchedStage::initGCNRegion();
642 }
643
setupNewBlock()644 void GCNSchedStage::setupNewBlock() {
645 if (CurrentMBB)
646 DAG.finishBlock();
647
648 CurrentMBB = DAG.RegionBegin->getParent();
649 DAG.startBlock(CurrentMBB);
650 // Get real RP for the region if it hasn't be calculated before. After the
651 // initial schedule stage real RP will be collected after scheduling.
652 if (StageID == GCNSchedStageID::InitialSchedule)
653 DAG.computeBlockPressure(RegionIdx, CurrentMBB);
654 }
655
finalizeGCNRegion()656 void GCNSchedStage::finalizeGCNRegion() {
657 DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd);
658 DAG.RescheduleRegions[RegionIdx] = false;
659 if (S.HasExcessPressure)
660 DAG.RegionsWithHighRP[RegionIdx] = true;
661
662 // Revert scheduling if we have dropped occupancy or there is some other
663 // reason that the original schedule is better.
664 checkScheduling();
665
666 DAG.exitRegion();
667 RegionIdx++;
668 }
669
finalizeGCNRegion()670 void InitialScheduleStage::finalizeGCNRegion() {
671 // Record which regions have clustered nodes for the next unclustered
672 // reschedule stage.
673 assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule);
674 if (S.HasClusteredNodes)
675 DAG.RegionsWithClusters[RegionIdx] = true;
676
677 GCNSchedStage::finalizeGCNRegion();
678 }
679
checkScheduling()680 void GCNSchedStage::checkScheduling() {
681 // Check the results of scheduling.
682 PressureAfter = DAG.getRealRegPressure(RegionIdx);
683 LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
684 PressureAfter.print(dbgs()));
685
686 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
687 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
688 DAG.Pressure[RegionIdx] = PressureAfter;
689 DAG.RegionsWithMinOcc[RegionIdx] =
690 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
691
692 // Early out if we have achieve the occupancy target.
693 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
694 return;
695 }
696
697 unsigned WavesAfter =
698 std::min(S.getTargetOccupancy(), PressureAfter.getOccupancy(ST));
699 unsigned WavesBefore =
700 std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST));
701 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
702 << ", after " << WavesAfter << ".\n");
703
704 // We may not be able to keep the current target occupancy because of the just
705 // scheduled region. We might still be able to revert scheduling if the
706 // occupancy before was higher, or if the current schedule has register
707 // pressure higher than the excess limits which could lead to more spilling.
708 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
709
710 // Allow memory bound functions to drop to 4 waves if not limited by an
711 // attribute.
712 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
713 WavesAfter >= MFI.getMinAllowedOccupancy()) {
714 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
715 << MFI.getMinAllowedOccupancy() << " waves\n");
716 NewOccupancy = WavesAfter;
717 }
718
719 if (NewOccupancy < DAG.MinOccupancy) {
720 DAG.MinOccupancy = NewOccupancy;
721 MFI.limitOccupancy(DAG.MinOccupancy);
722 DAG.RegionsWithMinOcc.reset();
723 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
724 << DAG.MinOccupancy << ".\n");
725 }
726
727 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
728 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
729 if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
730 PressureAfter.getAGPRNum() > MaxVGPRs ||
731 PressureAfter.getSGPRNum() > MaxSGPRs) {
732 DAG.RescheduleRegions[RegionIdx] = true;
733 DAG.RegionsWithHighRP[RegionIdx] = true;
734 }
735
736 // Revert if this region's schedule would cause a drop in occupancy or
737 // spilling.
738 if (shouldRevertScheduling(WavesAfter)) {
739 revertScheduling();
740 } else {
741 DAG.Pressure[RegionIdx] = PressureAfter;
742 DAG.RegionsWithMinOcc[RegionIdx] =
743 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
744 }
745 }
746
shouldRevertScheduling(unsigned WavesAfter)747 bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
748 if (WavesAfter < DAG.MinOccupancy)
749 return true;
750
751 return false;
752 }
753
shouldRevertScheduling(unsigned WavesAfter)754 bool InitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
755 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
756 return true;
757
758 if (mayCauseSpilling(WavesAfter))
759 return true;
760
761 assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule);
762 // Don't reschedule the region in the next stage if it doesn't have clusters.
763 if (!DAG.RegionsWithClusters[RegionIdx])
764 DAG.RescheduleRegions[RegionIdx] = false;
765
766 return false;
767 }
768
shouldRevertScheduling(unsigned WavesAfter)769 bool UnclusteredRescheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
770 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
771 return true;
772
773 // If RP is not reduced in the unclustred reschedule stage, revert to the old
774 // schedule.
775 if (!PressureAfter.less(ST, PressureBefore)) {
776 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
777 return true;
778 }
779
780 return false;
781 }
782
shouldRevertScheduling(unsigned WavesAfter)783 bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
784 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
785 return true;
786
787 if (mayCauseSpilling(WavesAfter))
788 return true;
789
790 return false;
791 }
792
shouldRevertScheduling(unsigned WavesAfter)793 bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
794 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
795 return true;
796
797 if (mayCauseSpilling(WavesAfter))
798 return true;
799
800 return false;
801 }
802
mayCauseSpilling(unsigned WavesAfter)803 bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
804 if (WavesAfter <= MFI.getMinWavesPerEU() &&
805 !PressureAfter.less(ST, PressureBefore) &&
806 DAG.RescheduleRegions[RegionIdx]) {
807 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
808 return true;
809 }
810
811 return false;
812 }
813
revertScheduling()814 void GCNSchedStage::revertScheduling() {
815 DAG.RegionsWithMinOcc[RegionIdx] =
816 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
817 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
818 DAG.RescheduleRegions[RegionIdx] =
819 DAG.RegionsWithClusters[RegionIdx] ||
820 (nextStage(StageID)) != GCNSchedStageID::UnclusteredReschedule;
821 DAG.RegionEnd = DAG.RegionBegin;
822 int SkippedDebugInstr = 0;
823 for (MachineInstr *MI : Unsched) {
824 if (MI->isDebugInstr()) {
825 ++SkippedDebugInstr;
826 continue;
827 }
828
829 if (MI->getIterator() != DAG.RegionEnd) {
830 DAG.BB->remove(MI);
831 DAG.BB->insert(DAG.RegionEnd, MI);
832 if (!MI->isDebugInstr())
833 DAG.LIS->handleMove(*MI, true);
834 }
835
836 // Reset read-undef flags and update them later.
837 for (auto &Op : MI->operands())
838 if (Op.isReg() && Op.isDef())
839 Op.setIsUndef(false);
840 RegisterOperands RegOpers;
841 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
842 if (!MI->isDebugInstr()) {
843 if (DAG.ShouldTrackLaneMasks) {
844 // Adjust liveness and add missing dead+read-undef flags.
845 SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
846 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
847 } else {
848 // Adjust for missing dead-def flags.
849 RegOpers.detectDeadDefs(*MI, *DAG.LIS);
850 }
851 }
852 DAG.RegionEnd = MI->getIterator();
853 ++DAG.RegionEnd;
854 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
855 }
856
857 // After reverting schedule, debug instrs will now be at the end of the block
858 // and RegionEnd will point to the first debug instr. Increment RegionEnd
859 // pass debug instrs to the actual end of the scheduling region.
860 while (SkippedDebugInstr-- > 0)
861 ++DAG.RegionEnd;
862
863 // If Unsched.front() instruction is a debug instruction, this will actually
864 // shrink the region since we moved all debug instructions to the end of the
865 // block. Find the first instruction that is not a debug instruction.
866 DAG.RegionBegin = Unsched.front()->getIterator();
867 if (DAG.RegionBegin->isDebugInstr()) {
868 for (MachineInstr *MI : Unsched) {
869 if (MI->isDebugInstr())
870 continue;
871 DAG.RegionBegin = MI->getIterator();
872 break;
873 }
874 }
875
876 // Then move the debug instructions back into their correct place and set
877 // RegionBegin and RegionEnd if needed.
878 DAG.placeDebugValues();
879
880 DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd);
881 }
882
collectRematerializableInstructions()883 void PreRARematStage::collectRematerializableInstructions() {
884 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
885 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
886 Register Reg = Register::index2VirtReg(I);
887 if (!DAG.LIS->hasInterval(Reg))
888 continue;
889
890 // TODO: Handle AGPR and SGPR rematerialization
891 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
892 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
893 continue;
894
895 MachineOperand *Op = DAG.MRI.getOneDef(Reg);
896 MachineInstr *Def = Op->getParent();
897 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
898 continue;
899
900 MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
901 if (Def->getParent() == UseI->getParent())
902 continue;
903
904 // We are only collecting defs that are defined in another block and are
905 // live-through or used inside regions at MinOccupancy. This means that the
906 // register must be in the live-in set for the region.
907 bool AddedToRematList = false;
908 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
909 auto It = DAG.LiveIns[I].find(Reg);
910 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
911 if (DAG.RegionsWithMinOcc[I]) {
912 RematerializableInsts[I][Def] = UseI;
913 AddedToRematList = true;
914 }
915
916 // Collect regions with rematerializable reg as live-in to avoid
917 // searching later when updating RP.
918 RematDefToLiveInRegions[Def].push_back(I);
919 }
920 }
921 if (!AddedToRematList)
922 RematDefToLiveInRegions.erase(Def);
923 }
924 }
925
sinkTriviallyRematInsts(const GCNSubtarget & ST,const TargetInstrInfo * TII)926 bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
927 const TargetInstrInfo *TII) {
928 // Temporary copies of cached variables we will be modifying and replacing if
929 // sinking succeeds.
930 SmallVector<
931 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
932 NewRegions;
933 DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
934 DenseMap<unsigned, GCNRegPressure> NewPressure;
935 BitVector NewRescheduleRegions;
936 LiveIntervals *LIS = DAG.LIS;
937
938 NewRegions.resize(DAG.Regions.size());
939 NewRescheduleRegions.resize(DAG.Regions.size());
940
941 // Collect only regions that has a rematerializable def as a live-in.
942 SmallSet<unsigned, 16> ImpactedRegions;
943 for (const auto &It : RematDefToLiveInRegions)
944 ImpactedRegions.insert(It.second.begin(), It.second.end());
945
946 // Make copies of register pressure and live-ins cache that will be updated
947 // as we rematerialize.
948 for (auto Idx : ImpactedRegions) {
949 NewPressure[Idx] = DAG.Pressure[Idx];
950 NewLiveIns[Idx] = DAG.LiveIns[Idx];
951 }
952 NewRegions = DAG.Regions;
953 NewRescheduleRegions.reset();
954
955 DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
956 bool Improved = false;
957 for (auto I : ImpactedRegions) {
958 if (!DAG.RegionsWithMinOcc[I])
959 continue;
960
961 Improved = false;
962 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
963 int SGPRUsage = NewPressure[I].getSGPRNum();
964
965 // TODO: Handle occupancy drop due to AGPR and SGPR.
966 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
967 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
968 break;
969
970 // The occupancy of this region could have been improved by a previous
971 // iteration's sinking of defs.
972 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
973 NewRescheduleRegions[I] = true;
974 Improved = true;
975 continue;
976 }
977
978 // First check if we have enough trivially rematerializable instructions to
979 // improve occupancy. Optimistically assume all instructions we are able to
980 // sink decreased RP.
981 int TotalSinkableRegs = 0;
982 for (const auto &It : RematerializableInsts[I]) {
983 MachineInstr *Def = It.first;
984 Register DefReg = Def->getOperand(0).getReg();
985 TotalSinkableRegs +=
986 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
987 }
988 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
989 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
990 // If in the most optimistic scenario, we cannot improve occupancy, then do
991 // not attempt to sink any instructions.
992 if (OptimisticOccupancy <= DAG.MinOccupancy)
993 break;
994
995 unsigned ImproveOccupancy = 0;
996 SmallVector<MachineInstr *, 4> SinkedDefs;
997 for (auto &It : RematerializableInsts[I]) {
998 MachineInstr *Def = It.first;
999 MachineBasicBlock::iterator InsertPos =
1000 MachineBasicBlock::iterator(It.second);
1001 Register Reg = Def->getOperand(0).getReg();
1002 // Rematerialize MI to its use block. Since we are only rematerializing
1003 // instructions that do not have any virtual reg uses, we do not need to
1004 // call LiveRangeEdit::allUsesAvailableAt() and
1005 // LiveRangeEdit::canRematerializeAt().
1006 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1007 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1008 MachineInstr *NewMI = &*(--InsertPos);
1009 LIS->InsertMachineInstrInMaps(*NewMI);
1010 LIS->removeInterval(Reg);
1011 LIS->createAndComputeVirtRegInterval(Reg);
1012 InsertedMIToOldDef[NewMI] = Def;
1013
1014 // Update region boundaries in scheduling region we sinked from since we
1015 // may sink an instruction that was at the beginning or end of its region
1016 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1017 /*Removing =*/true);
1018
1019 // Update region boundaries in region we sinked to.
1020 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1021
1022 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1023 // FIXME: Also update cached pressure for where the def was sinked from.
1024 // Update RP for all regions that has this reg as a live-in and remove
1025 // the reg from all regions as a live-in.
1026 for (auto Idx : RematDefToLiveInRegions[Def]) {
1027 NewLiveIns[Idx].erase(Reg);
1028 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1029 // Def is live-through and not used in this block.
1030 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1031 } else {
1032 // Def is used and rematerialized into this block.
1033 GCNDownwardRPTracker RPT(*LIS);
1034 auto *NonDbgMI = &*skipDebugInstructionsForward(
1035 NewRegions[Idx].first, NewRegions[Idx].second);
1036 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1037 RPT.advance(NewRegions[Idx].second);
1038 NewPressure[Idx] = RPT.moveMaxPressure();
1039 }
1040 }
1041
1042 SinkedDefs.push_back(Def);
1043 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1044 if (ImproveOccupancy > DAG.MinOccupancy)
1045 break;
1046 }
1047
1048 // Remove defs we just sinked from all regions' list of sinkable defs
1049 for (auto &Def : SinkedDefs)
1050 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1051 RematerializableInsts[TrackedIdx].erase(Def);
1052
1053 if (ImproveOccupancy <= DAG.MinOccupancy)
1054 break;
1055
1056 NewRescheduleRegions[I] = true;
1057 Improved = true;
1058 }
1059
1060 if (!Improved) {
1061 // Occupancy was not improved for all regions that were at MinOccupancy.
1062 // Undo sinking and remove newly rematerialized instructions.
1063 for (auto &Entry : InsertedMIToOldDef) {
1064 MachineInstr *MI = Entry.first;
1065 MachineInstr *OldMI = Entry.second;
1066 Register Reg = MI->getOperand(0).getReg();
1067 LIS->RemoveMachineInstrFromMaps(*MI);
1068 MI->eraseFromParent();
1069 OldMI->clearRegisterDeads(Reg);
1070 LIS->removeInterval(Reg);
1071 LIS->createAndComputeVirtRegInterval(Reg);
1072 }
1073 return false;
1074 }
1075
1076 // Occupancy was improved for all regions.
1077 for (auto &Entry : InsertedMIToOldDef) {
1078 MachineInstr *MI = Entry.first;
1079 MachineInstr *OldMI = Entry.second;
1080
1081 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1082 DAG.BBLiveInMap.erase(OldMI);
1083
1084 // Remove OldMI and update LIS
1085 Register Reg = MI->getOperand(0).getReg();
1086 LIS->RemoveMachineInstrFromMaps(*OldMI);
1087 OldMI->eraseFromParent();
1088 LIS->removeInterval(Reg);
1089 LIS->createAndComputeVirtRegInterval(Reg);
1090 }
1091
1092 // Update live-ins, register pressure, and regions caches.
1093 for (auto Idx : ImpactedRegions) {
1094 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1095 DAG.Pressure[Idx] = NewPressure[Idx];
1096 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1097 }
1098 DAG.Regions = NewRegions;
1099 DAG.RescheduleRegions = NewRescheduleRegions;
1100
1101 SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1102 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1103
1104 return true;
1105 }
1106
1107 // Copied from MachineLICM
isTriviallyReMaterializable(const MachineInstr & MI)1108 bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1109 if (!DAG.TII->isTriviallyReMaterializable(MI))
1110 return false;
1111
1112 for (const MachineOperand &MO : MI.operands())
1113 if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
1114 return false;
1115
1116 return true;
1117 }
1118
1119 // When removing, we will have to check both beginning and ending of the region.
1120 // When inserting, we will only have to check if we are inserting NewMI in front
1121 // of a scheduling region and do not need to check the ending since we will only
1122 // ever be inserting before an already existing MI.
updateRegionBoundaries(SmallVectorImpl<std::pair<MachineBasicBlock::iterator,MachineBasicBlock::iterator>> & RegionBoundaries,MachineBasicBlock::iterator MI,MachineInstr * NewMI,bool Removing)1123 void GCNScheduleDAGMILive::updateRegionBoundaries(
1124 SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
1125 MachineBasicBlock::iterator>> &RegionBoundaries,
1126 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1127 unsigned I = 0, E = RegionBoundaries.size();
1128 // Search for first region of the block where MI is located
1129 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1130 ++I;
1131
1132 for (; I != E; ++I) {
1133 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1134 return;
1135
1136 if (Removing && MI == RegionBoundaries[I].first &&
1137 MI == RegionBoundaries[I].second) {
1138 // MI is in a region with size 1, after removing, the region will be
1139 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1140 RegionBoundaries[I] =
1141 std::make_pair(MI->getParent()->end(), MI->getParent()->end());
1142 return;
1143 }
1144 if (MI == RegionBoundaries[I].first) {
1145 if (Removing)
1146 RegionBoundaries[I] =
1147 std::make_pair(std::next(MI), RegionBoundaries[I].second);
1148 else
1149 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1150 RegionBoundaries[I] = std::make_pair(MachineBasicBlock::iterator(NewMI),
1151 RegionBoundaries[I].second);
1152 return;
1153 }
1154 if (Removing && MI == RegionBoundaries[I].second) {
1155 RegionBoundaries[I] =
1156 std::make_pair(RegionBoundaries[I].first, std::prev(MI));
1157 return;
1158 }
1159 }
1160 }
1161