1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This contains a MachineSchedStrategy implementation for maximizing wave 11 /// occupancy on GCN hardware. 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNSchedStrategy.h" 15 #include "AMDGPUSubtarget.h" 16 #include "SIInstrInfo.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "SIRegisterInfo.h" 19 #include "Utils/AMDGPUBaseInfo.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/Support/MathExtras.h" 22 23 #define DEBUG_TYPE "machine-scheduler" 24 25 using namespace llvm; 26 27 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 28 const MachineSchedContext *C) : 29 GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { } 30 31 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 32 GenericScheduler::initialize(DAG); 33 34 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 35 36 MF = &DAG->MF; 37 38 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 39 40 // FIXME: This is also necessary, because some passes that run after 41 // scheduling and before regalloc increase register pressure. 42 const int ErrorMargin = 3; 43 44 SGPRExcessLimit = Context->RegClassInfo 45 ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; 46 VGPRExcessLimit = Context->RegClassInfo 47 ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; 48 if (TargetOccupancy) { 49 SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true); 50 VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); 51 } else { 52 SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 53 AMDGPU::RegisterPressureSets::SReg_32); 54 VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 55 AMDGPU::RegisterPressureSets::VGPR_32); 56 } 57 58 SGPRCriticalLimit -= ErrorMargin; 59 VGPRCriticalLimit -= ErrorMargin; 60 } 61 62 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 63 bool AtTop, const RegPressureTracker &RPTracker, 64 const SIRegisterInfo *SRI, 65 unsigned SGPRPressure, 66 unsigned VGPRPressure) { 67 68 Cand.SU = SU; 69 Cand.AtTop = AtTop; 70 71 // getDownwardPressure() and getUpwardPressure() make temporary changes to 72 // the tracker, so we need to pass those function a non-const copy. 73 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 74 75 Pressure.clear(); 76 MaxPressure.clear(); 77 78 if (AtTop) 79 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 80 else { 81 // FIXME: I think for bottom up scheduling, the register pressure is cached 82 // and can be retrieved by DAG->getPressureDif(SU). 83 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 84 } 85 86 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 87 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 88 89 // If two instructions increase the pressure of different register sets 90 // by the same amount, the generic scheduler will prefer to schedule the 91 // instruction that increases the set with the least amount of registers, 92 // which in our case would be SGPRs. This is rarely what we want, so 93 // when we report excess/critical register pressure, we do it either 94 // only for VGPRs or only for SGPRs. 95 96 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 97 const unsigned MaxVGPRPressureInc = 16; 98 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 99 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 100 101 102 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 103 // to increase the likelihood we don't go over the limits. We should improve 104 // the analysis to look through dependencies to find the path with the least 105 // register pressure. 106 107 // We only need to update the RPDelta for instructions that increase register 108 // pressure. Instructions that decrease or keep reg pressure the same will be 109 // marked as RegExcess in tryCandidate() when they are compared with 110 // instructions that increase the register pressure. 111 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 112 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 113 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 114 } 115 116 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 117 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 118 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 119 } 120 121 // Register pressure is considered 'CRITICAL' if it is approaching a value 122 // that would reduce the wave occupancy for the execution unit. When 123 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both 124 // has the same cost, so we don't need to prefer one over the other. 125 126 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 127 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 128 129 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 130 if (SGPRDelta > VGPRDelta) { 131 Cand.RPDelta.CriticalMax = 132 PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 133 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 134 } else { 135 Cand.RPDelta.CriticalMax = 136 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 137 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 138 } 139 } 140 } 141 142 // This function is mostly cut and pasted from 143 // GenericScheduler::pickNodeFromQueue() 144 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 145 const CandPolicy &ZonePolicy, 146 const RegPressureTracker &RPTracker, 147 SchedCandidate &Cand) { 148 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 149 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 150 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 151 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 152 ReadyQueue &Q = Zone.Available; 153 for (SUnit *SU : Q) { 154 155 SchedCandidate TryCand(ZonePolicy); 156 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 157 SGPRPressure, VGPRPressure); 158 // Pass SchedBoundary only when comparing nodes from the same boundary. 159 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 160 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 161 if (TryCand.Reason != NoCand) { 162 // Initialize resource delta if needed in case future heuristics query it. 163 if (TryCand.ResDelta == SchedResourceDelta()) 164 TryCand.initResourceDelta(Zone.DAG, SchedModel); 165 Cand.setBest(TryCand); 166 LLVM_DEBUG(traceCandidate(Cand)); 167 } 168 } 169 } 170 171 // This function is mostly cut and pasted from 172 // GenericScheduler::pickNodeBidirectional() 173 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 174 // Schedule as far as possible in the direction of no choice. This is most 175 // efficient, but also provides the best heuristics for CriticalPSets. 176 if (SUnit *SU = Bot.pickOnlyChoice()) { 177 IsTopNode = false; 178 return SU; 179 } 180 if (SUnit *SU = Top.pickOnlyChoice()) { 181 IsTopNode = true; 182 return SU; 183 } 184 // Set the bottom-up policy based on the state of the current bottom zone and 185 // the instructions outside the zone, including the top zone. 186 CandPolicy BotPolicy; 187 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 188 // Set the top-down policy based on the state of the current top zone and 189 // the instructions outside the zone, including the bottom zone. 190 CandPolicy TopPolicy; 191 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 192 193 // See if BotCand is still valid (because we previously scheduled from Top). 194 LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 195 if (!BotCand.isValid() || BotCand.SU->isScheduled || 196 BotCand.Policy != BotPolicy) { 197 BotCand.reset(CandPolicy()); 198 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 199 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 200 } else { 201 LLVM_DEBUG(traceCandidate(BotCand)); 202 #ifndef NDEBUG 203 if (VerifyScheduling) { 204 SchedCandidate TCand; 205 TCand.reset(CandPolicy()); 206 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 207 assert(TCand.SU == BotCand.SU && 208 "Last pick result should correspond to re-picking right now"); 209 } 210 #endif 211 } 212 213 // Check if the top Q has a better candidate. 214 LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 215 if (!TopCand.isValid() || TopCand.SU->isScheduled || 216 TopCand.Policy != TopPolicy) { 217 TopCand.reset(CandPolicy()); 218 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 219 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 220 } else { 221 LLVM_DEBUG(traceCandidate(TopCand)); 222 #ifndef NDEBUG 223 if (VerifyScheduling) { 224 SchedCandidate TCand; 225 TCand.reset(CandPolicy()); 226 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 227 assert(TCand.SU == TopCand.SU && 228 "Last pick result should correspond to re-picking right now"); 229 } 230 #endif 231 } 232 233 // Pick best from BotCand and TopCand. 234 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 235 dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 236 SchedCandidate Cand; 237 if (TopCand.Reason == BotCand.Reason) { 238 Cand = BotCand; 239 GenericSchedulerBase::CandReason TopReason = TopCand.Reason; 240 TopCand.Reason = NoCand; 241 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 242 if (TopCand.Reason != NoCand) { 243 Cand.setBest(TopCand); 244 } else { 245 TopCand.Reason = TopReason; 246 } 247 } else { 248 if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { 249 Cand = TopCand; 250 } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { 251 Cand = BotCand; 252 } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 253 Cand = TopCand; 254 } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 255 Cand = BotCand; 256 } else { 257 if (BotCand.Reason > TopCand.Reason) { 258 Cand = TopCand; 259 } else { 260 Cand = BotCand; 261 } 262 } 263 } 264 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 265 266 IsTopNode = Cand.AtTop; 267 return Cand.SU; 268 } 269 270 // This function is mostly cut and pasted from 271 // GenericScheduler::pickNode() 272 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 273 if (DAG->top() == DAG->bottom()) { 274 assert(Top.Available.empty() && Top.Pending.empty() && 275 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 276 return nullptr; 277 } 278 SUnit *SU; 279 do { 280 if (RegionPolicy.OnlyTopDown) { 281 SU = Top.pickOnlyChoice(); 282 if (!SU) { 283 CandPolicy NoPolicy; 284 TopCand.reset(NoPolicy); 285 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 286 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 287 SU = TopCand.SU; 288 } 289 IsTopNode = true; 290 } else if (RegionPolicy.OnlyBottomUp) { 291 SU = Bot.pickOnlyChoice(); 292 if (!SU) { 293 CandPolicy NoPolicy; 294 BotCand.reset(NoPolicy); 295 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 296 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 297 SU = BotCand.SU; 298 } 299 IsTopNode = false; 300 } else { 301 SU = pickNodeBidirectional(IsTopNode); 302 } 303 } while (SU->isScheduled); 304 305 if (SU->isTopReady()) 306 Top.removeReady(SU); 307 if (SU->isBottomReady()) 308 Bot.removeReady(SU); 309 310 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 311 << *SU->getInstr()); 312 return SU; 313 } 314 315 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, 316 std::unique_ptr<MachineSchedStrategy> S) : 317 ScheduleDAGMILive(C, std::move(S)), 318 ST(MF.getSubtarget<GCNSubtarget>()), 319 MFI(*MF.getInfo<SIMachineFunctionInfo>()), 320 StartingOccupancy(MFI.getOccupancy()), 321 MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) { 322 323 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 324 } 325 326 void GCNScheduleDAGMILive::schedule() { 327 if (Stage == Collect) { 328 // Just record regions at the first pass. 329 Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 330 return; 331 } 332 333 std::vector<MachineInstr*> Unsched; 334 Unsched.reserve(NumRegionInstrs); 335 for (auto &I : *this) { 336 Unsched.push_back(&I); 337 } 338 339 GCNRegPressure PressureBefore; 340 if (LIS) { 341 PressureBefore = Pressure[RegionIdx]; 342 343 LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 344 GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); 345 dbgs() << "Region live-in pressure: "; 346 llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); 347 dbgs() << "Region register pressure: "; 348 PressureBefore.print(dbgs())); 349 } 350 351 ScheduleDAGMILive::schedule(); 352 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 353 RescheduleRegions[RegionIdx] = false; 354 355 if (!LIS) 356 return; 357 358 // Check the results of scheduling. 359 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 360 auto PressureAfter = getRealRegPressure(); 361 362 LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 363 PressureAfter.print(dbgs())); 364 365 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 366 PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { 367 Pressure[RegionIdx] = PressureAfter; 368 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 369 return; 370 } 371 unsigned Occ = MFI.getOccupancy(); 372 unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST)); 373 unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST)); 374 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 375 << ", after " << WavesAfter << ".\n"); 376 377 // We could not keep current target occupancy because of the just scheduled 378 // region. Record new occupancy for next scheduling cycle. 379 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 380 // Allow memory bound functions to drop to 4 waves if not limited by an 381 // attribute. 382 if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && 383 WavesAfter >= MFI.getMinAllowedOccupancy()) { 384 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 385 << MFI.getMinAllowedOccupancy() << " waves\n"); 386 NewOccupancy = WavesAfter; 387 } 388 if (NewOccupancy < MinOccupancy) { 389 MinOccupancy = NewOccupancy; 390 MFI.limitOccupancy(MinOccupancy); 391 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 392 << MinOccupancy << ".\n"); 393 } 394 395 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 396 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 397 if (PressureAfter.getVGPRNum() > MaxVGPRs || 398 PressureAfter.getSGPRNum() > MaxSGPRs) 399 RescheduleRegions[RegionIdx] = true; 400 401 if (WavesAfter >= MinOccupancy) { 402 if (Stage == UnclusteredReschedule && 403 !PressureAfter.less(ST, PressureBefore)) { 404 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 405 } else if (WavesAfter > MFI.getMinWavesPerEU() || 406 PressureAfter.less(ST, PressureBefore) || 407 !RescheduleRegions[RegionIdx]) { 408 Pressure[RegionIdx] = PressureAfter; 409 return; 410 } else { 411 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 412 } 413 } 414 415 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 416 RescheduleRegions[RegionIdx] = true; 417 RegionEnd = RegionBegin; 418 for (MachineInstr *MI : Unsched) { 419 if (MI->isDebugInstr()) 420 continue; 421 422 if (MI->getIterator() != RegionEnd) { 423 BB->remove(MI); 424 BB->insert(RegionEnd, MI); 425 if (!MI->isDebugInstr()) 426 LIS->handleMove(*MI, true); 427 } 428 // Reset read-undef flags and update them later. 429 for (auto &Op : MI->operands()) 430 if (Op.isReg() && Op.isDef()) 431 Op.setIsUndef(false); 432 RegisterOperands RegOpers; 433 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 434 if (!MI->isDebugInstr()) { 435 if (ShouldTrackLaneMasks) { 436 // Adjust liveness and add missing dead+read-undef flags. 437 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 438 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 439 } else { 440 // Adjust for missing dead-def flags. 441 RegOpers.detectDeadDefs(*MI, *LIS); 442 } 443 } 444 RegionEnd = MI->getIterator(); 445 ++RegionEnd; 446 LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 447 } 448 RegionBegin = Unsched.front()->getIterator(); 449 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 450 451 placeDebugValues(); 452 } 453 454 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { 455 GCNDownwardRPTracker RPTracker(*LIS); 456 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 457 return RPTracker.moveMaxPressure(); 458 } 459 460 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { 461 GCNDownwardRPTracker RPTracker(*LIS); 462 463 // If the block has the only successor then live-ins of that successor are 464 // live-outs of the current block. We can reuse calculated live set if the 465 // successor will be sent to scheduling past current block. 466 const MachineBasicBlock *OnlySucc = nullptr; 467 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 468 SlotIndexes *Ind = LIS->getSlotIndexes(); 469 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 470 OnlySucc = *MBB->succ_begin(); 471 } 472 473 // Scheduler sends regions from the end of the block upwards. 474 size_t CurRegion = RegionIdx; 475 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 476 if (Regions[CurRegion].first->getParent() != MBB) 477 break; 478 --CurRegion; 479 480 auto I = MBB->begin(); 481 auto LiveInIt = MBBLiveIns.find(MBB); 482 if (LiveInIt != MBBLiveIns.end()) { 483 auto LiveIn = std::move(LiveInIt->second); 484 RPTracker.reset(*MBB->begin(), &LiveIn); 485 MBBLiveIns.erase(LiveInIt); 486 } else { 487 auto &Rgn = Regions[CurRegion]; 488 I = Rgn.first; 489 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 490 auto LRS = BBLiveInMap.lookup(NonDbgMI); 491 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 492 RPTracker.reset(*I, &LRS); 493 } 494 495 for ( ; ; ) { 496 I = RPTracker.getNext(); 497 498 if (Regions[CurRegion].first == I) { 499 LiveIns[CurRegion] = RPTracker.getLiveRegs(); 500 RPTracker.clearMaxPressure(); 501 } 502 503 if (Regions[CurRegion].second == I) { 504 Pressure[CurRegion] = RPTracker.moveMaxPressure(); 505 if (CurRegion-- == RegionIdx) 506 break; 507 } 508 RPTracker.advanceToNext(); 509 RPTracker.advanceBeforeNext(); 510 } 511 512 if (OnlySucc) { 513 if (I != MBB->end()) { 514 RPTracker.advanceToNext(); 515 RPTracker.advance(MBB->end()); 516 } 517 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 518 RPTracker.advanceBeforeNext(); 519 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 520 } 521 } 522 523 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 524 GCNScheduleDAGMILive::getBBLiveInMap() const { 525 assert(!Regions.empty()); 526 std::vector<MachineInstr *> BBStarters; 527 BBStarters.reserve(Regions.size()); 528 auto I = Regions.rbegin(), E = Regions.rend(); 529 auto *BB = I->first->getParent(); 530 do { 531 auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 532 BBStarters.push_back(MI); 533 do { 534 ++I; 535 } while (I != E && I->first->getParent() == BB); 536 } while (I != E); 537 return getLiveRegMap(BBStarters, false /*After*/, *LIS); 538 } 539 540 void GCNScheduleDAGMILive::finalizeSchedule() { 541 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 542 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 543 544 LiveIns.resize(Regions.size()); 545 Pressure.resize(Regions.size()); 546 RescheduleRegions.resize(Regions.size()); 547 RescheduleRegions.set(); 548 549 if (!Regions.empty()) 550 BBLiveInMap = getBBLiveInMap(); 551 552 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 553 554 do { 555 Stage++; 556 RegionIdx = 0; 557 MachineBasicBlock *MBB = nullptr; 558 559 if (Stage > InitialSchedule) { 560 if (!LIS) 561 break; 562 563 // Retry function scheduling if we found resulting occupancy and it is 564 // lower than used for first pass scheduling. This will give more freedom 565 // to schedule low register pressure blocks. 566 // Code is partially copied from MachineSchedulerBase::scheduleRegions(). 567 568 if (Stage == UnclusteredReschedule) { 569 if (RescheduleRegions.none()) 570 continue; 571 LLVM_DEBUG(dbgs() << 572 "Retrying function scheduling without clustering.\n"); 573 } 574 575 if (Stage == ClusteredLowOccupancyReschedule) { 576 if (StartingOccupancy <= MinOccupancy) 577 break; 578 579 LLVM_DEBUG( 580 dbgs() 581 << "Retrying function scheduling with lowest recorded occupancy " 582 << MinOccupancy << ".\n"); 583 584 S.setTargetOccupancy(MinOccupancy); 585 } 586 } 587 588 if (Stage == UnclusteredReschedule) 589 SavedMutations.swap(Mutations); 590 591 for (auto Region : Regions) { 592 if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) 593 continue; 594 595 RegionBegin = Region.first; 596 RegionEnd = Region.second; 597 598 if (RegionBegin->getParent() != MBB) { 599 if (MBB) finishBlock(); 600 MBB = RegionBegin->getParent(); 601 startBlock(MBB); 602 if (Stage == InitialSchedule) 603 computeBlockPressure(MBB); 604 } 605 606 unsigned NumRegionInstrs = std::distance(begin(), end()); 607 enterRegion(MBB, begin(), end(), NumRegionInstrs); 608 609 // Skip empty scheduling regions (0 or 1 schedulable instructions). 610 if (begin() == end() || begin() == std::prev(end())) { 611 exitRegion(); 612 continue; 613 } 614 615 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 616 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " 617 << MBB->getName() << "\n From: " << *begin() 618 << " To: "; 619 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 620 else dbgs() << "End"; 621 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 622 623 schedule(); 624 625 exitRegion(); 626 ++RegionIdx; 627 } 628 finishBlock(); 629 630 if (Stage == UnclusteredReschedule) 631 SavedMutations.swap(Mutations); 632 } while (Stage != LastStage); 633 } 634