1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This contains a MachineSchedStrategy implementation for maximizing wave 11 /// occupancy on GCN hardware. 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNSchedStrategy.h" 15 #include "SIMachineFunctionInfo.h" 16 17 #define DEBUG_TYPE "machine-scheduler" 18 19 using namespace llvm; 20 21 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 22 const MachineSchedContext *C) : 23 GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false), 24 HasExcessPressure(false), MF(nullptr) { } 25 26 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 27 GenericScheduler::initialize(DAG); 28 29 MF = &DAG->MF; 30 31 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 32 33 // FIXME: This is also necessary, because some passes that run after 34 // scheduling and before regalloc increase register pressure. 35 const unsigned ErrorMargin = 3; 36 37 SGPRExcessLimit = 38 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 39 VGPRExcessLimit = 40 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 41 42 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); 43 // Set the initial TargetOccupnacy to the maximum occupancy that we can 44 // achieve for this function. This effectively sets a lower bound on the 45 // 'Critical' register limits in the scheduler. 46 TargetOccupancy = MFI.getOccupancy(); 47 SGPRCriticalLimit = 48 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit); 49 VGPRCriticalLimit = 50 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit); 51 52 // Subtract error margin from register limits and avoid overflow. 53 SGPRCriticalLimit = 54 std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit); 55 VGPRCriticalLimit = 56 std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit); 57 SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit); 58 VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit); 59 } 60 61 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 62 bool AtTop, const RegPressureTracker &RPTracker, 63 const SIRegisterInfo *SRI, 64 unsigned SGPRPressure, 65 unsigned VGPRPressure) { 66 67 Cand.SU = SU; 68 Cand.AtTop = AtTop; 69 70 // getDownwardPressure() and getUpwardPressure() make temporary changes to 71 // the tracker, so we need to pass those function a non-const copy. 72 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 73 74 Pressure.clear(); 75 MaxPressure.clear(); 76 77 if (AtTop) 78 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 79 else { 80 // FIXME: I think for bottom up scheduling, the register pressure is cached 81 // and can be retrieved by DAG->getPressureDif(SU). 82 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 83 } 84 85 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 86 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 87 88 // If two instructions increase the pressure of different register sets 89 // by the same amount, the generic scheduler will prefer to schedule the 90 // instruction that increases the set with the least amount of registers, 91 // which in our case would be SGPRs. This is rarely what we want, so 92 // when we report excess/critical register pressure, we do it either 93 // only for VGPRs or only for SGPRs. 94 95 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 96 const unsigned MaxVGPRPressureInc = 16; 97 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 98 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 99 100 101 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 102 // to increase the likelihood we don't go over the limits. We should improve 103 // the analysis to look through dependencies to find the path with the least 104 // register pressure. 105 106 // We only need to update the RPDelta for instructions that increase register 107 // pressure. Instructions that decrease or keep reg pressure the same will be 108 // marked as RegExcess in tryCandidate() when they are compared with 109 // instructions that increase the register pressure. 110 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 111 HasExcessPressure = true; 112 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 113 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 114 } 115 116 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 117 HasExcessPressure = true; 118 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 119 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 120 } 121 122 // Register pressure is considered 'CRITICAL' if it is approaching a value 123 // that would reduce the wave occupancy for the execution unit. When 124 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both 125 // has the same cost, so we don't need to prefer one over the other. 126 127 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 128 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 129 130 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 131 HasExcessPressure = true; 132 if (SGPRDelta > VGPRDelta) { 133 Cand.RPDelta.CriticalMax = 134 PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 135 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 136 } else { 137 Cand.RPDelta.CriticalMax = 138 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 139 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 140 } 141 } 142 } 143 144 // This function is mostly cut and pasted from 145 // GenericScheduler::pickNodeFromQueue() 146 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 147 const CandPolicy &ZonePolicy, 148 const RegPressureTracker &RPTracker, 149 SchedCandidate &Cand) { 150 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 151 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 152 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 153 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 154 ReadyQueue &Q = Zone.Available; 155 for (SUnit *SU : Q) { 156 157 SchedCandidate TryCand(ZonePolicy); 158 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 159 SGPRPressure, VGPRPressure); 160 // Pass SchedBoundary only when comparing nodes from the same boundary. 161 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 162 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 163 if (TryCand.Reason != NoCand) { 164 // Initialize resource delta if needed in case future heuristics query it. 165 if (TryCand.ResDelta == SchedResourceDelta()) 166 TryCand.initResourceDelta(Zone.DAG, SchedModel); 167 Cand.setBest(TryCand); 168 LLVM_DEBUG(traceCandidate(Cand)); 169 } 170 } 171 } 172 173 // This function is mostly cut and pasted from 174 // GenericScheduler::pickNodeBidirectional() 175 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 176 // Schedule as far as possible in the direction of no choice. This is most 177 // efficient, but also provides the best heuristics for CriticalPSets. 178 if (SUnit *SU = Bot.pickOnlyChoice()) { 179 IsTopNode = false; 180 return SU; 181 } 182 if (SUnit *SU = Top.pickOnlyChoice()) { 183 IsTopNode = true; 184 return SU; 185 } 186 // Set the bottom-up policy based on the state of the current bottom zone and 187 // the instructions outside the zone, including the top zone. 188 CandPolicy BotPolicy; 189 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 190 // Set the top-down policy based on the state of the current top zone and 191 // the instructions outside the zone, including the bottom zone. 192 CandPolicy TopPolicy; 193 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 194 195 // See if BotCand is still valid (because we previously scheduled from Top). 196 LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 197 if (!BotCand.isValid() || BotCand.SU->isScheduled || 198 BotCand.Policy != BotPolicy) { 199 BotCand.reset(CandPolicy()); 200 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 201 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 202 } else { 203 LLVM_DEBUG(traceCandidate(BotCand)); 204 #ifndef NDEBUG 205 if (VerifyScheduling) { 206 SchedCandidate TCand; 207 TCand.reset(CandPolicy()); 208 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 209 assert(TCand.SU == BotCand.SU && 210 "Last pick result should correspond to re-picking right now"); 211 } 212 #endif 213 } 214 215 // Check if the top Q has a better candidate. 216 LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 217 if (!TopCand.isValid() || TopCand.SU->isScheduled || 218 TopCand.Policy != TopPolicy) { 219 TopCand.reset(CandPolicy()); 220 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 221 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 222 } else { 223 LLVM_DEBUG(traceCandidate(TopCand)); 224 #ifndef NDEBUG 225 if (VerifyScheduling) { 226 SchedCandidate TCand; 227 TCand.reset(CandPolicy()); 228 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 229 assert(TCand.SU == TopCand.SU && 230 "Last pick result should correspond to re-picking right now"); 231 } 232 #endif 233 } 234 235 // Pick best from BotCand and TopCand. 236 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 237 dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 238 SchedCandidate Cand = BotCand; 239 TopCand.Reason = NoCand; 240 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 241 if (TopCand.Reason != NoCand) { 242 Cand.setBest(TopCand); 243 } 244 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 245 246 IsTopNode = Cand.AtTop; 247 return Cand.SU; 248 } 249 250 // This function is mostly cut and pasted from 251 // GenericScheduler::pickNode() 252 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 253 if (DAG->top() == DAG->bottom()) { 254 assert(Top.Available.empty() && Top.Pending.empty() && 255 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 256 return nullptr; 257 } 258 SUnit *SU; 259 do { 260 if (RegionPolicy.OnlyTopDown) { 261 SU = Top.pickOnlyChoice(); 262 if (!SU) { 263 CandPolicy NoPolicy; 264 TopCand.reset(NoPolicy); 265 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 266 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 267 SU = TopCand.SU; 268 } 269 IsTopNode = true; 270 } else if (RegionPolicy.OnlyBottomUp) { 271 SU = Bot.pickOnlyChoice(); 272 if (!SU) { 273 CandPolicy NoPolicy; 274 BotCand.reset(NoPolicy); 275 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 276 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 277 SU = BotCand.SU; 278 } 279 IsTopNode = false; 280 } else { 281 SU = pickNodeBidirectional(IsTopNode); 282 } 283 } while (SU->isScheduled); 284 285 if (SU->isTopReady()) 286 Top.removeReady(SU); 287 if (SU->isBottomReady()) 288 Bot.removeReady(SU); 289 290 if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) { 291 for (SDep &Dep : SU->Preds) { 292 if (Dep.isCluster()) { 293 HasClusteredNodes = true; 294 break; 295 } 296 } 297 } 298 299 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 300 << *SU->getInstr()); 301 return SU; 302 } 303 304 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, 305 std::unique_ptr<MachineSchedStrategy> S) : 306 ScheduleDAGMILive(C, std::move(S)), 307 ST(MF.getSubtarget<GCNSubtarget>()), 308 MFI(*MF.getInfo<SIMachineFunctionInfo>()), 309 StartingOccupancy(MFI.getOccupancy()), 310 MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) { 311 312 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 313 } 314 315 void GCNScheduleDAGMILive::schedule() { 316 if (Stage == Collect) { 317 // Just record regions at the first pass. 318 Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 319 return; 320 } 321 322 std::vector<MachineInstr*> Unsched; 323 Unsched.reserve(NumRegionInstrs); 324 for (auto &I : *this) { 325 Unsched.push_back(&I); 326 } 327 328 GCNRegPressure PressureBefore; 329 if (LIS) { 330 PressureBefore = Pressure[RegionIdx]; 331 332 LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 333 GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); 334 dbgs() << "Region live-in pressure: "; 335 llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); 336 dbgs() << "Region register pressure: "; 337 PressureBefore.print(dbgs())); 338 } 339 340 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 341 // Set HasClusteredNodes to true for late stages where we have already 342 // collected it. That way pickNode() will not scan SDep's when not needed. 343 S.HasClusteredNodes = Stage > InitialSchedule; 344 S.HasExcessPressure = false; 345 ScheduleDAGMILive::schedule(); 346 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 347 RescheduleRegions[RegionIdx] = false; 348 if (Stage == InitialSchedule && S.HasClusteredNodes) 349 RegionsWithClusters[RegionIdx] = true; 350 if (S.HasExcessPressure) 351 RegionsWithHighRP[RegionIdx] = true; 352 353 if (!LIS) 354 return; 355 356 // Check the results of scheduling. 357 auto PressureAfter = getRealRegPressure(); 358 359 LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 360 PressureAfter.print(dbgs())); 361 362 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 363 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 364 Pressure[RegionIdx] = PressureAfter; 365 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 366 return; 367 } 368 369 unsigned WavesAfter = 370 std::min(S.TargetOccupancy, PressureAfter.getOccupancy(ST)); 371 unsigned WavesBefore = 372 std::min(S.TargetOccupancy, PressureBefore.getOccupancy(ST)); 373 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 374 << ", after " << WavesAfter << ".\n"); 375 376 // We may not be able to keep the current target occupancy because of the just 377 // scheduled region. We might still be able to revert scheduling if the 378 // occupancy before was higher, or if the current schedule has register 379 // pressure higher than the excess limits which could lead to more spilling. 380 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 381 // Allow memory bound functions to drop to 4 waves if not limited by an 382 // attribute. 383 if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && 384 WavesAfter >= MFI.getMinAllowedOccupancy()) { 385 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 386 << MFI.getMinAllowedOccupancy() << " waves\n"); 387 NewOccupancy = WavesAfter; 388 } 389 390 if (NewOccupancy < MinOccupancy) { 391 MinOccupancy = NewOccupancy; 392 MFI.limitOccupancy(MinOccupancy); 393 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 394 << MinOccupancy << ".\n"); 395 } 396 397 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 398 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 399 if (PressureAfter.getVGPRNum(false) > MaxVGPRs || 400 PressureAfter.getAGPRNum() > MaxVGPRs || 401 PressureAfter.getSGPRNum() > MaxSGPRs) { 402 RescheduleRegions[RegionIdx] = true; 403 RegionsWithHighRP[RegionIdx] = true; 404 } 405 406 // If this condition is true, then either the occupancy before and after 407 // scheduling is the same, or we are allowing the occupancy to drop because 408 // the function is memory bound. Even if we are OK with the current occupancy, 409 // we still need to verify that we will not introduce any extra chance of 410 // spilling. 411 if (WavesAfter >= MinOccupancy) { 412 if (Stage == UnclusteredReschedule && 413 !PressureAfter.less(ST, PressureBefore)) { 414 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 415 } else if (WavesAfter > MFI.getMinWavesPerEU() || 416 PressureAfter.less(ST, PressureBefore) || 417 !RescheduleRegions[RegionIdx]) { 418 Pressure[RegionIdx] = PressureAfter; 419 if (!RegionsWithClusters[RegionIdx] && 420 (Stage + 1) == UnclusteredReschedule) 421 RescheduleRegions[RegionIdx] = false; 422 return; 423 } else { 424 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 425 } 426 } 427 428 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 429 RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] || 430 (Stage + 1) != UnclusteredReschedule; 431 RegionEnd = RegionBegin; 432 int SkippedDebugInstr = 0; 433 for (MachineInstr *MI : Unsched) { 434 if (MI->isDebugInstr()) { 435 ++SkippedDebugInstr; 436 continue; 437 } 438 439 if (MI->getIterator() != RegionEnd) { 440 BB->remove(MI); 441 BB->insert(RegionEnd, MI); 442 if (!MI->isDebugInstr()) 443 LIS->handleMove(*MI, true); 444 } 445 // Reset read-undef flags and update them later. 446 for (auto &Op : MI->operands()) 447 if (Op.isReg() && Op.isDef()) 448 Op.setIsUndef(false); 449 RegisterOperands RegOpers; 450 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 451 if (!MI->isDebugInstr()) { 452 if (ShouldTrackLaneMasks) { 453 // Adjust liveness and add missing dead+read-undef flags. 454 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 455 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 456 } else { 457 // Adjust for missing dead-def flags. 458 RegOpers.detectDeadDefs(*MI, *LIS); 459 } 460 } 461 RegionEnd = MI->getIterator(); 462 ++RegionEnd; 463 LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 464 } 465 466 // After reverting schedule, debug instrs will now be at the end of the block 467 // and RegionEnd will point to the first debug instr. Increment RegionEnd 468 // pass debug instrs to the actual end of the scheduling region. 469 while (SkippedDebugInstr-- > 0) 470 ++RegionEnd; 471 472 // If Unsched.front() instruction is a debug instruction, this will actually 473 // shrink the region since we moved all debug instructions to the end of the 474 // block. Find the first instruction that is not a debug instruction. 475 RegionBegin = Unsched.front()->getIterator(); 476 if (RegionBegin->isDebugInstr()) { 477 for (MachineInstr *MI : Unsched) { 478 if (MI->isDebugInstr()) 479 continue; 480 RegionBegin = MI->getIterator(); 481 break; 482 } 483 } 484 485 // Then move the debug instructions back into their correct place and set 486 // RegionBegin and RegionEnd if needed. 487 placeDebugValues(); 488 489 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 490 } 491 492 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { 493 GCNDownwardRPTracker RPTracker(*LIS); 494 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 495 return RPTracker.moveMaxPressure(); 496 } 497 498 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { 499 GCNDownwardRPTracker RPTracker(*LIS); 500 501 // If the block has the only successor then live-ins of that successor are 502 // live-outs of the current block. We can reuse calculated live set if the 503 // successor will be sent to scheduling past current block. 504 const MachineBasicBlock *OnlySucc = nullptr; 505 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 506 SlotIndexes *Ind = LIS->getSlotIndexes(); 507 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 508 OnlySucc = *MBB->succ_begin(); 509 } 510 511 // Scheduler sends regions from the end of the block upwards. 512 size_t CurRegion = RegionIdx; 513 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 514 if (Regions[CurRegion].first->getParent() != MBB) 515 break; 516 --CurRegion; 517 518 auto I = MBB->begin(); 519 auto LiveInIt = MBBLiveIns.find(MBB); 520 auto &Rgn = Regions[CurRegion]; 521 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 522 if (LiveInIt != MBBLiveIns.end()) { 523 auto LiveIn = std::move(LiveInIt->second); 524 RPTracker.reset(*MBB->begin(), &LiveIn); 525 MBBLiveIns.erase(LiveInIt); 526 } else { 527 I = Rgn.first; 528 auto LRS = BBLiveInMap.lookup(NonDbgMI); 529 #ifdef EXPENSIVE_CHECKS 530 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 531 #endif 532 RPTracker.reset(*I, &LRS); 533 } 534 535 for ( ; ; ) { 536 I = RPTracker.getNext(); 537 538 if (Regions[CurRegion].first == I || NonDbgMI == I) { 539 LiveIns[CurRegion] = RPTracker.getLiveRegs(); 540 RPTracker.clearMaxPressure(); 541 } 542 543 if (Regions[CurRegion].second == I) { 544 Pressure[CurRegion] = RPTracker.moveMaxPressure(); 545 if (CurRegion-- == RegionIdx) 546 break; 547 } 548 RPTracker.advanceToNext(); 549 RPTracker.advanceBeforeNext(); 550 } 551 552 if (OnlySucc) { 553 if (I != MBB->end()) { 554 RPTracker.advanceToNext(); 555 RPTracker.advance(MBB->end()); 556 } 557 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 558 RPTracker.advanceBeforeNext(); 559 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 560 } 561 } 562 563 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 564 GCNScheduleDAGMILive::getBBLiveInMap() const { 565 assert(!Regions.empty()); 566 std::vector<MachineInstr *> BBStarters; 567 BBStarters.reserve(Regions.size()); 568 auto I = Regions.rbegin(), E = Regions.rend(); 569 auto *BB = I->first->getParent(); 570 do { 571 auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 572 BBStarters.push_back(MI); 573 do { 574 ++I; 575 } while (I != E && I->first->getParent() == BB); 576 } while (I != E); 577 return getLiveRegMap(BBStarters, false /*After*/, *LIS); 578 } 579 580 void GCNScheduleDAGMILive::finalizeSchedule() { 581 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 582 583 LiveIns.resize(Regions.size()); 584 Pressure.resize(Regions.size()); 585 RescheduleRegions.resize(Regions.size()); 586 RegionsWithClusters.resize(Regions.size()); 587 RegionsWithHighRP.resize(Regions.size()); 588 RescheduleRegions.set(); 589 RegionsWithClusters.reset(); 590 RegionsWithHighRP.reset(); 591 592 if (!Regions.empty()) 593 BBLiveInMap = getBBLiveInMap(); 594 595 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 596 597 do { 598 Stage++; 599 RegionIdx = 0; 600 MachineBasicBlock *MBB = nullptr; 601 602 if (Stage > InitialSchedule) { 603 if (!LIS) 604 break; 605 606 // Retry function scheduling if we found resulting occupancy and it is 607 // lower than used for first pass scheduling. This will give more freedom 608 // to schedule low register pressure blocks. 609 // Code is partially copied from MachineSchedulerBase::scheduleRegions(). 610 611 if (Stage == UnclusteredReschedule) { 612 if (RescheduleRegions.none()) 613 continue; 614 LLVM_DEBUG(dbgs() << 615 "Retrying function scheduling without clustering.\n"); 616 } 617 618 if (Stage == ClusteredLowOccupancyReschedule) { 619 if (StartingOccupancy <= MinOccupancy) 620 break; 621 622 LLVM_DEBUG( 623 dbgs() 624 << "Retrying function scheduling with lowest recorded occupancy " 625 << MinOccupancy << ".\n"); 626 } 627 } 628 629 if (Stage == UnclusteredReschedule) 630 SavedMutations.swap(Mutations); 631 632 for (auto Region : Regions) { 633 if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) || 634 (Stage == ClusteredLowOccupancyReschedule && 635 !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) { 636 637 ++RegionIdx; 638 continue; 639 } 640 641 RegionBegin = Region.first; 642 RegionEnd = Region.second; 643 644 if (RegionBegin->getParent() != MBB) { 645 if (MBB) finishBlock(); 646 MBB = RegionBegin->getParent(); 647 startBlock(MBB); 648 if (Stage == InitialSchedule) 649 computeBlockPressure(MBB); 650 } 651 652 unsigned NumRegionInstrs = std::distance(begin(), end()); 653 enterRegion(MBB, begin(), end(), NumRegionInstrs); 654 655 // Skip empty scheduling regions (0 or 1 schedulable instructions). 656 if (begin() == end() || begin() == std::prev(end())) { 657 exitRegion(); 658 continue; 659 } 660 661 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 662 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " 663 << MBB->getName() << "\n From: " << *begin() 664 << " To: "; 665 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 666 else dbgs() << "End"; 667 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 668 669 schedule(); 670 671 exitRegion(); 672 ++RegionIdx; 673 } 674 finishBlock(); 675 676 if (Stage == UnclusteredReschedule) 677 SavedMutations.swap(Mutations); 678 } while (Stage != LastStage); 679 } 680