1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This contains a MachineSchedStrategy implementation for maximizing wave 12 /// occupancy on GCN hardware. 13 //===----------------------------------------------------------------------===// 14 15 #include "GCNSchedStrategy.h" 16 #include "AMDGPUSubtarget.h" 17 #include "SIInstrInfo.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 22 #define DEBUG_TYPE "misched" 23 24 using namespace llvm; 25 26 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 27 const MachineSchedContext *C) : 28 GenericScheduler(C) { } 29 30 static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, 31 const MachineFunction &MF) { 32 33 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 34 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 35 unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs), 36 ST.getOccupancyWithNumVGPRs(VGPRs)); 37 return std::min(MinRegOccupancy, 38 ST.getOccupancyWithLocalMemSize(MFI->getLDSSize())); 39 } 40 41 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 42 bool AtTop, const RegPressureTracker &RPTracker, 43 const SIRegisterInfo *SRI, 44 int SGPRPressure, 45 int VGPRPressure, 46 int SGPRExcessLimit, 47 int VGPRExcessLimit, 48 int SGPRCriticalLimit, 49 int VGPRCriticalLimit) { 50 51 Cand.SU = SU; 52 Cand.AtTop = AtTop; 53 54 // getDownwardPressure() and getUpwardPressure() make temporary changes to 55 // the the tracker, so we need to pass those function a non-const copy. 56 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 57 58 std::vector<unsigned> Pressure; 59 std::vector<unsigned> MaxPressure; 60 61 if (AtTop) 62 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 63 else { 64 // FIXME: I think for bottom up scheduling, the register pressure is cached 65 // and can be retrieved by DAG->getPressureDif(SU). 66 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 67 } 68 69 int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 70 int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 71 72 // If two instructions increase the pressure of different register sets 73 // by the same amount, the generic scheduler will prefer to schedule the 74 // instruction that increases the set with the least amount of registers, 75 // which in our case would be SGPRs. This is rarely what we want, so 76 // when we report excess/critical register pressure, we do it either 77 // only for VGPRs or only for SGPRs. 78 79 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 80 const int MaxVGPRPressureInc = 16; 81 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 82 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 83 84 85 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 86 // to increase the likelihood we don't go over the limits. We should improve 87 // the analysis to look through dependencies to find the path with the least 88 // register pressure. 89 // FIXME: This is also necessary, because some passes that run after 90 // scheduling and before regalloc increase register pressure. 91 const int ErrorMargin = 3; 92 VGPRExcessLimit -= ErrorMargin; 93 SGPRExcessLimit -= ErrorMargin; 94 95 // We only need to update the RPDelata for instructions that increase 96 // register pressure. Instructions that decrease or keep reg pressure 97 // the same will be marked as RegExcess in tryCandidate() when they 98 // are compared with instructions that increase the register pressure. 99 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 100 Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); 101 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 102 } 103 104 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 105 Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); 106 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit); 107 } 108 109 // Register pressure is considered 'CRITICAL' if it is approaching a value 110 // that would reduce the wave occupancy for the execution unit. When 111 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both 112 // has the same cost, so we don't need to prefer one over the other. 113 114 VGPRCriticalLimit -= ErrorMargin; 115 SGPRCriticalLimit -= ErrorMargin; 116 117 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 118 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 119 120 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 121 if (SGPRDelta > VGPRDelta) { 122 Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); 123 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 124 } else { 125 Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); 126 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 127 } 128 } 129 } 130 131 // This function is mostly cut and pasted from 132 // GenericScheduler::pickNodeFromQueue() 133 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 134 const CandPolicy &ZonePolicy, 135 const RegPressureTracker &RPTracker, 136 SchedCandidate &Cand) { 137 const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>(); 138 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 139 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 140 unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 141 unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 142 unsigned SGPRExcessLimit = 143 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 144 unsigned VGPRExcessLimit = 145 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 146 unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); 147 unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves); 148 unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves); 149 150 ReadyQueue &Q = Zone.Available; 151 for (SUnit *SU : Q) { 152 153 SchedCandidate TryCand(ZonePolicy); 154 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 155 SGPRPressure, VGPRPressure, 156 SGPRExcessLimit, VGPRExcessLimit, 157 SGPRCriticalLimit, VGPRCriticalLimit); 158 // Pass SchedBoundary only when comparing nodes from the same boundary. 159 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 160 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 161 if (TryCand.Reason != NoCand) { 162 // Initialize resource delta if needed in case future heuristics query it. 163 if (TryCand.ResDelta == SchedResourceDelta()) 164 TryCand.initResourceDelta(Zone.DAG, SchedModel); 165 Cand.setBest(TryCand); 166 } 167 } 168 } 169 170 static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) { 171 switch (Reason) { 172 default: 173 return Reason; 174 case GenericSchedulerBase::RegCritical: 175 case GenericSchedulerBase::RegExcess: 176 return -Reason; 177 } 178 } 179 180 // This function is mostly cut and pasted from 181 // GenericScheduler::pickNodeBidirectional() 182 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 183 // Schedule as far as possible in the direction of no choice. This is most 184 // efficient, but also provides the best heuristics for CriticalPSets. 185 if (SUnit *SU = Bot.pickOnlyChoice()) { 186 IsTopNode = false; 187 return SU; 188 } 189 if (SUnit *SU = Top.pickOnlyChoice()) { 190 IsTopNode = true; 191 return SU; 192 } 193 // Set the bottom-up policy based on the state of the current bottom zone and 194 // the instructions outside the zone, including the top zone. 195 CandPolicy BotPolicy; 196 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 197 // Set the top-down policy based on the state of the current top zone and 198 // the instructions outside the zone, including the bottom zone. 199 CandPolicy TopPolicy; 200 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 201 202 // See if BotCand is still valid (because we previously scheduled from Top). 203 DEBUG(dbgs() << "Picking from Bot:\n"); 204 if (!BotCand.isValid() || BotCand.SU->isScheduled || 205 BotCand.Policy != BotPolicy) { 206 BotCand.reset(CandPolicy()); 207 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 208 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 209 } else { 210 DEBUG(traceCandidate(BotCand)); 211 } 212 213 // Check if the top Q has a better candidate. 214 DEBUG(dbgs() << "Picking from Top:\n"); 215 if (!TopCand.isValid() || TopCand.SU->isScheduled || 216 TopCand.Policy != TopPolicy) { 217 TopCand.reset(CandPolicy()); 218 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 219 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 220 } else { 221 DEBUG(traceCandidate(TopCand)); 222 } 223 224 // Pick best from BotCand and TopCand. 225 DEBUG( 226 dbgs() << "Top Cand: "; 227 traceCandidate(BotCand); 228 dbgs() << "Bot Cand: "; 229 traceCandidate(TopCand); 230 ); 231 SchedCandidate Cand; 232 if (TopCand.Reason == BotCand.Reason) { 233 Cand = BotCand; 234 GenericSchedulerBase::CandReason TopReason = TopCand.Reason; 235 TopCand.Reason = NoCand; 236 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 237 if (TopCand.Reason != NoCand) { 238 Cand.setBest(TopCand); 239 } else { 240 TopCand.Reason = TopReason; 241 } 242 } else { 243 if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { 244 Cand = TopCand; 245 } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { 246 Cand = BotCand; 247 } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 248 Cand = TopCand; 249 } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 250 Cand = BotCand; 251 } else { 252 int TopRank = getBidirectionalReasonRank(TopCand.Reason); 253 int BotRank = getBidirectionalReasonRank(BotCand.Reason); 254 if (TopRank > BotRank) { 255 Cand = TopCand; 256 } else { 257 Cand = BotCand; 258 } 259 } 260 } 261 DEBUG( 262 dbgs() << "Picking: "; 263 traceCandidate(Cand); 264 ); 265 266 IsTopNode = Cand.AtTop; 267 return Cand.SU; 268 } 269 270 // This function is mostly cut and pasted from 271 // GenericScheduler::pickNode() 272 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 273 if (DAG->top() == DAG->bottom()) { 274 assert(Top.Available.empty() && Top.Pending.empty() && 275 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 276 return nullptr; 277 } 278 SUnit *SU; 279 do { 280 if (RegionPolicy.OnlyTopDown) { 281 SU = Top.pickOnlyChoice(); 282 if (!SU) { 283 CandPolicy NoPolicy; 284 TopCand.reset(NoPolicy); 285 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 286 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 287 SU = TopCand.SU; 288 } 289 IsTopNode = true; 290 } else if (RegionPolicy.OnlyBottomUp) { 291 SU = Bot.pickOnlyChoice(); 292 if (!SU) { 293 CandPolicy NoPolicy; 294 BotCand.reset(NoPolicy); 295 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 296 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 297 SU = BotCand.SU; 298 } 299 IsTopNode = false; 300 } else { 301 SU = pickNodeBidirectional(IsTopNode); 302 } 303 } while (SU->isScheduled); 304 305 if (SU->isTopReady()) 306 Top.removeReady(SU); 307 if (SU->isBottomReady()) 308 Bot.removeReady(SU); 309 310 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 311 return SU; 312 } 313