1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This contains a MachineSchedStrategy implementation for maximizing wave 12 /// occupancy on GCN hardware. 13 //===----------------------------------------------------------------------===// 14 15 #include "GCNSchedStrategy.h" 16 #include "AMDGPUSubtarget.h" 17 #include "SIInstrInfo.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 22 #define DEBUG_TYPE "misched" 23 24 using namespace llvm; 25 26 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 27 const MachineSchedContext *C) : 28 GenericScheduler(C) { } 29 30 static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, 31 const MachineFunction &MF) { 32 33 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 34 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 35 unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs), 36 ST.getOccupancyWithNumVGPRs(VGPRs)); 37 return std::min(MinRegOccupancy, 38 ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), 39 *MF.getFunction())); 40 } 41 42 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 43 bool AtTop, const RegPressureTracker &RPTracker, 44 const SIRegisterInfo *SRI, 45 int SGPRPressure, 46 int VGPRPressure, 47 int SGPRExcessLimit, 48 int VGPRExcessLimit, 49 int SGPRCriticalLimit, 50 int VGPRCriticalLimit) { 51 52 Cand.SU = SU; 53 Cand.AtTop = AtTop; 54 55 // getDownwardPressure() and getUpwardPressure() make temporary changes to 56 // the the tracker, so we need to pass those function a non-const copy. 57 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 58 59 std::vector<unsigned> Pressure; 60 std::vector<unsigned> MaxPressure; 61 62 if (AtTop) 63 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 64 else { 65 // FIXME: I think for bottom up scheduling, the register pressure is cached 66 // and can be retrieved by DAG->getPressureDif(SU). 67 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 68 } 69 70 int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 71 int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 72 73 // If two instructions increase the pressure of different register sets 74 // by the same amount, the generic scheduler will prefer to schedule the 75 // instruction that increases the set with the least amount of registers, 76 // which in our case would be SGPRs. This is rarely what we want, so 77 // when we report excess/critical register pressure, we do it either 78 // only for VGPRs or only for SGPRs. 79 80 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 81 const int MaxVGPRPressureInc = 16; 82 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 83 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 84 85 86 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 87 // to increase the likelihood we don't go over the limits. We should improve 88 // the analysis to look through dependencies to find the path with the least 89 // register pressure. 90 // FIXME: This is also necessary, because some passes that run after 91 // scheduling and before regalloc increase register pressure. 92 const int ErrorMargin = 3; 93 VGPRExcessLimit -= ErrorMargin; 94 SGPRExcessLimit -= ErrorMargin; 95 96 // We only need to update the RPDelata for instructions that increase 97 // register pressure. Instructions that decrease or keep reg pressure 98 // the same will be marked as RegExcess in tryCandidate() when they 99 // are compared with instructions that increase the register pressure. 100 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 101 Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); 102 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 103 } 104 105 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 106 Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); 107 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 108 } 109 110 // Register pressure is considered 'CRITICAL' if it is approaching a value 111 // that would reduce the wave occupancy for the execution unit. When 112 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both 113 // has the same cost, so we don't need to prefer one over the other. 114 115 VGPRCriticalLimit -= ErrorMargin; 116 SGPRCriticalLimit -= ErrorMargin; 117 118 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 119 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 120 121 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 122 if (SGPRDelta > VGPRDelta) { 123 Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); 124 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 125 } else { 126 Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); 127 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 128 } 129 } 130 } 131 132 // This function is mostly cut and pasted from 133 // GenericScheduler::pickNodeFromQueue() 134 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 135 const CandPolicy &ZonePolicy, 136 const RegPressureTracker &RPTracker, 137 SchedCandidate &Cand) { 138 const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>(); 139 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 140 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 141 unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 142 unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 143 unsigned SGPRExcessLimit = 144 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 145 unsigned VGPRExcessLimit = 146 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 147 unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); 148 unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true); 149 unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves); 150 151 ReadyQueue &Q = Zone.Available; 152 for (SUnit *SU : Q) { 153 154 SchedCandidate TryCand(ZonePolicy); 155 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 156 SGPRPressure, VGPRPressure, 157 SGPRExcessLimit, VGPRExcessLimit, 158 SGPRCriticalLimit, VGPRCriticalLimit); 159 // Pass SchedBoundary only when comparing nodes from the same boundary. 160 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 161 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 162 if (TryCand.Reason != NoCand) { 163 // Initialize resource delta if needed in case future heuristics query it. 164 if (TryCand.ResDelta == SchedResourceDelta()) 165 TryCand.initResourceDelta(Zone.DAG, SchedModel); 166 Cand.setBest(TryCand); 167 } 168 } 169 } 170 171 static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) { 172 switch (Reason) { 173 default: 174 return Reason; 175 case GenericSchedulerBase::RegCritical: 176 case GenericSchedulerBase::RegExcess: 177 return -Reason; 178 } 179 } 180 181 // This function is mostly cut and pasted from 182 // GenericScheduler::pickNodeBidirectional() 183 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 184 // Schedule as far as possible in the direction of no choice. This is most 185 // efficient, but also provides the best heuristics for CriticalPSets. 186 if (SUnit *SU = Bot.pickOnlyChoice()) { 187 IsTopNode = false; 188 return SU; 189 } 190 if (SUnit *SU = Top.pickOnlyChoice()) { 191 IsTopNode = true; 192 return SU; 193 } 194 // Set the bottom-up policy based on the state of the current bottom zone and 195 // the instructions outside the zone, including the top zone. 196 CandPolicy BotPolicy; 197 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 198 // Set the top-down policy based on the state of the current top zone and 199 // the instructions outside the zone, including the bottom zone. 200 CandPolicy TopPolicy; 201 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 202 203 // See if BotCand is still valid (because we previously scheduled from Top). 204 DEBUG(dbgs() << "Picking from Bot:\n"); 205 if (!BotCand.isValid() || BotCand.SU->isScheduled || 206 BotCand.Policy != BotPolicy) { 207 BotCand.reset(CandPolicy()); 208 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 209 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 210 } else { 211 DEBUG(traceCandidate(BotCand)); 212 } 213 214 // Check if the top Q has a better candidate. 215 DEBUG(dbgs() << "Picking from Top:\n"); 216 if (!TopCand.isValid() || TopCand.SU->isScheduled || 217 TopCand.Policy != TopPolicy) { 218 TopCand.reset(CandPolicy()); 219 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 220 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 221 } else { 222 DEBUG(traceCandidate(TopCand)); 223 } 224 225 // Pick best from BotCand and TopCand. 226 DEBUG( 227 dbgs() << "Top Cand: "; 228 traceCandidate(TopCand); 229 dbgs() << "Bot Cand: "; 230 traceCandidate(BotCand); 231 ); 232 SchedCandidate Cand; 233 if (TopCand.Reason == BotCand.Reason) { 234 Cand = BotCand; 235 GenericSchedulerBase::CandReason TopReason = TopCand.Reason; 236 TopCand.Reason = NoCand; 237 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 238 if (TopCand.Reason != NoCand) { 239 Cand.setBest(TopCand); 240 } else { 241 TopCand.Reason = TopReason; 242 } 243 } else { 244 if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { 245 Cand = TopCand; 246 } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { 247 Cand = BotCand; 248 } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 249 Cand = TopCand; 250 } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 251 Cand = BotCand; 252 } else { 253 int TopRank = getBidirectionalReasonRank(TopCand.Reason); 254 int BotRank = getBidirectionalReasonRank(BotCand.Reason); 255 if (TopRank > BotRank) { 256 Cand = TopCand; 257 } else { 258 Cand = BotCand; 259 } 260 } 261 } 262 DEBUG( 263 dbgs() << "Picking: "; 264 traceCandidate(Cand); 265 ); 266 267 IsTopNode = Cand.AtTop; 268 return Cand.SU; 269 } 270 271 // This function is mostly cut and pasted from 272 // GenericScheduler::pickNode() 273 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 274 if (DAG->top() == DAG->bottom()) { 275 assert(Top.Available.empty() && Top.Pending.empty() && 276 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 277 return nullptr; 278 } 279 SUnit *SU; 280 do { 281 if (RegionPolicy.OnlyTopDown) { 282 SU = Top.pickOnlyChoice(); 283 if (!SU) { 284 CandPolicy NoPolicy; 285 TopCand.reset(NoPolicy); 286 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 287 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 288 SU = TopCand.SU; 289 } 290 IsTopNode = true; 291 } else if (RegionPolicy.OnlyBottomUp) { 292 SU = Bot.pickOnlyChoice(); 293 if (!SU) { 294 CandPolicy NoPolicy; 295 BotCand.reset(NoPolicy); 296 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 297 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 298 SU = BotCand.SU; 299 } 300 IsTopNode = false; 301 } else { 302 SU = pickNodeBidirectional(IsTopNode); 303 } 304 } while (SU->isScheduled); 305 306 if (SU->isTopReady()) 307 Top.removeReady(SU); 308 if (SU->isBottomReady()) 309 Bot.removeReady(SU); 310 311 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 312 return SU; 313 } 314