1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements hazard recognizers for scheduling on GCN processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNHazardRecognizer.h" 15 #include "AMDGPUSubtarget.h" 16 #include "SIInstrInfo.h" 17 #include "llvm/CodeGen/ScheduleDAG.h" 18 #include "llvm/Support/Debug.h" 19 20 using namespace llvm; 21 22 //===----------------------------------------------------------------------===// 23 // Hazard Recoginizer Implementation 24 //===----------------------------------------------------------------------===// 25 26 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 27 CurrCycleInstr(nullptr), 28 MF(MF), 29 ST(MF.getSubtarget<SISubtarget>()) { 30 MaxLookAhead = 5; 31 } 32 33 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 34 EmitInstruction(SU->getInstr()); 35 } 36 37 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 38 CurrCycleInstr = MI; 39 } 40 41 static bool isDivFMas(unsigned Opcode) { 42 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; 43 } 44 45 static bool isSGetReg(unsigned Opcode) { 46 return Opcode == AMDGPU::S_GETREG_B32; 47 } 48 49 static bool isSSetReg(unsigned Opcode) { 50 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; 51 } 52 53 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 54 55 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 56 AMDGPU::OpName::simm16); 57 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 58 } 59 60 ScheduleHazardRecognizer::HazardType 61 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 62 MachineInstr *MI = SU->getInstr(); 63 64 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 65 return NoopHazard; 66 67 if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) 68 return NoopHazard; 69 70 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 71 return NoopHazard; 72 73 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 74 return NoopHazard; 75 76 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 77 return NoopHazard; 78 79 return NoHazard; 80 } 81 82 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { 83 return PreEmitNoops(SU->getInstr()); 84 } 85 86 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 87 if (SIInstrInfo::isSMRD(*MI)) 88 return std::max(0, checkSMRDHazards(MI)); 89 90 if (SIInstrInfo::isVMEM(*MI)) 91 return std::max(0, checkVMEMHazards(MI)); 92 93 if (SIInstrInfo::isDPP(*MI)) 94 return std::max(0, checkDPPHazards(MI)); 95 96 if (isDivFMas(MI->getOpcode())) 97 return std::max(0, checkDivFMasHazards(MI)); 98 99 if (isSGetReg(MI->getOpcode())) 100 return std::max(0, checkGetRegHazards(MI)); 101 102 return 0; 103 } 104 105 void GCNHazardRecognizer::EmitNoop() { 106 EmittedInstrs.push_front(nullptr); 107 } 108 109 void GCNHazardRecognizer::AdvanceCycle() { 110 111 // When the scheduler detects a stall, it will call AdvanceCycle() without 112 // emitting any instructions. 113 if (!CurrCycleInstr) 114 return; 115 116 const SIInstrInfo *TII = ST.getInstrInfo(); 117 unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr); 118 119 // Keep track of emitted instructions 120 EmittedInstrs.push_front(CurrCycleInstr); 121 122 // Add a nullptr for each additional wait state after the first. Make sure 123 // not to add more than getMaxLookAhead() items to the list, since we 124 // truncate the list to that size right after this loop. 125 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 126 i < e; ++i) { 127 EmittedInstrs.push_front(nullptr); 128 } 129 130 // getMaxLookahead() is the largest number of wait states we will ever need 131 // to insert, so there is no point in keeping track of more than that many 132 // wait states. 133 EmittedInstrs.resize(getMaxLookAhead()); 134 135 CurrCycleInstr = nullptr; 136 } 137 138 void GCNHazardRecognizer::RecedeCycle() { 139 llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 140 } 141 142 //===----------------------------------------------------------------------===// 143 // Helper Functions 144 //===----------------------------------------------------------------------===// 145 146 int GCNHazardRecognizer::getWaitStatesSinceDef( 147 unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { 148 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 149 150 int WaitStates = -1; 151 for (MachineInstr *MI : EmittedInstrs) { 152 ++WaitStates; 153 if (!MI || !IsHazardDef(MI)) 154 continue; 155 if (MI->modifiesRegister(Reg, TRI)) 156 return WaitStates; 157 } 158 return std::numeric_limits<int>::max(); 159 } 160 161 int GCNHazardRecognizer::getWaitStatesSinceSetReg( 162 function_ref<bool(MachineInstr *)> IsHazard) { 163 164 int WaitStates = -1; 165 for (MachineInstr *MI : EmittedInstrs) { 166 ++WaitStates; 167 if (!MI || !isSSetReg(MI->getOpcode()) || !IsHazard(MI)) 168 continue; 169 return WaitStates; 170 } 171 return std::numeric_limits<int>::max(); 172 } 173 174 //===----------------------------------------------------------------------===// 175 // No-op Hazard Detection 176 //===----------------------------------------------------------------------===// 177 178 static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops, 179 std::set<unsigned> &Set) { 180 for (const MachineOperand &Op : Ops) { 181 if (Op.isReg()) 182 Set.insert(Op.getReg()); 183 } 184 } 185 186 int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { 187 // SMEM soft clause are only present on VI+ 188 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 189 return 0; 190 191 // A soft-clause is any group of consecutive SMEM instructions. The 192 // instructions in this group may return out of order and/or may be 193 // replayed (i.e. the same instruction issued more than once). 194 // 195 // In order to handle these situations correctly we need to make sure 196 // that when a clause has more than one instruction, no instruction in the 197 // clause writes to a register that is read another instruction in the clause 198 // (including itself). If we encounter this situaion, we need to break the 199 // clause by inserting a non SMEM instruction. 200 201 std::set<unsigned> ClauseDefs; 202 std::set<unsigned> ClauseUses; 203 204 for (MachineInstr *MI : EmittedInstrs) { 205 206 // When we hit a non-SMEM instruction then we have passed the start of the 207 // clause and we can stop. 208 if (!MI || !SIInstrInfo::isSMRD(*MI)) 209 break; 210 211 addRegsToSet(MI->defs(), ClauseDefs); 212 addRegsToSet(MI->uses(), ClauseUses); 213 } 214 215 if (ClauseDefs.empty()) 216 return 0; 217 218 // FIXME: When we support stores, we need to make sure not to put loads and 219 // stores in the same clause if they use the same address. For now, just 220 // start a new clause whenever we see a store. 221 if (SMEM->mayStore()) 222 return 1; 223 224 addRegsToSet(SMEM->defs(), ClauseDefs); 225 addRegsToSet(SMEM->uses(), ClauseUses); 226 227 std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size())); 228 std::vector<unsigned>::iterator End; 229 230 End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(), 231 ClauseUses.begin(), ClauseUses.end(), Result.begin()); 232 233 // If the set of defs and uses intersect then we cannot add this instruction 234 // to the clause, so we have a hazard. 235 if (End != Result.begin()) 236 return 1; 237 238 return 0; 239 } 240 241 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 242 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 243 const SIInstrInfo *TII = ST.getInstrInfo(); 244 int WaitStatesNeeded = 0; 245 246 WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD); 247 248 // This SMRD hazard only affects SI. 249 if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) 250 return WaitStatesNeeded; 251 252 // A read of an SGPR by SMRD instruction requires 4 wait states when the 253 // SGPR was written by a VALU instruction. 254 int SmrdSgprWaitStates = 4; 255 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 256 257 for (const MachineOperand &Use : SMRD->uses()) { 258 if (!Use.isReg()) 259 continue; 260 int WaitStatesNeededForUse = 261 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 262 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 263 } 264 return WaitStatesNeeded; 265 } 266 267 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 268 const SIInstrInfo *TII = ST.getInstrInfo(); 269 270 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 271 return 0; 272 273 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 274 275 // A read of an SGPR by a VMEM instruction requires 5 wait states when the 276 // SGPR was written by a VALU Instruction. 277 int VmemSgprWaitStates = 5; 278 int WaitStatesNeeded = 0; 279 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 280 281 for (const MachineOperand &Use : VMEM->uses()) { 282 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 283 continue; 284 285 int WaitStatesNeededForUse = 286 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 287 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 288 } 289 return WaitStatesNeeded; 290 } 291 292 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 293 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 294 295 // Check for DPP VGPR read after VALU VGPR write. 296 int DppVgprWaitStates = 2; 297 int WaitStatesNeeded = 0; 298 299 for (const MachineOperand &Use : DPP->uses()) { 300 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 301 continue; 302 int WaitStatesNeededForUse = 303 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); 304 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 305 } 306 307 return WaitStatesNeeded; 308 } 309 310 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 311 const SIInstrInfo *TII = ST.getInstrInfo(); 312 313 // v_div_fmas requires 4 wait states after a write to vcc from a VALU 314 // instruction. 315 const int DivFMasWaitStates = 4; 316 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 317 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); 318 319 return DivFMasWaitStates - WaitStatesNeeded; 320 } 321 322 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 323 const SIInstrInfo *TII = ST.getInstrInfo(); 324 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 325 326 const int GetRegWaitStates = 2; 327 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { 328 return GetRegHWReg == getHWReg(TII, *MI); 329 }; 330 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 331 332 return GetRegWaitStates - WaitStatesNeeded; 333 } 334