1cb6ba62dSTom Stellard //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2cb6ba62dSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cb6ba62dSTom Stellard //
7cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
8cb6ba62dSTom Stellard //
9cb6ba62dSTom Stellard // This file implements hazard recognizers for scheduling on GCN processors.
10cb6ba62dSTom Stellard //
11cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
12cb6ba62dSTom Stellard 
13734bb7bbSEugene Zelenko #include "GCNHazardRecognizer.h"
14560d7e04Sdfukalov #include "GCNSubtarget.h"
15560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
161e15adbaSAustin Kerbow #include "SIMachineFunctionInfo.h"
17734bb7bbSEugene Zelenko #include "llvm/CodeGen/MachineFunction.h"
18cb6ba62dSTom Stellard #include "llvm/CodeGen/ScheduleDAG.h"
196a87e9b0Sdfukalov #include "llvm/Support/TargetParser.h"
20cb6ba62dSTom Stellard 
21cb6ba62dSTom Stellard using namespace llvm;
22cb6ba62dSTom Stellard 
231e15adbaSAustin Kerbow namespace {
241e15adbaSAustin Kerbow 
251e15adbaSAustin Kerbow struct MFMAPaddingRatioParser : public cl::parser<unsigned> {
MFMAPaddingRatioParser__anon1717562d0111::MFMAPaddingRatioParser261e15adbaSAustin Kerbow   MFMAPaddingRatioParser(cl::Option &O) : cl::parser<unsigned>(O) {}
271e15adbaSAustin Kerbow 
parse__anon1717562d0111::MFMAPaddingRatioParser281e15adbaSAustin Kerbow   bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
291e15adbaSAustin Kerbow     if (Arg.getAsInteger(0, Value))
301e15adbaSAustin Kerbow       return O.error("'" + Arg + "' value invalid for uint argument!");
311e15adbaSAustin Kerbow 
321e15adbaSAustin Kerbow     if (Value > 100)
331e15adbaSAustin Kerbow       return O.error("'" + Arg + "' value must be in the range [0, 100]!");
341e15adbaSAustin Kerbow 
351e15adbaSAustin Kerbow     return false;
361e15adbaSAustin Kerbow   }
371e15adbaSAustin Kerbow };
381e15adbaSAustin Kerbow 
391e15adbaSAustin Kerbow } // end anonymous namespace
401e15adbaSAustin Kerbow 
411e15adbaSAustin Kerbow static cl::opt<unsigned, false, MFMAPaddingRatioParser>
421e15adbaSAustin Kerbow     MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden,
431e15adbaSAustin Kerbow                      cl::desc("Fill a percentage of the latency between "
441e15adbaSAustin Kerbow                               "neighboring MFMA with s_nops."));
451e15adbaSAustin Kerbow 
46cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
476527b2a4SSebastian Neubauer // Hazard Recognizer Implementation
48cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
49cb6ba62dSTom Stellard 
50e0c382a9SPiotr Sobczak static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
51e0c382a9SPiotr Sobczak                                                  const GCNSubtarget &ST);
52e0c382a9SPiotr Sobczak 
GCNHazardRecognizer(const MachineFunction & MF)53cb6ba62dSTom Stellard GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
54f92ed696SStanislav Mekhanoshin   IsHazardRecognizerMode(false),
55cb6ba62dSTom Stellard   CurrCycleInstr(nullptr),
5643e92fe3SMatt Arsenault   MF(MF),
575bfbae5cSTom Stellard   ST(MF.getSubtarget<GCNSubtarget>()),
5803c67d1eSMatt Arsenault   TII(*ST.getInstrInfo()),
5903c67d1eSMatt Arsenault   TRI(TII.getRegisterInfo()),
6003c67d1eSMatt Arsenault   ClauseUses(TRI.getNumRegUnits()),
6103c67d1eSMatt Arsenault   ClauseDefs(TRI.getNumRegUnits()) {
62a8d9d507SStanislav Mekhanoshin   MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 19 : 5;
637d2019bbSStanislav Mekhanoshin   TSchedModel.init(&ST);
64e0c382a9SPiotr Sobczak   RunLdsBranchVmemWARHazardFixup = shouldRunLdsBranchVmemWARHazardFixup(MF, ST);
65cb6ba62dSTom Stellard }
66cb6ba62dSTom Stellard 
Reset()67de518673SAustin Kerbow void GCNHazardRecognizer::Reset() {
68de518673SAustin Kerbow   EmittedInstrs.clear();
69de518673SAustin Kerbow }
70de518673SAustin Kerbow 
EmitInstruction(SUnit * SU)71cb6ba62dSTom Stellard void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
72cb6ba62dSTom Stellard   EmitInstruction(SU->getInstr());
73cb6ba62dSTom Stellard }
74cb6ba62dSTom Stellard 
EmitInstruction(MachineInstr * MI)75cb6ba62dSTom Stellard void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
76cb6ba62dSTom Stellard   CurrCycleInstr = MI;
77cb6ba62dSTom Stellard }
78cb6ba62dSTom Stellard 
isDivFMas(unsigned Opcode)795ab6154dSTom Stellard static bool isDivFMas(unsigned Opcode) {
80314e29edSJoe Nash   return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64;
815ab6154dSTom Stellard }
825ab6154dSTom Stellard 
isSGetReg(unsigned Opcode)83961811c9STom Stellard static bool isSGetReg(unsigned Opcode) {
84961811c9STom Stellard   return Opcode == AMDGPU::S_GETREG_B32;
85961811c9STom Stellard }
86961811c9STom Stellard 
isSSetReg(unsigned Opcode)87961811c9STom Stellard static bool isSSetReg(unsigned Opcode) {
8890777e29SJay Foad   switch (Opcode) {
8990777e29SJay Foad   case AMDGPU::S_SETREG_B32:
9090777e29SJay Foad   case AMDGPU::S_SETREG_B32_mode:
9190777e29SJay Foad   case AMDGPU::S_SETREG_IMM32_B32:
9290777e29SJay Foad   case AMDGPU::S_SETREG_IMM32_B32_mode:
9390777e29SJay Foad     return true;
9490777e29SJay Foad   }
9590777e29SJay Foad   return false;
96961811c9STom Stellard }
97961811c9STom Stellard 
isRWLane(unsigned Opcode)9804051b5fSTom Stellard static bool isRWLane(unsigned Opcode) {
9904051b5fSTom Stellard   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
10004051b5fSTom Stellard }
10104051b5fSTom Stellard 
isRFE(unsigned Opcode)102aea899e2STom Stellard static bool isRFE(unsigned Opcode) {
103aea899e2STom Stellard   return Opcode == AMDGPU::S_RFE_B64;
104aea899e2STom Stellard }
105aea899e2STom Stellard 
isSMovRel(unsigned Opcode)106e823d92fSMatt Arsenault static bool isSMovRel(unsigned Opcode) {
10759ece95fSMatt Arsenault   switch (Opcode) {
10859ece95fSMatt Arsenault   case AMDGPU::S_MOVRELS_B32:
10959ece95fSMatt Arsenault   case AMDGPU::S_MOVRELS_B64:
11059ece95fSMatt Arsenault   case AMDGPU::S_MOVRELD_B32:
11159ece95fSMatt Arsenault   case AMDGPU::S_MOVRELD_B64:
11259ece95fSMatt Arsenault     return true;
11359ece95fSMatt Arsenault   default:
11459ece95fSMatt Arsenault     return false;
115e823d92fSMatt Arsenault   }
116e823d92fSMatt Arsenault }
117e823d92fSMatt Arsenault 
isDGEMM(unsigned Opcode)118a8d9d507SStanislav Mekhanoshin static bool isDGEMM(unsigned Opcode) {
11964838ba3SStanislav Mekhanoshin   return AMDGPU::getMAIIsDGEMM(Opcode);
120a8d9d507SStanislav Mekhanoshin }
121a8d9d507SStanislav Mekhanoshin 
isXDL(const GCNSubtarget & ST,const MachineInstr & MI)122a8d9d507SStanislav Mekhanoshin static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
123a8d9d507SStanislav Mekhanoshin   unsigned Opcode = MI.getOpcode();
124a8d9d507SStanislav Mekhanoshin 
125a8d9d507SStanislav Mekhanoshin   if (!SIInstrInfo::isMAI(MI) ||
126a8d9d507SStanislav Mekhanoshin       isDGEMM(Opcode) ||
127a8d9d507SStanislav Mekhanoshin       Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
128a8d9d507SStanislav Mekhanoshin       Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
129a8d9d507SStanislav Mekhanoshin     return false;
130a8d9d507SStanislav Mekhanoshin 
131cad9de71SStanislav Mekhanoshin   if (!ST.hasGFX940Insts())
132a8d9d507SStanislav Mekhanoshin     return true;
133cad9de71SStanislav Mekhanoshin 
134cad9de71SStanislav Mekhanoshin   return AMDGPU::getMAIIsGFX940XDL(Opcode);
135a8d9d507SStanislav Mekhanoshin }
136a8d9d507SStanislav Mekhanoshin 
isSendMsgTraceDataOrGDS(const SIInstrInfo & TII,const MachineInstr & MI)137c5cec5e1SMarek Olsak static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
138c5cec5e1SMarek Olsak                                     const MachineInstr &MI) {
139c5cec5e1SMarek Olsak   if (TII.isAlwaysGDS(MI.getOpcode()))
140c5cec5e1SMarek Olsak     return true;
141c5cec5e1SMarek Olsak 
142a41351e3SMatt Arsenault   switch (MI.getOpcode()) {
143a41351e3SMatt Arsenault   case AMDGPU::S_SENDMSG:
144a41351e3SMatt Arsenault   case AMDGPU::S_SENDMSGHALT:
145a41351e3SMatt Arsenault   case AMDGPU::S_TTRACEDATA:
146a41351e3SMatt Arsenault     return true;
147c5cec5e1SMarek Olsak   // These DS opcodes don't support GDS.
148c5cec5e1SMarek Olsak   case AMDGPU::DS_NOP:
149c5cec5e1SMarek Olsak   case AMDGPU::DS_PERMUTE_B32:
150c5cec5e1SMarek Olsak   case AMDGPU::DS_BPERMUTE_B32:
151c5cec5e1SMarek Olsak     return false;
152a41351e3SMatt Arsenault   default:
153c5cec5e1SMarek Olsak     if (TII.isDS(MI.getOpcode())) {
154c5cec5e1SMarek Olsak       int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
155c5cec5e1SMarek Olsak                                            AMDGPU::OpName::gds);
156c5cec5e1SMarek Olsak       if (MI.getOperand(GDS).getImm())
157c5cec5e1SMarek Olsak         return true;
158c5cec5e1SMarek Olsak     }
159a41351e3SMatt Arsenault     return false;
160a41351e3SMatt Arsenault   }
161a41351e3SMatt Arsenault }
162a41351e3SMatt Arsenault 
isPermlane(const MachineInstr & MI)1635f581c9fSStanislav Mekhanoshin static bool isPermlane(const MachineInstr &MI) {
1645f581c9fSStanislav Mekhanoshin   unsigned Opcode = MI.getOpcode();
165314e29edSJoe Nash   return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
166314e29edSJoe Nash          Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
1675f581c9fSStanislav Mekhanoshin }
1685f581c9fSStanislav Mekhanoshin 
isLdsDma(const MachineInstr & MI)16963f21f4cSStanislav Mekhanoshin static bool isLdsDma(const MachineInstr &MI) {
17063f21f4cSStanislav Mekhanoshin   return SIInstrInfo::isVALU(MI) &&
17163f21f4cSStanislav Mekhanoshin          (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI));
17263f21f4cSStanislav Mekhanoshin }
17363f21f4cSStanislav Mekhanoshin 
getHWReg(const SIInstrInfo * TII,const MachineInstr & RegInstr)174aea899e2STom Stellard static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
175961811c9STom Stellard   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
176961811c9STom Stellard                                                      AMDGPU::OpName::simm16);
177961811c9STom Stellard   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
178961811c9STom Stellard }
179961811c9STom Stellard 
180cb6ba62dSTom Stellard ScheduleHazardRecognizer::HazardType
getHazardType(SUnit * SU,int Stalls)181cb6ba62dSTom Stellard GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
182cb6ba62dSTom Stellard   MachineInstr *MI = SU->getInstr();
183ebdcef20SAustin Kerbow   // If we are not in "HazardRecognizerMode" and therefore not being run from
184ebdcef20SAustin Kerbow   // the scheduler, track possible stalls from hazards but don't insert noops.
185ebdcef20SAustin Kerbow   auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
186ebdcef20SAustin Kerbow 
1878a3d3a9aSAustin Kerbow   if (MI->isBundle())
1888a3d3a9aSAustin Kerbow    return NoHazard;
189cb6ba62dSTom Stellard 
1905c190d05SAaron Ballman   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
191ebdcef20SAustin Kerbow     return HazardType;
192cb6ba62dSTom Stellard 
19351d1415aSStanislav Mekhanoshin   if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
194ebdcef20SAustin Kerbow     return HazardType;
19551d1415aSStanislav Mekhanoshin 
196bdf7f81bSStanislav Mekhanoshin   if (checkFPAtomicToDenormModeHazard(MI) > 0)
197ebdcef20SAustin Kerbow     return HazardType;
198bdf7f81bSStanislav Mekhanoshin 
19951d1415aSStanislav Mekhanoshin   if (ST.hasNoDataDepHazard())
20051d1415aSStanislav Mekhanoshin     return NoHazard;
20151d1415aSStanislav Mekhanoshin 
202ed745839SJay Foad   // FIXME: Should flat be considered vmem?
203ed745839SJay Foad   if ((SIInstrInfo::isVMEM(*MI) ||
204ed745839SJay Foad        SIInstrInfo::isFLAT(*MI))
205ed745839SJay Foad       && checkVMEMHazards(MI) > 0)
206ed745839SJay Foad     return HazardType;
207ed745839SJay Foad 
208b133fbb9STom Stellard   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
209ebdcef20SAustin Kerbow     return HazardType;
210b133fbb9STom Stellard 
211a27007ebSTom Stellard   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
212ebdcef20SAustin Kerbow     return HazardType;
213a27007ebSTom Stellard 
2145ab6154dSTom Stellard   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
215ebdcef20SAustin Kerbow     return HazardType;
2165ab6154dSTom Stellard 
21704051b5fSTom Stellard   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
218ebdcef20SAustin Kerbow     return HazardType;
21904051b5fSTom Stellard 
220a8d9d507SStanislav Mekhanoshin   if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
221a8d9d507SStanislav Mekhanoshin        SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
222a8d9d507SStanislav Mekhanoshin        SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
223a8d9d507SStanislav Mekhanoshin     return HazardType;
224a8d9d507SStanislav Mekhanoshin 
225961811c9STom Stellard   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
226ebdcef20SAustin Kerbow     return HazardType;
227961811c9STom Stellard 
22830d30824STom Stellard   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
229ebdcef20SAustin Kerbow     return HazardType;
23030d30824STom Stellard 
231aea899e2STom Stellard   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
232ebdcef20SAustin Kerbow     return HazardType;
233aea899e2STom Stellard 
23463f21f4cSStanislav Mekhanoshin   if (((ST.hasReadM0MovRelInterpHazard() &&
23563f21f4cSStanislav Mekhanoshin         (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
23663f21f4cSStanislav Mekhanoshin        (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
23763f21f4cSStanislav Mekhanoshin        (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
23863f21f4cSStanislav Mekhanoshin        (ST.hasReadM0LdsDirectHazard() &&
23963f21f4cSStanislav Mekhanoshin         MI->readsRegister(AMDGPU::LDS_DIRECT))) &&
240e823d92fSMatt Arsenault       checkReadM0Hazards(MI) > 0)
241ebdcef20SAustin Kerbow     return HazardType;
242e823d92fSMatt Arsenault 
2437d2019bbSStanislav Mekhanoshin   if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
244ebdcef20SAustin Kerbow     return HazardType;
2457d2019bbSStanislav Mekhanoshin 
24643a38dc2SStanislav Mekhanoshin   if ((SIInstrInfo::isVMEM(*MI) ||
24743a38dc2SStanislav Mekhanoshin        SIInstrInfo::isFLAT(*MI) ||
24843a38dc2SStanislav Mekhanoshin        SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
249ebdcef20SAustin Kerbow     return HazardType;
2507d2019bbSStanislav Mekhanoshin 
251d29f24acSMark Searles   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
252ebdcef20SAustin Kerbow     return HazardType;
253d29f24acSMark Searles 
254cb6ba62dSTom Stellard   return NoHazard;
255cb6ba62dSTom Stellard }
256cb6ba62dSTom Stellard 
insertNoopsInBundle(MachineInstr * MI,const SIInstrInfo & TII,unsigned Quantity)2578b127a86SAustin Kerbow static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
2588b127a86SAustin Kerbow                                 unsigned Quantity) {
2598b127a86SAustin Kerbow   while (Quantity > 0) {
26069f5105fSJay Foad     unsigned Arg = std::min(Quantity, 8u);
26169f5105fSJay Foad     Quantity -= Arg;
2628a3d3a9aSAustin Kerbow     BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
26369f5105fSJay Foad         .addImm(Arg - 1);
2648b127a86SAustin Kerbow   }
2658a3d3a9aSAustin Kerbow }
2668a3d3a9aSAustin Kerbow 
2671e15adbaSAustin Kerbow unsigned
getMFMAPipelineWaitStates(const MachineInstr & MI) const2681e15adbaSAustin Kerbow GCNHazardRecognizer::getMFMAPipelineWaitStates(const MachineInstr &MI) const {
2691e15adbaSAustin Kerbow   const MCSchedClassDesc *SC = TSchedModel.resolveSchedClass(&MI);
2701e15adbaSAustin Kerbow   assert(TSchedModel.getWriteProcResBegin(SC) !=
2711e15adbaSAustin Kerbow          TSchedModel.getWriteProcResEnd(SC));
2721e15adbaSAustin Kerbow   return TSchedModel.getWriteProcResBegin(SC)->Cycles;
2731e15adbaSAustin Kerbow }
2741e15adbaSAustin Kerbow 
processBundle()2758a3d3a9aSAustin Kerbow void GCNHazardRecognizer::processBundle() {
2768a3d3a9aSAustin Kerbow   MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
2778a3d3a9aSAustin Kerbow   MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
2788a3d3a9aSAustin Kerbow   // Check bundled MachineInstr's for hazards.
2798a3d3a9aSAustin Kerbow   for (; MI != E && MI->isInsideBundle(); ++MI) {
2808a3d3a9aSAustin Kerbow     CurrCycleInstr = &*MI;
2818a3d3a9aSAustin Kerbow     unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
2828a3d3a9aSAustin Kerbow 
2838b127a86SAustin Kerbow     if (IsHazardRecognizerMode) {
2848a3d3a9aSAustin Kerbow       fixHazards(CurrCycleInstr);
2858a3d3a9aSAustin Kerbow 
2868b127a86SAustin Kerbow       insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
2878b127a86SAustin Kerbow     }
2888a3d3a9aSAustin Kerbow 
2898a3d3a9aSAustin Kerbow     // It’s unnecessary to track more than MaxLookAhead instructions. Since we
2908a3d3a9aSAustin Kerbow     // include the bundled MI directly after, only add a maximum of
2918a3d3a9aSAustin Kerbow     // (MaxLookAhead - 1) noops to EmittedInstrs.
2928a3d3a9aSAustin Kerbow     for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
2938a3d3a9aSAustin Kerbow       EmittedInstrs.push_front(nullptr);
2948a3d3a9aSAustin Kerbow 
2958a3d3a9aSAustin Kerbow     EmittedInstrs.push_front(CurrCycleInstr);
2968a3d3a9aSAustin Kerbow     EmittedInstrs.resize(MaxLookAhead);
2978a3d3a9aSAustin Kerbow   }
2988a3d3a9aSAustin Kerbow   CurrCycleInstr = nullptr;
2998a3d3a9aSAustin Kerbow }
3008a3d3a9aSAustin Kerbow 
PreEmitNoops(MachineInstr * MI)301cb6ba62dSTom Stellard unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
302f92ed696SStanislav Mekhanoshin   IsHazardRecognizerMode = true;
303f92ed696SStanislav Mekhanoshin   CurrCycleInstr = MI;
304f92ed696SStanislav Mekhanoshin   unsigned W = PreEmitNoopsCommon(MI);
3058a3d3a9aSAustin Kerbow   fixHazards(MI);
306f92ed696SStanislav Mekhanoshin   CurrCycleInstr = nullptr;
307f92ed696SStanislav Mekhanoshin   return W;
308f92ed696SStanislav Mekhanoshin }
309f92ed696SStanislav Mekhanoshin 
PreEmitNoopsCommon(MachineInstr * MI)310f92ed696SStanislav Mekhanoshin unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
3118a3d3a9aSAustin Kerbow   if (MI->isBundle())
3128a3d3a9aSAustin Kerbow     return 0;
3138a3d3a9aSAustin Kerbow 
3142e87acacSDmitry Preobrazhensky   int WaitStates = 0;
315e823d92fSMatt Arsenault 
3165c190d05SAaron Ballman   if (SIInstrInfo::isSMRD(*MI))
317e823d92fSMatt Arsenault     return std::max(WaitStates, checkSMRDHazards(MI));
318cb6ba62dSTom Stellard 
31951d1415aSStanislav Mekhanoshin   if (ST.hasNSAtoVMEMBug())
32051d1415aSStanislav Mekhanoshin     WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
32151d1415aSStanislav Mekhanoshin 
322bdf7f81bSStanislav Mekhanoshin   WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
323bdf7f81bSStanislav Mekhanoshin 
32451d1415aSStanislav Mekhanoshin   if (ST.hasNoDataDepHazard())
32551d1415aSStanislav Mekhanoshin     return WaitStates;
32651d1415aSStanislav Mekhanoshin 
327ed745839SJay Foad   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
328ed745839SJay Foad     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
329ed745839SJay Foad 
33051d1415aSStanislav Mekhanoshin   if (SIInstrInfo::isVALU(*MI))
33151d1415aSStanislav Mekhanoshin     WaitStates = std::max(WaitStates, checkVALUHazards(MI));
33251d1415aSStanislav Mekhanoshin 
333a27007ebSTom Stellard   if (SIInstrInfo::isDPP(*MI))
334b133fbb9STom Stellard     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
335a27007ebSTom Stellard 
3365ab6154dSTom Stellard   if (isDivFMas(MI->getOpcode()))
337b133fbb9STom Stellard     WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
338b133fbb9STom Stellard 
33904051b5fSTom Stellard   if (isRWLane(MI->getOpcode()))
34004051b5fSTom Stellard     WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
34104051b5fSTom Stellard 
342a8d9d507SStanislav Mekhanoshin   if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
343a8d9d507SStanislav Mekhanoshin        SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
344a8d9d507SStanislav Mekhanoshin        SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
345a8d9d507SStanislav Mekhanoshin     WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));
346a8d9d507SStanislav Mekhanoshin 
347d29f24acSMark Searles   if (MI->isInlineAsm())
348d29f24acSMark Searles     return std::max(WaitStates, checkInlineAsmHazards(MI));
349d29f24acSMark Searles 
350961811c9STom Stellard   if (isSGetReg(MI->getOpcode()))
351e823d92fSMatt Arsenault     return std::max(WaitStates, checkGetRegHazards(MI));
352961811c9STom Stellard 
35330d30824STom Stellard   if (isSSetReg(MI->getOpcode()))
354e823d92fSMatt Arsenault     return std::max(WaitStates, checkSetRegHazards(MI));
35530d30824STom Stellard 
356aea899e2STom Stellard   if (isRFE(MI->getOpcode()))
357e823d92fSMatt Arsenault     return std::max(WaitStates, checkRFEHazards(MI));
358aea899e2STom Stellard 
35963f21f4cSStanislav Mekhanoshin   if ((ST.hasReadM0MovRelInterpHazard() &&
36063f21f4cSStanislav Mekhanoshin        (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
36163f21f4cSStanislav Mekhanoshin       (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
36263f21f4cSStanislav Mekhanoshin       (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
36363f21f4cSStanislav Mekhanoshin       (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT)))
364e823d92fSMatt Arsenault     return std::max(WaitStates, checkReadM0Hazards(MI));
365e823d92fSMatt Arsenault 
3667d2019bbSStanislav Mekhanoshin   if (SIInstrInfo::isMAI(*MI))
3677d2019bbSStanislav Mekhanoshin     return std::max(WaitStates, checkMAIHazards(MI));
3687d2019bbSStanislav Mekhanoshin 
36943a38dc2SStanislav Mekhanoshin   if (SIInstrInfo::isVMEM(*MI) ||
37043a38dc2SStanislav Mekhanoshin       SIInstrInfo::isFLAT(*MI) ||
37143a38dc2SStanislav Mekhanoshin       SIInstrInfo::isDS(*MI))
3727d2019bbSStanislav Mekhanoshin     return std::max(WaitStates, checkMAILdStHazards(MI));
3737d2019bbSStanislav Mekhanoshin 
374e823d92fSMatt Arsenault   return WaitStates;
375cb6ba62dSTom Stellard }
376cb6ba62dSTom Stellard 
EmitNoop()377cb6ba62dSTom Stellard void GCNHazardRecognizer::EmitNoop() {
378cb6ba62dSTom Stellard   EmittedInstrs.push_front(nullptr);
379cb6ba62dSTom Stellard }
380cb6ba62dSTom Stellard 
AdvanceCycle()381cb6ba62dSTom Stellard void GCNHazardRecognizer::AdvanceCycle() {
382cb6ba62dSTom Stellard   // When the scheduler detects a stall, it will call AdvanceCycle() without
383cb6ba62dSTom Stellard   // emitting any instructions.
384ebdcef20SAustin Kerbow   if (!CurrCycleInstr) {
385ebdcef20SAustin Kerbow     EmittedInstrs.push_front(nullptr);
386cb6ba62dSTom Stellard     return;
387ebdcef20SAustin Kerbow   }
388cb6ba62dSTom Stellard 
3898a3d3a9aSAustin Kerbow   if (CurrCycleInstr->isBundle()) {
3908a3d3a9aSAustin Kerbow     processBundle();
3918a3d3a9aSAustin Kerbow     return;
3928a3d3a9aSAustin Kerbow   }
3938a3d3a9aSAustin Kerbow 
39459ece95fSMatt Arsenault   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
39568660767SChristudasan Devadasan   if (!NumWaitStates) {
39668660767SChristudasan Devadasan     CurrCycleInstr = nullptr;
39768660767SChristudasan Devadasan     return;
39868660767SChristudasan Devadasan   }
399cb6ba62dSTom Stellard 
400cb6ba62dSTom Stellard   // Keep track of emitted instructions
401cb6ba62dSTom Stellard   EmittedInstrs.push_front(CurrCycleInstr);
402cb6ba62dSTom Stellard 
403cb6ba62dSTom Stellard   // Add a nullptr for each additional wait state after the first.  Make sure
404cb6ba62dSTom Stellard   // not to add more than getMaxLookAhead() items to the list, since we
405cb6ba62dSTom Stellard   // truncate the list to that size right after this loop.
406cb6ba62dSTom Stellard   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
407cb6ba62dSTom Stellard        i < e; ++i) {
408cb6ba62dSTom Stellard     EmittedInstrs.push_front(nullptr);
409cb6ba62dSTom Stellard   }
410cb6ba62dSTom Stellard 
411cb6ba62dSTom Stellard   // getMaxLookahead() is the largest number of wait states we will ever need
412cb6ba62dSTom Stellard   // to insert, so there is no point in keeping track of more than that many
413cb6ba62dSTom Stellard   // wait states.
414cb6ba62dSTom Stellard   EmittedInstrs.resize(getMaxLookAhead());
415cb6ba62dSTom Stellard 
416cb6ba62dSTom Stellard   CurrCycleInstr = nullptr;
417cb6ba62dSTom Stellard }
418cb6ba62dSTom Stellard 
RecedeCycle()419cb6ba62dSTom Stellard void GCNHazardRecognizer::RecedeCycle() {
420cb6ba62dSTom Stellard   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
421cb6ba62dSTom Stellard }
422cb6ba62dSTom Stellard 
423cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
424cb6ba62dSTom Stellard // Helper Functions
425cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
426cb6ba62dSTom Stellard 
4279dff14beSJay Foad typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult;
4289dff14beSJay Foad 
429424f1f6fSCarl Ritson typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
43013107c27SJay Foad typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
431f92ed696SStanislav Mekhanoshin 
4329dff14beSJay Foad // Search for a hazard in a block and its predecessors.
4339dff14beSJay Foad template <typename StateT>
4349dff14beSJay Foad static bool
hasHazard(StateT State,function_ref<HazardFnResult (StateT &,const MachineInstr &)> IsHazard,function_ref<void (StateT &,const MachineInstr &)> UpdateState,const MachineBasicBlock * MBB,MachineBasicBlock::const_reverse_instr_iterator I,DenseSet<const MachineBasicBlock * > & Visited)4359dff14beSJay Foad hasHazard(StateT State,
4369dff14beSJay Foad           function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
4379dff14beSJay Foad           function_ref<void(StateT &, const MachineInstr &)> UpdateState,
4389dff14beSJay Foad           const MachineBasicBlock *MBB,
4399dff14beSJay Foad           MachineBasicBlock::const_reverse_instr_iterator I,
4409dff14beSJay Foad           DenseSet<const MachineBasicBlock *> &Visited) {
4419dff14beSJay Foad   for (auto E = MBB->instr_rend(); I != E; ++I) {
4429dff14beSJay Foad     // No need to look at parent BUNDLE instructions.
4439dff14beSJay Foad     if (I->isBundle())
4449dff14beSJay Foad       continue;
4459dff14beSJay Foad 
4469dff14beSJay Foad     switch (IsHazard(State, *I)) {
4479dff14beSJay Foad     case HazardFound:
4489dff14beSJay Foad       return true;
4499dff14beSJay Foad     case HazardExpired:
4509dff14beSJay Foad       return false;
4519dff14beSJay Foad     default:
4529dff14beSJay Foad       // Continue search
4539dff14beSJay Foad       break;
4549dff14beSJay Foad     }
4559dff14beSJay Foad 
4569dff14beSJay Foad     if (I->isInlineAsm() || I->isMetaInstruction())
4579dff14beSJay Foad       continue;
4589dff14beSJay Foad 
4599dff14beSJay Foad     UpdateState(State, *I);
4609dff14beSJay Foad   }
4619dff14beSJay Foad 
4629dff14beSJay Foad   for (MachineBasicBlock *Pred : MBB->predecessors()) {
4639dff14beSJay Foad     if (!Visited.insert(Pred).second)
4649dff14beSJay Foad       continue;
4659dff14beSJay Foad 
4669dff14beSJay Foad     if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
4679dff14beSJay Foad                   Visited))
4689dff14beSJay Foad       return true;
4699dff14beSJay Foad   }
4709dff14beSJay Foad 
4719dff14beSJay Foad   return false;
4729dff14beSJay Foad }
4739dff14beSJay Foad 
474f92ed696SStanislav Mekhanoshin // Returns a minimum wait states since \p I walking all predecessors.
475f92ed696SStanislav Mekhanoshin // Only scans until \p IsExpired does not return true.
476f92ed696SStanislav Mekhanoshin // Can only be run in a hazard recognizer mode.
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,const MachineBasicBlock * MBB,MachineBasicBlock::const_reverse_instr_iterator I,int WaitStates,IsExpiredFn IsExpired,DenseSet<const MachineBasicBlock * > & Visited,GetNumWaitStatesFn GetNumWaitStates=SIInstrInfo::getNumWaitStates)47713107c27SJay Foad static int getWaitStatesSince(
47813107c27SJay Foad     GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
47913107c27SJay Foad     MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
48013107c27SJay Foad     IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
48113107c27SJay Foad     GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
4828a3d3a9aSAustin Kerbow   for (auto E = MBB->instr_rend(); I != E; ++I) {
4838a3d3a9aSAustin Kerbow     // Don't add WaitStates for parent BUNDLE instructions.
4848a3d3a9aSAustin Kerbow     if (I->isBundle())
4858a3d3a9aSAustin Kerbow       continue;
486f92ed696SStanislav Mekhanoshin 
487424f1f6fSCarl Ritson     if (IsHazard(*I))
488f92ed696SStanislav Mekhanoshin       return WaitStates;
489f92ed696SStanislav Mekhanoshin 
4904f5ba46eSChristudasan Devadasan     if (I->isInlineAsm())
491f92ed696SStanislav Mekhanoshin       continue;
492f92ed696SStanislav Mekhanoshin 
49313107c27SJay Foad     WaitStates += GetNumWaitStates(*I);
494f92ed696SStanislav Mekhanoshin 
495424f1f6fSCarl Ritson     if (IsExpired(*I, WaitStates))
496f92ed696SStanislav Mekhanoshin       return std::numeric_limits<int>::max();
497f92ed696SStanislav Mekhanoshin   }
498f92ed696SStanislav Mekhanoshin 
499f251379aSJay Foad   int MinWaitStates = std::numeric_limits<int>::max();
500f92ed696SStanislav Mekhanoshin   for (MachineBasicBlock *Pred : MBB->predecessors()) {
501f92ed696SStanislav Mekhanoshin     if (!Visited.insert(Pred).second)
502f92ed696SStanislav Mekhanoshin       continue;
503f92ed696SStanislav Mekhanoshin 
50413107c27SJay Foad     int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates,
50513107c27SJay Foad                                IsExpired, Visited, GetNumWaitStates);
506f92ed696SStanislav Mekhanoshin 
507f251379aSJay Foad     MinWaitStates = std::min(MinWaitStates, W);
508f92ed696SStanislav Mekhanoshin   }
509f92ed696SStanislav Mekhanoshin 
510f92ed696SStanislav Mekhanoshin   return MinWaitStates;
511f92ed696SStanislav Mekhanoshin }
512f92ed696SStanislav Mekhanoshin 
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,const MachineInstr * MI,IsExpiredFn IsExpired)513f92ed696SStanislav Mekhanoshin static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
514424f1f6fSCarl Ritson                               const MachineInstr *MI, IsExpiredFn IsExpired) {
515f92ed696SStanislav Mekhanoshin   DenseSet<const MachineBasicBlock *> Visited;
516f92ed696SStanislav Mekhanoshin   return getWaitStatesSince(IsHazard, MI->getParent(),
517f92ed696SStanislav Mekhanoshin                             std::next(MI->getReverseIterator()),
518f92ed696SStanislav Mekhanoshin                             0, IsExpired, Visited);
519f92ed696SStanislav Mekhanoshin }
520f92ed696SStanislav Mekhanoshin 
getWaitStatesSince(IsHazardFn IsHazard,int Limit)521f92ed696SStanislav Mekhanoshin int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
522f92ed696SStanislav Mekhanoshin   if (IsHazardRecognizerMode) {
523424f1f6fSCarl Ritson     auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
524f92ed696SStanislav Mekhanoshin       return WaitStates >= Limit;
525f92ed696SStanislav Mekhanoshin     };
526f92ed696SStanislav Mekhanoshin     return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
527f92ed696SStanislav Mekhanoshin   }
528f92ed696SStanislav Mekhanoshin 
52975c98c36SNicolai Haehnle   int WaitStates = 0;
530961811c9STom Stellard   for (MachineInstr *MI : EmittedInstrs) {
53175c98c36SNicolai Haehnle     if (MI) {
532424f1f6fSCarl Ritson       if (IsHazard(*MI))
533961811c9STom Stellard         return WaitStates;
53475c98c36SNicolai Haehnle 
535f92ed696SStanislav Mekhanoshin       if (MI->isInlineAsm())
53675c98c36SNicolai Haehnle         continue;
53775c98c36SNicolai Haehnle     }
53875c98c36SNicolai Haehnle     ++WaitStates;
539f92ed696SStanislav Mekhanoshin 
540f92ed696SStanislav Mekhanoshin     if (WaitStates >= Limit)
541f92ed696SStanislav Mekhanoshin       break;
542961811c9STom Stellard   }
543961811c9STom Stellard   return std::numeric_limits<int>::max();
544961811c9STom Stellard }
545961811c9STom Stellard 
getWaitStatesSinceDef(unsigned Reg,IsHazardFn IsHazardDef,int Limit)546f92ed696SStanislav Mekhanoshin int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
547f92ed696SStanislav Mekhanoshin                                                IsHazardFn IsHazardDef,
548f92ed696SStanislav Mekhanoshin                                                int Limit) {
549b133fbb9STom Stellard   const SIRegisterInfo *TRI = ST.getRegisterInfo();
550b133fbb9STom Stellard 
551424f1f6fSCarl Ritson   auto IsHazardFn = [IsHazardDef, TRI, Reg](const MachineInstr &MI) {
552424f1f6fSCarl Ritson     return IsHazardDef(MI) && MI.modifiesRegister(Reg, TRI);
553b133fbb9STom Stellard   };
554b133fbb9STom Stellard 
555f92ed696SStanislav Mekhanoshin   return getWaitStatesSince(IsHazardFn, Limit);
556b133fbb9STom Stellard }
557b133fbb9STom Stellard 
getWaitStatesSinceSetReg(IsHazardFn IsHazard,int Limit)558f92ed696SStanislav Mekhanoshin int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
559f92ed696SStanislav Mekhanoshin                                                   int Limit) {
560424f1f6fSCarl Ritson   auto IsHazardFn = [IsHazard](const MachineInstr &MI) {
561424f1f6fSCarl Ritson     return isSSetReg(MI.getOpcode()) && IsHazard(MI);
562b133fbb9STom Stellard   };
563b133fbb9STom Stellard 
564f92ed696SStanislav Mekhanoshin   return getWaitStatesSince(IsHazardFn, Limit);
565b133fbb9STom Stellard }
566b133fbb9STom Stellard 
567cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
568cb6ba62dSTom Stellard // No-op Hazard Detection
569cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
570cb6ba62dSTom Stellard 
addRegUnits(const SIRegisterInfo & TRI,BitVector & BV,MCRegister Reg)5715dc47541SMircea Trofin static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV,
5725dc47541SMircea Trofin                         MCRegister Reg) {
57303c67d1eSMatt Arsenault   for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
57403c67d1eSMatt Arsenault     BV.set(*RUI);
57503c67d1eSMatt Arsenault }
57603c67d1eSMatt Arsenault 
addRegsToSet(const SIRegisterInfo & TRI,iterator_range<MachineInstr::const_mop_iterator> Ops,BitVector & Set)57703c67d1eSMatt Arsenault static void addRegsToSet(const SIRegisterInfo &TRI,
57803c67d1eSMatt Arsenault                          iterator_range<MachineInstr::const_mop_iterator> Ops,
57903c67d1eSMatt Arsenault                          BitVector &Set) {
5801f520e5cSTom Stellard   for (const MachineOperand &Op : Ops) {
5811f520e5cSTom Stellard     if (Op.isReg())
5825dc47541SMircea Trofin       addRegUnits(TRI, Set, Op.getReg().asMCReg());
5831f520e5cSTom Stellard   }
5841f520e5cSTom Stellard }
5851f520e5cSTom Stellard 
addClauseInst(const MachineInstr & MI)58603c67d1eSMatt Arsenault void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
58703c67d1eSMatt Arsenault   // XXX: Do we need to worry about implicit operands
58803c67d1eSMatt Arsenault   addRegsToSet(TRI, MI.defs(), ClauseDefs);
58903c67d1eSMatt Arsenault   addRegsToSet(TRI, MI.uses(), ClauseUses);
59003c67d1eSMatt Arsenault }
59103c67d1eSMatt Arsenault 
breaksSMEMSoftClause(MachineInstr * MI)5923d76824bSJay Foad static bool breaksSMEMSoftClause(MachineInstr *MI) {
5933d76824bSJay Foad   return !SIInstrInfo::isSMRD(*MI);
5943d76824bSJay Foad }
5953d76824bSJay Foad 
breaksVMEMSoftClause(MachineInstr * MI)5963d76824bSJay Foad static bool breaksVMEMSoftClause(MachineInstr *MI) {
5973d76824bSJay Foad   return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
5983d76824bSJay Foad }
5993d76824bSJay Foad 
checkSoftClauseHazards(MachineInstr * MEM)600a41351e3SMatt Arsenault int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
60103c67d1eSMatt Arsenault   // SMEM soft clause are only present on VI+, and only matter if xnack is
60203c67d1eSMatt Arsenault   // enabled.
60303c67d1eSMatt Arsenault   if (!ST.isXNACKEnabled())
6041f520e5cSTom Stellard     return 0;
6051f520e5cSTom Stellard 
606a41351e3SMatt Arsenault   bool IsSMRD = TII.isSMRD(*MEM);
607a41351e3SMatt Arsenault 
60803c67d1eSMatt Arsenault   resetClause();
60903c67d1eSMatt Arsenault 
6101f520e5cSTom Stellard   // A soft-clause is any group of consecutive SMEM instructions.  The
6111f520e5cSTom Stellard   // instructions in this group may return out of order and/or may be
6121f520e5cSTom Stellard   // replayed (i.e. the same instruction issued more than once).
6131f520e5cSTom Stellard   //
6148a3d3a9aSAustin Kerbow   // In order to handle these situations correctly we need to make sure that
6158a3d3a9aSAustin Kerbow   // when a clause has more than one instruction, no instruction in the clause
6168a3d3a9aSAustin Kerbow   // writes to a register that is read by another instruction in the clause
6176527b2a4SSebastian Neubauer   // (including itself). If we encounter this situation, we need to break the
6181f520e5cSTom Stellard   // clause by inserting a non SMEM instruction.
6191f520e5cSTom Stellard 
6201f520e5cSTom Stellard   for (MachineInstr *MI : EmittedInstrs) {
6211f520e5cSTom Stellard     // When we hit a non-SMEM instruction then we have passed the start of the
6221f520e5cSTom Stellard     // clause and we can stop.
623a41351e3SMatt Arsenault     if (!MI)
624a41351e3SMatt Arsenault       break;
625a41351e3SMatt Arsenault 
6263d76824bSJay Foad     if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI))
6271f520e5cSTom Stellard       break;
6281f520e5cSTom Stellard 
62903c67d1eSMatt Arsenault     addClauseInst(*MI);
6301f520e5cSTom Stellard   }
6311f520e5cSTom Stellard 
63203c67d1eSMatt Arsenault   if (ClauseDefs.none())
6331f520e5cSTom Stellard     return 0;
6341f520e5cSTom Stellard 
635a41351e3SMatt Arsenault   // We need to make sure not to put loads and stores in the same clause if they
636a41351e3SMatt Arsenault   // use the same address. For now, just start a new clause whenever we see a
637a41351e3SMatt Arsenault   // store.
638a41351e3SMatt Arsenault   if (MEM->mayStore())
6391f520e5cSTom Stellard     return 1;
6401f520e5cSTom Stellard 
641a41351e3SMatt Arsenault   addClauseInst(*MEM);
6421f520e5cSTom Stellard 
6431f520e5cSTom Stellard   // If the set of defs and uses intersect then we cannot add this instruction
6441f520e5cSTom Stellard   // to the clause, so we have a hazard.
64503c67d1eSMatt Arsenault   return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
6461f520e5cSTom Stellard }
6471f520e5cSTom Stellard 
checkSMRDHazards(MachineInstr * SMRD)648cb6ba62dSTom Stellard int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
6491f520e5cSTom Stellard   int WaitStatesNeeded = 0;
6501f520e5cSTom Stellard 
651a41351e3SMatt Arsenault   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
652cb6ba62dSTom Stellard 
653cb6ba62dSTom Stellard   // This SMRD hazard only affects SI.
654e4c2e9b0SMatt Arsenault   if (!ST.hasSMRDReadVALUDefHazard())
6551f520e5cSTom Stellard     return WaitStatesNeeded;
656cb6ba62dSTom Stellard 
657cb6ba62dSTom Stellard   // A read of an SGPR by SMRD instruction requires 4 wait states when the
658cb6ba62dSTom Stellard   // SGPR was written by a VALU instruction.
659cb6ba62dSTom Stellard   int SmrdSgprWaitStates = 4;
660424f1f6fSCarl Ritson   auto IsHazardDefFn = [this](const MachineInstr &MI) {
661424f1f6fSCarl Ritson     return TII.isVALU(MI);
662424f1f6fSCarl Ritson   };
663424f1f6fSCarl Ritson   auto IsBufferHazardDefFn = [this](const MachineInstr &MI) {
664424f1f6fSCarl Ritson     return TII.isSALU(MI);
665424f1f6fSCarl Ritson   };
66622322438SMarek Olsak 
6674512d0a6SMatt Arsenault   bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
668cb6ba62dSTom Stellard 
669cb6ba62dSTom Stellard   for (const MachineOperand &Use : SMRD->uses()) {
670cb6ba62dSTom Stellard     if (!Use.isReg())
671cb6ba62dSTom Stellard       continue;
672cb6ba62dSTom Stellard     int WaitStatesNeededForUse =
673f92ed696SStanislav Mekhanoshin         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
674f92ed696SStanislav Mekhanoshin                                                    SmrdSgprWaitStates);
675cb6ba62dSTom Stellard     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
67622322438SMarek Olsak 
67722322438SMarek Olsak     // This fixes what appears to be undocumented hardware behavior in SI where
67822322438SMarek Olsak     // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
67922322438SMarek Olsak     // needs some number of nops in between. We don't know how many we need, but
68022322438SMarek Olsak     // let's use 4. This wasn't discovered before probably because the only
68122322438SMarek Olsak     // case when this happens is when we expand a 64-bit pointer into a full
68222322438SMarek Olsak     // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
68322322438SMarek Olsak     // probably never encountered in the closed-source land.
68422322438SMarek Olsak     if (IsBufferSMRD) {
68522322438SMarek Olsak       int WaitStatesNeededForUse =
68622322438SMarek Olsak         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
687f92ed696SStanislav Mekhanoshin                                                    IsBufferHazardDefFn,
688f92ed696SStanislav Mekhanoshin                                                    SmrdSgprWaitStates);
68922322438SMarek Olsak       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
690cb6ba62dSTom Stellard     }
69122322438SMarek Olsak   }
69222322438SMarek Olsak 
693cb6ba62dSTom Stellard   return WaitStatesNeeded;
694cb6ba62dSTom Stellard }
695cb6ba62dSTom Stellard 
checkVMEMHazards(MachineInstr * VMEM)696cb6ba62dSTom Stellard int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
697e4c2e9b0SMatt Arsenault   if (!ST.hasVMEMReadSGPRVALUDefHazard())
698cb6ba62dSTom Stellard     return 0;
699cb6ba62dSTom Stellard 
700a41351e3SMatt Arsenault   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
701cb6ba62dSTom Stellard 
702cb6ba62dSTom Stellard   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
703cb6ba62dSTom Stellard   // SGPR was written by a VALU Instruction.
704a41351e3SMatt Arsenault   const int VmemSgprWaitStates = 5;
705424f1f6fSCarl Ritson   auto IsHazardDefFn = [this](const MachineInstr &MI) {
706424f1f6fSCarl Ritson     return TII.isVALU(MI);
707424f1f6fSCarl Ritson   };
708cb6ba62dSTom Stellard   for (const MachineOperand &Use : VMEM->uses()) {
709a8d9d507SStanislav Mekhanoshin     if (!Use.isReg() || TRI.isVectorRegister(MF.getRegInfo(), Use.getReg()))
710cb6ba62dSTom Stellard       continue;
711cb6ba62dSTom Stellard 
712cb6ba62dSTom Stellard     int WaitStatesNeededForUse =
713f92ed696SStanislav Mekhanoshin         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
714f92ed696SStanislav Mekhanoshin                                                    VmemSgprWaitStates);
715cb6ba62dSTom Stellard     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
716cb6ba62dSTom Stellard   }
717cb6ba62dSTom Stellard   return WaitStatesNeeded;
718cb6ba62dSTom Stellard }
719a27007ebSTom Stellard 
checkDPPHazards(MachineInstr * DPP)720a27007ebSTom Stellard int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
72143e92fe3SMatt Arsenault   const SIRegisterInfo *TRI = ST.getRegisterInfo();
72200755362SConnor Abbott   const SIInstrInfo *TII = ST.getInstrInfo();
723a27007ebSTom Stellard 
72400755362SConnor Abbott   // Check for DPP VGPR read after VALU VGPR write and EXEC write.
725a27007ebSTom Stellard   int DppVgprWaitStates = 2;
72600755362SConnor Abbott   int DppExecWaitStates = 5;
727a27007ebSTom Stellard   int WaitStatesNeeded = 0;
728424f1f6fSCarl Ritson   auto IsHazardDefFn = [TII](const MachineInstr &MI) {
729424f1f6fSCarl Ritson     return TII->isVALU(MI);
730424f1f6fSCarl Ritson   };
731a27007ebSTom Stellard 
732a27007ebSTom Stellard   for (const MachineOperand &Use : DPP->uses()) {
733a27007ebSTom Stellard     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
734a27007ebSTom Stellard       continue;
735a27007ebSTom Stellard     int WaitStatesNeededForUse =
736424f1f6fSCarl Ritson         DppVgprWaitStates - getWaitStatesSinceDef(
737424f1f6fSCarl Ritson                                 Use.getReg(),
738424f1f6fSCarl Ritson                                 [](const MachineInstr &) { return true; },
739f92ed696SStanislav Mekhanoshin                                 DppVgprWaitStates);
740a27007ebSTom Stellard     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
741a27007ebSTom Stellard   }
742a27007ebSTom Stellard 
74300755362SConnor Abbott   WaitStatesNeeded = std::max(
74400755362SConnor Abbott       WaitStatesNeeded,
745f92ed696SStanislav Mekhanoshin       DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
746f92ed696SStanislav Mekhanoshin                                                 DppExecWaitStates));
74700755362SConnor Abbott 
748a27007ebSTom Stellard   return WaitStatesNeeded;
749a27007ebSTom Stellard }
7505ab6154dSTom Stellard 
checkDivFMasHazards(MachineInstr * DivFMas)7515ab6154dSTom Stellard int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
7525ab6154dSTom Stellard   const SIInstrInfo *TII = ST.getInstrInfo();
7535ab6154dSTom Stellard 
7545ab6154dSTom Stellard   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
7555ab6154dSTom Stellard   // instruction.
7565ab6154dSTom Stellard   const int DivFMasWaitStates = 4;
757424f1f6fSCarl Ritson   auto IsHazardDefFn = [TII](const MachineInstr &MI) {
758424f1f6fSCarl Ritson     return TII->isVALU(MI);
759424f1f6fSCarl Ritson   };
760f92ed696SStanislav Mekhanoshin   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
761f92ed696SStanislav Mekhanoshin                                                DivFMasWaitStates);
7625ab6154dSTom Stellard 
7635ab6154dSTom Stellard   return DivFMasWaitStates - WaitStatesNeeded;
7645ab6154dSTom Stellard }
765961811c9STom Stellard 
checkGetRegHazards(MachineInstr * GetRegInstr)766961811c9STom Stellard int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
767961811c9STom Stellard   const SIInstrInfo *TII = ST.getInstrInfo();
768961811c9STom Stellard   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
769961811c9STom Stellard 
770961811c9STom Stellard   const int GetRegWaitStates = 2;
771424f1f6fSCarl Ritson   auto IsHazardFn = [TII, GetRegHWReg](const MachineInstr &MI) {
772424f1f6fSCarl Ritson     return GetRegHWReg == getHWReg(TII, MI);
773961811c9STom Stellard   };
774f92ed696SStanislav Mekhanoshin   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
775961811c9STom Stellard 
776961811c9STom Stellard   return GetRegWaitStates - WaitStatesNeeded;
777961811c9STom Stellard }
77830d30824STom Stellard 
checkSetRegHazards(MachineInstr * SetRegInstr)77930d30824STom Stellard int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
78030d30824STom Stellard   const SIInstrInfo *TII = ST.getInstrInfo();
78130d30824STom Stellard   unsigned HWReg = getHWReg(TII, *SetRegInstr);
78230d30824STom Stellard 
783e4c2e9b0SMatt Arsenault   const int SetRegWaitStates = ST.getSetRegWaitStates();
784424f1f6fSCarl Ritson   auto IsHazardFn = [TII, HWReg](const MachineInstr &MI) {
785424f1f6fSCarl Ritson     return HWReg == getHWReg(TII, MI);
78630d30824STom Stellard   };
787f92ed696SStanislav Mekhanoshin   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
78830d30824STom Stellard   return SetRegWaitStates - WaitStatesNeeded;
78930d30824STom Stellard }
790b133fbb9STom Stellard 
createsVALUHazard(const MachineInstr & MI)791b133fbb9STom Stellard int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
792b133fbb9STom Stellard   if (!MI.mayStore())
793b133fbb9STom Stellard     return -1;
794b133fbb9STom Stellard 
795b133fbb9STom Stellard   const SIInstrInfo *TII = ST.getInstrInfo();
796b133fbb9STom Stellard   unsigned Opcode = MI.getOpcode();
797b133fbb9STom Stellard   const MCInstrDesc &Desc = MI.getDesc();
798b133fbb9STom Stellard 
799b133fbb9STom Stellard   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
800b133fbb9STom Stellard   int VDataRCID = -1;
801b133fbb9STom Stellard   if (VDataIdx != -1)
802b133fbb9STom Stellard     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
803b133fbb9STom Stellard 
804b133fbb9STom Stellard   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
805e8cc395eSJan Vesely     // There is no hazard if the instruction does not use vector regs
806e8cc395eSJan Vesely     // (like wbinvl1)
807e8cc395eSJan Vesely     if (VDataIdx == -1)
808e8cc395eSJan Vesely       return -1;
809b133fbb9STom Stellard     // For MUBUF/MTBUF instructions this hazard only exists if the
810b133fbb9STom Stellard     // instruction is not using a register in the soffset field.
811b133fbb9STom Stellard     const MachineOperand *SOffset =
812b133fbb9STom Stellard         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
813b133fbb9STom Stellard     // If we have no soffset operand, then assume this field has been
814b133fbb9STom Stellard     // hardcoded to zero.
815b133fbb9STom Stellard     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
816b133fbb9STom Stellard         (!SOffset || !SOffset->isReg()))
817b133fbb9STom Stellard       return VDataIdx;
818b133fbb9STom Stellard   }
819b133fbb9STom Stellard 
820b133fbb9STom Stellard   // MIMG instructions create a hazard if they don't use a 256-bit T# and
821b133fbb9STom Stellard   // the store size is greater than 8 bytes and they have more than two bits
822b133fbb9STom Stellard   // of their dmask set.
823b133fbb9STom Stellard   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
824b133fbb9STom Stellard   if (TII->isMIMG(MI)) {
825b133fbb9STom Stellard     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
826b133fbb9STom Stellard     assert(SRsrcIdx != -1 &&
827b133fbb9STom Stellard            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
8286b9c1be4STom Stellard     (void)SRsrcIdx;
829b133fbb9STom Stellard   }
830b133fbb9STom Stellard 
831b133fbb9STom Stellard   if (TII->isFLAT(MI)) {
83297279a8cSMatt Arsenault     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
833b133fbb9STom Stellard     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
834b133fbb9STom Stellard       return DataIdx;
835b133fbb9STom Stellard   }
836b133fbb9STom Stellard 
837b133fbb9STom Stellard   return -1;
838b133fbb9STom Stellard }
839b133fbb9STom Stellard 
840decfdb8cSStanislav Mekhanoshin int
checkVALUHazardsHelper(const MachineOperand & Def,const MachineRegisterInfo & MRI)841decfdb8cSStanislav Mekhanoshin GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
842d29f24acSMark Searles                                             const MachineRegisterInfo &MRI) {
843d29f24acSMark Searles   // Helper to check for the hazard where VMEM instructions that store more than
844b133fbb9STom Stellard   // 8 bytes can have there store data over written by the next instruction.
845b133fbb9STom Stellard   const SIRegisterInfo *TRI = ST.getRegisterInfo();
846b133fbb9STom Stellard 
847d951d937SStanislav Mekhanoshin   const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1;
848b133fbb9STom Stellard   int WaitStatesNeeded = 0;
849b133fbb9STom Stellard 
850a8d9d507SStanislav Mekhanoshin   if (!TRI->isVectorRegister(MRI, Def.getReg()))
851d29f24acSMark Searles     return WaitStatesNeeded;
8520c476111SDaniel Sanders   Register Reg = Def.getReg();
853424f1f6fSCarl Ritson   auto IsHazardFn = [this, Reg, TRI](const MachineInstr &MI) {
854424f1f6fSCarl Ritson     int DataIdx = createsVALUHazard(MI);
855b133fbb9STom Stellard     return DataIdx >= 0 &&
856424f1f6fSCarl Ritson            TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg);
857b133fbb9STom Stellard   };
858b133fbb9STom Stellard   int WaitStatesNeededForDef =
859f92ed696SStanislav Mekhanoshin     VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
860b133fbb9STom Stellard   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
861d29f24acSMark Searles 
862d29f24acSMark Searles   return WaitStatesNeeded;
863b133fbb9STom Stellard }
864d29f24acSMark Searles 
checkVALUHazards(MachineInstr * VALU)865d29f24acSMark Searles int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
866f311f934SStanislav Mekhanoshin   int WaitStatesNeeded = 0;
867f311f934SStanislav Mekhanoshin 
868f311f934SStanislav Mekhanoshin   if (ST.hasTransForwardingHazard() && !SIInstrInfo::isTRANS(*VALU)) {
869f311f934SStanislav Mekhanoshin     const int TransDefWaitstates = 1;
870f311f934SStanislav Mekhanoshin 
871f311f934SStanislav Mekhanoshin     auto IsTransDefFn = [this, VALU](const MachineInstr &MI) {
872f311f934SStanislav Mekhanoshin       if (!SIInstrInfo::isTRANS(MI))
873f311f934SStanislav Mekhanoshin         return false;
874f311f934SStanislav Mekhanoshin       const SIRegisterInfo *TRI = ST.getRegisterInfo();
875f311f934SStanislav Mekhanoshin       const SIInstrInfo *TII = ST.getInstrInfo();
876f311f934SStanislav Mekhanoshin       Register Def = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)->getReg();
877f311f934SStanislav Mekhanoshin 
878f311f934SStanislav Mekhanoshin       for (const MachineOperand &Use : VALU->explicit_uses()) {
879f311f934SStanislav Mekhanoshin         if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
880f311f934SStanislav Mekhanoshin           return true;
881f311f934SStanislav Mekhanoshin       }
882f311f934SStanislav Mekhanoshin 
883f311f934SStanislav Mekhanoshin       return false;
884f311f934SStanislav Mekhanoshin     };
885f311f934SStanislav Mekhanoshin 
886f311f934SStanislav Mekhanoshin     int WaitStatesNeededForDef =
887f311f934SStanislav Mekhanoshin         TransDefWaitstates -
888f311f934SStanislav Mekhanoshin         getWaitStatesSince(IsTransDefFn, TransDefWaitstates);
889f311f934SStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
890f311f934SStanislav Mekhanoshin   }
891f311f934SStanislav Mekhanoshin 
892f311f934SStanislav Mekhanoshin   if (ST.hasDstSelForwardingHazard()) {
893f311f934SStanislav Mekhanoshin     const int Shift16DefWaitstates = 1;
894f311f934SStanislav Mekhanoshin 
895f311f934SStanislav Mekhanoshin     auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) {
896f311f934SStanislav Mekhanoshin       if (!SIInstrInfo::isVALU(MI))
897f311f934SStanislav Mekhanoshin         return false;
898f311f934SStanislav Mekhanoshin       const SIInstrInfo *TII = ST.getInstrInfo();
899f311f934SStanislav Mekhanoshin       if (SIInstrInfo::isSDWA(MI)) {
900f311f934SStanislav Mekhanoshin         if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
901f311f934SStanislav Mekhanoshin           if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
902f311f934SStanislav Mekhanoshin             return false;
903f311f934SStanislav Mekhanoshin       } else {
904f311f934SStanislav Mekhanoshin         if ((AMDGPU::getNamedOperandIdx(MI.getOpcode(),
905f311f934SStanislav Mekhanoshin                                         AMDGPU::OpName::op_sel) == -1) ||
906f311f934SStanislav Mekhanoshin             !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)
907f311f934SStanislav Mekhanoshin                   ->getImm() &
908f311f934SStanislav Mekhanoshin               SISrcMods::DST_OP_SEL))
909f311f934SStanislav Mekhanoshin           return false;
910f311f934SStanislav Mekhanoshin       }
911f311f934SStanislav Mekhanoshin       const SIRegisterInfo *TRI = ST.getRegisterInfo();
912f311f934SStanislav Mekhanoshin       if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
913f311f934SStanislav Mekhanoshin         Register Def = Dst->getReg();
914f311f934SStanislav Mekhanoshin 
915f311f934SStanislav Mekhanoshin         for (const MachineOperand &Use : VALU->explicit_uses()) {
916f311f934SStanislav Mekhanoshin           if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
917f311f934SStanislav Mekhanoshin             return true;
918f311f934SStanislav Mekhanoshin         }
919f311f934SStanislav Mekhanoshin       }
920f311f934SStanislav Mekhanoshin 
921f311f934SStanislav Mekhanoshin       return false;
922f311f934SStanislav Mekhanoshin     };
923f311f934SStanislav Mekhanoshin 
924f311f934SStanislav Mekhanoshin     int WaitStatesNeededForDef =
925f311f934SStanislav Mekhanoshin         Shift16DefWaitstates -
926f311f934SStanislav Mekhanoshin         getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates);
927f311f934SStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
928f311f934SStanislav Mekhanoshin   }
929f311f934SStanislav Mekhanoshin 
930f311f934SStanislav Mekhanoshin   if (ST.hasVDecCoExecHazard()) {
931f311f934SStanislav Mekhanoshin     const int VALUWriteSGPRVALUReadWaitstates = 2;
932f311f934SStanislav Mekhanoshin     const int VALUWriteEXECRWLane = 4;
933f311f934SStanislav Mekhanoshin     const int VALUWriteVGPRReadlaneRead = 1;
934f311f934SStanislav Mekhanoshin 
935f311f934SStanislav Mekhanoshin     const SIRegisterInfo *TRI = ST.getRegisterInfo();
936f311f934SStanislav Mekhanoshin     const MachineRegisterInfo &MRI = MF.getRegInfo();
937f311f934SStanislav Mekhanoshin     Register UseReg;
938f311f934SStanislav Mekhanoshin     auto IsVALUDefSGPRFn = [&UseReg, TRI](const MachineInstr &MI) {
939f311f934SStanislav Mekhanoshin       if (!SIInstrInfo::isVALU(MI))
940f311f934SStanislav Mekhanoshin         return false;
941f311f934SStanislav Mekhanoshin       return MI.modifiesRegister(UseReg, TRI);
942f311f934SStanislav Mekhanoshin     };
943f311f934SStanislav Mekhanoshin 
944f311f934SStanislav Mekhanoshin     for (const MachineOperand &Use : VALU->explicit_uses()) {
945f311f934SStanislav Mekhanoshin       if (!Use.isReg())
946f311f934SStanislav Mekhanoshin         continue;
947f311f934SStanislav Mekhanoshin 
948f311f934SStanislav Mekhanoshin       UseReg = Use.getReg();
949f311f934SStanislav Mekhanoshin       if (TRI->isSGPRReg(MRI, UseReg)) {
950f311f934SStanislav Mekhanoshin         int WaitStatesNeededForDef =
951f311f934SStanislav Mekhanoshin             VALUWriteSGPRVALUReadWaitstates -
952f311f934SStanislav Mekhanoshin             getWaitStatesSince(IsVALUDefSGPRFn,
953f311f934SStanislav Mekhanoshin                                VALUWriteSGPRVALUReadWaitstates);
954f311f934SStanislav Mekhanoshin         WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
955f311f934SStanislav Mekhanoshin       }
956f311f934SStanislav Mekhanoshin     }
957f311f934SStanislav Mekhanoshin 
958f311f934SStanislav Mekhanoshin     if (VALU->readsRegister(AMDGPU::VCC, TRI)) {
959f311f934SStanislav Mekhanoshin       UseReg = AMDGPU::VCC;
960f311f934SStanislav Mekhanoshin       int WaitStatesNeededForDef =
961f311f934SStanislav Mekhanoshin           VALUWriteSGPRVALUReadWaitstates -
962f311f934SStanislav Mekhanoshin           getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteSGPRVALUReadWaitstates);
963f311f934SStanislav Mekhanoshin       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
964f311f934SStanislav Mekhanoshin     }
965f311f934SStanislav Mekhanoshin 
966f311f934SStanislav Mekhanoshin     switch (VALU->getOpcode()) {
967f311f934SStanislav Mekhanoshin     case AMDGPU::V_READLANE_B32:
968f311f934SStanislav Mekhanoshin     case AMDGPU::V_READFIRSTLANE_B32: {
969f311f934SStanislav Mekhanoshin       MachineOperand *Src = TII.getNamedOperand(*VALU, AMDGPU::OpName::src0);
970f311f934SStanislav Mekhanoshin       UseReg = Src->getReg();
971f311f934SStanislav Mekhanoshin       int WaitStatesNeededForDef =
972f311f934SStanislav Mekhanoshin           VALUWriteVGPRReadlaneRead -
973f311f934SStanislav Mekhanoshin           getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteVGPRReadlaneRead);
974f311f934SStanislav Mekhanoshin       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
975f311f934SStanislav Mekhanoshin     }
976f311f934SStanislav Mekhanoshin       LLVM_FALLTHROUGH;
977f311f934SStanislav Mekhanoshin     case AMDGPU::V_WRITELANE_B32: {
978f311f934SStanislav Mekhanoshin       UseReg = AMDGPU::EXEC;
979f311f934SStanislav Mekhanoshin       int WaitStatesNeededForDef =
980f311f934SStanislav Mekhanoshin           VALUWriteEXECRWLane -
981f311f934SStanislav Mekhanoshin           getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteEXECRWLane);
982f311f934SStanislav Mekhanoshin       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
983f311f934SStanislav Mekhanoshin       break;
984f311f934SStanislav Mekhanoshin     }
985f311f934SStanislav Mekhanoshin     default:
986f311f934SStanislav Mekhanoshin       break;
987f311f934SStanislav Mekhanoshin     }
988f311f934SStanislav Mekhanoshin   }
989f311f934SStanislav Mekhanoshin 
990d29f24acSMark Searles   // This checks for the hazard where VMEM instructions that store more than
991d29f24acSMark Searles   // 8 bytes can have there store data over written by the next instruction.
992d29f24acSMark Searles   if (!ST.has12DWordStoreHazard())
993f311f934SStanislav Mekhanoshin     return WaitStatesNeeded;
994d29f24acSMark Searles 
995d29f24acSMark Searles   const MachineRegisterInfo &MRI = MF.getRegInfo();
996d29f24acSMark Searles 
997d29f24acSMark Searles   for (const MachineOperand &Def : VALU->defs()) {
998d29f24acSMark Searles     WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
999d29f24acSMark Searles   }
1000d29f24acSMark Searles 
1001d29f24acSMark Searles   return WaitStatesNeeded;
1002d29f24acSMark Searles }
1003d29f24acSMark Searles 
checkInlineAsmHazards(MachineInstr * IA)1004d29f24acSMark Searles int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
1005d29f24acSMark Searles   // This checks for hazards associated with inline asm statements.
1006d29f24acSMark Searles   // Since inline asms can contain just about anything, we use this
1007d29f24acSMark Searles   // to call/leverage other check*Hazard routines. Note that
1008d29f24acSMark Searles   // this function doesn't attempt to address all possible inline asm
1009d29f24acSMark Searles   // hazards (good luck), but is a collection of what has been
1010d29f24acSMark Searles   // problematic thus far.
1011d29f24acSMark Searles 
1012d29f24acSMark Searles   // see checkVALUHazards()
1013d29f24acSMark Searles   if (!ST.has12DWordStoreHazard())
1014d29f24acSMark Searles     return 0;
1015d29f24acSMark Searles 
1016d29f24acSMark Searles   const MachineRegisterInfo &MRI = MF.getRegInfo();
1017d29f24acSMark Searles   int WaitStatesNeeded = 0;
1018d29f24acSMark Searles 
1019d29f24acSMark Searles   for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
1020d29f24acSMark Searles        I != E; ++I) {
1021d29f24acSMark Searles     const MachineOperand &Op = IA->getOperand(I);
1022d29f24acSMark Searles     if (Op.isReg() && Op.isDef()) {
1023d29f24acSMark Searles       WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
1024d29f24acSMark Searles     }
1025d29f24acSMark Searles   }
1026d29f24acSMark Searles 
1027b133fbb9STom Stellard   return WaitStatesNeeded;
1028b133fbb9STom Stellard }
102904051b5fSTom Stellard 
checkRWLaneHazards(MachineInstr * RWLane)103004051b5fSTom Stellard int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
103104051b5fSTom Stellard   const SIInstrInfo *TII = ST.getInstrInfo();
103204051b5fSTom Stellard   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1033d29f24acSMark Searles   const MachineRegisterInfo &MRI = MF.getRegInfo();
103404051b5fSTom Stellard 
103504051b5fSTom Stellard   const MachineOperand *LaneSelectOp =
103604051b5fSTom Stellard       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
103704051b5fSTom Stellard 
103804051b5fSTom Stellard   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
103904051b5fSTom Stellard     return 0;
104004051b5fSTom Stellard 
10410c476111SDaniel Sanders   Register LaneSelectReg = LaneSelectOp->getReg();
1042424f1f6fSCarl Ritson   auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); };
104304051b5fSTom Stellard 
104404051b5fSTom Stellard   const int RWLaneWaitStates = 4;
1045f92ed696SStanislav Mekhanoshin   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
1046f92ed696SStanislav Mekhanoshin                                               RWLaneWaitStates);
104704051b5fSTom Stellard   return RWLaneWaitStates - WaitStatesSince;
104804051b5fSTom Stellard }
1049aea899e2STom Stellard 
checkRFEHazards(MachineInstr * RFE)1050aea899e2STom Stellard int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
1051e4c2e9b0SMatt Arsenault   if (!ST.hasRFEHazards())
1052aea899e2STom Stellard     return 0;
1053aea899e2STom Stellard 
1054aea899e2STom Stellard   const SIInstrInfo *TII = ST.getInstrInfo();
1055aea899e2STom Stellard 
1056aea899e2STom Stellard   const int RFEWaitStates = 1;
1057aea899e2STom Stellard 
1058424f1f6fSCarl Ritson   auto IsHazardFn = [TII](const MachineInstr &MI) {
1059424f1f6fSCarl Ritson     return getHWReg(TII, MI) == AMDGPU::Hwreg::ID_TRAPSTS;
1060aea899e2STom Stellard   };
1061f92ed696SStanislav Mekhanoshin   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
1062aea899e2STom Stellard   return RFEWaitStates - WaitStatesNeeded;
1063aea899e2STom Stellard }
1064e823d92fSMatt Arsenault 
checkReadM0Hazards(MachineInstr * MI)1065e823d92fSMatt Arsenault int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
1066e823d92fSMatt Arsenault   const SIInstrInfo *TII = ST.getInstrInfo();
106763f21f4cSStanislav Mekhanoshin   const int ReadM0WaitStates = 1;
1068424f1f6fSCarl Ritson   auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
106963f21f4cSStanislav Mekhanoshin   return ReadM0WaitStates -
107063f21f4cSStanislav Mekhanoshin          getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
1071e823d92fSMatt Arsenault }
107251d1415aSStanislav Mekhanoshin 
fixHazards(MachineInstr * MI)10738a3d3a9aSAustin Kerbow void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
10748a3d3a9aSAustin Kerbow   fixVMEMtoScalarWriteHazards(MI);
10755f581c9fSStanislav Mekhanoshin   fixVcmpxPermlaneHazards(MI);
10768a3d3a9aSAustin Kerbow   fixSMEMtoVectorWriteHazards(MI);
10778a3d3a9aSAustin Kerbow   fixVcmpxExecWARHazard(MI);
10788a3d3a9aSAustin Kerbow   fixLdsBranchVmemWARHazard(MI);
107913107c27SJay Foad   if (ST.hasLdsDirect()) {
108013107c27SJay Foad     fixLdsDirectVALUHazard(MI);
108113107c27SJay Foad     fixLdsDirectVMEMHazard(MI);
108213107c27SJay Foad   }
10839dff14beSJay Foad   fixVALUPartialForwardingHazard(MI);
10849dff14beSJay Foad   fixVALUTransUseHazard(MI);
1085*4874838aSPiotr Sobczak   fixWMMAHazards(MI);
10868a3d3a9aSAustin Kerbow }
10878a3d3a9aSAustin Kerbow 
fixVcmpxPermlaneHazards(MachineInstr * MI)10885f581c9fSStanislav Mekhanoshin bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
10895f581c9fSStanislav Mekhanoshin   if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
10905f581c9fSStanislav Mekhanoshin     return false;
10915f581c9fSStanislav Mekhanoshin 
10925f581c9fSStanislav Mekhanoshin   const SIInstrInfo *TII = ST.getInstrInfo();
10935c974d08SStanislav Mekhanoshin   const SIRegisterInfo *TRI = ST.getRegisterInfo();
10945c974d08SStanislav Mekhanoshin   auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
10955c974d08SStanislav Mekhanoshin     return (TII->isVOPC(MI) ||
10965c974d08SStanislav Mekhanoshin             ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
10975c974d08SStanislav Mekhanoshin            MI.modifiesRegister(AMDGPU::EXEC, TRI);
10985c974d08SStanislav Mekhanoshin   };
10995f581c9fSStanislav Mekhanoshin 
1100424f1f6fSCarl Ritson   auto IsExpiredFn = [](const MachineInstr &MI, int) {
1101424f1f6fSCarl Ritson     unsigned Opc = MI.getOpcode();
1102424f1f6fSCarl Ritson     return SIInstrInfo::isVALU(MI) && Opc != AMDGPU::V_NOP_e32 &&
1103424f1f6fSCarl Ritson            Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa;
11045f581c9fSStanislav Mekhanoshin   };
11055f581c9fSStanislav Mekhanoshin 
11065f581c9fSStanislav Mekhanoshin   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
11075f581c9fSStanislav Mekhanoshin       std::numeric_limits<int>::max())
11085f581c9fSStanislav Mekhanoshin     return false;
11095f581c9fSStanislav Mekhanoshin 
11105f581c9fSStanislav Mekhanoshin   // V_NOP will be discarded by SQ.
1111380ff31dSThomas Symalla   // Use V_MOV_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
11125f581c9fSStanislav Mekhanoshin   // which is always a VGPR and available.
11135f581c9fSStanislav Mekhanoshin   auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
11140c476111SDaniel Sanders   Register Reg = Src0->getReg();
11155f581c9fSStanislav Mekhanoshin   bool IsUndef = Src0->isUndef();
11165f581c9fSStanislav Mekhanoshin   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
11175f581c9fSStanislav Mekhanoshin           TII->get(AMDGPU::V_MOV_B32_e32))
11185f581c9fSStanislav Mekhanoshin     .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
11195f581c9fSStanislav Mekhanoshin     .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
11205f581c9fSStanislav Mekhanoshin 
11215f581c9fSStanislav Mekhanoshin   return true;
11225f581c9fSStanislav Mekhanoshin }
11235f581c9fSStanislav Mekhanoshin 
fixVMEMtoScalarWriteHazards(MachineInstr * MI)112451d1415aSStanislav Mekhanoshin bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
112551d1415aSStanislav Mekhanoshin   if (!ST.hasVMEMtoScalarWriteHazard())
112651d1415aSStanislav Mekhanoshin     return false;
112751d1415aSStanislav Mekhanoshin 
112851d1415aSStanislav Mekhanoshin   if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
112951d1415aSStanislav Mekhanoshin     return false;
113051d1415aSStanislav Mekhanoshin 
113151d1415aSStanislav Mekhanoshin   if (MI->getNumDefs() == 0)
113251d1415aSStanislav Mekhanoshin     return false;
113351d1415aSStanislav Mekhanoshin 
113451d1415aSStanislav Mekhanoshin   const SIRegisterInfo *TRI = ST.getRegisterInfo();
113551d1415aSStanislav Mekhanoshin 
1136424f1f6fSCarl Ritson   auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
1137424f1f6fSCarl Ritson     if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) &&
1138424f1f6fSCarl Ritson         !SIInstrInfo::isFLAT(I))
113951d1415aSStanislav Mekhanoshin       return false;
114051d1415aSStanislav Mekhanoshin 
114151d1415aSStanislav Mekhanoshin     for (const MachineOperand &Def : MI->defs()) {
1142424f1f6fSCarl Ritson       const MachineOperand *Op =
1143424f1f6fSCarl Ritson           I.findRegisterUseOperand(Def.getReg(), false, TRI);
11448b7041a5SNicolai Haehnle       if (!Op)
114551d1415aSStanislav Mekhanoshin         continue;
114651d1415aSStanislav Mekhanoshin       return true;
114751d1415aSStanislav Mekhanoshin     }
114851d1415aSStanislav Mekhanoshin     return false;
114951d1415aSStanislav Mekhanoshin   };
115051d1415aSStanislav Mekhanoshin 
1151424f1f6fSCarl Ritson   auto IsExpiredFn = [](const MachineInstr &MI, int) {
1152424f1f6fSCarl Ritson     return SIInstrInfo::isVALU(MI) ||
1153424f1f6fSCarl Ritson            (MI.getOpcode() == AMDGPU::S_WAITCNT &&
1154424f1f6fSCarl Ritson             !MI.getOperand(0).getImm()) ||
1155424f1f6fSCarl Ritson            (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1156424f1f6fSCarl Ritson             MI.getOperand(0).getImm() == 0xffe3);
115751d1415aSStanislav Mekhanoshin   };
115851d1415aSStanislav Mekhanoshin 
115951d1415aSStanislav Mekhanoshin   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
116051d1415aSStanislav Mekhanoshin       std::numeric_limits<int>::max())
116151d1415aSStanislav Mekhanoshin     return false;
116251d1415aSStanislav Mekhanoshin 
116351d1415aSStanislav Mekhanoshin   const SIInstrInfo *TII = ST.getInstrInfo();
11645bf2a9ddSCarl Ritson   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
11655bf2a9ddSCarl Ritson           TII->get(AMDGPU::S_WAITCNT_DEPCTR))
11665bf2a9ddSCarl Ritson       .addImm(0xffe3);
116751d1415aSStanislav Mekhanoshin   return true;
116851d1415aSStanislav Mekhanoshin }
116951d1415aSStanislav Mekhanoshin 
fixSMEMtoVectorWriteHazards(MachineInstr * MI)117051d1415aSStanislav Mekhanoshin bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
117151d1415aSStanislav Mekhanoshin   if (!ST.hasSMEMtoVectorWriteHazard())
117251d1415aSStanislav Mekhanoshin     return false;
117351d1415aSStanislav Mekhanoshin 
117451d1415aSStanislav Mekhanoshin   if (!SIInstrInfo::isVALU(*MI))
117551d1415aSStanislav Mekhanoshin     return false;
117651d1415aSStanislav Mekhanoshin 
117751d1415aSStanislav Mekhanoshin   unsigned SDSTName;
117851d1415aSStanislav Mekhanoshin   switch (MI->getOpcode()) {
117951d1415aSStanislav Mekhanoshin   case AMDGPU::V_READLANE_B32:
118051d1415aSStanislav Mekhanoshin   case AMDGPU::V_READFIRSTLANE_B32:
118151d1415aSStanislav Mekhanoshin     SDSTName = AMDGPU::OpName::vdst;
118251d1415aSStanislav Mekhanoshin     break;
118351d1415aSStanislav Mekhanoshin   default:
118451d1415aSStanislav Mekhanoshin     SDSTName = AMDGPU::OpName::sdst;
118551d1415aSStanislav Mekhanoshin     break;
118651d1415aSStanislav Mekhanoshin   }
118751d1415aSStanislav Mekhanoshin 
118851d1415aSStanislav Mekhanoshin   const SIInstrInfo *TII = ST.getInstrInfo();
118951d1415aSStanislav Mekhanoshin   const SIRegisterInfo *TRI = ST.getRegisterInfo();
119034e95ce2SCarl Ritson   const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
119151d1415aSStanislav Mekhanoshin   const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
119251d1415aSStanislav Mekhanoshin   if (!SDST) {
11935ddd564eSStanislav Mekhanoshin     for (const auto &MO : MI->implicit_operands()) {
119451d1415aSStanislav Mekhanoshin       if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
119551d1415aSStanislav Mekhanoshin         SDST = &MO;
119651d1415aSStanislav Mekhanoshin         break;
119751d1415aSStanislav Mekhanoshin       }
119851d1415aSStanislav Mekhanoshin     }
119951d1415aSStanislav Mekhanoshin   }
120051d1415aSStanislav Mekhanoshin 
120151d1415aSStanislav Mekhanoshin   if (!SDST)
120251d1415aSStanislav Mekhanoshin     return false;
120351d1415aSStanislav Mekhanoshin 
12040c476111SDaniel Sanders   const Register SDSTReg = SDST->getReg();
1205424f1f6fSCarl Ritson   auto IsHazardFn = [SDSTReg, TRI](const MachineInstr &I) {
1206424f1f6fSCarl Ritson     return SIInstrInfo::isSMRD(I) && I.readsRegister(SDSTReg, TRI);
120751d1415aSStanislav Mekhanoshin   };
120851d1415aSStanislav Mekhanoshin 
1209424f1f6fSCarl Ritson   auto IsExpiredFn = [TII, IV](const MachineInstr &MI, int) {
1210424f1f6fSCarl Ritson     if (TII->isSALU(MI)) {
1211424f1f6fSCarl Ritson       switch (MI.getOpcode()) {
121251d1415aSStanislav Mekhanoshin       case AMDGPU::S_SETVSKIP:
121351d1415aSStanislav Mekhanoshin       case AMDGPU::S_VERSION:
121451d1415aSStanislav Mekhanoshin       case AMDGPU::S_WAITCNT_VSCNT:
121551d1415aSStanislav Mekhanoshin       case AMDGPU::S_WAITCNT_VMCNT:
121651d1415aSStanislav Mekhanoshin       case AMDGPU::S_WAITCNT_EXPCNT:
121734e95ce2SCarl Ritson         // These instructions cannot not mitigate the hazard.
121851d1415aSStanislav Mekhanoshin         return false;
121934e95ce2SCarl Ritson       case AMDGPU::S_WAITCNT_LGKMCNT:
122034e95ce2SCarl Ritson         // Reducing lgkmcnt count to 0 always mitigates the hazard.
1221424f1f6fSCarl Ritson         return (MI.getOperand(1).getImm() == 0) &&
1222424f1f6fSCarl Ritson                (MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
122334e95ce2SCarl Ritson       case AMDGPU::S_WAITCNT: {
1224424f1f6fSCarl Ritson         const int64_t Imm = MI.getOperand(0).getImm();
122534e95ce2SCarl Ritson         AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
122634e95ce2SCarl Ritson         return (Decoded.LgkmCnt == 0);
122734e95ce2SCarl Ritson       }
122851d1415aSStanislav Mekhanoshin       default:
122934e95ce2SCarl Ritson         // SOPP instructions cannot mitigate the hazard.
1230424f1f6fSCarl Ritson         if (TII->isSOPP(MI))
123134e95ce2SCarl Ritson           return false;
123234e95ce2SCarl Ritson         // At this point the SALU can be assumed to mitigate the hazard
123334e95ce2SCarl Ritson         // because either:
123434e95ce2SCarl Ritson         // (a) it is independent of the at risk SMEM (breaking chain),
123534e95ce2SCarl Ritson         // or
123634e95ce2SCarl Ritson         // (b) it is dependent on the SMEM, in which case an appropriate
123734e95ce2SCarl Ritson         //     s_waitcnt lgkmcnt _must_ exist between it and the at risk
123834e95ce2SCarl Ritson         //     SMEM instruction.
123951d1415aSStanislav Mekhanoshin         return true;
124051d1415aSStanislav Mekhanoshin       }
124151d1415aSStanislav Mekhanoshin     }
124251d1415aSStanislav Mekhanoshin     return false;
124351d1415aSStanislav Mekhanoshin   };
124451d1415aSStanislav Mekhanoshin 
124551d1415aSStanislav Mekhanoshin   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
124651d1415aSStanislav Mekhanoshin       std::numeric_limits<int>::max())
124751d1415aSStanislav Mekhanoshin     return false;
124851d1415aSStanislav Mekhanoshin 
124951d1415aSStanislav Mekhanoshin   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
125051d1415aSStanislav Mekhanoshin           TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
125151d1415aSStanislav Mekhanoshin       .addImm(0);
125251d1415aSStanislav Mekhanoshin   return true;
125351d1415aSStanislav Mekhanoshin }
125451d1415aSStanislav Mekhanoshin 
fixVcmpxExecWARHazard(MachineInstr * MI)125551d1415aSStanislav Mekhanoshin bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
125651d1415aSStanislav Mekhanoshin   if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
125751d1415aSStanislav Mekhanoshin     return false;
125851d1415aSStanislav Mekhanoshin 
125951d1415aSStanislav Mekhanoshin   const SIRegisterInfo *TRI = ST.getRegisterInfo();
126051d1415aSStanislav Mekhanoshin   if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
126151d1415aSStanislav Mekhanoshin     return false;
126251d1415aSStanislav Mekhanoshin 
1263424f1f6fSCarl Ritson   auto IsHazardFn = [TRI](const MachineInstr &I) {
1264424f1f6fSCarl Ritson     if (SIInstrInfo::isVALU(I))
126551d1415aSStanislav Mekhanoshin       return false;
1266424f1f6fSCarl Ritson     return I.readsRegister(AMDGPU::EXEC, TRI);
126751d1415aSStanislav Mekhanoshin   };
126851d1415aSStanislav Mekhanoshin 
126951d1415aSStanislav Mekhanoshin   const SIInstrInfo *TII = ST.getInstrInfo();
1270424f1f6fSCarl Ritson   auto IsExpiredFn = [TII, TRI](const MachineInstr &MI, int) {
1271424f1f6fSCarl Ritson     if (SIInstrInfo::isVALU(MI)) {
1272424f1f6fSCarl Ritson       if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst))
127351d1415aSStanislav Mekhanoshin         return true;
1274424f1f6fSCarl Ritson       for (auto MO : MI.implicit_operands())
127551d1415aSStanislav Mekhanoshin         if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
127651d1415aSStanislav Mekhanoshin           return true;
127751d1415aSStanislav Mekhanoshin     }
1278424f1f6fSCarl Ritson     if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1279424f1f6fSCarl Ritson         (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe)
128051d1415aSStanislav Mekhanoshin       return true;
128151d1415aSStanislav Mekhanoshin     return false;
128251d1415aSStanislav Mekhanoshin   };
128351d1415aSStanislav Mekhanoshin 
128451d1415aSStanislav Mekhanoshin   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
128551d1415aSStanislav Mekhanoshin       std::numeric_limits<int>::max())
128651d1415aSStanislav Mekhanoshin     return false;
128751d1415aSStanislav Mekhanoshin 
128851d1415aSStanislav Mekhanoshin   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
128951d1415aSStanislav Mekhanoshin           TII->get(AMDGPU::S_WAITCNT_DEPCTR))
129051d1415aSStanislav Mekhanoshin     .addImm(0xfffe);
129151d1415aSStanislav Mekhanoshin   return true;
129251d1415aSStanislav Mekhanoshin }
129351d1415aSStanislav Mekhanoshin 
shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction & MF,const GCNSubtarget & ST)1294e0c382a9SPiotr Sobczak static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
1295e0c382a9SPiotr Sobczak                                                  const GCNSubtarget &ST) {
129651d1415aSStanislav Mekhanoshin   if (!ST.hasLdsBranchVmemWARHazard())
129751d1415aSStanislav Mekhanoshin     return false;
129851d1415aSStanislav Mekhanoshin 
1299e0c382a9SPiotr Sobczak   // Check if the necessary condition for the hazard is met: both LDS and VMEM
1300e0c382a9SPiotr Sobczak   // instructions need to appear in the same function.
1301e0c382a9SPiotr Sobczak   bool HasLds = false;
1302e0c382a9SPiotr Sobczak   bool HasVmem = false;
1303e0c382a9SPiotr Sobczak   for (auto &MBB : MF) {
1304e0c382a9SPiotr Sobczak     for (auto &MI : MBB) {
1305e0c382a9SPiotr Sobczak       HasLds |= SIInstrInfo::isDS(MI);
1306e0c382a9SPiotr Sobczak       HasVmem |=
1307e0c382a9SPiotr Sobczak           SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI);
1308e0c382a9SPiotr Sobczak       if (HasLds && HasVmem)
1309e0c382a9SPiotr Sobczak         return true;
1310e0c382a9SPiotr Sobczak     }
1311e0c382a9SPiotr Sobczak   }
1312e0c382a9SPiotr Sobczak   return false;
1313e0c382a9SPiotr Sobczak }
1314e0c382a9SPiotr Sobczak 
fixLdsBranchVmemWARHazard(MachineInstr * MI)1315e0c382a9SPiotr Sobczak bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1316e0c382a9SPiotr Sobczak   if (!RunLdsBranchVmemWARHazardFixup)
1317e0c382a9SPiotr Sobczak     return false;
1318e0c382a9SPiotr Sobczak 
1319e0c382a9SPiotr Sobczak   assert(ST.hasLdsBranchVmemWARHazard());
1320e0c382a9SPiotr Sobczak 
1321424f1f6fSCarl Ritson   auto IsHazardInst = [](const MachineInstr &MI) {
1322424f1f6fSCarl Ritson     if (SIInstrInfo::isDS(MI))
132351d1415aSStanislav Mekhanoshin       return 1;
1324424f1f6fSCarl Ritson     if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
132551d1415aSStanislav Mekhanoshin       return 2;
132651d1415aSStanislav Mekhanoshin     return 0;
132751d1415aSStanislav Mekhanoshin   };
132851d1415aSStanislav Mekhanoshin 
1329424f1f6fSCarl Ritson   auto InstType = IsHazardInst(*MI);
133051d1415aSStanislav Mekhanoshin   if (!InstType)
133151d1415aSStanislav Mekhanoshin     return false;
133251d1415aSStanislav Mekhanoshin 
1333424f1f6fSCarl Ritson   auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) {
1334424f1f6fSCarl Ritson     return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1335424f1f6fSCarl Ritson                                I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1336424f1f6fSCarl Ritson                                !I.getOperand(1).getImm());
133751d1415aSStanislav Mekhanoshin   };
133851d1415aSStanislav Mekhanoshin 
1339424f1f6fSCarl Ritson   auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) {
1340424f1f6fSCarl Ritson     if (!I.isBranch())
134151d1415aSStanislav Mekhanoshin       return false;
134251d1415aSStanislav Mekhanoshin 
1343424f1f6fSCarl Ritson     auto IsHazardFn = [InstType, IsHazardInst](const MachineInstr &I) {
134451d1415aSStanislav Mekhanoshin       auto InstType2 = IsHazardInst(I);
134551d1415aSStanislav Mekhanoshin       return InstType2 && InstType != InstType2;
134651d1415aSStanislav Mekhanoshin     };
134751d1415aSStanislav Mekhanoshin 
1348424f1f6fSCarl Ritson     auto IsExpiredFn = [InstType, &IsHazardInst](const MachineInstr &I, int) {
134951d1415aSStanislav Mekhanoshin       auto InstType2 = IsHazardInst(I);
135051d1415aSStanislav Mekhanoshin       if (InstType == InstType2)
135151d1415aSStanislav Mekhanoshin         return true;
135251d1415aSStanislav Mekhanoshin 
1353424f1f6fSCarl Ritson       return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1354424f1f6fSCarl Ritson              I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1355424f1f6fSCarl Ritson              !I.getOperand(1).getImm();
135651d1415aSStanislav Mekhanoshin     };
135751d1415aSStanislav Mekhanoshin 
1358424f1f6fSCarl Ritson     return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) !=
135951d1415aSStanislav Mekhanoshin            std::numeric_limits<int>::max();
136051d1415aSStanislav Mekhanoshin   };
136151d1415aSStanislav Mekhanoshin 
136251d1415aSStanislav Mekhanoshin   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
136351d1415aSStanislav Mekhanoshin       std::numeric_limits<int>::max())
136451d1415aSStanislav Mekhanoshin     return false;
136551d1415aSStanislav Mekhanoshin 
136651d1415aSStanislav Mekhanoshin   const SIInstrInfo *TII = ST.getInstrInfo();
136751d1415aSStanislav Mekhanoshin   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
136851d1415aSStanislav Mekhanoshin           TII->get(AMDGPU::S_WAITCNT_VSCNT))
136951d1415aSStanislav Mekhanoshin     .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
137051d1415aSStanislav Mekhanoshin     .addImm(0);
137151d1415aSStanislav Mekhanoshin 
137251d1415aSStanislav Mekhanoshin   return true;
137351d1415aSStanislav Mekhanoshin }
137451d1415aSStanislav Mekhanoshin 
fixLdsDirectVALUHazard(MachineInstr * MI)137513107c27SJay Foad bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
137613107c27SJay Foad   if (!SIInstrInfo::isLDSDIR(*MI))
137713107c27SJay Foad     return false;
137813107c27SJay Foad 
137913107c27SJay Foad   const int NoHazardWaitStates = 15;
138013107c27SJay Foad   const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
138113107c27SJay Foad   const Register VDSTReg = VDST->getReg();
138213107c27SJay Foad 
138313107c27SJay Foad   bool VisitedTrans = false;
138413107c27SJay Foad   auto IsHazardFn = [this, VDSTReg, &VisitedTrans](const MachineInstr &I) {
138513107c27SJay Foad     if (!SIInstrInfo::isVALU(I))
138613107c27SJay Foad       return false;
138713107c27SJay Foad     VisitedTrans = VisitedTrans || SIInstrInfo::isTRANS(I);
138813107c27SJay Foad     // Cover both WAR and WAW
138913107c27SJay Foad     return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
139013107c27SJay Foad   };
139113107c27SJay Foad   auto IsExpiredFn = [&](const MachineInstr &I, int WaitStates) {
139213107c27SJay Foad     if (WaitStates >= NoHazardWaitStates)
139313107c27SJay Foad       return true;
139413107c27SJay Foad     // Instructions which cause va_vdst==0 expire hazard
139513107c27SJay Foad     return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
139613107c27SJay Foad            SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
139713107c27SJay Foad   };
139813107c27SJay Foad   auto GetWaitStatesFn = [](const MachineInstr &MI) {
139913107c27SJay Foad     return SIInstrInfo::isVALU(MI) ? 1 : 0;
140013107c27SJay Foad   };
140113107c27SJay Foad 
140213107c27SJay Foad   DenseSet<const MachineBasicBlock *> Visited;
140313107c27SJay Foad   auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(),
140413107c27SJay Foad                                     std::next(MI->getReverseIterator()), 0,
140513107c27SJay Foad                                     IsExpiredFn, Visited, GetWaitStatesFn);
140613107c27SJay Foad 
140713107c27SJay Foad   // Transcendentals can execute in parallel to other VALUs.
140813107c27SJay Foad   // This makes va_vdst count unusable with a mixture of VALU and TRANS.
140913107c27SJay Foad   if (VisitedTrans)
141013107c27SJay Foad     Count = 0;
141113107c27SJay Foad 
141213107c27SJay Foad   MachineOperand *WaitVdstOp =
141313107c27SJay Foad       TII.getNamedOperand(*MI, AMDGPU::OpName::waitvdst);
141413107c27SJay Foad   WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates));
141513107c27SJay Foad 
141613107c27SJay Foad   return true;
141713107c27SJay Foad }
141813107c27SJay Foad 
fixLdsDirectVMEMHazard(MachineInstr * MI)141913107c27SJay Foad bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
142013107c27SJay Foad   if (!SIInstrInfo::isLDSDIR(*MI))
142113107c27SJay Foad     return false;
142213107c27SJay Foad 
142313107c27SJay Foad   const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
142413107c27SJay Foad   const Register VDSTReg = VDST->getReg();
142513107c27SJay Foad 
142613107c27SJay Foad   auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
142713107c27SJay Foad     if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
142813107c27SJay Foad         !SIInstrInfo::isDS(I))
142913107c27SJay Foad       return false;
143013107c27SJay Foad     return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
143113107c27SJay Foad   };
143213107c27SJay Foad   auto IsExpiredFn = [](const MachineInstr &I, int) {
143313107c27SJay Foad     return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
143413107c27SJay Foad            (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
143513107c27SJay Foad            (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
143613107c27SJay Foad             I.getOperand(0).getImm() == 0xffe3);
143713107c27SJay Foad   };
143813107c27SJay Foad 
143913107c27SJay Foad   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
144013107c27SJay Foad       std::numeric_limits<int>::max())
144113107c27SJay Foad     return false;
144213107c27SJay Foad 
144313107c27SJay Foad   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
144413107c27SJay Foad           TII.get(AMDGPU::S_WAITCNT_DEPCTR))
144513107c27SJay Foad       .addImm(0xffe3);
144613107c27SJay Foad 
144713107c27SJay Foad   return true;
144813107c27SJay Foad }
144913107c27SJay Foad 
fixVALUPartialForwardingHazard(MachineInstr * MI)14509dff14beSJay Foad bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
14519dff14beSJay Foad   if (!ST.isWave64())
14529dff14beSJay Foad     return false;
14539dff14beSJay Foad   if (!ST.hasVALUPartialForwardingHazard())
14549dff14beSJay Foad     return false;
14559dff14beSJay Foad   if (!SIInstrInfo::isVALU(*MI))
14569dff14beSJay Foad     return false;
14579dff14beSJay Foad 
14589dff14beSJay Foad   SmallSetVector<Register, 4> SrcVGPRs;
14599dff14beSJay Foad 
14609dff14beSJay Foad   for (const MachineOperand &Use : MI->explicit_uses()) {
14619dff14beSJay Foad     if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
14629dff14beSJay Foad       SrcVGPRs.insert(Use.getReg());
14639dff14beSJay Foad   }
14649dff14beSJay Foad 
14659dff14beSJay Foad   // Only applies with >= 2 unique VGPR sources
14669dff14beSJay Foad   if (SrcVGPRs.size() <= 1)
14679dff14beSJay Foad     return false;
14689dff14beSJay Foad 
14699dff14beSJay Foad   // Look for the following pattern:
14709dff14beSJay Foad   //   Va <- VALU [PreExecPos]
14719dff14beSJay Foad   //   intv1
14729dff14beSJay Foad   //   Exec <- SALU [ExecPos]
14739dff14beSJay Foad   //   intv2
14749dff14beSJay Foad   //   Vb <- VALU [PostExecPos]
14759dff14beSJay Foad   //   intv3
14769dff14beSJay Foad   //   MI Va, Vb (WaitState = 0)
14779dff14beSJay Foad   //
14789dff14beSJay Foad   // Where:
14799dff14beSJay Foad   // intv1 + intv2 <= 2 VALUs
14809dff14beSJay Foad   // intv3 <= 4 VALUs
14819dff14beSJay Foad   //
14829dff14beSJay Foad   // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
14839dff14beSJay Foad 
14849dff14beSJay Foad   const int Intv1plus2MaxVALUs = 2;
14859dff14beSJay Foad   const int Intv3MaxVALUs = 4;
14869dff14beSJay Foad   const int IntvMaxVALUs = 6;
14879dff14beSJay Foad   const int NoHazardVALUWaitStates = IntvMaxVALUs + 2;
14889dff14beSJay Foad 
14899dff14beSJay Foad   struct StateType {
14909dff14beSJay Foad     SmallDenseMap<Register, int, 4> DefPos;
14919dff14beSJay Foad     int ExecPos = std::numeric_limits<int>::max();
14929dff14beSJay Foad     int VALUs = 0;
14939dff14beSJay Foad   };
14949dff14beSJay Foad 
14959dff14beSJay Foad   StateType State;
14969dff14beSJay Foad 
14979dff14beSJay Foad   // This overloads expiry testing with all the hazard detection
14989dff14beSJay Foad   auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
14999dff14beSJay Foad     // Too many VALU states have passed
15009dff14beSJay Foad     if (State.VALUs > NoHazardVALUWaitStates)
15019dff14beSJay Foad       return HazardExpired;
15029dff14beSJay Foad 
15039dff14beSJay Foad     // Instructions which cause va_vdst==0 expire hazard
15049dff14beSJay Foad     if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
15059dff14beSJay Foad         SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
15069dff14beSJay Foad         (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
15079dff14beSJay Foad          I.getOperand(0).getImm() == 0x0fff))
15089dff14beSJay Foad       return HazardExpired;
15099dff14beSJay Foad 
15109dff14beSJay Foad     // Track registers writes
15119dff14beSJay Foad     bool Changed = false;
15129dff14beSJay Foad     if (SIInstrInfo::isVALU(I)) {
15139dff14beSJay Foad       for (Register Src : SrcVGPRs) {
15149dff14beSJay Foad         if (!State.DefPos.count(Src) && I.modifiesRegister(Src, &TRI)) {
15159dff14beSJay Foad           State.DefPos[Src] = State.VALUs;
15169dff14beSJay Foad           Changed = true;
15179dff14beSJay Foad         }
15189dff14beSJay Foad       }
15199dff14beSJay Foad     } else if (SIInstrInfo::isSALU(I)) {
15209dff14beSJay Foad       if (State.ExecPos == std::numeric_limits<int>::max()) {
15219dff14beSJay Foad         if (!State.DefPos.empty() && I.modifiesRegister(AMDGPU::EXEC, &TRI)) {
15229dff14beSJay Foad           State.ExecPos = State.VALUs;
15239dff14beSJay Foad           Changed = true;
15249dff14beSJay Foad         }
15259dff14beSJay Foad       }
15269dff14beSJay Foad     }
15279dff14beSJay Foad 
15289dff14beSJay Foad     // Early expiration: too many VALUs in intv3
15299dff14beSJay Foad     if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty())
15309dff14beSJay Foad       return HazardExpired;
15319dff14beSJay Foad 
15329dff14beSJay Foad     // Only evaluate state if something changed
15339dff14beSJay Foad     if (!Changed)
15349dff14beSJay Foad       return NoHazardFound;
15359dff14beSJay Foad 
15369dff14beSJay Foad     // Determine positions of VALUs pre/post exec change
15379dff14beSJay Foad     if (State.ExecPos == std::numeric_limits<int>::max())
15389dff14beSJay Foad       return NoHazardFound;
15399dff14beSJay Foad 
15409dff14beSJay Foad     int PreExecPos = std::numeric_limits<int>::max();
15419dff14beSJay Foad     int PostExecPos = std::numeric_limits<int>::max();
15429dff14beSJay Foad 
15439dff14beSJay Foad     for (auto Entry : State.DefPos) {
15449dff14beSJay Foad       int DefVALUs = Entry.second;
15459dff14beSJay Foad       if (DefVALUs != std::numeric_limits<int>::max()) {
15469dff14beSJay Foad         if (DefVALUs >= State.ExecPos)
15479dff14beSJay Foad           PreExecPos = std::min(PreExecPos, DefVALUs);
15489dff14beSJay Foad         else if (DefVALUs < State.ExecPos)
15499dff14beSJay Foad           PostExecPos = std::min(PostExecPos, DefVALUs);
15509dff14beSJay Foad       }
15519dff14beSJay Foad     }
15529dff14beSJay Foad 
15539dff14beSJay Foad     // Need a VALUs post exec change
15549dff14beSJay Foad     if (PostExecPos == std::numeric_limits<int>::max())
15559dff14beSJay Foad       return NoHazardFound;
15569dff14beSJay Foad 
15579dff14beSJay Foad     // Too many VALUs in intv3?
15589dff14beSJay Foad     int Intv3VALUs = PostExecPos;
15599dff14beSJay Foad     if (Intv3VALUs > Intv3MaxVALUs)
15609dff14beSJay Foad       return HazardExpired;
15619dff14beSJay Foad 
15629dff14beSJay Foad     // Too many VALUs in intv2?
15639dff14beSJay Foad     int Intv2VALUs = (State.ExecPos - PostExecPos) - 1;
15649dff14beSJay Foad     if (Intv2VALUs > Intv1plus2MaxVALUs)
15659dff14beSJay Foad       return HazardExpired;
15669dff14beSJay Foad 
15679dff14beSJay Foad     // Need a VALUs pre exec change
15689dff14beSJay Foad     if (PreExecPos == std::numeric_limits<int>::max())
15699dff14beSJay Foad       return NoHazardFound;
15709dff14beSJay Foad 
15719dff14beSJay Foad     // Too many VALUs in intv1?
15729dff14beSJay Foad     int Intv1VALUs = PreExecPos - State.ExecPos;
15739dff14beSJay Foad     if (Intv1VALUs > Intv1plus2MaxVALUs)
15749dff14beSJay Foad       return HazardExpired;
15759dff14beSJay Foad 
15769dff14beSJay Foad     // Too many VALUs in intv1 + intv2
15779dff14beSJay Foad     if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs)
15789dff14beSJay Foad       return HazardExpired;
15799dff14beSJay Foad 
15809dff14beSJay Foad     return HazardFound;
15819dff14beSJay Foad   };
15829dff14beSJay Foad   auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
15839dff14beSJay Foad     if (SIInstrInfo::isVALU(MI))
15849dff14beSJay Foad       State.VALUs += 1;
15859dff14beSJay Foad   };
15869dff14beSJay Foad 
15879dff14beSJay Foad   DenseSet<const MachineBasicBlock *> Visited;
15889dff14beSJay Foad   if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
15899dff14beSJay Foad                             std::next(MI->getReverseIterator()), Visited))
15909dff14beSJay Foad     return false;
15919dff14beSJay Foad 
15929dff14beSJay Foad   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
15939dff14beSJay Foad           TII.get(AMDGPU::S_WAITCNT_DEPCTR))
15949dff14beSJay Foad       .addImm(0x0fff);
15959dff14beSJay Foad 
15969dff14beSJay Foad   return true;
15979dff14beSJay Foad }
15989dff14beSJay Foad 
fixVALUTransUseHazard(MachineInstr * MI)15999dff14beSJay Foad bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
16009dff14beSJay Foad   if (!ST.hasVALUTransUseHazard())
16019dff14beSJay Foad     return false;
16029dff14beSJay Foad   if (!SIInstrInfo::isVALU(*MI))
16039dff14beSJay Foad     return false;
16049dff14beSJay Foad 
16059dff14beSJay Foad   SmallSet<Register, 4> SrcVGPRs;
16069dff14beSJay Foad 
16079dff14beSJay Foad   for (const MachineOperand &Use : MI->explicit_uses()) {
16089dff14beSJay Foad     if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
16099dff14beSJay Foad       SrcVGPRs.insert(Use.getReg());
16109dff14beSJay Foad   }
16119dff14beSJay Foad 
16129dff14beSJay Foad   // Look for the following pattern:
16139dff14beSJay Foad   //   Va <- TRANS VALU
16149dff14beSJay Foad   //   intv
16159dff14beSJay Foad   //   MI Va (WaitState = 0)
16169dff14beSJay Foad   //
16179dff14beSJay Foad   // Where:
16189dff14beSJay Foad   // intv <= 5 VALUs / 1 TRANS
16199dff14beSJay Foad   //
16209dff14beSJay Foad   // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
16219dff14beSJay Foad 
16229dff14beSJay Foad   const int IntvMaxVALUs = 5;
16239dff14beSJay Foad   const int IntvMaxTRANS = 1;
16249dff14beSJay Foad 
16259dff14beSJay Foad   struct StateType {
16269dff14beSJay Foad     int VALUs = 0;
16279dff14beSJay Foad     int TRANS = 0;
16289dff14beSJay Foad   };
16299dff14beSJay Foad 
16309dff14beSJay Foad   StateType State;
16319dff14beSJay Foad 
16329dff14beSJay Foad   // This overloads expiry testing with all the hazard detection
16339dff14beSJay Foad   auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
16349dff14beSJay Foad     // Too many VALU states have passed
16359dff14beSJay Foad     if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS)
16369dff14beSJay Foad       return HazardExpired;
16379dff14beSJay Foad 
16389dff14beSJay Foad     // Instructions which cause va_vdst==0 expire hazard
16399dff14beSJay Foad     if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
16409dff14beSJay Foad         SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
16419dff14beSJay Foad         (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
16429dff14beSJay Foad          I.getOperand(0).getImm() == 0x0fff))
16439dff14beSJay Foad       return HazardExpired;
16449dff14beSJay Foad 
16459dff14beSJay Foad     // Track registers writes
16469dff14beSJay Foad     if (SIInstrInfo::isTRANS(I)) {
16479dff14beSJay Foad       for (Register Src : SrcVGPRs) {
16489dff14beSJay Foad         if (I.modifiesRegister(Src, &TRI)) {
16499dff14beSJay Foad           return HazardFound;
16509dff14beSJay Foad         }
16519dff14beSJay Foad       }
16529dff14beSJay Foad     }
16539dff14beSJay Foad 
16549dff14beSJay Foad     return NoHazardFound;
16559dff14beSJay Foad   };
16569dff14beSJay Foad   auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
16579dff14beSJay Foad     if (SIInstrInfo::isVALU(MI))
16589dff14beSJay Foad       State.VALUs += 1;
16599dff14beSJay Foad     if (SIInstrInfo::isTRANS(MI))
16609dff14beSJay Foad       State.TRANS += 1;
16619dff14beSJay Foad   };
16629dff14beSJay Foad 
16639dff14beSJay Foad   DenseSet<const MachineBasicBlock *> Visited;
16649dff14beSJay Foad   if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
16659dff14beSJay Foad                             std::next(MI->getReverseIterator()), Visited))
16669dff14beSJay Foad     return false;
16679dff14beSJay Foad 
16689dff14beSJay Foad   // Hazard is observed - insert a wait on va_dst counter to ensure hazard is
16699dff14beSJay Foad   // avoided (mask 0x0fff achieves this).
16709dff14beSJay Foad   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
16719dff14beSJay Foad           TII.get(AMDGPU::S_WAITCNT_DEPCTR))
16729dff14beSJay Foad       .addImm(0x0fff);
16739dff14beSJay Foad 
16749dff14beSJay Foad   return true;
16759dff14beSJay Foad }
16769dff14beSJay Foad 
fixWMMAHazards(MachineInstr * MI)1677*4874838aSPiotr Sobczak bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
1678*4874838aSPiotr Sobczak   if (!SIInstrInfo::isWMMA(*MI))
1679*4874838aSPiotr Sobczak     return false;
1680*4874838aSPiotr Sobczak 
1681*4874838aSPiotr Sobczak   const SIInstrInfo *TII = ST.getInstrInfo();
1682*4874838aSPiotr Sobczak   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1683*4874838aSPiotr Sobczak 
1684*4874838aSPiotr Sobczak   auto IsHazardFn = [MI, TII, TRI](const MachineInstr &I) {
1685*4874838aSPiotr Sobczak     if (!SIInstrInfo::isWMMA(I))
1686*4874838aSPiotr Sobczak       return false;
1687*4874838aSPiotr Sobczak 
1688*4874838aSPiotr Sobczak     // Src0 or Src1 of the current wmma instruction overlaps with the dest of
1689*4874838aSPiotr Sobczak     // the previous wmma.
1690*4874838aSPiotr Sobczak     const Register CurSrc0Reg =
1691*4874838aSPiotr Sobczak         TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg();
1692*4874838aSPiotr Sobczak     const Register CurSrc1Reg =
1693*4874838aSPiotr Sobczak         TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg();
1694*4874838aSPiotr Sobczak 
1695*4874838aSPiotr Sobczak     const Register PrevDstReg =
1696*4874838aSPiotr Sobczak         TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg();
1697*4874838aSPiotr Sobczak 
1698*4874838aSPiotr Sobczak     if (TRI->regsOverlap(PrevDstReg, CurSrc0Reg) ||
1699*4874838aSPiotr Sobczak         TRI->regsOverlap(PrevDstReg, CurSrc1Reg)) {
1700*4874838aSPiotr Sobczak       return true;
1701*4874838aSPiotr Sobczak     }
1702*4874838aSPiotr Sobczak 
1703*4874838aSPiotr Sobczak     // Src2 of the current wmma instruction overlaps with the dest of the
1704*4874838aSPiotr Sobczak     // previous wmma.
1705*4874838aSPiotr Sobczak     const MachineOperand *Src2 =
1706*4874838aSPiotr Sobczak         TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
1707*4874838aSPiotr Sobczak     const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register();
1708*4874838aSPiotr Sobczak 
1709*4874838aSPiotr Sobczak     if (CurSrc2Reg != AMDGPU::NoRegister &&
1710*4874838aSPiotr Sobczak         TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) {
1711*4874838aSPiotr Sobczak 
1712*4874838aSPiotr Sobczak       const MachineOperand *Src2Mods =
1713*4874838aSPiotr Sobczak           TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
1714*4874838aSPiotr Sobczak       const bool NoSrc2Mods =
1715*4874838aSPiotr Sobczak           (Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0;
1716*4874838aSPiotr Sobczak       // Exception: there is no hazard if the wmma instructions are of the same
1717*4874838aSPiotr Sobczak       // type and there is no input modifier on src2 of the current instruction.
1718*4874838aSPiotr Sobczak       return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) ==
1719*4874838aSPiotr Sobczak                               TII->pseudoToMCOpcode(MI->getOpcode())));
1720*4874838aSPiotr Sobczak     }
1721*4874838aSPiotr Sobczak 
1722*4874838aSPiotr Sobczak     return false;
1723*4874838aSPiotr Sobczak   };
1724*4874838aSPiotr Sobczak 
1725*4874838aSPiotr Sobczak   auto IsExpiredFn = [](const MachineInstr &I, int) {
1726*4874838aSPiotr Sobczak     return SIInstrInfo::isVALU(I);
1727*4874838aSPiotr Sobczak   };
1728*4874838aSPiotr Sobczak 
1729*4874838aSPiotr Sobczak   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1730*4874838aSPiotr Sobczak       std::numeric_limits<int>::max())
1731*4874838aSPiotr Sobczak     return false;
1732*4874838aSPiotr Sobczak 
1733*4874838aSPiotr Sobczak   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
1734*4874838aSPiotr Sobczak 
1735*4874838aSPiotr Sobczak   return true;
1736*4874838aSPiotr Sobczak }
1737*4874838aSPiotr Sobczak 
checkNSAtoVMEMHazard(MachineInstr * MI)173851d1415aSStanislav Mekhanoshin int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
173951d1415aSStanislav Mekhanoshin   int NSAtoVMEMWaitStates = 1;
174051d1415aSStanislav Mekhanoshin 
174151d1415aSStanislav Mekhanoshin   if (!ST.hasNSAtoVMEMBug())
174251d1415aSStanislav Mekhanoshin     return 0;
174351d1415aSStanislav Mekhanoshin 
174451d1415aSStanislav Mekhanoshin   if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
174551d1415aSStanislav Mekhanoshin     return 0;
174651d1415aSStanislav Mekhanoshin 
174751d1415aSStanislav Mekhanoshin   const SIInstrInfo *TII = ST.getInstrInfo();
174851d1415aSStanislav Mekhanoshin   const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
174951d1415aSStanislav Mekhanoshin   if (!Offset || (Offset->getImm() & 6) == 0)
175051d1415aSStanislav Mekhanoshin     return 0;
175151d1415aSStanislav Mekhanoshin 
1752424f1f6fSCarl Ritson   auto IsHazardFn = [TII](const MachineInstr &I) {
1753424f1f6fSCarl Ritson     if (!SIInstrInfo::isMIMG(I))
175451d1415aSStanislav Mekhanoshin       return false;
1755424f1f6fSCarl Ritson     const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I.getOpcode());
175651d1415aSStanislav Mekhanoshin     return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1757424f1f6fSCarl Ritson            TII->getInstSizeInBytes(I) >= 16;
175851d1415aSStanislav Mekhanoshin   };
175951d1415aSStanislav Mekhanoshin 
176051d1415aSStanislav Mekhanoshin   return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
176151d1415aSStanislav Mekhanoshin }
1762bdf7f81bSStanislav Mekhanoshin 
checkFPAtomicToDenormModeHazard(MachineInstr * MI)1763bdf7f81bSStanislav Mekhanoshin int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1764bdf7f81bSStanislav Mekhanoshin   int FPAtomicToDenormModeWaitStates = 3;
1765bdf7f81bSStanislav Mekhanoshin 
1766bdf7f81bSStanislav Mekhanoshin   if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1767bdf7f81bSStanislav Mekhanoshin     return 0;
1768bdf7f81bSStanislav Mekhanoshin 
1769424f1f6fSCarl Ritson   auto IsHazardFn = [](const MachineInstr &I) {
1770424f1f6fSCarl Ritson     if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I))
1771bdf7f81bSStanislav Mekhanoshin       return false;
1772424f1f6fSCarl Ritson     return SIInstrInfo::isFPAtomic(I);
1773bdf7f81bSStanislav Mekhanoshin   };
1774bdf7f81bSStanislav Mekhanoshin 
1775424f1f6fSCarl Ritson   auto IsExpiredFn = [](const MachineInstr &MI, int WaitStates) {
1776424f1f6fSCarl Ritson     if (WaitStates >= 3 || SIInstrInfo::isVALU(MI))
1777bdf7f81bSStanislav Mekhanoshin       return true;
1778bdf7f81bSStanislav Mekhanoshin 
1779424f1f6fSCarl Ritson     switch (MI.getOpcode()) {
1780bdf7f81bSStanislav Mekhanoshin     case AMDGPU::S_WAITCNT:
1781bdf7f81bSStanislav Mekhanoshin     case AMDGPU::S_WAITCNT_VSCNT:
1782bdf7f81bSStanislav Mekhanoshin     case AMDGPU::S_WAITCNT_VMCNT:
1783bdf7f81bSStanislav Mekhanoshin     case AMDGPU::S_WAITCNT_EXPCNT:
1784bdf7f81bSStanislav Mekhanoshin     case AMDGPU::S_WAITCNT_LGKMCNT:
17859f69c1bcSJay Foad     case AMDGPU::S_WAIT_IDLE:
1786bdf7f81bSStanislav Mekhanoshin       return true;
1787bdf7f81bSStanislav Mekhanoshin     default:
1788bdf7f81bSStanislav Mekhanoshin       break;
1789bdf7f81bSStanislav Mekhanoshin     }
1790bdf7f81bSStanislav Mekhanoshin 
1791bdf7f81bSStanislav Mekhanoshin     return false;
1792bdf7f81bSStanislav Mekhanoshin   };
1793bdf7f81bSStanislav Mekhanoshin 
1794bdf7f81bSStanislav Mekhanoshin   return FPAtomicToDenormModeWaitStates -
1795bdf7f81bSStanislav Mekhanoshin          ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1796bdf7f81bSStanislav Mekhanoshin }
17977d2019bbSStanislav Mekhanoshin 
checkMAIHazards(MachineInstr * MI)17987d2019bbSStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
17997d2019bbSStanislav Mekhanoshin   assert(SIInstrInfo::isMAI(*MI));
18007d2019bbSStanislav Mekhanoshin 
1801a8d9d507SStanislav Mekhanoshin   return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI);
1802a8d9d507SStanislav Mekhanoshin }
1803a8d9d507SStanislav Mekhanoshin 
checkMFMAPadding(MachineInstr * MI)18041e15adbaSAustin Kerbow int GCNHazardRecognizer::checkMFMAPadding(MachineInstr *MI) {
18051e15adbaSAustin Kerbow   // Early exit if no padding is requested.
18061e15adbaSAustin Kerbow   if (MFMAPaddingRatio == 0)
18071e15adbaSAustin Kerbow     return 0;
18081e15adbaSAustin Kerbow 
18091e15adbaSAustin Kerbow   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1810bd9eed3aSAustin Kerbow   if (!SIInstrInfo::isMFMA(*MI) || MFI->getOccupancy() < 2)
18111e15adbaSAustin Kerbow     return 0;
18121e15adbaSAustin Kerbow 
18131e15adbaSAustin Kerbow   int NeighborMFMALatency = 0;
1814bd9eed3aSAustin Kerbow   auto IsNeighboringMFMA = [&NeighborMFMALatency,
18151e15adbaSAustin Kerbow                             this](const MachineInstr &MI) {
1816bd9eed3aSAustin Kerbow     if (!SIInstrInfo::isMFMA(MI))
18171e15adbaSAustin Kerbow       return false;
18181e15adbaSAustin Kerbow 
18191e15adbaSAustin Kerbow     NeighborMFMALatency = this->getMFMAPipelineWaitStates(MI);
18201e15adbaSAustin Kerbow     return true;
18211e15adbaSAustin Kerbow   };
18221e15adbaSAustin Kerbow 
18231e15adbaSAustin Kerbow   const int MaxMFMAPipelineWaitStates = 16;
18241e15adbaSAustin Kerbow   int WaitStatesSinceNeighborMFMA =
18251e15adbaSAustin Kerbow       getWaitStatesSince(IsNeighboringMFMA, MaxMFMAPipelineWaitStates);
18261e15adbaSAustin Kerbow 
18271e15adbaSAustin Kerbow   int NeighborMFMAPaddingNeeded =
18281e15adbaSAustin Kerbow       (NeighborMFMALatency * MFMAPaddingRatio / 100) -
18291e15adbaSAustin Kerbow       WaitStatesSinceNeighborMFMA;
18301e15adbaSAustin Kerbow 
18311e15adbaSAustin Kerbow   return std::max(0, NeighborMFMAPaddingNeeded);
18321e15adbaSAustin Kerbow }
18331e15adbaSAustin Kerbow 
checkMAIHazards908(MachineInstr * MI)1834a8d9d507SStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
18357d2019bbSStanislav Mekhanoshin   int WaitStatesNeeded = 0;
18367d2019bbSStanislav Mekhanoshin   unsigned Opc = MI->getOpcode();
18377d2019bbSStanislav Mekhanoshin 
1838424f1f6fSCarl Ritson   auto IsVALUFn = [](const MachineInstr &MI) {
1839424f1f6fSCarl Ritson     return SIInstrInfo::isVALU(MI);
18407d2019bbSStanislav Mekhanoshin   };
18417d2019bbSStanislav Mekhanoshin 
1842314e29edSJoe Nash   if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write
18437d2019bbSStanislav Mekhanoshin     const int LegacyVALUWritesVGPRWaitStates = 2;
18447d2019bbSStanislav Mekhanoshin     const int VALUWritesExecWaitStates = 4;
18457d2019bbSStanislav Mekhanoshin     const int MaxWaitStates = 4;
18467d2019bbSStanislav Mekhanoshin 
18477d2019bbSStanislav Mekhanoshin     int WaitStatesNeededForUse = VALUWritesExecWaitStates -
18487d2019bbSStanislav Mekhanoshin       getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
18497d2019bbSStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
18507d2019bbSStanislav Mekhanoshin 
18517d2019bbSStanislav Mekhanoshin     if (WaitStatesNeeded < MaxWaitStates) {
18527d2019bbSStanislav Mekhanoshin       for (const MachineOperand &Use : MI->explicit_uses()) {
18537d2019bbSStanislav Mekhanoshin         const int MaxWaitStates = 2;
18547d2019bbSStanislav Mekhanoshin 
18557d2019bbSStanislav Mekhanoshin         if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
18567d2019bbSStanislav Mekhanoshin           continue;
18577d2019bbSStanislav Mekhanoshin 
18587d2019bbSStanislav Mekhanoshin         int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
18597d2019bbSStanislav Mekhanoshin           getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
18607d2019bbSStanislav Mekhanoshin         WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
18617d2019bbSStanislav Mekhanoshin 
18627d2019bbSStanislav Mekhanoshin         if (WaitStatesNeeded == MaxWaitStates)
18637d2019bbSStanislav Mekhanoshin           break;
18647d2019bbSStanislav Mekhanoshin       }
18657d2019bbSStanislav Mekhanoshin     }
18667d2019bbSStanislav Mekhanoshin   }
18677d2019bbSStanislav Mekhanoshin 
18687d2019bbSStanislav Mekhanoshin   for (const MachineOperand &Op : MI->explicit_operands()) {
18697d2019bbSStanislav Mekhanoshin     if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
18707d2019bbSStanislav Mekhanoshin       continue;
18717d2019bbSStanislav Mekhanoshin 
1872314e29edSJoe Nash     if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
18737d2019bbSStanislav Mekhanoshin       continue;
18747d2019bbSStanislav Mekhanoshin 
18757d2019bbSStanislav Mekhanoshin     const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
18767d2019bbSStanislav Mekhanoshin     const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
18777d2019bbSStanislav Mekhanoshin     const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
18787d2019bbSStanislav Mekhanoshin     const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
18797d2019bbSStanislav Mekhanoshin     const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
18807d2019bbSStanislav Mekhanoshin     const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
18817d2019bbSStanislav Mekhanoshin     const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
18827d2019bbSStanislav Mekhanoshin     const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
18837d2019bbSStanislav Mekhanoshin     const int MaxWaitStates = 18;
18840c476111SDaniel Sanders     Register Reg = Op.getReg();
18857d2019bbSStanislav Mekhanoshin     unsigned HazardDefLatency = 0;
18867d2019bbSStanislav Mekhanoshin 
1887bd9eed3aSAustin Kerbow     auto IsOverlappedMFMAFn = [Reg, &HazardDefLatency,
1888424f1f6fSCarl Ritson                                this](const MachineInstr &MI) {
1889bd9eed3aSAustin Kerbow       if (!SIInstrInfo::isMFMA(MI))
18907d2019bbSStanislav Mekhanoshin         return false;
1891424f1f6fSCarl Ritson       Register DstReg = MI.getOperand(0).getReg();
18927d2019bbSStanislav Mekhanoshin       if (DstReg == Reg)
18937d2019bbSStanislav Mekhanoshin         return false;
1894424f1f6fSCarl Ritson       HazardDefLatency =
1895424f1f6fSCarl Ritson           std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
18967d2019bbSStanislav Mekhanoshin       return TRI.regsOverlap(DstReg, Reg);
18977d2019bbSStanislav Mekhanoshin     };
18987d2019bbSStanislav Mekhanoshin 
18997d2019bbSStanislav Mekhanoshin     int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
19007d2019bbSStanislav Mekhanoshin                                                    MaxWaitStates);
19017d2019bbSStanislav Mekhanoshin     int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
19027d2019bbSStanislav Mekhanoshin     int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
19037d2019bbSStanislav Mekhanoshin     int OpNo = MI->getOperandNo(&Op);
19047d2019bbSStanislav Mekhanoshin     if (OpNo == SrcCIdx) {
19057d2019bbSStanislav Mekhanoshin       NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1906314e29edSJoe Nash     } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
19077d2019bbSStanislav Mekhanoshin       switch (HazardDefLatency) {
19087d2019bbSStanislav Mekhanoshin       case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
19097d2019bbSStanislav Mekhanoshin                break;
19107d2019bbSStanislav Mekhanoshin       case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
19117d2019bbSStanislav Mekhanoshin                break;
19127d2019bbSStanislav Mekhanoshin       case 16: LLVM_FALLTHROUGH;
19137d2019bbSStanislav Mekhanoshin       default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
19147d2019bbSStanislav Mekhanoshin                break;
19157d2019bbSStanislav Mekhanoshin       }
1916314e29edSJoe Nash     } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
19177d2019bbSStanislav Mekhanoshin       switch (HazardDefLatency) {
19187d2019bbSStanislav Mekhanoshin       case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
19197d2019bbSStanislav Mekhanoshin                break;
19207d2019bbSStanislav Mekhanoshin       case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
19217d2019bbSStanislav Mekhanoshin                break;
19227d2019bbSStanislav Mekhanoshin       case 16: LLVM_FALLTHROUGH;
19237d2019bbSStanislav Mekhanoshin       default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
19247d2019bbSStanislav Mekhanoshin                break;
19257d2019bbSStanislav Mekhanoshin       }
19267d2019bbSStanislav Mekhanoshin     }
19277d2019bbSStanislav Mekhanoshin 
19287d2019bbSStanislav Mekhanoshin     int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
19297d2019bbSStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
19307d2019bbSStanislav Mekhanoshin 
19317d2019bbSStanislav Mekhanoshin     if (WaitStatesNeeded == MaxWaitStates)
19327d2019bbSStanislav Mekhanoshin       return WaitStatesNeeded; // Early exit.
19337d2019bbSStanislav Mekhanoshin 
1934424f1f6fSCarl Ritson     auto IsAccVgprWriteFn = [Reg, this](const MachineInstr &MI) {
1935424f1f6fSCarl Ritson       if (MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
19367d2019bbSStanislav Mekhanoshin         return false;
1937424f1f6fSCarl Ritson       Register DstReg = MI.getOperand(0).getReg();
19387d2019bbSStanislav Mekhanoshin       return TRI.regsOverlap(Reg, DstReg);
19397d2019bbSStanislav Mekhanoshin     };
19407d2019bbSStanislav Mekhanoshin 
19417d2019bbSStanislav Mekhanoshin     const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
19427d2019bbSStanislav Mekhanoshin     const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
19437d2019bbSStanislav Mekhanoshin     const int AccVGPRWriteAccVgprReadWaitStates = 3;
19447d2019bbSStanislav Mekhanoshin     NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
19457d2019bbSStanislav Mekhanoshin     if (OpNo == SrcCIdx)
19467d2019bbSStanislav Mekhanoshin       NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1947314e29edSJoe Nash     else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64)
19487d2019bbSStanislav Mekhanoshin       NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
19497d2019bbSStanislav Mekhanoshin 
19507d2019bbSStanislav Mekhanoshin     WaitStatesNeededForUse = NeedWaitStates -
19517d2019bbSStanislav Mekhanoshin       getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
19527d2019bbSStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
19537d2019bbSStanislav Mekhanoshin 
19547d2019bbSStanislav Mekhanoshin     if (WaitStatesNeeded == MaxWaitStates)
19557d2019bbSStanislav Mekhanoshin       return WaitStatesNeeded; // Early exit.
19567d2019bbSStanislav Mekhanoshin   }
19577d2019bbSStanislav Mekhanoshin 
1958314e29edSJoe Nash   if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
19597d2019bbSStanislav Mekhanoshin     const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
19607d2019bbSStanislav Mekhanoshin     const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
19617d2019bbSStanislav Mekhanoshin     const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
19627d2019bbSStanislav Mekhanoshin     const int MaxWaitStates = 13;
19630c476111SDaniel Sanders     Register DstReg = MI->getOperand(0).getReg();
19647d2019bbSStanislav Mekhanoshin     unsigned HazardDefLatency = 0;
19657d2019bbSStanislav Mekhanoshin 
1966bd9eed3aSAustin Kerbow     auto IsSrcCMFMAFn = [DstReg, &HazardDefLatency,
1967424f1f6fSCarl Ritson                          this](const MachineInstr &MI) {
1968bd9eed3aSAustin Kerbow       if (!SIInstrInfo::isMFMA(MI))
19697d2019bbSStanislav Mekhanoshin         return false;
1970424f1f6fSCarl Ritson       Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg();
1971424f1f6fSCarl Ritson       HazardDefLatency =
1972424f1f6fSCarl Ritson           std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
19737d2019bbSStanislav Mekhanoshin       return TRI.regsOverlap(Reg, DstReg);
19747d2019bbSStanislav Mekhanoshin     };
19757d2019bbSStanislav Mekhanoshin 
19767d2019bbSStanislav Mekhanoshin     int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
19777d2019bbSStanislav Mekhanoshin     int NeedWaitStates;
19787d2019bbSStanislav Mekhanoshin     switch (HazardDefLatency) {
19797d2019bbSStanislav Mekhanoshin     case 2:  NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
19807d2019bbSStanislav Mekhanoshin              break;
19817d2019bbSStanislav Mekhanoshin     case 8:  NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
19827d2019bbSStanislav Mekhanoshin              break;
19837d2019bbSStanislav Mekhanoshin     case 16: LLVM_FALLTHROUGH;
19847d2019bbSStanislav Mekhanoshin     default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
19857d2019bbSStanislav Mekhanoshin              break;
19867d2019bbSStanislav Mekhanoshin     }
19877d2019bbSStanislav Mekhanoshin 
19887d2019bbSStanislav Mekhanoshin     int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
19897d2019bbSStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
19907d2019bbSStanislav Mekhanoshin   }
19917d2019bbSStanislav Mekhanoshin 
19921e15adbaSAustin Kerbow   // Pad neighboring MFMA with noops for better inter-wave performance.
19931e15adbaSAustin Kerbow   WaitStatesNeeded = std::max(WaitStatesNeeded, checkMFMAPadding(MI));
19941e15adbaSAustin Kerbow 
19957d2019bbSStanislav Mekhanoshin   return WaitStatesNeeded;
19967d2019bbSStanislav Mekhanoshin }
19977d2019bbSStanislav Mekhanoshin 
checkMAIHazards90A(MachineInstr * MI)1998a8d9d507SStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1999a8d9d507SStanislav Mekhanoshin   int WaitStatesNeeded = 0;
2000a8d9d507SStanislav Mekhanoshin   unsigned Opc = MI->getOpcode();
2001a8d9d507SStanislav Mekhanoshin 
2002bd9eed3aSAustin Kerbow   auto IsLegacyVALUFn = [](const MachineInstr &MI) {
2003bd9eed3aSAustin Kerbow     return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI);
2004a8d9d507SStanislav Mekhanoshin   };
2005a8d9d507SStanislav Mekhanoshin 
2006bd9eed3aSAustin Kerbow   auto IsLegacyVALUNotDotFn = [](const MachineInstr &MI) {
2007bd9eed3aSAustin Kerbow     return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI) &&
2008bd9eed3aSAustin Kerbow            !SIInstrInfo::isDOT(MI);
2009a8d9d507SStanislav Mekhanoshin   };
2010a8d9d507SStanislav Mekhanoshin 
2011bd9eed3aSAustin Kerbow   if (!SIInstrInfo::isMFMA(*MI))
2012a8d9d507SStanislav Mekhanoshin     return WaitStatesNeeded;
2013a8d9d507SStanislav Mekhanoshin 
2014a8d9d507SStanislav Mekhanoshin   const int VALUWritesExecWaitStates = 4;
2015a8d9d507SStanislav Mekhanoshin   int WaitStatesNeededForUse = VALUWritesExecWaitStates -
2016a8d9d507SStanislav Mekhanoshin     getWaitStatesSinceDef(AMDGPU::EXEC, IsLegacyVALUFn,
2017a8d9d507SStanislav Mekhanoshin                           VALUWritesExecWaitStates);
2018a8d9d507SStanislav Mekhanoshin   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2019a8d9d507SStanislav Mekhanoshin 
2020a8d9d507SStanislav Mekhanoshin   int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
2021a8d9d507SStanislav Mekhanoshin 
2022a8d9d507SStanislav Mekhanoshin   // Loop for both DGEMM and S/HGEMM 2nd instruction.
2023a8d9d507SStanislav Mekhanoshin   for (const MachineOperand &Use : MI->explicit_uses()) {
2024a8d9d507SStanislav Mekhanoshin     const int LegacyVALUNotDotWritesVGPRWaitStates = 2;
2025a8d9d507SStanislav Mekhanoshin     const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2;
2026cad9de71SStanislav Mekhanoshin     const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3;
2027cad9de71SStanislav Mekhanoshin     const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5;
2028cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4;
2029cad9de71SStanislav Mekhanoshin     const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9;
2030cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8;
2031cad9de71SStanislav Mekhanoshin     const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17;
2032cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16;
2033a8d9d507SStanislav Mekhanoshin     const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8;
2034a8d9d507SStanislav Mekhanoshin     const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16;
2035a8d9d507SStanislav Mekhanoshin     const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3;
2036a8d9d507SStanislav Mekhanoshin     const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
2037a8d9d507SStanislav Mekhanoshin     const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
2038a8d9d507SStanislav Mekhanoshin     const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
2039a8d9d507SStanislav Mekhanoshin     const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
2040a8d9d507SStanislav Mekhanoshin     const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
2041a8d9d507SStanislav Mekhanoshin     const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
2042a8d9d507SStanislav Mekhanoshin     const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
2043cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4;
2044cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6;
2045cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10;
2046cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18;
2047cad9de71SStanislav Mekhanoshin     const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5;
2048cad9de71SStanislav Mekhanoshin     const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7;
2049cad9de71SStanislav Mekhanoshin     const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11;
2050cad9de71SStanislav Mekhanoshin     const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19;
2051a8d9d507SStanislav Mekhanoshin     const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
2052a8d9d507SStanislav Mekhanoshin     const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
2053a8d9d507SStanislav Mekhanoshin     const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
2054cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
2055a8d9d507SStanislav Mekhanoshin     const int MaxWaitStates = 19;
2056a8d9d507SStanislav Mekhanoshin 
2057a8d9d507SStanislav Mekhanoshin     if (!Use.isReg())
2058a8d9d507SStanislav Mekhanoshin       continue;
2059d6b07348SJim Lin     Register Reg = Use.getReg();
2060a8d9d507SStanislav Mekhanoshin     bool FullReg;
2061424f1f6fSCarl Ritson     const MachineInstr *MI1;
2062a8d9d507SStanislav Mekhanoshin 
2063bd9eed3aSAustin Kerbow     auto IsOverlappedMFMAFn = [Reg, &FullReg, &MI1,
2064424f1f6fSCarl Ritson                                this](const MachineInstr &MI) {
2065bd9eed3aSAustin Kerbow       if (!SIInstrInfo::isMFMA(MI))
2066a8d9d507SStanislav Mekhanoshin         return false;
2067424f1f6fSCarl Ritson       Register DstReg = MI.getOperand(0).getReg();
2068a8d9d507SStanislav Mekhanoshin       FullReg = (DstReg == Reg);
2069424f1f6fSCarl Ritson       MI1 = &MI;
2070a8d9d507SStanislav Mekhanoshin       return TRI.regsOverlap(DstReg, Reg);
2071a8d9d507SStanislav Mekhanoshin     };
2072a8d9d507SStanislav Mekhanoshin 
2073a8d9d507SStanislav Mekhanoshin     WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates -
2074a8d9d507SStanislav Mekhanoshin       getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates);
2075a8d9d507SStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2076a8d9d507SStanislav Mekhanoshin 
2077661a232eSStanislav Mekhanoshin     int NumWaitStates =
2078661a232eSStanislav Mekhanoshin         getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates);
2079a8d9d507SStanislav Mekhanoshin     if (NumWaitStates == std::numeric_limits<int>::max())
2080a8d9d507SStanislav Mekhanoshin       continue;
2081a8d9d507SStanislav Mekhanoshin 
2082a8d9d507SStanislav Mekhanoshin     int OpNo = MI->getOperandNo(&Use);
2083a8d9d507SStanislav Mekhanoshin     unsigned Opc1 = MI1->getOpcode();
2084a8d9d507SStanislav Mekhanoshin     int NeedWaitStates = 0;
2085a8d9d507SStanislav Mekhanoshin     if (OpNo == SrcCIdx) {
2086cad9de71SStanislav Mekhanoshin       if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) {
2087a8d9d507SStanislav Mekhanoshin         NeedWaitStates = 0;
2088a8d9d507SStanislav Mekhanoshin       } else if (FullReg) {
2089a8d9d507SStanislav Mekhanoshin         if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
2090a8d9d507SStanislav Mekhanoshin              Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) &&
2091a8d9d507SStanislav Mekhanoshin             (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
2092a8d9d507SStanislav Mekhanoshin              Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
2093a8d9d507SStanislav Mekhanoshin           NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
2094cad9de71SStanislav Mekhanoshin         else if (ST.hasGFX940Insts() &&
2095cad9de71SStanislav Mekhanoshin                  TSchedModel.computeInstrLatency(MI1) == 2)
2096cad9de71SStanislav Mekhanoshin           NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
2097a8d9d507SStanislav Mekhanoshin       } else {
2098a8d9d507SStanislav Mekhanoshin         switch (Opc1) {
2099a8d9d507SStanislav Mekhanoshin         case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
2100a8d9d507SStanislav Mekhanoshin         case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
2101dbf278b9SStanislav Mekhanoshin         case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
2102dbf278b9SStanislav Mekhanoshin         case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2103a8d9d507SStanislav Mekhanoshin           if (!isXDL(ST, *MI))
2104a8d9d507SStanislav Mekhanoshin             NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
2105a8d9d507SStanislav Mekhanoshin           break;
2106a8d9d507SStanislav Mekhanoshin         case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
2107a8d9d507SStanislav Mekhanoshin         case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
2108a8d9d507SStanislav Mekhanoshin           if (!isXDL(ST, *MI))
2109a8d9d507SStanislav Mekhanoshin             NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
2110a8d9d507SStanislav Mekhanoshin           break;
2111a8d9d507SStanislav Mekhanoshin         default:
2112cad9de71SStanislav Mekhanoshin           if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1))
2113cad9de71SStanislav Mekhanoshin             break;
2114a8d9d507SStanislav Mekhanoshin           switch (TSchedModel.computeInstrLatency(MI1)) {
2115a8d9d507SStanislav Mekhanoshin           case 2:
2116cad9de71SStanislav Mekhanoshin             NeedWaitStates = ST.hasGFX940Insts()
2117cad9de71SStanislav Mekhanoshin               ? isXDL(ST, *MI1)
2118cad9de71SStanislav Mekhanoshin                 ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates
2119cad9de71SStanislav Mekhanoshin                 : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates
2120cad9de71SStanislav Mekhanoshin               : isDGEMM(Opc)
2121a8d9d507SStanislav Mekhanoshin                 ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
2122a8d9d507SStanislav Mekhanoshin                 : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
2123a8d9d507SStanislav Mekhanoshin             break;
2124cad9de71SStanislav Mekhanoshin           case 4:
2125cad9de71SStanislav Mekhanoshin             assert(ST.hasGFX940Insts());
2126cad9de71SStanislav Mekhanoshin             NeedWaitStates = isXDL(ST, *MI1)
2127cad9de71SStanislav Mekhanoshin               ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates
2128cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates;
2129cad9de71SStanislav Mekhanoshin             break;
2130a8d9d507SStanislav Mekhanoshin           case 8:
2131cad9de71SStanislav Mekhanoshin             NeedWaitStates = ST.hasGFX940Insts()
2132cad9de71SStanislav Mekhanoshin               ? isXDL(ST, *MI1)
2133cad9de71SStanislav Mekhanoshin                 ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates
2134cad9de71SStanislav Mekhanoshin                 : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates
2135cad9de71SStanislav Mekhanoshin               : isDGEMM(Opc)
2136a8d9d507SStanislav Mekhanoshin                 ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
2137a8d9d507SStanislav Mekhanoshin                 : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
2138a8d9d507SStanislav Mekhanoshin             break;
2139a8d9d507SStanislav Mekhanoshin           case 16: LLVM_FALLTHROUGH;
2140a8d9d507SStanislav Mekhanoshin           default:
2141cad9de71SStanislav Mekhanoshin             NeedWaitStates = ST.hasGFX940Insts()
2142cad9de71SStanislav Mekhanoshin               ? isXDL(ST, *MI1)
2143cad9de71SStanislav Mekhanoshin                 ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates
2144cad9de71SStanislav Mekhanoshin                 : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates
2145cad9de71SStanislav Mekhanoshin               : isDGEMM(Opc)
2146a8d9d507SStanislav Mekhanoshin                 ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
2147a8d9d507SStanislav Mekhanoshin                 : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
2148a8d9d507SStanislav Mekhanoshin           }
2149a8d9d507SStanislav Mekhanoshin         }
2150a8d9d507SStanislav Mekhanoshin       }
2151a8d9d507SStanislav Mekhanoshin     } else {
2152a8d9d507SStanislav Mekhanoshin       switch (Opc1) {
2153a8d9d507SStanislav Mekhanoshin       case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
2154a8d9d507SStanislav Mekhanoshin       case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
2155dbf278b9SStanislav Mekhanoshin       case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
2156dbf278b9SStanislav Mekhanoshin       case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2157a8d9d507SStanislav Mekhanoshin         NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
2158a8d9d507SStanislav Mekhanoshin         break;
2159a8d9d507SStanislav Mekhanoshin       case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
2160a8d9d507SStanislav Mekhanoshin       case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
2161a8d9d507SStanislav Mekhanoshin         NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates;
2162a8d9d507SStanislav Mekhanoshin         break;
2163a8d9d507SStanislav Mekhanoshin       default:
2164a8d9d507SStanislav Mekhanoshin         switch (TSchedModel.computeInstrLatency(MI1)) {
2165a8d9d507SStanislav Mekhanoshin         case 2:
2166cad9de71SStanislav Mekhanoshin           NeedWaitStates = ST.hasGFX940Insts()
2167cad9de71SStanislav Mekhanoshin             ? isXDL(ST, *MI1)
2168cad9de71SStanislav Mekhanoshin               ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates
2169cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates
2170cad9de71SStanislav Mekhanoshin             : SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
2171cad9de71SStanislav Mekhanoshin           break;
2172cad9de71SStanislav Mekhanoshin         case 4:
2173cad9de71SStanislav Mekhanoshin           assert(ST.hasGFX940Insts());
2174cad9de71SStanislav Mekhanoshin           NeedWaitStates = isXDL(ST, *MI1)
2175cad9de71SStanislav Mekhanoshin             ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates
2176cad9de71SStanislav Mekhanoshin             : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates;
2177a8d9d507SStanislav Mekhanoshin           break;
2178a8d9d507SStanislav Mekhanoshin         case 8:
2179cad9de71SStanislav Mekhanoshin           NeedWaitStates = ST.hasGFX940Insts()
2180cad9de71SStanislav Mekhanoshin             ? isXDL(ST, *MI1)
2181cad9de71SStanislav Mekhanoshin               ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates
2182cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates
2183cad9de71SStanislav Mekhanoshin             : SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
2184a8d9d507SStanislav Mekhanoshin           break;
2185a8d9d507SStanislav Mekhanoshin         case 16: LLVM_FALLTHROUGH;
2186a8d9d507SStanislav Mekhanoshin         default:
2187cad9de71SStanislav Mekhanoshin           NeedWaitStates = ST.hasGFX940Insts()
2188cad9de71SStanislav Mekhanoshin             ? isXDL(ST, *MI1)
2189cad9de71SStanislav Mekhanoshin               ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates
2190cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates
2191cad9de71SStanislav Mekhanoshin             : SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
2192a8d9d507SStanislav Mekhanoshin         }
2193a8d9d507SStanislav Mekhanoshin       }
2194a8d9d507SStanislav Mekhanoshin     }
2195a8d9d507SStanislav Mekhanoshin     if (WaitStatesNeeded >= NeedWaitStates)
2196a8d9d507SStanislav Mekhanoshin       continue;
2197a8d9d507SStanislav Mekhanoshin 
2198a8d9d507SStanislav Mekhanoshin     WaitStatesNeededForUse = NeedWaitStates - NumWaitStates;
2199a8d9d507SStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2200a8d9d507SStanislav Mekhanoshin 
2201a8d9d507SStanislav Mekhanoshin     if (WaitStatesNeeded == MaxWaitStates)
2202a8d9d507SStanislav Mekhanoshin       break;
2203a8d9d507SStanislav Mekhanoshin   }
2204a8d9d507SStanislav Mekhanoshin 
2205a8d9d507SStanislav Mekhanoshin   return WaitStatesNeeded;
2206a8d9d507SStanislav Mekhanoshin }
2207a8d9d507SStanislav Mekhanoshin 
checkMAILdStHazards(MachineInstr * MI)22087d2019bbSStanislav Mekhanoshin int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
2209d1f45ed5SNeubauer, Sebastian   // On gfx90a+ relevant hazards are checked in checkMAIVALUHazards()
2210a8d9d507SStanislav Mekhanoshin   if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
22117d2019bbSStanislav Mekhanoshin     return 0;
22127d2019bbSStanislav Mekhanoshin 
22137d2019bbSStanislav Mekhanoshin   int WaitStatesNeeded = 0;
22147d2019bbSStanislav Mekhanoshin 
2215424f1f6fSCarl Ritson   auto IsAccVgprReadFn = [](const MachineInstr &MI) {
2216424f1f6fSCarl Ritson     return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64;
22177d2019bbSStanislav Mekhanoshin   };
22187d2019bbSStanislav Mekhanoshin 
22197d2019bbSStanislav Mekhanoshin   for (const MachineOperand &Op : MI->explicit_uses()) {
22207d2019bbSStanislav Mekhanoshin     if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
22217d2019bbSStanislav Mekhanoshin       continue;
22227d2019bbSStanislav Mekhanoshin 
22230c476111SDaniel Sanders     Register Reg = Op.getReg();
22247d2019bbSStanislav Mekhanoshin 
22257d2019bbSStanislav Mekhanoshin     const int AccVgprReadLdStWaitStates = 2;
2226a4f35ab2SAustin Kerbow     const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
22277d2019bbSStanislav Mekhanoshin     const int MaxWaitStates = 2;
22287d2019bbSStanislav Mekhanoshin 
22297d2019bbSStanislav Mekhanoshin     int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
22307d2019bbSStanislav Mekhanoshin       getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
22317d2019bbSStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
22327d2019bbSStanislav Mekhanoshin 
22337d2019bbSStanislav Mekhanoshin     if (WaitStatesNeeded == MaxWaitStates)
22347d2019bbSStanislav Mekhanoshin       return WaitStatesNeeded; // Early exit.
22357d2019bbSStanislav Mekhanoshin 
2236424f1f6fSCarl Ritson     auto IsVALUAccVgprRdWrCheckFn = [Reg, this](const MachineInstr &MI) {
2237424f1f6fSCarl Ritson       if (MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 &&
2238424f1f6fSCarl Ritson           MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
22397d2019bbSStanislav Mekhanoshin         return false;
2240424f1f6fSCarl Ritson       auto IsVALUFn = [](const MachineInstr &MI) {
2241424f1f6fSCarl Ritson         return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMAI(MI);
22427d2019bbSStanislav Mekhanoshin       };
22437d2019bbSStanislav Mekhanoshin       return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
22447d2019bbSStanislav Mekhanoshin              std::numeric_limits<int>::max();
22457d2019bbSStanislav Mekhanoshin     };
22467d2019bbSStanislav Mekhanoshin 
2247a4f35ab2SAustin Kerbow     WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
2248a4f35ab2SAustin Kerbow       getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
22497d2019bbSStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
22507d2019bbSStanislav Mekhanoshin   }
22517d2019bbSStanislav Mekhanoshin 
22527d2019bbSStanislav Mekhanoshin   return WaitStatesNeeded;
22537d2019bbSStanislav Mekhanoshin }
225413b63be4SStanislav Mekhanoshin 
checkMAIVALUHazards(MachineInstr * MI)2255a8d9d507SStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
2256a8d9d507SStanislav Mekhanoshin   if (!ST.hasGFX90AInsts())
2257a8d9d507SStanislav Mekhanoshin     return 0;
2258a8d9d507SStanislav Mekhanoshin 
2259424f1f6fSCarl Ritson   auto IsDGEMMFn = [](const MachineInstr &MI) -> bool {
2260424f1f6fSCarl Ritson     return isDGEMM(MI.getOpcode());
2261a8d9d507SStanislav Mekhanoshin   };
2262a8d9d507SStanislav Mekhanoshin 
2263a8d9d507SStanislav Mekhanoshin   // This is checked in checkMAIHazards90A()
2264bd9eed3aSAustin Kerbow   if (SIInstrInfo::isMFMA(*MI))
2265a8d9d507SStanislav Mekhanoshin     return 0;
2266a8d9d507SStanislav Mekhanoshin 
2267a8d9d507SStanislav Mekhanoshin   int WaitStatesNeeded = 0;
2268a8d9d507SStanislav Mekhanoshin 
2269a8d9d507SStanislav Mekhanoshin   bool IsMemOrExport = SIInstrInfo::isVMEM(*MI) ||
2270a8d9d507SStanislav Mekhanoshin                        SIInstrInfo::isFLAT(*MI) ||
2271a8d9d507SStanislav Mekhanoshin                        SIInstrInfo::isDS(*MI) ||
2272a8d9d507SStanislav Mekhanoshin                        SIInstrInfo::isEXP(*MI);
2273a8d9d507SStanislav Mekhanoshin   bool IsVALU = SIInstrInfo::isVALU(*MI);
2274a8d9d507SStanislav Mekhanoshin 
2275424f1f6fSCarl Ritson   const MachineInstr *MFMA = nullptr;
2276a8d9d507SStanislav Mekhanoshin   unsigned Reg;
2277bd9eed3aSAustin Kerbow   auto IsMFMAWriteFn = [&Reg, &MFMA, this](const MachineInstr &MI) {
2278bd9eed3aSAustin Kerbow     if (!SIInstrInfo::isMFMA(MI) ||
2279bd9eed3aSAustin Kerbow         !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
2280a8d9d507SStanislav Mekhanoshin       return false;
2281424f1f6fSCarl Ritson     MFMA = &MI;
2282a8d9d507SStanislav Mekhanoshin     return true;
2283a8d9d507SStanislav Mekhanoshin   };
2284a8d9d507SStanislav Mekhanoshin 
2285424f1f6fSCarl Ritson   const MachineInstr *DOT = nullptr;
2286424f1f6fSCarl Ritson   auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) {
2287424f1f6fSCarl Ritson     if (!SIInstrInfo::isDOT(MI) ||
2288424f1f6fSCarl Ritson         !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
2289a8d9d507SStanislav Mekhanoshin       return false;
2290424f1f6fSCarl Ritson     DOT = &MI;
2291a8d9d507SStanislav Mekhanoshin     return true;
2292a8d9d507SStanislav Mekhanoshin   };
2293a8d9d507SStanislav Mekhanoshin 
2294a8d9d507SStanislav Mekhanoshin   int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2295a8d9d507SStanislav Mekhanoshin                                            AMDGPU::OpName::src2);
2296a8d9d507SStanislav Mekhanoshin 
2297a8d9d507SStanislav Mekhanoshin   if (IsMemOrExport || IsVALU) {
2298a8d9d507SStanislav Mekhanoshin     const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
2299a8d9d507SStanislav Mekhanoshin     const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
2300a8d9d507SStanislav Mekhanoshin     const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
2301cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4;
2302cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6;
2303cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10;
2304cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18;
2305cad9de71SStanislav Mekhanoshin     const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5;
2306cad9de71SStanislav Mekhanoshin     const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7;
2307cad9de71SStanislav Mekhanoshin     const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11;
2308cad9de71SStanislav Mekhanoshin     const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19;
2309a8d9d507SStanislav Mekhanoshin     const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
2310a8d9d507SStanislav Mekhanoshin     const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
2311a8d9d507SStanislav Mekhanoshin     const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
2312a8d9d507SStanislav Mekhanoshin     const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
2313a8d9d507SStanislav Mekhanoshin     const int DotWriteSameDotReadSrcAB = 3;
2314a8d9d507SStanislav Mekhanoshin     const int DotWriteDifferentVALURead = 3;
2315a8d9d507SStanislav Mekhanoshin     const int MaxWaitStates = 19;
2316a8d9d507SStanislav Mekhanoshin 
2317a8d9d507SStanislav Mekhanoshin     for (const MachineOperand &Use : MI->explicit_uses()) {
2318a8d9d507SStanislav Mekhanoshin       if (!Use.isReg())
2319a8d9d507SStanislav Mekhanoshin         continue;
2320a8d9d507SStanislav Mekhanoshin       Reg = Use.getReg();
2321a8d9d507SStanislav Mekhanoshin 
2322a8d9d507SStanislav Mekhanoshin       DOT = nullptr;
2323a8d9d507SStanislav Mekhanoshin       int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
2324a8d9d507SStanislav Mekhanoshin                                                      MaxWaitStates);
2325a8d9d507SStanislav Mekhanoshin       if (DOT) {
2326a8d9d507SStanislav Mekhanoshin         int NeedWaitStates = 0;
2327a8d9d507SStanislav Mekhanoshin         if (DOT->getOpcode() == MI->getOpcode()) {
2328a8d9d507SStanislav Mekhanoshin           if (&Use - &MI->getOperand(0) != SrcCIdx)
2329a8d9d507SStanislav Mekhanoshin             NeedWaitStates = DotWriteSameDotReadSrcAB;
2330a8d9d507SStanislav Mekhanoshin         } else {
2331a8d9d507SStanislav Mekhanoshin           NeedWaitStates = DotWriteDifferentVALURead;
2332a8d9d507SStanislav Mekhanoshin         }
2333a8d9d507SStanislav Mekhanoshin 
2334a8d9d507SStanislav Mekhanoshin         int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2335a8d9d507SStanislav Mekhanoshin         WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2336a8d9d507SStanislav Mekhanoshin       }
2337a8d9d507SStanislav Mekhanoshin 
2338a8d9d507SStanislav Mekhanoshin       MFMA = nullptr;
2339661a232eSStanislav Mekhanoshin       WaitStatesSinceDef =
2340661a232eSStanislav Mekhanoshin           getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
2341a8d9d507SStanislav Mekhanoshin       if (!MFMA)
2342a8d9d507SStanislav Mekhanoshin         continue;
2343a8d9d507SStanislav Mekhanoshin 
2344a8d9d507SStanislav Mekhanoshin       unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
2345a8d9d507SStanislav Mekhanoshin       int NeedWaitStates = MaxWaitStates;
2346a8d9d507SStanislav Mekhanoshin       switch (HazardDefLatency) {
2347a8d9d507SStanislav Mekhanoshin       case 2:
2348cad9de71SStanislav Mekhanoshin         NeedWaitStates =
2349cad9de71SStanislav Mekhanoshin           ST.hasGFX940Insts()
2350cad9de71SStanislav Mekhanoshin             ? isXDL(ST, *MFMA)
2351cad9de71SStanislav Mekhanoshin               ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates
2352cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates
2353cad9de71SStanislav Mekhanoshin             : SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
2354a8d9d507SStanislav Mekhanoshin         break;
2355a8d9d507SStanislav Mekhanoshin       case 4:
2356e9a49c64SStanislav Mekhanoshin         assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
2357a8d9d507SStanislav Mekhanoshin         NeedWaitStates =
2358cad9de71SStanislav Mekhanoshin           isDGEMM(MFMA->getOpcode())
2359cad9de71SStanislav Mekhanoshin             ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
2360cad9de71SStanislav Mekhanoshin                             : DMFMA4x4WriteVgprVALUReadWaitStates
2361cad9de71SStanislav Mekhanoshin             : isXDL(ST, *MFMA)
2362cad9de71SStanislav Mekhanoshin               ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates
2363cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates;
2364a8d9d507SStanislav Mekhanoshin         break;
2365a8d9d507SStanislav Mekhanoshin       case 8:
2366cad9de71SStanislav Mekhanoshin         NeedWaitStates =
2367cad9de71SStanislav Mekhanoshin           ST.hasGFX940Insts()
2368cad9de71SStanislav Mekhanoshin             ? isXDL(ST, *MFMA)
2369cad9de71SStanislav Mekhanoshin               ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates
2370cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates
2371cad9de71SStanislav Mekhanoshin             : SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
2372a8d9d507SStanislav Mekhanoshin         break;
2373a8d9d507SStanislav Mekhanoshin       case 16: LLVM_FALLTHROUGH;
2374a8d9d507SStanislav Mekhanoshin       default:
2375a8d9d507SStanislav Mekhanoshin         NeedWaitStates =
2376a8d9d507SStanislav Mekhanoshin           isDGEMM(MFMA->getOpcode())
2377a8d9d507SStanislav Mekhanoshin             ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
2378a8d9d507SStanislav Mekhanoshin                             : DMFMA16x16WriteVgprVALUReadWaitStates
2379cad9de71SStanislav Mekhanoshin             : ST.hasGFX940Insts()
2380cad9de71SStanislav Mekhanoshin               ? isXDL(ST, *MFMA)
2381cad9de71SStanislav Mekhanoshin                 ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates
2382cad9de71SStanislav Mekhanoshin                 : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates
2383a8d9d507SStanislav Mekhanoshin               : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
2384a8d9d507SStanislav Mekhanoshin         break;
2385a8d9d507SStanislav Mekhanoshin       }
2386a8d9d507SStanislav Mekhanoshin 
2387a8d9d507SStanislav Mekhanoshin       int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2388a8d9d507SStanislav Mekhanoshin       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2389a8d9d507SStanislav Mekhanoshin 
2390a8d9d507SStanislav Mekhanoshin       if (WaitStatesNeeded == MaxWaitStates)
2391a8d9d507SStanislav Mekhanoshin         break;
2392a8d9d507SStanislav Mekhanoshin     }
2393a8d9d507SStanislav Mekhanoshin   }
2394a8d9d507SStanislav Mekhanoshin 
2395a8d9d507SStanislav Mekhanoshin   unsigned Opc = MI->getOpcode();
2396a8d9d507SStanislav Mekhanoshin   const int DMFMAToFMA64WaitStates = 2;
2397a8d9d507SStanislav Mekhanoshin   if ((Opc == AMDGPU::V_FMA_F64_e64 ||
2398a8d9d507SStanislav Mekhanoshin        Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64 ||
2399a8d9d507SStanislav Mekhanoshin        Opc == AMDGPU::V_FMAC_F64_dpp) &&
2400a8d9d507SStanislav Mekhanoshin       WaitStatesNeeded < DMFMAToFMA64WaitStates) {
2401a8d9d507SStanislav Mekhanoshin     int WaitStatesNeededForUse = DMFMAToFMA64WaitStates -
2402a8d9d507SStanislav Mekhanoshin       getWaitStatesSince(IsDGEMMFn, DMFMAToFMA64WaitStates);
2403a8d9d507SStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2404a8d9d507SStanislav Mekhanoshin   }
2405a8d9d507SStanislav Mekhanoshin 
2406a8d9d507SStanislav Mekhanoshin   if (!IsVALU && !IsMemOrExport)
2407a8d9d507SStanislav Mekhanoshin     return WaitStatesNeeded;
2408a8d9d507SStanislav Mekhanoshin 
2409a8d9d507SStanislav Mekhanoshin   for (const MachineOperand &Def : MI->defs()) {
2410a8d9d507SStanislav Mekhanoshin     const int SMFMA4x4WriteVgprVALUWawWaitStates = 5;
2411a8d9d507SStanislav Mekhanoshin     const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
2412a8d9d507SStanislav Mekhanoshin     const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
2413cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4;
2414cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6;
2415cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10;
2416cad9de71SStanislav Mekhanoshin     const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18;
2417cad9de71SStanislav Mekhanoshin     const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5;
2418cad9de71SStanislav Mekhanoshin     const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7;
2419cad9de71SStanislav Mekhanoshin     const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11;
2420cad9de71SStanislav Mekhanoshin     const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19;
2421a8d9d507SStanislav Mekhanoshin     const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
2422cad9de71SStanislav Mekhanoshin     const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3;
2423a8d9d507SStanislav Mekhanoshin     const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
2424a8d9d507SStanislav Mekhanoshin     const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
2425a8d9d507SStanislav Mekhanoshin     const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
2426a8d9d507SStanislav Mekhanoshin     const int DMFMA16x16WriteVgprVALUWriteWaitStates = 11;
2427a8d9d507SStanislav Mekhanoshin     const int DotWriteDifferentVALUWrite = 3;
2428a8d9d507SStanislav Mekhanoshin     const int MaxWaitStates = 19;
2429a8d9d507SStanislav Mekhanoshin     const int MaxWarWaitStates = 15;
2430a8d9d507SStanislav Mekhanoshin 
2431a8d9d507SStanislav Mekhanoshin     Reg = Def.getReg();
2432a8d9d507SStanislav Mekhanoshin 
2433a8d9d507SStanislav Mekhanoshin     DOT = nullptr;
2434a8d9d507SStanislav Mekhanoshin     int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
2435a8d9d507SStanislav Mekhanoshin                                                    MaxWaitStates);
2436a8d9d507SStanislav Mekhanoshin     if (DOT && DOT->getOpcode() != MI->getOpcode())
2437a8d9d507SStanislav Mekhanoshin       WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite -
2438a8d9d507SStanislav Mekhanoshin                                                     WaitStatesSinceDef);
2439a8d9d507SStanislav Mekhanoshin 
2440a8d9d507SStanislav Mekhanoshin     MFMA = nullptr;
2441661a232eSStanislav Mekhanoshin     WaitStatesSinceDef =
2442661a232eSStanislav Mekhanoshin         getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
2443a8d9d507SStanislav Mekhanoshin     if (MFMA) {
2444a8d9d507SStanislav Mekhanoshin       int NeedWaitStates = MaxWaitStates;
2445a8d9d507SStanislav Mekhanoshin       switch (TSchedModel.computeInstrLatency(MFMA)) {
2446a8d9d507SStanislav Mekhanoshin       case 2:
2447cad9de71SStanislav Mekhanoshin         NeedWaitStates = ST.hasGFX940Insts()
2448cad9de71SStanislav Mekhanoshin           ? isXDL(ST, *MFMA)
2449cad9de71SStanislav Mekhanoshin             ? GFX940_XDL2PassWriteVgprVALUWawWaitStates
2450cad9de71SStanislav Mekhanoshin             : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates
2451cad9de71SStanislav Mekhanoshin           : SMFMA4x4WriteVgprVALUWawWaitStates;
2452a8d9d507SStanislav Mekhanoshin         break;
2453a8d9d507SStanislav Mekhanoshin       case 4:
2454cad9de71SStanislav Mekhanoshin         assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
2455cad9de71SStanislav Mekhanoshin         NeedWaitStates = isDGEMM(MFMA->getOpcode())
2456cad9de71SStanislav Mekhanoshin             ? DMFMA4x4WriteVgprVALUWriteWaitStates
2457cad9de71SStanislav Mekhanoshin             : isXDL(ST, *MFMA)
2458cad9de71SStanislav Mekhanoshin               ? GFX940_XDL4PassWriteVgprVALUWawWaitStates
2459cad9de71SStanislav Mekhanoshin               : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates;
2460a8d9d507SStanislav Mekhanoshin         break;
2461a8d9d507SStanislav Mekhanoshin       case 8:
2462cad9de71SStanislav Mekhanoshin         NeedWaitStates = ST.hasGFX940Insts()
2463cad9de71SStanislav Mekhanoshin           ? isXDL(ST, *MFMA)
2464cad9de71SStanislav Mekhanoshin             ? GFX940_XDL8PassWriteVgprVALUWawWaitStates
2465cad9de71SStanislav Mekhanoshin             : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates
2466cad9de71SStanislav Mekhanoshin           : SMFMA16x16WriteVgprVALUWawWaitStates;
2467a8d9d507SStanislav Mekhanoshin         break;
2468a8d9d507SStanislav Mekhanoshin       case 16: LLVM_FALLTHROUGH;
2469a8d9d507SStanislav Mekhanoshin       default:
2470a8d9d507SStanislav Mekhanoshin         NeedWaitStates = isDGEMM(MFMA->getOpcode())
2471a8d9d507SStanislav Mekhanoshin                    ? DMFMA16x16WriteVgprVALUWriteWaitStates
2472cad9de71SStanislav Mekhanoshin                    : ST.hasGFX940Insts()
2473cad9de71SStanislav Mekhanoshin                      ? isXDL(ST, *MFMA)
2474cad9de71SStanislav Mekhanoshin                        ? GFX940_XDL16PassWriteVgprVALUWawWaitStates
2475cad9de71SStanislav Mekhanoshin                        : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates
2476a8d9d507SStanislav Mekhanoshin                    : SMFMA32x32WriteVgprVALUWawWaitStates;
2477a8d9d507SStanislav Mekhanoshin         break;
2478a8d9d507SStanislav Mekhanoshin       }
2479a8d9d507SStanislav Mekhanoshin 
2480a8d9d507SStanislav Mekhanoshin       int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2481a8d9d507SStanislav Mekhanoshin       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2482a8d9d507SStanislav Mekhanoshin 
2483a8d9d507SStanislav Mekhanoshin       if (WaitStatesNeeded == MaxWaitStates)
2484a8d9d507SStanislav Mekhanoshin         break;
2485a8d9d507SStanislav Mekhanoshin     }
2486a8d9d507SStanislav Mekhanoshin 
2487bd9eed3aSAustin Kerbow     auto IsSMFMAReadAsCFn = [&Reg, &MFMA, this](const MachineInstr &MI) {
2488bd9eed3aSAustin Kerbow       if (!SIInstrInfo::isMFMA(MI) || isDGEMM(MI.getOpcode()) ||
2489424f1f6fSCarl Ritson           !MI.readsRegister(Reg, &TRI))
2490a8d9d507SStanislav Mekhanoshin         return false;
2491a8d9d507SStanislav Mekhanoshin 
2492cad9de71SStanislav Mekhanoshin       if (ST.hasGFX940Insts() && !isXDL(ST, MI))
2493cad9de71SStanislav Mekhanoshin         return false;
2494cad9de71SStanislav Mekhanoshin 
2495424f1f6fSCarl Ritson       const MachineOperand *SrcC =
2496424f1f6fSCarl Ritson           TII.getNamedOperand(MI, AMDGPU::OpName::src2);
2497a8d9d507SStanislav Mekhanoshin       assert(SrcC);
2498a8d9d507SStanislav Mekhanoshin       if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg))
2499a8d9d507SStanislav Mekhanoshin         return false;
2500a8d9d507SStanislav Mekhanoshin 
2501424f1f6fSCarl Ritson       MFMA = &MI;
2502a8d9d507SStanislav Mekhanoshin       return true;
2503a8d9d507SStanislav Mekhanoshin     };
2504a8d9d507SStanislav Mekhanoshin 
2505a8d9d507SStanislav Mekhanoshin     MFMA = nullptr;
2506a8d9d507SStanislav Mekhanoshin     int WaitStatesSinceUse = getWaitStatesSince(IsSMFMAReadAsCFn,
2507a8d9d507SStanislav Mekhanoshin                                                 MaxWarWaitStates);
2508a8d9d507SStanislav Mekhanoshin     if (!MFMA)
2509a8d9d507SStanislav Mekhanoshin       continue;
2510a8d9d507SStanislav Mekhanoshin 
2511a8d9d507SStanislav Mekhanoshin     unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
2512a8d9d507SStanislav Mekhanoshin     int NeedWaitStates = MaxWaitStates;
2513a8d9d507SStanislav Mekhanoshin     switch (HazardDefLatency) {
2514a8d9d507SStanislav Mekhanoshin     case 2:  NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
2515a8d9d507SStanislav Mekhanoshin              break;
2516cad9de71SStanislav Mekhanoshin     case 4:  assert(ST.hasGFX940Insts());
2517cad9de71SStanislav Mekhanoshin              NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
2518cad9de71SStanislav Mekhanoshin              break;
2519a8d9d507SStanislav Mekhanoshin     case 8:  NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
2520a8d9d507SStanislav Mekhanoshin              break;
2521a8d9d507SStanislav Mekhanoshin     case 16: LLVM_FALLTHROUGH;
2522a8d9d507SStanislav Mekhanoshin     default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
2523a8d9d507SStanislav Mekhanoshin              break;
2524a8d9d507SStanislav Mekhanoshin     }
2525a8d9d507SStanislav Mekhanoshin 
2526a8d9d507SStanislav Mekhanoshin     int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse;
2527a8d9d507SStanislav Mekhanoshin     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2528a8d9d507SStanislav Mekhanoshin   }
2529a8d9d507SStanislav Mekhanoshin 
2530a8d9d507SStanislav Mekhanoshin   return WaitStatesNeeded;
2531a8d9d507SStanislav Mekhanoshin }
2532a8d9d507SStanislav Mekhanoshin 
ShouldPreferAnother(SUnit * SU)253313b63be4SStanislav Mekhanoshin bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
253413b63be4SStanislav Mekhanoshin   if (!SU->isInstr())
253513b63be4SStanislav Mekhanoshin     return false;
253613b63be4SStanislav Mekhanoshin 
2537424f1f6fSCarl Ritson   const MachineInstr *MAI = nullptr;
2538bd9eed3aSAustin Kerbow 
2539424f1f6fSCarl Ritson   auto IsMFMAFn = [&MAI](const MachineInstr &MI) {
254013b63be4SStanislav Mekhanoshin     MAI = nullptr;
2541bd9eed3aSAustin Kerbow     if (SIInstrInfo::isMFMA(MI))
2542424f1f6fSCarl Ritson       MAI = &MI;
254313b63be4SStanislav Mekhanoshin     return MAI != nullptr;
254413b63be4SStanislav Mekhanoshin   };
254513b63be4SStanislav Mekhanoshin 
254613b63be4SStanislav Mekhanoshin   MachineInstr *MI = SU->getInstr();
2547424f1f6fSCarl Ritson   if (IsMFMAFn(*MI)) {
254813b63be4SStanislav Mekhanoshin     int W = getWaitStatesSince(IsMFMAFn, 16);
254913b63be4SStanislav Mekhanoshin     if (MAI)
255013b63be4SStanislav Mekhanoshin       return W < (int)TSchedModel.computeInstrLatency(MAI);
255113b63be4SStanislav Mekhanoshin   }
255213b63be4SStanislav Mekhanoshin 
255313b63be4SStanislav Mekhanoshin   return false;
255413b63be4SStanislav Mekhanoshin }
2555