1cb6ba62dSTom Stellard //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2cb6ba62dSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cb6ba62dSTom Stellard //
7cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
8cb6ba62dSTom Stellard //
9cb6ba62dSTom Stellard // This file implements hazard recognizers for scheduling on GCN processors.
10cb6ba62dSTom Stellard //
11cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
12cb6ba62dSTom Stellard
13734bb7bbSEugene Zelenko #include "GCNHazardRecognizer.h"
14560d7e04Sdfukalov #include "GCNSubtarget.h"
15560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
161e15adbaSAustin Kerbow #include "SIMachineFunctionInfo.h"
17734bb7bbSEugene Zelenko #include "llvm/CodeGen/MachineFunction.h"
18cb6ba62dSTom Stellard #include "llvm/CodeGen/ScheduleDAG.h"
196a87e9b0Sdfukalov #include "llvm/Support/TargetParser.h"
20cb6ba62dSTom Stellard
21cb6ba62dSTom Stellard using namespace llvm;
22cb6ba62dSTom Stellard
231e15adbaSAustin Kerbow namespace {
241e15adbaSAustin Kerbow
251e15adbaSAustin Kerbow struct MFMAPaddingRatioParser : public cl::parser<unsigned> {
MFMAPaddingRatioParser__anon1717562d0111::MFMAPaddingRatioParser261e15adbaSAustin Kerbow MFMAPaddingRatioParser(cl::Option &O) : cl::parser<unsigned>(O) {}
271e15adbaSAustin Kerbow
parse__anon1717562d0111::MFMAPaddingRatioParser281e15adbaSAustin Kerbow bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
291e15adbaSAustin Kerbow if (Arg.getAsInteger(0, Value))
301e15adbaSAustin Kerbow return O.error("'" + Arg + "' value invalid for uint argument!");
311e15adbaSAustin Kerbow
321e15adbaSAustin Kerbow if (Value > 100)
331e15adbaSAustin Kerbow return O.error("'" + Arg + "' value must be in the range [0, 100]!");
341e15adbaSAustin Kerbow
351e15adbaSAustin Kerbow return false;
361e15adbaSAustin Kerbow }
371e15adbaSAustin Kerbow };
381e15adbaSAustin Kerbow
391e15adbaSAustin Kerbow } // end anonymous namespace
401e15adbaSAustin Kerbow
411e15adbaSAustin Kerbow static cl::opt<unsigned, false, MFMAPaddingRatioParser>
421e15adbaSAustin Kerbow MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden,
431e15adbaSAustin Kerbow cl::desc("Fill a percentage of the latency between "
441e15adbaSAustin Kerbow "neighboring MFMA with s_nops."));
451e15adbaSAustin Kerbow
46cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
476527b2a4SSebastian Neubauer // Hazard Recognizer Implementation
48cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
49cb6ba62dSTom Stellard
50e0c382a9SPiotr Sobczak static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
51e0c382a9SPiotr Sobczak const GCNSubtarget &ST);
52e0c382a9SPiotr Sobczak
GCNHazardRecognizer(const MachineFunction & MF)53cb6ba62dSTom Stellard GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
54f92ed696SStanislav Mekhanoshin IsHazardRecognizerMode(false),
55cb6ba62dSTom Stellard CurrCycleInstr(nullptr),
5643e92fe3SMatt Arsenault MF(MF),
575bfbae5cSTom Stellard ST(MF.getSubtarget<GCNSubtarget>()),
5803c67d1eSMatt Arsenault TII(*ST.getInstrInfo()),
5903c67d1eSMatt Arsenault TRI(TII.getRegisterInfo()),
6003c67d1eSMatt Arsenault ClauseUses(TRI.getNumRegUnits()),
6103c67d1eSMatt Arsenault ClauseDefs(TRI.getNumRegUnits()) {
62a8d9d507SStanislav Mekhanoshin MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 19 : 5;
637d2019bbSStanislav Mekhanoshin TSchedModel.init(&ST);
64e0c382a9SPiotr Sobczak RunLdsBranchVmemWARHazardFixup = shouldRunLdsBranchVmemWARHazardFixup(MF, ST);
65cb6ba62dSTom Stellard }
66cb6ba62dSTom Stellard
Reset()67de518673SAustin Kerbow void GCNHazardRecognizer::Reset() {
68de518673SAustin Kerbow EmittedInstrs.clear();
69de518673SAustin Kerbow }
70de518673SAustin Kerbow
EmitInstruction(SUnit * SU)71cb6ba62dSTom Stellard void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
72cb6ba62dSTom Stellard EmitInstruction(SU->getInstr());
73cb6ba62dSTom Stellard }
74cb6ba62dSTom Stellard
EmitInstruction(MachineInstr * MI)75cb6ba62dSTom Stellard void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
76cb6ba62dSTom Stellard CurrCycleInstr = MI;
77cb6ba62dSTom Stellard }
78cb6ba62dSTom Stellard
isDivFMas(unsigned Opcode)795ab6154dSTom Stellard static bool isDivFMas(unsigned Opcode) {
80314e29edSJoe Nash return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64;
815ab6154dSTom Stellard }
825ab6154dSTom Stellard
isSGetReg(unsigned Opcode)83961811c9STom Stellard static bool isSGetReg(unsigned Opcode) {
84961811c9STom Stellard return Opcode == AMDGPU::S_GETREG_B32;
85961811c9STom Stellard }
86961811c9STom Stellard
isSSetReg(unsigned Opcode)87961811c9STom Stellard static bool isSSetReg(unsigned Opcode) {
8890777e29SJay Foad switch (Opcode) {
8990777e29SJay Foad case AMDGPU::S_SETREG_B32:
9090777e29SJay Foad case AMDGPU::S_SETREG_B32_mode:
9190777e29SJay Foad case AMDGPU::S_SETREG_IMM32_B32:
9290777e29SJay Foad case AMDGPU::S_SETREG_IMM32_B32_mode:
9390777e29SJay Foad return true;
9490777e29SJay Foad }
9590777e29SJay Foad return false;
96961811c9STom Stellard }
97961811c9STom Stellard
isRWLane(unsigned Opcode)9804051b5fSTom Stellard static bool isRWLane(unsigned Opcode) {
9904051b5fSTom Stellard return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
10004051b5fSTom Stellard }
10104051b5fSTom Stellard
isRFE(unsigned Opcode)102aea899e2STom Stellard static bool isRFE(unsigned Opcode) {
103aea899e2STom Stellard return Opcode == AMDGPU::S_RFE_B64;
104aea899e2STom Stellard }
105aea899e2STom Stellard
isSMovRel(unsigned Opcode)106e823d92fSMatt Arsenault static bool isSMovRel(unsigned Opcode) {
10759ece95fSMatt Arsenault switch (Opcode) {
10859ece95fSMatt Arsenault case AMDGPU::S_MOVRELS_B32:
10959ece95fSMatt Arsenault case AMDGPU::S_MOVRELS_B64:
11059ece95fSMatt Arsenault case AMDGPU::S_MOVRELD_B32:
11159ece95fSMatt Arsenault case AMDGPU::S_MOVRELD_B64:
11259ece95fSMatt Arsenault return true;
11359ece95fSMatt Arsenault default:
11459ece95fSMatt Arsenault return false;
115e823d92fSMatt Arsenault }
116e823d92fSMatt Arsenault }
117e823d92fSMatt Arsenault
isDGEMM(unsigned Opcode)118a8d9d507SStanislav Mekhanoshin static bool isDGEMM(unsigned Opcode) {
11964838ba3SStanislav Mekhanoshin return AMDGPU::getMAIIsDGEMM(Opcode);
120a8d9d507SStanislav Mekhanoshin }
121a8d9d507SStanislav Mekhanoshin
isXDL(const GCNSubtarget & ST,const MachineInstr & MI)122a8d9d507SStanislav Mekhanoshin static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
123a8d9d507SStanislav Mekhanoshin unsigned Opcode = MI.getOpcode();
124a8d9d507SStanislav Mekhanoshin
125a8d9d507SStanislav Mekhanoshin if (!SIInstrInfo::isMAI(MI) ||
126a8d9d507SStanislav Mekhanoshin isDGEMM(Opcode) ||
127a8d9d507SStanislav Mekhanoshin Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
128a8d9d507SStanislav Mekhanoshin Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
129a8d9d507SStanislav Mekhanoshin return false;
130a8d9d507SStanislav Mekhanoshin
131cad9de71SStanislav Mekhanoshin if (!ST.hasGFX940Insts())
132a8d9d507SStanislav Mekhanoshin return true;
133cad9de71SStanislav Mekhanoshin
134cad9de71SStanislav Mekhanoshin return AMDGPU::getMAIIsGFX940XDL(Opcode);
135a8d9d507SStanislav Mekhanoshin }
136a8d9d507SStanislav Mekhanoshin
isSendMsgTraceDataOrGDS(const SIInstrInfo & TII,const MachineInstr & MI)137c5cec5e1SMarek Olsak static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
138c5cec5e1SMarek Olsak const MachineInstr &MI) {
139c5cec5e1SMarek Olsak if (TII.isAlwaysGDS(MI.getOpcode()))
140c5cec5e1SMarek Olsak return true;
141c5cec5e1SMarek Olsak
142a41351e3SMatt Arsenault switch (MI.getOpcode()) {
143a41351e3SMatt Arsenault case AMDGPU::S_SENDMSG:
144a41351e3SMatt Arsenault case AMDGPU::S_SENDMSGHALT:
145a41351e3SMatt Arsenault case AMDGPU::S_TTRACEDATA:
146a41351e3SMatt Arsenault return true;
147c5cec5e1SMarek Olsak // These DS opcodes don't support GDS.
148c5cec5e1SMarek Olsak case AMDGPU::DS_NOP:
149c5cec5e1SMarek Olsak case AMDGPU::DS_PERMUTE_B32:
150c5cec5e1SMarek Olsak case AMDGPU::DS_BPERMUTE_B32:
151c5cec5e1SMarek Olsak return false;
152a41351e3SMatt Arsenault default:
153c5cec5e1SMarek Olsak if (TII.isDS(MI.getOpcode())) {
154c5cec5e1SMarek Olsak int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
155c5cec5e1SMarek Olsak AMDGPU::OpName::gds);
156c5cec5e1SMarek Olsak if (MI.getOperand(GDS).getImm())
157c5cec5e1SMarek Olsak return true;
158c5cec5e1SMarek Olsak }
159a41351e3SMatt Arsenault return false;
160a41351e3SMatt Arsenault }
161a41351e3SMatt Arsenault }
162a41351e3SMatt Arsenault
isPermlane(const MachineInstr & MI)1635f581c9fSStanislav Mekhanoshin static bool isPermlane(const MachineInstr &MI) {
1645f581c9fSStanislav Mekhanoshin unsigned Opcode = MI.getOpcode();
165314e29edSJoe Nash return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
166314e29edSJoe Nash Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
1675f581c9fSStanislav Mekhanoshin }
1685f581c9fSStanislav Mekhanoshin
isLdsDma(const MachineInstr & MI)16963f21f4cSStanislav Mekhanoshin static bool isLdsDma(const MachineInstr &MI) {
17063f21f4cSStanislav Mekhanoshin return SIInstrInfo::isVALU(MI) &&
17163f21f4cSStanislav Mekhanoshin (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI));
17263f21f4cSStanislav Mekhanoshin }
17363f21f4cSStanislav Mekhanoshin
getHWReg(const SIInstrInfo * TII,const MachineInstr & RegInstr)174aea899e2STom Stellard static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
175961811c9STom Stellard const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
176961811c9STom Stellard AMDGPU::OpName::simm16);
177961811c9STom Stellard return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
178961811c9STom Stellard }
179961811c9STom Stellard
180cb6ba62dSTom Stellard ScheduleHazardRecognizer::HazardType
getHazardType(SUnit * SU,int Stalls)181cb6ba62dSTom Stellard GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
182cb6ba62dSTom Stellard MachineInstr *MI = SU->getInstr();
183ebdcef20SAustin Kerbow // If we are not in "HazardRecognizerMode" and therefore not being run from
184ebdcef20SAustin Kerbow // the scheduler, track possible stalls from hazards but don't insert noops.
185ebdcef20SAustin Kerbow auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
186ebdcef20SAustin Kerbow
1878a3d3a9aSAustin Kerbow if (MI->isBundle())
1888a3d3a9aSAustin Kerbow return NoHazard;
189cb6ba62dSTom Stellard
1905c190d05SAaron Ballman if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
191ebdcef20SAustin Kerbow return HazardType;
192cb6ba62dSTom Stellard
19351d1415aSStanislav Mekhanoshin if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
194ebdcef20SAustin Kerbow return HazardType;
19551d1415aSStanislav Mekhanoshin
196bdf7f81bSStanislav Mekhanoshin if (checkFPAtomicToDenormModeHazard(MI) > 0)
197ebdcef20SAustin Kerbow return HazardType;
198bdf7f81bSStanislav Mekhanoshin
19951d1415aSStanislav Mekhanoshin if (ST.hasNoDataDepHazard())
20051d1415aSStanislav Mekhanoshin return NoHazard;
20151d1415aSStanislav Mekhanoshin
202ed745839SJay Foad // FIXME: Should flat be considered vmem?
203ed745839SJay Foad if ((SIInstrInfo::isVMEM(*MI) ||
204ed745839SJay Foad SIInstrInfo::isFLAT(*MI))
205ed745839SJay Foad && checkVMEMHazards(MI) > 0)
206ed745839SJay Foad return HazardType;
207ed745839SJay Foad
208b133fbb9STom Stellard if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
209ebdcef20SAustin Kerbow return HazardType;
210b133fbb9STom Stellard
211a27007ebSTom Stellard if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
212ebdcef20SAustin Kerbow return HazardType;
213a27007ebSTom Stellard
2145ab6154dSTom Stellard if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
215ebdcef20SAustin Kerbow return HazardType;
2165ab6154dSTom Stellard
21704051b5fSTom Stellard if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
218ebdcef20SAustin Kerbow return HazardType;
21904051b5fSTom Stellard
220a8d9d507SStanislav Mekhanoshin if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
221a8d9d507SStanislav Mekhanoshin SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
222a8d9d507SStanislav Mekhanoshin SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
223a8d9d507SStanislav Mekhanoshin return HazardType;
224a8d9d507SStanislav Mekhanoshin
225961811c9STom Stellard if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
226ebdcef20SAustin Kerbow return HazardType;
227961811c9STom Stellard
22830d30824STom Stellard if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
229ebdcef20SAustin Kerbow return HazardType;
23030d30824STom Stellard
231aea899e2STom Stellard if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
232ebdcef20SAustin Kerbow return HazardType;
233aea899e2STom Stellard
23463f21f4cSStanislav Mekhanoshin if (((ST.hasReadM0MovRelInterpHazard() &&
23563f21f4cSStanislav Mekhanoshin (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
23663f21f4cSStanislav Mekhanoshin (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
23763f21f4cSStanislav Mekhanoshin (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
23863f21f4cSStanislav Mekhanoshin (ST.hasReadM0LdsDirectHazard() &&
23963f21f4cSStanislav Mekhanoshin MI->readsRegister(AMDGPU::LDS_DIRECT))) &&
240e823d92fSMatt Arsenault checkReadM0Hazards(MI) > 0)
241ebdcef20SAustin Kerbow return HazardType;
242e823d92fSMatt Arsenault
2437d2019bbSStanislav Mekhanoshin if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
244ebdcef20SAustin Kerbow return HazardType;
2457d2019bbSStanislav Mekhanoshin
24643a38dc2SStanislav Mekhanoshin if ((SIInstrInfo::isVMEM(*MI) ||
24743a38dc2SStanislav Mekhanoshin SIInstrInfo::isFLAT(*MI) ||
24843a38dc2SStanislav Mekhanoshin SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
249ebdcef20SAustin Kerbow return HazardType;
2507d2019bbSStanislav Mekhanoshin
251d29f24acSMark Searles if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
252ebdcef20SAustin Kerbow return HazardType;
253d29f24acSMark Searles
254cb6ba62dSTom Stellard return NoHazard;
255cb6ba62dSTom Stellard }
256cb6ba62dSTom Stellard
insertNoopsInBundle(MachineInstr * MI,const SIInstrInfo & TII,unsigned Quantity)2578b127a86SAustin Kerbow static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
2588b127a86SAustin Kerbow unsigned Quantity) {
2598b127a86SAustin Kerbow while (Quantity > 0) {
26069f5105fSJay Foad unsigned Arg = std::min(Quantity, 8u);
26169f5105fSJay Foad Quantity -= Arg;
2628a3d3a9aSAustin Kerbow BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
26369f5105fSJay Foad .addImm(Arg - 1);
2648b127a86SAustin Kerbow }
2658a3d3a9aSAustin Kerbow }
2668a3d3a9aSAustin Kerbow
2671e15adbaSAustin Kerbow unsigned
getMFMAPipelineWaitStates(const MachineInstr & MI) const2681e15adbaSAustin Kerbow GCNHazardRecognizer::getMFMAPipelineWaitStates(const MachineInstr &MI) const {
2691e15adbaSAustin Kerbow const MCSchedClassDesc *SC = TSchedModel.resolveSchedClass(&MI);
2701e15adbaSAustin Kerbow assert(TSchedModel.getWriteProcResBegin(SC) !=
2711e15adbaSAustin Kerbow TSchedModel.getWriteProcResEnd(SC));
2721e15adbaSAustin Kerbow return TSchedModel.getWriteProcResBegin(SC)->Cycles;
2731e15adbaSAustin Kerbow }
2741e15adbaSAustin Kerbow
processBundle()2758a3d3a9aSAustin Kerbow void GCNHazardRecognizer::processBundle() {
2768a3d3a9aSAustin Kerbow MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
2778a3d3a9aSAustin Kerbow MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
2788a3d3a9aSAustin Kerbow // Check bundled MachineInstr's for hazards.
2798a3d3a9aSAustin Kerbow for (; MI != E && MI->isInsideBundle(); ++MI) {
2808a3d3a9aSAustin Kerbow CurrCycleInstr = &*MI;
2818a3d3a9aSAustin Kerbow unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
2828a3d3a9aSAustin Kerbow
2838b127a86SAustin Kerbow if (IsHazardRecognizerMode) {
2848a3d3a9aSAustin Kerbow fixHazards(CurrCycleInstr);
2858a3d3a9aSAustin Kerbow
2868b127a86SAustin Kerbow insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
2878b127a86SAustin Kerbow }
2888a3d3a9aSAustin Kerbow
2898a3d3a9aSAustin Kerbow // It’s unnecessary to track more than MaxLookAhead instructions. Since we
2908a3d3a9aSAustin Kerbow // include the bundled MI directly after, only add a maximum of
2918a3d3a9aSAustin Kerbow // (MaxLookAhead - 1) noops to EmittedInstrs.
2928a3d3a9aSAustin Kerbow for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
2938a3d3a9aSAustin Kerbow EmittedInstrs.push_front(nullptr);
2948a3d3a9aSAustin Kerbow
2958a3d3a9aSAustin Kerbow EmittedInstrs.push_front(CurrCycleInstr);
2968a3d3a9aSAustin Kerbow EmittedInstrs.resize(MaxLookAhead);
2978a3d3a9aSAustin Kerbow }
2988a3d3a9aSAustin Kerbow CurrCycleInstr = nullptr;
2998a3d3a9aSAustin Kerbow }
3008a3d3a9aSAustin Kerbow
PreEmitNoops(MachineInstr * MI)301cb6ba62dSTom Stellard unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
302f92ed696SStanislav Mekhanoshin IsHazardRecognizerMode = true;
303f92ed696SStanislav Mekhanoshin CurrCycleInstr = MI;
304f92ed696SStanislav Mekhanoshin unsigned W = PreEmitNoopsCommon(MI);
3058a3d3a9aSAustin Kerbow fixHazards(MI);
306f92ed696SStanislav Mekhanoshin CurrCycleInstr = nullptr;
307f92ed696SStanislav Mekhanoshin return W;
308f92ed696SStanislav Mekhanoshin }
309f92ed696SStanislav Mekhanoshin
PreEmitNoopsCommon(MachineInstr * MI)310f92ed696SStanislav Mekhanoshin unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
3118a3d3a9aSAustin Kerbow if (MI->isBundle())
3128a3d3a9aSAustin Kerbow return 0;
3138a3d3a9aSAustin Kerbow
3142e87acacSDmitry Preobrazhensky int WaitStates = 0;
315e823d92fSMatt Arsenault
3165c190d05SAaron Ballman if (SIInstrInfo::isSMRD(*MI))
317e823d92fSMatt Arsenault return std::max(WaitStates, checkSMRDHazards(MI));
318cb6ba62dSTom Stellard
31951d1415aSStanislav Mekhanoshin if (ST.hasNSAtoVMEMBug())
32051d1415aSStanislav Mekhanoshin WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
32151d1415aSStanislav Mekhanoshin
322bdf7f81bSStanislav Mekhanoshin WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
323bdf7f81bSStanislav Mekhanoshin
32451d1415aSStanislav Mekhanoshin if (ST.hasNoDataDepHazard())
32551d1415aSStanislav Mekhanoshin return WaitStates;
32651d1415aSStanislav Mekhanoshin
327ed745839SJay Foad if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
328ed745839SJay Foad WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
329ed745839SJay Foad
33051d1415aSStanislav Mekhanoshin if (SIInstrInfo::isVALU(*MI))
33151d1415aSStanislav Mekhanoshin WaitStates = std::max(WaitStates, checkVALUHazards(MI));
33251d1415aSStanislav Mekhanoshin
333a27007ebSTom Stellard if (SIInstrInfo::isDPP(*MI))
334b133fbb9STom Stellard WaitStates = std::max(WaitStates, checkDPPHazards(MI));
335a27007ebSTom Stellard
3365ab6154dSTom Stellard if (isDivFMas(MI->getOpcode()))
337b133fbb9STom Stellard WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
338b133fbb9STom Stellard
33904051b5fSTom Stellard if (isRWLane(MI->getOpcode()))
34004051b5fSTom Stellard WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
34104051b5fSTom Stellard
342a8d9d507SStanislav Mekhanoshin if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
343a8d9d507SStanislav Mekhanoshin SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
344a8d9d507SStanislav Mekhanoshin SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
345a8d9d507SStanislav Mekhanoshin WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));
346a8d9d507SStanislav Mekhanoshin
347d29f24acSMark Searles if (MI->isInlineAsm())
348d29f24acSMark Searles return std::max(WaitStates, checkInlineAsmHazards(MI));
349d29f24acSMark Searles
350961811c9STom Stellard if (isSGetReg(MI->getOpcode()))
351e823d92fSMatt Arsenault return std::max(WaitStates, checkGetRegHazards(MI));
352961811c9STom Stellard
35330d30824STom Stellard if (isSSetReg(MI->getOpcode()))
354e823d92fSMatt Arsenault return std::max(WaitStates, checkSetRegHazards(MI));
35530d30824STom Stellard
356aea899e2STom Stellard if (isRFE(MI->getOpcode()))
357e823d92fSMatt Arsenault return std::max(WaitStates, checkRFEHazards(MI));
358aea899e2STom Stellard
35963f21f4cSStanislav Mekhanoshin if ((ST.hasReadM0MovRelInterpHazard() &&
36063f21f4cSStanislav Mekhanoshin (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
36163f21f4cSStanislav Mekhanoshin (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
36263f21f4cSStanislav Mekhanoshin (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
36363f21f4cSStanislav Mekhanoshin (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT)))
364e823d92fSMatt Arsenault return std::max(WaitStates, checkReadM0Hazards(MI));
365e823d92fSMatt Arsenault
3667d2019bbSStanislav Mekhanoshin if (SIInstrInfo::isMAI(*MI))
3677d2019bbSStanislav Mekhanoshin return std::max(WaitStates, checkMAIHazards(MI));
3687d2019bbSStanislav Mekhanoshin
36943a38dc2SStanislav Mekhanoshin if (SIInstrInfo::isVMEM(*MI) ||
37043a38dc2SStanislav Mekhanoshin SIInstrInfo::isFLAT(*MI) ||
37143a38dc2SStanislav Mekhanoshin SIInstrInfo::isDS(*MI))
3727d2019bbSStanislav Mekhanoshin return std::max(WaitStates, checkMAILdStHazards(MI));
3737d2019bbSStanislav Mekhanoshin
374e823d92fSMatt Arsenault return WaitStates;
375cb6ba62dSTom Stellard }
376cb6ba62dSTom Stellard
EmitNoop()377cb6ba62dSTom Stellard void GCNHazardRecognizer::EmitNoop() {
378cb6ba62dSTom Stellard EmittedInstrs.push_front(nullptr);
379cb6ba62dSTom Stellard }
380cb6ba62dSTom Stellard
AdvanceCycle()381cb6ba62dSTom Stellard void GCNHazardRecognizer::AdvanceCycle() {
382cb6ba62dSTom Stellard // When the scheduler detects a stall, it will call AdvanceCycle() without
383cb6ba62dSTom Stellard // emitting any instructions.
384ebdcef20SAustin Kerbow if (!CurrCycleInstr) {
385ebdcef20SAustin Kerbow EmittedInstrs.push_front(nullptr);
386cb6ba62dSTom Stellard return;
387ebdcef20SAustin Kerbow }
388cb6ba62dSTom Stellard
3898a3d3a9aSAustin Kerbow if (CurrCycleInstr->isBundle()) {
3908a3d3a9aSAustin Kerbow processBundle();
3918a3d3a9aSAustin Kerbow return;
3928a3d3a9aSAustin Kerbow }
3938a3d3a9aSAustin Kerbow
39459ece95fSMatt Arsenault unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
39568660767SChristudasan Devadasan if (!NumWaitStates) {
39668660767SChristudasan Devadasan CurrCycleInstr = nullptr;
39768660767SChristudasan Devadasan return;
39868660767SChristudasan Devadasan }
399cb6ba62dSTom Stellard
400cb6ba62dSTom Stellard // Keep track of emitted instructions
401cb6ba62dSTom Stellard EmittedInstrs.push_front(CurrCycleInstr);
402cb6ba62dSTom Stellard
403cb6ba62dSTom Stellard // Add a nullptr for each additional wait state after the first. Make sure
404cb6ba62dSTom Stellard // not to add more than getMaxLookAhead() items to the list, since we
405cb6ba62dSTom Stellard // truncate the list to that size right after this loop.
406cb6ba62dSTom Stellard for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
407cb6ba62dSTom Stellard i < e; ++i) {
408cb6ba62dSTom Stellard EmittedInstrs.push_front(nullptr);
409cb6ba62dSTom Stellard }
410cb6ba62dSTom Stellard
411cb6ba62dSTom Stellard // getMaxLookahead() is the largest number of wait states we will ever need
412cb6ba62dSTom Stellard // to insert, so there is no point in keeping track of more than that many
413cb6ba62dSTom Stellard // wait states.
414cb6ba62dSTom Stellard EmittedInstrs.resize(getMaxLookAhead());
415cb6ba62dSTom Stellard
416cb6ba62dSTom Stellard CurrCycleInstr = nullptr;
417cb6ba62dSTom Stellard }
418cb6ba62dSTom Stellard
RecedeCycle()419cb6ba62dSTom Stellard void GCNHazardRecognizer::RecedeCycle() {
420cb6ba62dSTom Stellard llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
421cb6ba62dSTom Stellard }
422cb6ba62dSTom Stellard
423cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
424cb6ba62dSTom Stellard // Helper Functions
425cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
426cb6ba62dSTom Stellard
4279dff14beSJay Foad typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult;
4289dff14beSJay Foad
429424f1f6fSCarl Ritson typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
43013107c27SJay Foad typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
431f92ed696SStanislav Mekhanoshin
4329dff14beSJay Foad // Search for a hazard in a block and its predecessors.
4339dff14beSJay Foad template <typename StateT>
4349dff14beSJay Foad static bool
hasHazard(StateT State,function_ref<HazardFnResult (StateT &,const MachineInstr &)> IsHazard,function_ref<void (StateT &,const MachineInstr &)> UpdateState,const MachineBasicBlock * MBB,MachineBasicBlock::const_reverse_instr_iterator I,DenseSet<const MachineBasicBlock * > & Visited)4359dff14beSJay Foad hasHazard(StateT State,
4369dff14beSJay Foad function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
4379dff14beSJay Foad function_ref<void(StateT &, const MachineInstr &)> UpdateState,
4389dff14beSJay Foad const MachineBasicBlock *MBB,
4399dff14beSJay Foad MachineBasicBlock::const_reverse_instr_iterator I,
4409dff14beSJay Foad DenseSet<const MachineBasicBlock *> &Visited) {
4419dff14beSJay Foad for (auto E = MBB->instr_rend(); I != E; ++I) {
4429dff14beSJay Foad // No need to look at parent BUNDLE instructions.
4439dff14beSJay Foad if (I->isBundle())
4449dff14beSJay Foad continue;
4459dff14beSJay Foad
4469dff14beSJay Foad switch (IsHazard(State, *I)) {
4479dff14beSJay Foad case HazardFound:
4489dff14beSJay Foad return true;
4499dff14beSJay Foad case HazardExpired:
4509dff14beSJay Foad return false;
4519dff14beSJay Foad default:
4529dff14beSJay Foad // Continue search
4539dff14beSJay Foad break;
4549dff14beSJay Foad }
4559dff14beSJay Foad
4569dff14beSJay Foad if (I->isInlineAsm() || I->isMetaInstruction())
4579dff14beSJay Foad continue;
4589dff14beSJay Foad
4599dff14beSJay Foad UpdateState(State, *I);
4609dff14beSJay Foad }
4619dff14beSJay Foad
4629dff14beSJay Foad for (MachineBasicBlock *Pred : MBB->predecessors()) {
4639dff14beSJay Foad if (!Visited.insert(Pred).second)
4649dff14beSJay Foad continue;
4659dff14beSJay Foad
4669dff14beSJay Foad if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
4679dff14beSJay Foad Visited))
4689dff14beSJay Foad return true;
4699dff14beSJay Foad }
4709dff14beSJay Foad
4719dff14beSJay Foad return false;
4729dff14beSJay Foad }
4739dff14beSJay Foad
474f92ed696SStanislav Mekhanoshin // Returns a minimum wait states since \p I walking all predecessors.
475f92ed696SStanislav Mekhanoshin // Only scans until \p IsExpired does not return true.
476f92ed696SStanislav Mekhanoshin // Can only be run in a hazard recognizer mode.
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,const MachineBasicBlock * MBB,MachineBasicBlock::const_reverse_instr_iterator I,int WaitStates,IsExpiredFn IsExpired,DenseSet<const MachineBasicBlock * > & Visited,GetNumWaitStatesFn GetNumWaitStates=SIInstrInfo::getNumWaitStates)47713107c27SJay Foad static int getWaitStatesSince(
47813107c27SJay Foad GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
47913107c27SJay Foad MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
48013107c27SJay Foad IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
48113107c27SJay Foad GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
4828a3d3a9aSAustin Kerbow for (auto E = MBB->instr_rend(); I != E; ++I) {
4838a3d3a9aSAustin Kerbow // Don't add WaitStates for parent BUNDLE instructions.
4848a3d3a9aSAustin Kerbow if (I->isBundle())
4858a3d3a9aSAustin Kerbow continue;
486f92ed696SStanislav Mekhanoshin
487424f1f6fSCarl Ritson if (IsHazard(*I))
488f92ed696SStanislav Mekhanoshin return WaitStates;
489f92ed696SStanislav Mekhanoshin
4904f5ba46eSChristudasan Devadasan if (I->isInlineAsm())
491f92ed696SStanislav Mekhanoshin continue;
492f92ed696SStanislav Mekhanoshin
49313107c27SJay Foad WaitStates += GetNumWaitStates(*I);
494f92ed696SStanislav Mekhanoshin
495424f1f6fSCarl Ritson if (IsExpired(*I, WaitStates))
496f92ed696SStanislav Mekhanoshin return std::numeric_limits<int>::max();
497f92ed696SStanislav Mekhanoshin }
498f92ed696SStanislav Mekhanoshin
499f251379aSJay Foad int MinWaitStates = std::numeric_limits<int>::max();
500f92ed696SStanislav Mekhanoshin for (MachineBasicBlock *Pred : MBB->predecessors()) {
501f92ed696SStanislav Mekhanoshin if (!Visited.insert(Pred).second)
502f92ed696SStanislav Mekhanoshin continue;
503f92ed696SStanislav Mekhanoshin
50413107c27SJay Foad int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates,
50513107c27SJay Foad IsExpired, Visited, GetNumWaitStates);
506f92ed696SStanislav Mekhanoshin
507f251379aSJay Foad MinWaitStates = std::min(MinWaitStates, W);
508f92ed696SStanislav Mekhanoshin }
509f92ed696SStanislav Mekhanoshin
510f92ed696SStanislav Mekhanoshin return MinWaitStates;
511f92ed696SStanislav Mekhanoshin }
512f92ed696SStanislav Mekhanoshin
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,const MachineInstr * MI,IsExpiredFn IsExpired)513f92ed696SStanislav Mekhanoshin static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
514424f1f6fSCarl Ritson const MachineInstr *MI, IsExpiredFn IsExpired) {
515f92ed696SStanislav Mekhanoshin DenseSet<const MachineBasicBlock *> Visited;
516f92ed696SStanislav Mekhanoshin return getWaitStatesSince(IsHazard, MI->getParent(),
517f92ed696SStanislav Mekhanoshin std::next(MI->getReverseIterator()),
518f92ed696SStanislav Mekhanoshin 0, IsExpired, Visited);
519f92ed696SStanislav Mekhanoshin }
520f92ed696SStanislav Mekhanoshin
getWaitStatesSince(IsHazardFn IsHazard,int Limit)521f92ed696SStanislav Mekhanoshin int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
522f92ed696SStanislav Mekhanoshin if (IsHazardRecognizerMode) {
523424f1f6fSCarl Ritson auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
524f92ed696SStanislav Mekhanoshin return WaitStates >= Limit;
525f92ed696SStanislav Mekhanoshin };
526f92ed696SStanislav Mekhanoshin return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
527f92ed696SStanislav Mekhanoshin }
528f92ed696SStanislav Mekhanoshin
52975c98c36SNicolai Haehnle int WaitStates = 0;
530961811c9STom Stellard for (MachineInstr *MI : EmittedInstrs) {
53175c98c36SNicolai Haehnle if (MI) {
532424f1f6fSCarl Ritson if (IsHazard(*MI))
533961811c9STom Stellard return WaitStates;
53475c98c36SNicolai Haehnle
535f92ed696SStanislav Mekhanoshin if (MI->isInlineAsm())
53675c98c36SNicolai Haehnle continue;
53775c98c36SNicolai Haehnle }
53875c98c36SNicolai Haehnle ++WaitStates;
539f92ed696SStanislav Mekhanoshin
540f92ed696SStanislav Mekhanoshin if (WaitStates >= Limit)
541f92ed696SStanislav Mekhanoshin break;
542961811c9STom Stellard }
543961811c9STom Stellard return std::numeric_limits<int>::max();
544961811c9STom Stellard }
545961811c9STom Stellard
getWaitStatesSinceDef(unsigned Reg,IsHazardFn IsHazardDef,int Limit)546f92ed696SStanislav Mekhanoshin int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
547f92ed696SStanislav Mekhanoshin IsHazardFn IsHazardDef,
548f92ed696SStanislav Mekhanoshin int Limit) {
549b133fbb9STom Stellard const SIRegisterInfo *TRI = ST.getRegisterInfo();
550b133fbb9STom Stellard
551424f1f6fSCarl Ritson auto IsHazardFn = [IsHazardDef, TRI, Reg](const MachineInstr &MI) {
552424f1f6fSCarl Ritson return IsHazardDef(MI) && MI.modifiesRegister(Reg, TRI);
553b133fbb9STom Stellard };
554b133fbb9STom Stellard
555f92ed696SStanislav Mekhanoshin return getWaitStatesSince(IsHazardFn, Limit);
556b133fbb9STom Stellard }
557b133fbb9STom Stellard
getWaitStatesSinceSetReg(IsHazardFn IsHazard,int Limit)558f92ed696SStanislav Mekhanoshin int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
559f92ed696SStanislav Mekhanoshin int Limit) {
560424f1f6fSCarl Ritson auto IsHazardFn = [IsHazard](const MachineInstr &MI) {
561424f1f6fSCarl Ritson return isSSetReg(MI.getOpcode()) && IsHazard(MI);
562b133fbb9STom Stellard };
563b133fbb9STom Stellard
564f92ed696SStanislav Mekhanoshin return getWaitStatesSince(IsHazardFn, Limit);
565b133fbb9STom Stellard }
566b133fbb9STom Stellard
567cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
568cb6ba62dSTom Stellard // No-op Hazard Detection
569cb6ba62dSTom Stellard //===----------------------------------------------------------------------===//
570cb6ba62dSTom Stellard
addRegUnits(const SIRegisterInfo & TRI,BitVector & BV,MCRegister Reg)5715dc47541SMircea Trofin static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV,
5725dc47541SMircea Trofin MCRegister Reg) {
57303c67d1eSMatt Arsenault for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
57403c67d1eSMatt Arsenault BV.set(*RUI);
57503c67d1eSMatt Arsenault }
57603c67d1eSMatt Arsenault
addRegsToSet(const SIRegisterInfo & TRI,iterator_range<MachineInstr::const_mop_iterator> Ops,BitVector & Set)57703c67d1eSMatt Arsenault static void addRegsToSet(const SIRegisterInfo &TRI,
57803c67d1eSMatt Arsenault iterator_range<MachineInstr::const_mop_iterator> Ops,
57903c67d1eSMatt Arsenault BitVector &Set) {
5801f520e5cSTom Stellard for (const MachineOperand &Op : Ops) {
5811f520e5cSTom Stellard if (Op.isReg())
5825dc47541SMircea Trofin addRegUnits(TRI, Set, Op.getReg().asMCReg());
5831f520e5cSTom Stellard }
5841f520e5cSTom Stellard }
5851f520e5cSTom Stellard
addClauseInst(const MachineInstr & MI)58603c67d1eSMatt Arsenault void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
58703c67d1eSMatt Arsenault // XXX: Do we need to worry about implicit operands
58803c67d1eSMatt Arsenault addRegsToSet(TRI, MI.defs(), ClauseDefs);
58903c67d1eSMatt Arsenault addRegsToSet(TRI, MI.uses(), ClauseUses);
59003c67d1eSMatt Arsenault }
59103c67d1eSMatt Arsenault
breaksSMEMSoftClause(MachineInstr * MI)5923d76824bSJay Foad static bool breaksSMEMSoftClause(MachineInstr *MI) {
5933d76824bSJay Foad return !SIInstrInfo::isSMRD(*MI);
5943d76824bSJay Foad }
5953d76824bSJay Foad
breaksVMEMSoftClause(MachineInstr * MI)5963d76824bSJay Foad static bool breaksVMEMSoftClause(MachineInstr *MI) {
5973d76824bSJay Foad return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
5983d76824bSJay Foad }
5993d76824bSJay Foad
checkSoftClauseHazards(MachineInstr * MEM)600a41351e3SMatt Arsenault int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
60103c67d1eSMatt Arsenault // SMEM soft clause are only present on VI+, and only matter if xnack is
60203c67d1eSMatt Arsenault // enabled.
60303c67d1eSMatt Arsenault if (!ST.isXNACKEnabled())
6041f520e5cSTom Stellard return 0;
6051f520e5cSTom Stellard
606a41351e3SMatt Arsenault bool IsSMRD = TII.isSMRD(*MEM);
607a41351e3SMatt Arsenault
60803c67d1eSMatt Arsenault resetClause();
60903c67d1eSMatt Arsenault
6101f520e5cSTom Stellard // A soft-clause is any group of consecutive SMEM instructions. The
6111f520e5cSTom Stellard // instructions in this group may return out of order and/or may be
6121f520e5cSTom Stellard // replayed (i.e. the same instruction issued more than once).
6131f520e5cSTom Stellard //
6148a3d3a9aSAustin Kerbow // In order to handle these situations correctly we need to make sure that
6158a3d3a9aSAustin Kerbow // when a clause has more than one instruction, no instruction in the clause
6168a3d3a9aSAustin Kerbow // writes to a register that is read by another instruction in the clause
6176527b2a4SSebastian Neubauer // (including itself). If we encounter this situation, we need to break the
6181f520e5cSTom Stellard // clause by inserting a non SMEM instruction.
6191f520e5cSTom Stellard
6201f520e5cSTom Stellard for (MachineInstr *MI : EmittedInstrs) {
6211f520e5cSTom Stellard // When we hit a non-SMEM instruction then we have passed the start of the
6221f520e5cSTom Stellard // clause and we can stop.
623a41351e3SMatt Arsenault if (!MI)
624a41351e3SMatt Arsenault break;
625a41351e3SMatt Arsenault
6263d76824bSJay Foad if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI))
6271f520e5cSTom Stellard break;
6281f520e5cSTom Stellard
62903c67d1eSMatt Arsenault addClauseInst(*MI);
6301f520e5cSTom Stellard }
6311f520e5cSTom Stellard
63203c67d1eSMatt Arsenault if (ClauseDefs.none())
6331f520e5cSTom Stellard return 0;
6341f520e5cSTom Stellard
635a41351e3SMatt Arsenault // We need to make sure not to put loads and stores in the same clause if they
636a41351e3SMatt Arsenault // use the same address. For now, just start a new clause whenever we see a
637a41351e3SMatt Arsenault // store.
638a41351e3SMatt Arsenault if (MEM->mayStore())
6391f520e5cSTom Stellard return 1;
6401f520e5cSTom Stellard
641a41351e3SMatt Arsenault addClauseInst(*MEM);
6421f520e5cSTom Stellard
6431f520e5cSTom Stellard // If the set of defs and uses intersect then we cannot add this instruction
6441f520e5cSTom Stellard // to the clause, so we have a hazard.
64503c67d1eSMatt Arsenault return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
6461f520e5cSTom Stellard }
6471f520e5cSTom Stellard
checkSMRDHazards(MachineInstr * SMRD)648cb6ba62dSTom Stellard int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
6491f520e5cSTom Stellard int WaitStatesNeeded = 0;
6501f520e5cSTom Stellard
651a41351e3SMatt Arsenault WaitStatesNeeded = checkSoftClauseHazards(SMRD);
652cb6ba62dSTom Stellard
653cb6ba62dSTom Stellard // This SMRD hazard only affects SI.
654e4c2e9b0SMatt Arsenault if (!ST.hasSMRDReadVALUDefHazard())
6551f520e5cSTom Stellard return WaitStatesNeeded;
656cb6ba62dSTom Stellard
657cb6ba62dSTom Stellard // A read of an SGPR by SMRD instruction requires 4 wait states when the
658cb6ba62dSTom Stellard // SGPR was written by a VALU instruction.
659cb6ba62dSTom Stellard int SmrdSgprWaitStates = 4;
660424f1f6fSCarl Ritson auto IsHazardDefFn = [this](const MachineInstr &MI) {
661424f1f6fSCarl Ritson return TII.isVALU(MI);
662424f1f6fSCarl Ritson };
663424f1f6fSCarl Ritson auto IsBufferHazardDefFn = [this](const MachineInstr &MI) {
664424f1f6fSCarl Ritson return TII.isSALU(MI);
665424f1f6fSCarl Ritson };
66622322438SMarek Olsak
6674512d0a6SMatt Arsenault bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
668cb6ba62dSTom Stellard
669cb6ba62dSTom Stellard for (const MachineOperand &Use : SMRD->uses()) {
670cb6ba62dSTom Stellard if (!Use.isReg())
671cb6ba62dSTom Stellard continue;
672cb6ba62dSTom Stellard int WaitStatesNeededForUse =
673f92ed696SStanislav Mekhanoshin SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
674f92ed696SStanislav Mekhanoshin SmrdSgprWaitStates);
675cb6ba62dSTom Stellard WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
67622322438SMarek Olsak
67722322438SMarek Olsak // This fixes what appears to be undocumented hardware behavior in SI where
67822322438SMarek Olsak // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
67922322438SMarek Olsak // needs some number of nops in between. We don't know how many we need, but
68022322438SMarek Olsak // let's use 4. This wasn't discovered before probably because the only
68122322438SMarek Olsak // case when this happens is when we expand a 64-bit pointer into a full
68222322438SMarek Olsak // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
68322322438SMarek Olsak // probably never encountered in the closed-source land.
68422322438SMarek Olsak if (IsBufferSMRD) {
68522322438SMarek Olsak int WaitStatesNeededForUse =
68622322438SMarek Olsak SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
687f92ed696SStanislav Mekhanoshin IsBufferHazardDefFn,
688f92ed696SStanislav Mekhanoshin SmrdSgprWaitStates);
68922322438SMarek Olsak WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
690cb6ba62dSTom Stellard }
69122322438SMarek Olsak }
69222322438SMarek Olsak
693cb6ba62dSTom Stellard return WaitStatesNeeded;
694cb6ba62dSTom Stellard }
695cb6ba62dSTom Stellard
checkVMEMHazards(MachineInstr * VMEM)696cb6ba62dSTom Stellard int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
697e4c2e9b0SMatt Arsenault if (!ST.hasVMEMReadSGPRVALUDefHazard())
698cb6ba62dSTom Stellard return 0;
699cb6ba62dSTom Stellard
700a41351e3SMatt Arsenault int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
701cb6ba62dSTom Stellard
702cb6ba62dSTom Stellard // A read of an SGPR by a VMEM instruction requires 5 wait states when the
703cb6ba62dSTom Stellard // SGPR was written by a VALU Instruction.
704a41351e3SMatt Arsenault const int VmemSgprWaitStates = 5;
705424f1f6fSCarl Ritson auto IsHazardDefFn = [this](const MachineInstr &MI) {
706424f1f6fSCarl Ritson return TII.isVALU(MI);
707424f1f6fSCarl Ritson };
708cb6ba62dSTom Stellard for (const MachineOperand &Use : VMEM->uses()) {
709a8d9d507SStanislav Mekhanoshin if (!Use.isReg() || TRI.isVectorRegister(MF.getRegInfo(), Use.getReg()))
710cb6ba62dSTom Stellard continue;
711cb6ba62dSTom Stellard
712cb6ba62dSTom Stellard int WaitStatesNeededForUse =
713f92ed696SStanislav Mekhanoshin VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
714f92ed696SStanislav Mekhanoshin VmemSgprWaitStates);
715cb6ba62dSTom Stellard WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
716cb6ba62dSTom Stellard }
717cb6ba62dSTom Stellard return WaitStatesNeeded;
718cb6ba62dSTom Stellard }
719a27007ebSTom Stellard
checkDPPHazards(MachineInstr * DPP)720a27007ebSTom Stellard int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
72143e92fe3SMatt Arsenault const SIRegisterInfo *TRI = ST.getRegisterInfo();
72200755362SConnor Abbott const SIInstrInfo *TII = ST.getInstrInfo();
723a27007ebSTom Stellard
72400755362SConnor Abbott // Check for DPP VGPR read after VALU VGPR write and EXEC write.
725a27007ebSTom Stellard int DppVgprWaitStates = 2;
72600755362SConnor Abbott int DppExecWaitStates = 5;
727a27007ebSTom Stellard int WaitStatesNeeded = 0;
728424f1f6fSCarl Ritson auto IsHazardDefFn = [TII](const MachineInstr &MI) {
729424f1f6fSCarl Ritson return TII->isVALU(MI);
730424f1f6fSCarl Ritson };
731a27007ebSTom Stellard
732a27007ebSTom Stellard for (const MachineOperand &Use : DPP->uses()) {
733a27007ebSTom Stellard if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
734a27007ebSTom Stellard continue;
735a27007ebSTom Stellard int WaitStatesNeededForUse =
736424f1f6fSCarl Ritson DppVgprWaitStates - getWaitStatesSinceDef(
737424f1f6fSCarl Ritson Use.getReg(),
738424f1f6fSCarl Ritson [](const MachineInstr &) { return true; },
739f92ed696SStanislav Mekhanoshin DppVgprWaitStates);
740a27007ebSTom Stellard WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
741a27007ebSTom Stellard }
742a27007ebSTom Stellard
74300755362SConnor Abbott WaitStatesNeeded = std::max(
74400755362SConnor Abbott WaitStatesNeeded,
745f92ed696SStanislav Mekhanoshin DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
746f92ed696SStanislav Mekhanoshin DppExecWaitStates));
74700755362SConnor Abbott
748a27007ebSTom Stellard return WaitStatesNeeded;
749a27007ebSTom Stellard }
7505ab6154dSTom Stellard
checkDivFMasHazards(MachineInstr * DivFMas)7515ab6154dSTom Stellard int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
7525ab6154dSTom Stellard const SIInstrInfo *TII = ST.getInstrInfo();
7535ab6154dSTom Stellard
7545ab6154dSTom Stellard // v_div_fmas requires 4 wait states after a write to vcc from a VALU
7555ab6154dSTom Stellard // instruction.
7565ab6154dSTom Stellard const int DivFMasWaitStates = 4;
757424f1f6fSCarl Ritson auto IsHazardDefFn = [TII](const MachineInstr &MI) {
758424f1f6fSCarl Ritson return TII->isVALU(MI);
759424f1f6fSCarl Ritson };
760f92ed696SStanislav Mekhanoshin int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
761f92ed696SStanislav Mekhanoshin DivFMasWaitStates);
7625ab6154dSTom Stellard
7635ab6154dSTom Stellard return DivFMasWaitStates - WaitStatesNeeded;
7645ab6154dSTom Stellard }
765961811c9STom Stellard
checkGetRegHazards(MachineInstr * GetRegInstr)766961811c9STom Stellard int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
767961811c9STom Stellard const SIInstrInfo *TII = ST.getInstrInfo();
768961811c9STom Stellard unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
769961811c9STom Stellard
770961811c9STom Stellard const int GetRegWaitStates = 2;
771424f1f6fSCarl Ritson auto IsHazardFn = [TII, GetRegHWReg](const MachineInstr &MI) {
772424f1f6fSCarl Ritson return GetRegHWReg == getHWReg(TII, MI);
773961811c9STom Stellard };
774f92ed696SStanislav Mekhanoshin int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
775961811c9STom Stellard
776961811c9STom Stellard return GetRegWaitStates - WaitStatesNeeded;
777961811c9STom Stellard }
77830d30824STom Stellard
checkSetRegHazards(MachineInstr * SetRegInstr)77930d30824STom Stellard int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
78030d30824STom Stellard const SIInstrInfo *TII = ST.getInstrInfo();
78130d30824STom Stellard unsigned HWReg = getHWReg(TII, *SetRegInstr);
78230d30824STom Stellard
783e4c2e9b0SMatt Arsenault const int SetRegWaitStates = ST.getSetRegWaitStates();
784424f1f6fSCarl Ritson auto IsHazardFn = [TII, HWReg](const MachineInstr &MI) {
785424f1f6fSCarl Ritson return HWReg == getHWReg(TII, MI);
78630d30824STom Stellard };
787f92ed696SStanislav Mekhanoshin int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
78830d30824STom Stellard return SetRegWaitStates - WaitStatesNeeded;
78930d30824STom Stellard }
790b133fbb9STom Stellard
createsVALUHazard(const MachineInstr & MI)791b133fbb9STom Stellard int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
792b133fbb9STom Stellard if (!MI.mayStore())
793b133fbb9STom Stellard return -1;
794b133fbb9STom Stellard
795b133fbb9STom Stellard const SIInstrInfo *TII = ST.getInstrInfo();
796b133fbb9STom Stellard unsigned Opcode = MI.getOpcode();
797b133fbb9STom Stellard const MCInstrDesc &Desc = MI.getDesc();
798b133fbb9STom Stellard
799b133fbb9STom Stellard int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
800b133fbb9STom Stellard int VDataRCID = -1;
801b133fbb9STom Stellard if (VDataIdx != -1)
802b133fbb9STom Stellard VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
803b133fbb9STom Stellard
804b133fbb9STom Stellard if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
805e8cc395eSJan Vesely // There is no hazard if the instruction does not use vector regs
806e8cc395eSJan Vesely // (like wbinvl1)
807e8cc395eSJan Vesely if (VDataIdx == -1)
808e8cc395eSJan Vesely return -1;
809b133fbb9STom Stellard // For MUBUF/MTBUF instructions this hazard only exists if the
810b133fbb9STom Stellard // instruction is not using a register in the soffset field.
811b133fbb9STom Stellard const MachineOperand *SOffset =
812b133fbb9STom Stellard TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
813b133fbb9STom Stellard // If we have no soffset operand, then assume this field has been
814b133fbb9STom Stellard // hardcoded to zero.
815b133fbb9STom Stellard if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
816b133fbb9STom Stellard (!SOffset || !SOffset->isReg()))
817b133fbb9STom Stellard return VDataIdx;
818b133fbb9STom Stellard }
819b133fbb9STom Stellard
820b133fbb9STom Stellard // MIMG instructions create a hazard if they don't use a 256-bit T# and
821b133fbb9STom Stellard // the store size is greater than 8 bytes and they have more than two bits
822b133fbb9STom Stellard // of their dmask set.
823b133fbb9STom Stellard // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
824b133fbb9STom Stellard if (TII->isMIMG(MI)) {
825b133fbb9STom Stellard int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
826b133fbb9STom Stellard assert(SRsrcIdx != -1 &&
827b133fbb9STom Stellard AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
8286b9c1be4STom Stellard (void)SRsrcIdx;
829b133fbb9STom Stellard }
830b133fbb9STom Stellard
831b133fbb9STom Stellard if (TII->isFLAT(MI)) {
83297279a8cSMatt Arsenault int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
833b133fbb9STom Stellard if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
834b133fbb9STom Stellard return DataIdx;
835b133fbb9STom Stellard }
836b133fbb9STom Stellard
837b133fbb9STom Stellard return -1;
838b133fbb9STom Stellard }
839b133fbb9STom Stellard
840decfdb8cSStanislav Mekhanoshin int
checkVALUHazardsHelper(const MachineOperand & Def,const MachineRegisterInfo & MRI)841decfdb8cSStanislav Mekhanoshin GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
842d29f24acSMark Searles const MachineRegisterInfo &MRI) {
843d29f24acSMark Searles // Helper to check for the hazard where VMEM instructions that store more than
844b133fbb9STom Stellard // 8 bytes can have there store data over written by the next instruction.
845b133fbb9STom Stellard const SIRegisterInfo *TRI = ST.getRegisterInfo();
846b133fbb9STom Stellard
847d951d937SStanislav Mekhanoshin const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1;
848b133fbb9STom Stellard int WaitStatesNeeded = 0;
849b133fbb9STom Stellard
850a8d9d507SStanislav Mekhanoshin if (!TRI->isVectorRegister(MRI, Def.getReg()))
851d29f24acSMark Searles return WaitStatesNeeded;
8520c476111SDaniel Sanders Register Reg = Def.getReg();
853424f1f6fSCarl Ritson auto IsHazardFn = [this, Reg, TRI](const MachineInstr &MI) {
854424f1f6fSCarl Ritson int DataIdx = createsVALUHazard(MI);
855b133fbb9STom Stellard return DataIdx >= 0 &&
856424f1f6fSCarl Ritson TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg);
857b133fbb9STom Stellard };
858b133fbb9STom Stellard int WaitStatesNeededForDef =
859f92ed696SStanislav Mekhanoshin VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
860b133fbb9STom Stellard WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
861d29f24acSMark Searles
862d29f24acSMark Searles return WaitStatesNeeded;
863b133fbb9STom Stellard }
864d29f24acSMark Searles
checkVALUHazards(MachineInstr * VALU)865d29f24acSMark Searles int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
866f311f934SStanislav Mekhanoshin int WaitStatesNeeded = 0;
867f311f934SStanislav Mekhanoshin
868f311f934SStanislav Mekhanoshin if (ST.hasTransForwardingHazard() && !SIInstrInfo::isTRANS(*VALU)) {
869f311f934SStanislav Mekhanoshin const int TransDefWaitstates = 1;
870f311f934SStanislav Mekhanoshin
871f311f934SStanislav Mekhanoshin auto IsTransDefFn = [this, VALU](const MachineInstr &MI) {
872f311f934SStanislav Mekhanoshin if (!SIInstrInfo::isTRANS(MI))
873f311f934SStanislav Mekhanoshin return false;
874f311f934SStanislav Mekhanoshin const SIRegisterInfo *TRI = ST.getRegisterInfo();
875f311f934SStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
876f311f934SStanislav Mekhanoshin Register Def = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)->getReg();
877f311f934SStanislav Mekhanoshin
878f311f934SStanislav Mekhanoshin for (const MachineOperand &Use : VALU->explicit_uses()) {
879f311f934SStanislav Mekhanoshin if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
880f311f934SStanislav Mekhanoshin return true;
881f311f934SStanislav Mekhanoshin }
882f311f934SStanislav Mekhanoshin
883f311f934SStanislav Mekhanoshin return false;
884f311f934SStanislav Mekhanoshin };
885f311f934SStanislav Mekhanoshin
886f311f934SStanislav Mekhanoshin int WaitStatesNeededForDef =
887f311f934SStanislav Mekhanoshin TransDefWaitstates -
888f311f934SStanislav Mekhanoshin getWaitStatesSince(IsTransDefFn, TransDefWaitstates);
889f311f934SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
890f311f934SStanislav Mekhanoshin }
891f311f934SStanislav Mekhanoshin
892f311f934SStanislav Mekhanoshin if (ST.hasDstSelForwardingHazard()) {
893f311f934SStanislav Mekhanoshin const int Shift16DefWaitstates = 1;
894f311f934SStanislav Mekhanoshin
895f311f934SStanislav Mekhanoshin auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) {
896f311f934SStanislav Mekhanoshin if (!SIInstrInfo::isVALU(MI))
897f311f934SStanislav Mekhanoshin return false;
898f311f934SStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
899f311f934SStanislav Mekhanoshin if (SIInstrInfo::isSDWA(MI)) {
900f311f934SStanislav Mekhanoshin if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
901f311f934SStanislav Mekhanoshin if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
902f311f934SStanislav Mekhanoshin return false;
903f311f934SStanislav Mekhanoshin } else {
904f311f934SStanislav Mekhanoshin if ((AMDGPU::getNamedOperandIdx(MI.getOpcode(),
905f311f934SStanislav Mekhanoshin AMDGPU::OpName::op_sel) == -1) ||
906f311f934SStanislav Mekhanoshin !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)
907f311f934SStanislav Mekhanoshin ->getImm() &
908f311f934SStanislav Mekhanoshin SISrcMods::DST_OP_SEL))
909f311f934SStanislav Mekhanoshin return false;
910f311f934SStanislav Mekhanoshin }
911f311f934SStanislav Mekhanoshin const SIRegisterInfo *TRI = ST.getRegisterInfo();
912f311f934SStanislav Mekhanoshin if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
913f311f934SStanislav Mekhanoshin Register Def = Dst->getReg();
914f311f934SStanislav Mekhanoshin
915f311f934SStanislav Mekhanoshin for (const MachineOperand &Use : VALU->explicit_uses()) {
916f311f934SStanislav Mekhanoshin if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
917f311f934SStanislav Mekhanoshin return true;
918f311f934SStanislav Mekhanoshin }
919f311f934SStanislav Mekhanoshin }
920f311f934SStanislav Mekhanoshin
921f311f934SStanislav Mekhanoshin return false;
922f311f934SStanislav Mekhanoshin };
923f311f934SStanislav Mekhanoshin
924f311f934SStanislav Mekhanoshin int WaitStatesNeededForDef =
925f311f934SStanislav Mekhanoshin Shift16DefWaitstates -
926f311f934SStanislav Mekhanoshin getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates);
927f311f934SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
928f311f934SStanislav Mekhanoshin }
929f311f934SStanislav Mekhanoshin
930f311f934SStanislav Mekhanoshin if (ST.hasVDecCoExecHazard()) {
931f311f934SStanislav Mekhanoshin const int VALUWriteSGPRVALUReadWaitstates = 2;
932f311f934SStanislav Mekhanoshin const int VALUWriteEXECRWLane = 4;
933f311f934SStanislav Mekhanoshin const int VALUWriteVGPRReadlaneRead = 1;
934f311f934SStanislav Mekhanoshin
935f311f934SStanislav Mekhanoshin const SIRegisterInfo *TRI = ST.getRegisterInfo();
936f311f934SStanislav Mekhanoshin const MachineRegisterInfo &MRI = MF.getRegInfo();
937f311f934SStanislav Mekhanoshin Register UseReg;
938f311f934SStanislav Mekhanoshin auto IsVALUDefSGPRFn = [&UseReg, TRI](const MachineInstr &MI) {
939f311f934SStanislav Mekhanoshin if (!SIInstrInfo::isVALU(MI))
940f311f934SStanislav Mekhanoshin return false;
941f311f934SStanislav Mekhanoshin return MI.modifiesRegister(UseReg, TRI);
942f311f934SStanislav Mekhanoshin };
943f311f934SStanislav Mekhanoshin
944f311f934SStanislav Mekhanoshin for (const MachineOperand &Use : VALU->explicit_uses()) {
945f311f934SStanislav Mekhanoshin if (!Use.isReg())
946f311f934SStanislav Mekhanoshin continue;
947f311f934SStanislav Mekhanoshin
948f311f934SStanislav Mekhanoshin UseReg = Use.getReg();
949f311f934SStanislav Mekhanoshin if (TRI->isSGPRReg(MRI, UseReg)) {
950f311f934SStanislav Mekhanoshin int WaitStatesNeededForDef =
951f311f934SStanislav Mekhanoshin VALUWriteSGPRVALUReadWaitstates -
952f311f934SStanislav Mekhanoshin getWaitStatesSince(IsVALUDefSGPRFn,
953f311f934SStanislav Mekhanoshin VALUWriteSGPRVALUReadWaitstates);
954f311f934SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
955f311f934SStanislav Mekhanoshin }
956f311f934SStanislav Mekhanoshin }
957f311f934SStanislav Mekhanoshin
958f311f934SStanislav Mekhanoshin if (VALU->readsRegister(AMDGPU::VCC, TRI)) {
959f311f934SStanislav Mekhanoshin UseReg = AMDGPU::VCC;
960f311f934SStanislav Mekhanoshin int WaitStatesNeededForDef =
961f311f934SStanislav Mekhanoshin VALUWriteSGPRVALUReadWaitstates -
962f311f934SStanislav Mekhanoshin getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteSGPRVALUReadWaitstates);
963f311f934SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
964f311f934SStanislav Mekhanoshin }
965f311f934SStanislav Mekhanoshin
966f311f934SStanislav Mekhanoshin switch (VALU->getOpcode()) {
967f311f934SStanislav Mekhanoshin case AMDGPU::V_READLANE_B32:
968f311f934SStanislav Mekhanoshin case AMDGPU::V_READFIRSTLANE_B32: {
969f311f934SStanislav Mekhanoshin MachineOperand *Src = TII.getNamedOperand(*VALU, AMDGPU::OpName::src0);
970f311f934SStanislav Mekhanoshin UseReg = Src->getReg();
971f311f934SStanislav Mekhanoshin int WaitStatesNeededForDef =
972f311f934SStanislav Mekhanoshin VALUWriteVGPRReadlaneRead -
973f311f934SStanislav Mekhanoshin getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteVGPRReadlaneRead);
974f311f934SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
975f311f934SStanislav Mekhanoshin }
976f311f934SStanislav Mekhanoshin LLVM_FALLTHROUGH;
977f311f934SStanislav Mekhanoshin case AMDGPU::V_WRITELANE_B32: {
978f311f934SStanislav Mekhanoshin UseReg = AMDGPU::EXEC;
979f311f934SStanislav Mekhanoshin int WaitStatesNeededForDef =
980f311f934SStanislav Mekhanoshin VALUWriteEXECRWLane -
981f311f934SStanislav Mekhanoshin getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteEXECRWLane);
982f311f934SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
983f311f934SStanislav Mekhanoshin break;
984f311f934SStanislav Mekhanoshin }
985f311f934SStanislav Mekhanoshin default:
986f311f934SStanislav Mekhanoshin break;
987f311f934SStanislav Mekhanoshin }
988f311f934SStanislav Mekhanoshin }
989f311f934SStanislav Mekhanoshin
990d29f24acSMark Searles // This checks for the hazard where VMEM instructions that store more than
991d29f24acSMark Searles // 8 bytes can have there store data over written by the next instruction.
992d29f24acSMark Searles if (!ST.has12DWordStoreHazard())
993f311f934SStanislav Mekhanoshin return WaitStatesNeeded;
994d29f24acSMark Searles
995d29f24acSMark Searles const MachineRegisterInfo &MRI = MF.getRegInfo();
996d29f24acSMark Searles
997d29f24acSMark Searles for (const MachineOperand &Def : VALU->defs()) {
998d29f24acSMark Searles WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
999d29f24acSMark Searles }
1000d29f24acSMark Searles
1001d29f24acSMark Searles return WaitStatesNeeded;
1002d29f24acSMark Searles }
1003d29f24acSMark Searles
checkInlineAsmHazards(MachineInstr * IA)1004d29f24acSMark Searles int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
1005d29f24acSMark Searles // This checks for hazards associated with inline asm statements.
1006d29f24acSMark Searles // Since inline asms can contain just about anything, we use this
1007d29f24acSMark Searles // to call/leverage other check*Hazard routines. Note that
1008d29f24acSMark Searles // this function doesn't attempt to address all possible inline asm
1009d29f24acSMark Searles // hazards (good luck), but is a collection of what has been
1010d29f24acSMark Searles // problematic thus far.
1011d29f24acSMark Searles
1012d29f24acSMark Searles // see checkVALUHazards()
1013d29f24acSMark Searles if (!ST.has12DWordStoreHazard())
1014d29f24acSMark Searles return 0;
1015d29f24acSMark Searles
1016d29f24acSMark Searles const MachineRegisterInfo &MRI = MF.getRegInfo();
1017d29f24acSMark Searles int WaitStatesNeeded = 0;
1018d29f24acSMark Searles
1019d29f24acSMark Searles for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
1020d29f24acSMark Searles I != E; ++I) {
1021d29f24acSMark Searles const MachineOperand &Op = IA->getOperand(I);
1022d29f24acSMark Searles if (Op.isReg() && Op.isDef()) {
1023d29f24acSMark Searles WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
1024d29f24acSMark Searles }
1025d29f24acSMark Searles }
1026d29f24acSMark Searles
1027b133fbb9STom Stellard return WaitStatesNeeded;
1028b133fbb9STom Stellard }
102904051b5fSTom Stellard
checkRWLaneHazards(MachineInstr * RWLane)103004051b5fSTom Stellard int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
103104051b5fSTom Stellard const SIInstrInfo *TII = ST.getInstrInfo();
103204051b5fSTom Stellard const SIRegisterInfo *TRI = ST.getRegisterInfo();
1033d29f24acSMark Searles const MachineRegisterInfo &MRI = MF.getRegInfo();
103404051b5fSTom Stellard
103504051b5fSTom Stellard const MachineOperand *LaneSelectOp =
103604051b5fSTom Stellard TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
103704051b5fSTom Stellard
103804051b5fSTom Stellard if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
103904051b5fSTom Stellard return 0;
104004051b5fSTom Stellard
10410c476111SDaniel Sanders Register LaneSelectReg = LaneSelectOp->getReg();
1042424f1f6fSCarl Ritson auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); };
104304051b5fSTom Stellard
104404051b5fSTom Stellard const int RWLaneWaitStates = 4;
1045f92ed696SStanislav Mekhanoshin int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
1046f92ed696SStanislav Mekhanoshin RWLaneWaitStates);
104704051b5fSTom Stellard return RWLaneWaitStates - WaitStatesSince;
104804051b5fSTom Stellard }
1049aea899e2STom Stellard
checkRFEHazards(MachineInstr * RFE)1050aea899e2STom Stellard int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
1051e4c2e9b0SMatt Arsenault if (!ST.hasRFEHazards())
1052aea899e2STom Stellard return 0;
1053aea899e2STom Stellard
1054aea899e2STom Stellard const SIInstrInfo *TII = ST.getInstrInfo();
1055aea899e2STom Stellard
1056aea899e2STom Stellard const int RFEWaitStates = 1;
1057aea899e2STom Stellard
1058424f1f6fSCarl Ritson auto IsHazardFn = [TII](const MachineInstr &MI) {
1059424f1f6fSCarl Ritson return getHWReg(TII, MI) == AMDGPU::Hwreg::ID_TRAPSTS;
1060aea899e2STom Stellard };
1061f92ed696SStanislav Mekhanoshin int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
1062aea899e2STom Stellard return RFEWaitStates - WaitStatesNeeded;
1063aea899e2STom Stellard }
1064e823d92fSMatt Arsenault
checkReadM0Hazards(MachineInstr * MI)1065e823d92fSMatt Arsenault int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
1066e823d92fSMatt Arsenault const SIInstrInfo *TII = ST.getInstrInfo();
106763f21f4cSStanislav Mekhanoshin const int ReadM0WaitStates = 1;
1068424f1f6fSCarl Ritson auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
106963f21f4cSStanislav Mekhanoshin return ReadM0WaitStates -
107063f21f4cSStanislav Mekhanoshin getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
1071e823d92fSMatt Arsenault }
107251d1415aSStanislav Mekhanoshin
fixHazards(MachineInstr * MI)10738a3d3a9aSAustin Kerbow void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
10748a3d3a9aSAustin Kerbow fixVMEMtoScalarWriteHazards(MI);
10755f581c9fSStanislav Mekhanoshin fixVcmpxPermlaneHazards(MI);
10768a3d3a9aSAustin Kerbow fixSMEMtoVectorWriteHazards(MI);
10778a3d3a9aSAustin Kerbow fixVcmpxExecWARHazard(MI);
10788a3d3a9aSAustin Kerbow fixLdsBranchVmemWARHazard(MI);
107913107c27SJay Foad if (ST.hasLdsDirect()) {
108013107c27SJay Foad fixLdsDirectVALUHazard(MI);
108113107c27SJay Foad fixLdsDirectVMEMHazard(MI);
108213107c27SJay Foad }
10839dff14beSJay Foad fixVALUPartialForwardingHazard(MI);
10849dff14beSJay Foad fixVALUTransUseHazard(MI);
1085*4874838aSPiotr Sobczak fixWMMAHazards(MI);
10868a3d3a9aSAustin Kerbow }
10878a3d3a9aSAustin Kerbow
fixVcmpxPermlaneHazards(MachineInstr * MI)10885f581c9fSStanislav Mekhanoshin bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
10895f581c9fSStanislav Mekhanoshin if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
10905f581c9fSStanislav Mekhanoshin return false;
10915f581c9fSStanislav Mekhanoshin
10925f581c9fSStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
10935c974d08SStanislav Mekhanoshin const SIRegisterInfo *TRI = ST.getRegisterInfo();
10945c974d08SStanislav Mekhanoshin auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
10955c974d08SStanislav Mekhanoshin return (TII->isVOPC(MI) ||
10965c974d08SStanislav Mekhanoshin ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
10975c974d08SStanislav Mekhanoshin MI.modifiesRegister(AMDGPU::EXEC, TRI);
10985c974d08SStanislav Mekhanoshin };
10995f581c9fSStanislav Mekhanoshin
1100424f1f6fSCarl Ritson auto IsExpiredFn = [](const MachineInstr &MI, int) {
1101424f1f6fSCarl Ritson unsigned Opc = MI.getOpcode();
1102424f1f6fSCarl Ritson return SIInstrInfo::isVALU(MI) && Opc != AMDGPU::V_NOP_e32 &&
1103424f1f6fSCarl Ritson Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa;
11045f581c9fSStanislav Mekhanoshin };
11055f581c9fSStanislav Mekhanoshin
11065f581c9fSStanislav Mekhanoshin if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
11075f581c9fSStanislav Mekhanoshin std::numeric_limits<int>::max())
11085f581c9fSStanislav Mekhanoshin return false;
11095f581c9fSStanislav Mekhanoshin
11105f581c9fSStanislav Mekhanoshin // V_NOP will be discarded by SQ.
1111380ff31dSThomas Symalla // Use V_MOV_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
11125f581c9fSStanislav Mekhanoshin // which is always a VGPR and available.
11135f581c9fSStanislav Mekhanoshin auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
11140c476111SDaniel Sanders Register Reg = Src0->getReg();
11155f581c9fSStanislav Mekhanoshin bool IsUndef = Src0->isUndef();
11165f581c9fSStanislav Mekhanoshin BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
11175f581c9fSStanislav Mekhanoshin TII->get(AMDGPU::V_MOV_B32_e32))
11185f581c9fSStanislav Mekhanoshin .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
11195f581c9fSStanislav Mekhanoshin .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
11205f581c9fSStanislav Mekhanoshin
11215f581c9fSStanislav Mekhanoshin return true;
11225f581c9fSStanislav Mekhanoshin }
11235f581c9fSStanislav Mekhanoshin
fixVMEMtoScalarWriteHazards(MachineInstr * MI)112451d1415aSStanislav Mekhanoshin bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
112551d1415aSStanislav Mekhanoshin if (!ST.hasVMEMtoScalarWriteHazard())
112651d1415aSStanislav Mekhanoshin return false;
112751d1415aSStanislav Mekhanoshin
112851d1415aSStanislav Mekhanoshin if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
112951d1415aSStanislav Mekhanoshin return false;
113051d1415aSStanislav Mekhanoshin
113151d1415aSStanislav Mekhanoshin if (MI->getNumDefs() == 0)
113251d1415aSStanislav Mekhanoshin return false;
113351d1415aSStanislav Mekhanoshin
113451d1415aSStanislav Mekhanoshin const SIRegisterInfo *TRI = ST.getRegisterInfo();
113551d1415aSStanislav Mekhanoshin
1136424f1f6fSCarl Ritson auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
1137424f1f6fSCarl Ritson if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) &&
1138424f1f6fSCarl Ritson !SIInstrInfo::isFLAT(I))
113951d1415aSStanislav Mekhanoshin return false;
114051d1415aSStanislav Mekhanoshin
114151d1415aSStanislav Mekhanoshin for (const MachineOperand &Def : MI->defs()) {
1142424f1f6fSCarl Ritson const MachineOperand *Op =
1143424f1f6fSCarl Ritson I.findRegisterUseOperand(Def.getReg(), false, TRI);
11448b7041a5SNicolai Haehnle if (!Op)
114551d1415aSStanislav Mekhanoshin continue;
114651d1415aSStanislav Mekhanoshin return true;
114751d1415aSStanislav Mekhanoshin }
114851d1415aSStanislav Mekhanoshin return false;
114951d1415aSStanislav Mekhanoshin };
115051d1415aSStanislav Mekhanoshin
1151424f1f6fSCarl Ritson auto IsExpiredFn = [](const MachineInstr &MI, int) {
1152424f1f6fSCarl Ritson return SIInstrInfo::isVALU(MI) ||
1153424f1f6fSCarl Ritson (MI.getOpcode() == AMDGPU::S_WAITCNT &&
1154424f1f6fSCarl Ritson !MI.getOperand(0).getImm()) ||
1155424f1f6fSCarl Ritson (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1156424f1f6fSCarl Ritson MI.getOperand(0).getImm() == 0xffe3);
115751d1415aSStanislav Mekhanoshin };
115851d1415aSStanislav Mekhanoshin
115951d1415aSStanislav Mekhanoshin if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
116051d1415aSStanislav Mekhanoshin std::numeric_limits<int>::max())
116151d1415aSStanislav Mekhanoshin return false;
116251d1415aSStanislav Mekhanoshin
116351d1415aSStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
11645bf2a9ddSCarl Ritson BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
11655bf2a9ddSCarl Ritson TII->get(AMDGPU::S_WAITCNT_DEPCTR))
11665bf2a9ddSCarl Ritson .addImm(0xffe3);
116751d1415aSStanislav Mekhanoshin return true;
116851d1415aSStanislav Mekhanoshin }
116951d1415aSStanislav Mekhanoshin
fixSMEMtoVectorWriteHazards(MachineInstr * MI)117051d1415aSStanislav Mekhanoshin bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
117151d1415aSStanislav Mekhanoshin if (!ST.hasSMEMtoVectorWriteHazard())
117251d1415aSStanislav Mekhanoshin return false;
117351d1415aSStanislav Mekhanoshin
117451d1415aSStanislav Mekhanoshin if (!SIInstrInfo::isVALU(*MI))
117551d1415aSStanislav Mekhanoshin return false;
117651d1415aSStanislav Mekhanoshin
117751d1415aSStanislav Mekhanoshin unsigned SDSTName;
117851d1415aSStanislav Mekhanoshin switch (MI->getOpcode()) {
117951d1415aSStanislav Mekhanoshin case AMDGPU::V_READLANE_B32:
118051d1415aSStanislav Mekhanoshin case AMDGPU::V_READFIRSTLANE_B32:
118151d1415aSStanislav Mekhanoshin SDSTName = AMDGPU::OpName::vdst;
118251d1415aSStanislav Mekhanoshin break;
118351d1415aSStanislav Mekhanoshin default:
118451d1415aSStanislav Mekhanoshin SDSTName = AMDGPU::OpName::sdst;
118551d1415aSStanislav Mekhanoshin break;
118651d1415aSStanislav Mekhanoshin }
118751d1415aSStanislav Mekhanoshin
118851d1415aSStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
118951d1415aSStanislav Mekhanoshin const SIRegisterInfo *TRI = ST.getRegisterInfo();
119034e95ce2SCarl Ritson const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
119151d1415aSStanislav Mekhanoshin const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
119251d1415aSStanislav Mekhanoshin if (!SDST) {
11935ddd564eSStanislav Mekhanoshin for (const auto &MO : MI->implicit_operands()) {
119451d1415aSStanislav Mekhanoshin if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
119551d1415aSStanislav Mekhanoshin SDST = &MO;
119651d1415aSStanislav Mekhanoshin break;
119751d1415aSStanislav Mekhanoshin }
119851d1415aSStanislav Mekhanoshin }
119951d1415aSStanislav Mekhanoshin }
120051d1415aSStanislav Mekhanoshin
120151d1415aSStanislav Mekhanoshin if (!SDST)
120251d1415aSStanislav Mekhanoshin return false;
120351d1415aSStanislav Mekhanoshin
12040c476111SDaniel Sanders const Register SDSTReg = SDST->getReg();
1205424f1f6fSCarl Ritson auto IsHazardFn = [SDSTReg, TRI](const MachineInstr &I) {
1206424f1f6fSCarl Ritson return SIInstrInfo::isSMRD(I) && I.readsRegister(SDSTReg, TRI);
120751d1415aSStanislav Mekhanoshin };
120851d1415aSStanislav Mekhanoshin
1209424f1f6fSCarl Ritson auto IsExpiredFn = [TII, IV](const MachineInstr &MI, int) {
1210424f1f6fSCarl Ritson if (TII->isSALU(MI)) {
1211424f1f6fSCarl Ritson switch (MI.getOpcode()) {
121251d1415aSStanislav Mekhanoshin case AMDGPU::S_SETVSKIP:
121351d1415aSStanislav Mekhanoshin case AMDGPU::S_VERSION:
121451d1415aSStanislav Mekhanoshin case AMDGPU::S_WAITCNT_VSCNT:
121551d1415aSStanislav Mekhanoshin case AMDGPU::S_WAITCNT_VMCNT:
121651d1415aSStanislav Mekhanoshin case AMDGPU::S_WAITCNT_EXPCNT:
121734e95ce2SCarl Ritson // These instructions cannot not mitigate the hazard.
121851d1415aSStanislav Mekhanoshin return false;
121934e95ce2SCarl Ritson case AMDGPU::S_WAITCNT_LGKMCNT:
122034e95ce2SCarl Ritson // Reducing lgkmcnt count to 0 always mitigates the hazard.
1221424f1f6fSCarl Ritson return (MI.getOperand(1).getImm() == 0) &&
1222424f1f6fSCarl Ritson (MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
122334e95ce2SCarl Ritson case AMDGPU::S_WAITCNT: {
1224424f1f6fSCarl Ritson const int64_t Imm = MI.getOperand(0).getImm();
122534e95ce2SCarl Ritson AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
122634e95ce2SCarl Ritson return (Decoded.LgkmCnt == 0);
122734e95ce2SCarl Ritson }
122851d1415aSStanislav Mekhanoshin default:
122934e95ce2SCarl Ritson // SOPP instructions cannot mitigate the hazard.
1230424f1f6fSCarl Ritson if (TII->isSOPP(MI))
123134e95ce2SCarl Ritson return false;
123234e95ce2SCarl Ritson // At this point the SALU can be assumed to mitigate the hazard
123334e95ce2SCarl Ritson // because either:
123434e95ce2SCarl Ritson // (a) it is independent of the at risk SMEM (breaking chain),
123534e95ce2SCarl Ritson // or
123634e95ce2SCarl Ritson // (b) it is dependent on the SMEM, in which case an appropriate
123734e95ce2SCarl Ritson // s_waitcnt lgkmcnt _must_ exist between it and the at risk
123834e95ce2SCarl Ritson // SMEM instruction.
123951d1415aSStanislav Mekhanoshin return true;
124051d1415aSStanislav Mekhanoshin }
124151d1415aSStanislav Mekhanoshin }
124251d1415aSStanislav Mekhanoshin return false;
124351d1415aSStanislav Mekhanoshin };
124451d1415aSStanislav Mekhanoshin
124551d1415aSStanislav Mekhanoshin if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
124651d1415aSStanislav Mekhanoshin std::numeric_limits<int>::max())
124751d1415aSStanislav Mekhanoshin return false;
124851d1415aSStanislav Mekhanoshin
124951d1415aSStanislav Mekhanoshin BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
125051d1415aSStanislav Mekhanoshin TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
125151d1415aSStanislav Mekhanoshin .addImm(0);
125251d1415aSStanislav Mekhanoshin return true;
125351d1415aSStanislav Mekhanoshin }
125451d1415aSStanislav Mekhanoshin
fixVcmpxExecWARHazard(MachineInstr * MI)125551d1415aSStanislav Mekhanoshin bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
125651d1415aSStanislav Mekhanoshin if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
125751d1415aSStanislav Mekhanoshin return false;
125851d1415aSStanislav Mekhanoshin
125951d1415aSStanislav Mekhanoshin const SIRegisterInfo *TRI = ST.getRegisterInfo();
126051d1415aSStanislav Mekhanoshin if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
126151d1415aSStanislav Mekhanoshin return false;
126251d1415aSStanislav Mekhanoshin
1263424f1f6fSCarl Ritson auto IsHazardFn = [TRI](const MachineInstr &I) {
1264424f1f6fSCarl Ritson if (SIInstrInfo::isVALU(I))
126551d1415aSStanislav Mekhanoshin return false;
1266424f1f6fSCarl Ritson return I.readsRegister(AMDGPU::EXEC, TRI);
126751d1415aSStanislav Mekhanoshin };
126851d1415aSStanislav Mekhanoshin
126951d1415aSStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
1270424f1f6fSCarl Ritson auto IsExpiredFn = [TII, TRI](const MachineInstr &MI, int) {
1271424f1f6fSCarl Ritson if (SIInstrInfo::isVALU(MI)) {
1272424f1f6fSCarl Ritson if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst))
127351d1415aSStanislav Mekhanoshin return true;
1274424f1f6fSCarl Ritson for (auto MO : MI.implicit_operands())
127551d1415aSStanislav Mekhanoshin if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
127651d1415aSStanislav Mekhanoshin return true;
127751d1415aSStanislav Mekhanoshin }
1278424f1f6fSCarl Ritson if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1279424f1f6fSCarl Ritson (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe)
128051d1415aSStanislav Mekhanoshin return true;
128151d1415aSStanislav Mekhanoshin return false;
128251d1415aSStanislav Mekhanoshin };
128351d1415aSStanislav Mekhanoshin
128451d1415aSStanislav Mekhanoshin if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
128551d1415aSStanislav Mekhanoshin std::numeric_limits<int>::max())
128651d1415aSStanislav Mekhanoshin return false;
128751d1415aSStanislav Mekhanoshin
128851d1415aSStanislav Mekhanoshin BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
128951d1415aSStanislav Mekhanoshin TII->get(AMDGPU::S_WAITCNT_DEPCTR))
129051d1415aSStanislav Mekhanoshin .addImm(0xfffe);
129151d1415aSStanislav Mekhanoshin return true;
129251d1415aSStanislav Mekhanoshin }
129351d1415aSStanislav Mekhanoshin
shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction & MF,const GCNSubtarget & ST)1294e0c382a9SPiotr Sobczak static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
1295e0c382a9SPiotr Sobczak const GCNSubtarget &ST) {
129651d1415aSStanislav Mekhanoshin if (!ST.hasLdsBranchVmemWARHazard())
129751d1415aSStanislav Mekhanoshin return false;
129851d1415aSStanislav Mekhanoshin
1299e0c382a9SPiotr Sobczak // Check if the necessary condition for the hazard is met: both LDS and VMEM
1300e0c382a9SPiotr Sobczak // instructions need to appear in the same function.
1301e0c382a9SPiotr Sobczak bool HasLds = false;
1302e0c382a9SPiotr Sobczak bool HasVmem = false;
1303e0c382a9SPiotr Sobczak for (auto &MBB : MF) {
1304e0c382a9SPiotr Sobczak for (auto &MI : MBB) {
1305e0c382a9SPiotr Sobczak HasLds |= SIInstrInfo::isDS(MI);
1306e0c382a9SPiotr Sobczak HasVmem |=
1307e0c382a9SPiotr Sobczak SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI);
1308e0c382a9SPiotr Sobczak if (HasLds && HasVmem)
1309e0c382a9SPiotr Sobczak return true;
1310e0c382a9SPiotr Sobczak }
1311e0c382a9SPiotr Sobczak }
1312e0c382a9SPiotr Sobczak return false;
1313e0c382a9SPiotr Sobczak }
1314e0c382a9SPiotr Sobczak
fixLdsBranchVmemWARHazard(MachineInstr * MI)1315e0c382a9SPiotr Sobczak bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1316e0c382a9SPiotr Sobczak if (!RunLdsBranchVmemWARHazardFixup)
1317e0c382a9SPiotr Sobczak return false;
1318e0c382a9SPiotr Sobczak
1319e0c382a9SPiotr Sobczak assert(ST.hasLdsBranchVmemWARHazard());
1320e0c382a9SPiotr Sobczak
1321424f1f6fSCarl Ritson auto IsHazardInst = [](const MachineInstr &MI) {
1322424f1f6fSCarl Ritson if (SIInstrInfo::isDS(MI))
132351d1415aSStanislav Mekhanoshin return 1;
1324424f1f6fSCarl Ritson if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
132551d1415aSStanislav Mekhanoshin return 2;
132651d1415aSStanislav Mekhanoshin return 0;
132751d1415aSStanislav Mekhanoshin };
132851d1415aSStanislav Mekhanoshin
1329424f1f6fSCarl Ritson auto InstType = IsHazardInst(*MI);
133051d1415aSStanislav Mekhanoshin if (!InstType)
133151d1415aSStanislav Mekhanoshin return false;
133251d1415aSStanislav Mekhanoshin
1333424f1f6fSCarl Ritson auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) {
1334424f1f6fSCarl Ritson return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1335424f1f6fSCarl Ritson I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1336424f1f6fSCarl Ritson !I.getOperand(1).getImm());
133751d1415aSStanislav Mekhanoshin };
133851d1415aSStanislav Mekhanoshin
1339424f1f6fSCarl Ritson auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) {
1340424f1f6fSCarl Ritson if (!I.isBranch())
134151d1415aSStanislav Mekhanoshin return false;
134251d1415aSStanislav Mekhanoshin
1343424f1f6fSCarl Ritson auto IsHazardFn = [InstType, IsHazardInst](const MachineInstr &I) {
134451d1415aSStanislav Mekhanoshin auto InstType2 = IsHazardInst(I);
134551d1415aSStanislav Mekhanoshin return InstType2 && InstType != InstType2;
134651d1415aSStanislav Mekhanoshin };
134751d1415aSStanislav Mekhanoshin
1348424f1f6fSCarl Ritson auto IsExpiredFn = [InstType, &IsHazardInst](const MachineInstr &I, int) {
134951d1415aSStanislav Mekhanoshin auto InstType2 = IsHazardInst(I);
135051d1415aSStanislav Mekhanoshin if (InstType == InstType2)
135151d1415aSStanislav Mekhanoshin return true;
135251d1415aSStanislav Mekhanoshin
1353424f1f6fSCarl Ritson return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1354424f1f6fSCarl Ritson I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1355424f1f6fSCarl Ritson !I.getOperand(1).getImm();
135651d1415aSStanislav Mekhanoshin };
135751d1415aSStanislav Mekhanoshin
1358424f1f6fSCarl Ritson return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) !=
135951d1415aSStanislav Mekhanoshin std::numeric_limits<int>::max();
136051d1415aSStanislav Mekhanoshin };
136151d1415aSStanislav Mekhanoshin
136251d1415aSStanislav Mekhanoshin if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
136351d1415aSStanislav Mekhanoshin std::numeric_limits<int>::max())
136451d1415aSStanislav Mekhanoshin return false;
136551d1415aSStanislav Mekhanoshin
136651d1415aSStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
136751d1415aSStanislav Mekhanoshin BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
136851d1415aSStanislav Mekhanoshin TII->get(AMDGPU::S_WAITCNT_VSCNT))
136951d1415aSStanislav Mekhanoshin .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
137051d1415aSStanislav Mekhanoshin .addImm(0);
137151d1415aSStanislav Mekhanoshin
137251d1415aSStanislav Mekhanoshin return true;
137351d1415aSStanislav Mekhanoshin }
137451d1415aSStanislav Mekhanoshin
fixLdsDirectVALUHazard(MachineInstr * MI)137513107c27SJay Foad bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
137613107c27SJay Foad if (!SIInstrInfo::isLDSDIR(*MI))
137713107c27SJay Foad return false;
137813107c27SJay Foad
137913107c27SJay Foad const int NoHazardWaitStates = 15;
138013107c27SJay Foad const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
138113107c27SJay Foad const Register VDSTReg = VDST->getReg();
138213107c27SJay Foad
138313107c27SJay Foad bool VisitedTrans = false;
138413107c27SJay Foad auto IsHazardFn = [this, VDSTReg, &VisitedTrans](const MachineInstr &I) {
138513107c27SJay Foad if (!SIInstrInfo::isVALU(I))
138613107c27SJay Foad return false;
138713107c27SJay Foad VisitedTrans = VisitedTrans || SIInstrInfo::isTRANS(I);
138813107c27SJay Foad // Cover both WAR and WAW
138913107c27SJay Foad return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
139013107c27SJay Foad };
139113107c27SJay Foad auto IsExpiredFn = [&](const MachineInstr &I, int WaitStates) {
139213107c27SJay Foad if (WaitStates >= NoHazardWaitStates)
139313107c27SJay Foad return true;
139413107c27SJay Foad // Instructions which cause va_vdst==0 expire hazard
139513107c27SJay Foad return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
139613107c27SJay Foad SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
139713107c27SJay Foad };
139813107c27SJay Foad auto GetWaitStatesFn = [](const MachineInstr &MI) {
139913107c27SJay Foad return SIInstrInfo::isVALU(MI) ? 1 : 0;
140013107c27SJay Foad };
140113107c27SJay Foad
140213107c27SJay Foad DenseSet<const MachineBasicBlock *> Visited;
140313107c27SJay Foad auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(),
140413107c27SJay Foad std::next(MI->getReverseIterator()), 0,
140513107c27SJay Foad IsExpiredFn, Visited, GetWaitStatesFn);
140613107c27SJay Foad
140713107c27SJay Foad // Transcendentals can execute in parallel to other VALUs.
140813107c27SJay Foad // This makes va_vdst count unusable with a mixture of VALU and TRANS.
140913107c27SJay Foad if (VisitedTrans)
141013107c27SJay Foad Count = 0;
141113107c27SJay Foad
141213107c27SJay Foad MachineOperand *WaitVdstOp =
141313107c27SJay Foad TII.getNamedOperand(*MI, AMDGPU::OpName::waitvdst);
141413107c27SJay Foad WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates));
141513107c27SJay Foad
141613107c27SJay Foad return true;
141713107c27SJay Foad }
141813107c27SJay Foad
fixLdsDirectVMEMHazard(MachineInstr * MI)141913107c27SJay Foad bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
142013107c27SJay Foad if (!SIInstrInfo::isLDSDIR(*MI))
142113107c27SJay Foad return false;
142213107c27SJay Foad
142313107c27SJay Foad const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
142413107c27SJay Foad const Register VDSTReg = VDST->getReg();
142513107c27SJay Foad
142613107c27SJay Foad auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
142713107c27SJay Foad if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
142813107c27SJay Foad !SIInstrInfo::isDS(I))
142913107c27SJay Foad return false;
143013107c27SJay Foad return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
143113107c27SJay Foad };
143213107c27SJay Foad auto IsExpiredFn = [](const MachineInstr &I, int) {
143313107c27SJay Foad return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
143413107c27SJay Foad (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
143513107c27SJay Foad (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
143613107c27SJay Foad I.getOperand(0).getImm() == 0xffe3);
143713107c27SJay Foad };
143813107c27SJay Foad
143913107c27SJay Foad if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
144013107c27SJay Foad std::numeric_limits<int>::max())
144113107c27SJay Foad return false;
144213107c27SJay Foad
144313107c27SJay Foad BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
144413107c27SJay Foad TII.get(AMDGPU::S_WAITCNT_DEPCTR))
144513107c27SJay Foad .addImm(0xffe3);
144613107c27SJay Foad
144713107c27SJay Foad return true;
144813107c27SJay Foad }
144913107c27SJay Foad
fixVALUPartialForwardingHazard(MachineInstr * MI)14509dff14beSJay Foad bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
14519dff14beSJay Foad if (!ST.isWave64())
14529dff14beSJay Foad return false;
14539dff14beSJay Foad if (!ST.hasVALUPartialForwardingHazard())
14549dff14beSJay Foad return false;
14559dff14beSJay Foad if (!SIInstrInfo::isVALU(*MI))
14569dff14beSJay Foad return false;
14579dff14beSJay Foad
14589dff14beSJay Foad SmallSetVector<Register, 4> SrcVGPRs;
14599dff14beSJay Foad
14609dff14beSJay Foad for (const MachineOperand &Use : MI->explicit_uses()) {
14619dff14beSJay Foad if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
14629dff14beSJay Foad SrcVGPRs.insert(Use.getReg());
14639dff14beSJay Foad }
14649dff14beSJay Foad
14659dff14beSJay Foad // Only applies with >= 2 unique VGPR sources
14669dff14beSJay Foad if (SrcVGPRs.size() <= 1)
14679dff14beSJay Foad return false;
14689dff14beSJay Foad
14699dff14beSJay Foad // Look for the following pattern:
14709dff14beSJay Foad // Va <- VALU [PreExecPos]
14719dff14beSJay Foad // intv1
14729dff14beSJay Foad // Exec <- SALU [ExecPos]
14739dff14beSJay Foad // intv2
14749dff14beSJay Foad // Vb <- VALU [PostExecPos]
14759dff14beSJay Foad // intv3
14769dff14beSJay Foad // MI Va, Vb (WaitState = 0)
14779dff14beSJay Foad //
14789dff14beSJay Foad // Where:
14799dff14beSJay Foad // intv1 + intv2 <= 2 VALUs
14809dff14beSJay Foad // intv3 <= 4 VALUs
14819dff14beSJay Foad //
14829dff14beSJay Foad // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
14839dff14beSJay Foad
14849dff14beSJay Foad const int Intv1plus2MaxVALUs = 2;
14859dff14beSJay Foad const int Intv3MaxVALUs = 4;
14869dff14beSJay Foad const int IntvMaxVALUs = 6;
14879dff14beSJay Foad const int NoHazardVALUWaitStates = IntvMaxVALUs + 2;
14889dff14beSJay Foad
14899dff14beSJay Foad struct StateType {
14909dff14beSJay Foad SmallDenseMap<Register, int, 4> DefPos;
14919dff14beSJay Foad int ExecPos = std::numeric_limits<int>::max();
14929dff14beSJay Foad int VALUs = 0;
14939dff14beSJay Foad };
14949dff14beSJay Foad
14959dff14beSJay Foad StateType State;
14969dff14beSJay Foad
14979dff14beSJay Foad // This overloads expiry testing with all the hazard detection
14989dff14beSJay Foad auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
14999dff14beSJay Foad // Too many VALU states have passed
15009dff14beSJay Foad if (State.VALUs > NoHazardVALUWaitStates)
15019dff14beSJay Foad return HazardExpired;
15029dff14beSJay Foad
15039dff14beSJay Foad // Instructions which cause va_vdst==0 expire hazard
15049dff14beSJay Foad if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
15059dff14beSJay Foad SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
15069dff14beSJay Foad (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
15079dff14beSJay Foad I.getOperand(0).getImm() == 0x0fff))
15089dff14beSJay Foad return HazardExpired;
15099dff14beSJay Foad
15109dff14beSJay Foad // Track registers writes
15119dff14beSJay Foad bool Changed = false;
15129dff14beSJay Foad if (SIInstrInfo::isVALU(I)) {
15139dff14beSJay Foad for (Register Src : SrcVGPRs) {
15149dff14beSJay Foad if (!State.DefPos.count(Src) && I.modifiesRegister(Src, &TRI)) {
15159dff14beSJay Foad State.DefPos[Src] = State.VALUs;
15169dff14beSJay Foad Changed = true;
15179dff14beSJay Foad }
15189dff14beSJay Foad }
15199dff14beSJay Foad } else if (SIInstrInfo::isSALU(I)) {
15209dff14beSJay Foad if (State.ExecPos == std::numeric_limits<int>::max()) {
15219dff14beSJay Foad if (!State.DefPos.empty() && I.modifiesRegister(AMDGPU::EXEC, &TRI)) {
15229dff14beSJay Foad State.ExecPos = State.VALUs;
15239dff14beSJay Foad Changed = true;
15249dff14beSJay Foad }
15259dff14beSJay Foad }
15269dff14beSJay Foad }
15279dff14beSJay Foad
15289dff14beSJay Foad // Early expiration: too many VALUs in intv3
15299dff14beSJay Foad if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty())
15309dff14beSJay Foad return HazardExpired;
15319dff14beSJay Foad
15329dff14beSJay Foad // Only evaluate state if something changed
15339dff14beSJay Foad if (!Changed)
15349dff14beSJay Foad return NoHazardFound;
15359dff14beSJay Foad
15369dff14beSJay Foad // Determine positions of VALUs pre/post exec change
15379dff14beSJay Foad if (State.ExecPos == std::numeric_limits<int>::max())
15389dff14beSJay Foad return NoHazardFound;
15399dff14beSJay Foad
15409dff14beSJay Foad int PreExecPos = std::numeric_limits<int>::max();
15419dff14beSJay Foad int PostExecPos = std::numeric_limits<int>::max();
15429dff14beSJay Foad
15439dff14beSJay Foad for (auto Entry : State.DefPos) {
15449dff14beSJay Foad int DefVALUs = Entry.second;
15459dff14beSJay Foad if (DefVALUs != std::numeric_limits<int>::max()) {
15469dff14beSJay Foad if (DefVALUs >= State.ExecPos)
15479dff14beSJay Foad PreExecPos = std::min(PreExecPos, DefVALUs);
15489dff14beSJay Foad else if (DefVALUs < State.ExecPos)
15499dff14beSJay Foad PostExecPos = std::min(PostExecPos, DefVALUs);
15509dff14beSJay Foad }
15519dff14beSJay Foad }
15529dff14beSJay Foad
15539dff14beSJay Foad // Need a VALUs post exec change
15549dff14beSJay Foad if (PostExecPos == std::numeric_limits<int>::max())
15559dff14beSJay Foad return NoHazardFound;
15569dff14beSJay Foad
15579dff14beSJay Foad // Too many VALUs in intv3?
15589dff14beSJay Foad int Intv3VALUs = PostExecPos;
15599dff14beSJay Foad if (Intv3VALUs > Intv3MaxVALUs)
15609dff14beSJay Foad return HazardExpired;
15619dff14beSJay Foad
15629dff14beSJay Foad // Too many VALUs in intv2?
15639dff14beSJay Foad int Intv2VALUs = (State.ExecPos - PostExecPos) - 1;
15649dff14beSJay Foad if (Intv2VALUs > Intv1plus2MaxVALUs)
15659dff14beSJay Foad return HazardExpired;
15669dff14beSJay Foad
15679dff14beSJay Foad // Need a VALUs pre exec change
15689dff14beSJay Foad if (PreExecPos == std::numeric_limits<int>::max())
15699dff14beSJay Foad return NoHazardFound;
15709dff14beSJay Foad
15719dff14beSJay Foad // Too many VALUs in intv1?
15729dff14beSJay Foad int Intv1VALUs = PreExecPos - State.ExecPos;
15739dff14beSJay Foad if (Intv1VALUs > Intv1plus2MaxVALUs)
15749dff14beSJay Foad return HazardExpired;
15759dff14beSJay Foad
15769dff14beSJay Foad // Too many VALUs in intv1 + intv2
15779dff14beSJay Foad if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs)
15789dff14beSJay Foad return HazardExpired;
15799dff14beSJay Foad
15809dff14beSJay Foad return HazardFound;
15819dff14beSJay Foad };
15829dff14beSJay Foad auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
15839dff14beSJay Foad if (SIInstrInfo::isVALU(MI))
15849dff14beSJay Foad State.VALUs += 1;
15859dff14beSJay Foad };
15869dff14beSJay Foad
15879dff14beSJay Foad DenseSet<const MachineBasicBlock *> Visited;
15889dff14beSJay Foad if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
15899dff14beSJay Foad std::next(MI->getReverseIterator()), Visited))
15909dff14beSJay Foad return false;
15919dff14beSJay Foad
15929dff14beSJay Foad BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
15939dff14beSJay Foad TII.get(AMDGPU::S_WAITCNT_DEPCTR))
15949dff14beSJay Foad .addImm(0x0fff);
15959dff14beSJay Foad
15969dff14beSJay Foad return true;
15979dff14beSJay Foad }
15989dff14beSJay Foad
fixVALUTransUseHazard(MachineInstr * MI)15999dff14beSJay Foad bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
16009dff14beSJay Foad if (!ST.hasVALUTransUseHazard())
16019dff14beSJay Foad return false;
16029dff14beSJay Foad if (!SIInstrInfo::isVALU(*MI))
16039dff14beSJay Foad return false;
16049dff14beSJay Foad
16059dff14beSJay Foad SmallSet<Register, 4> SrcVGPRs;
16069dff14beSJay Foad
16079dff14beSJay Foad for (const MachineOperand &Use : MI->explicit_uses()) {
16089dff14beSJay Foad if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
16099dff14beSJay Foad SrcVGPRs.insert(Use.getReg());
16109dff14beSJay Foad }
16119dff14beSJay Foad
16129dff14beSJay Foad // Look for the following pattern:
16139dff14beSJay Foad // Va <- TRANS VALU
16149dff14beSJay Foad // intv
16159dff14beSJay Foad // MI Va (WaitState = 0)
16169dff14beSJay Foad //
16179dff14beSJay Foad // Where:
16189dff14beSJay Foad // intv <= 5 VALUs / 1 TRANS
16199dff14beSJay Foad //
16209dff14beSJay Foad // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
16219dff14beSJay Foad
16229dff14beSJay Foad const int IntvMaxVALUs = 5;
16239dff14beSJay Foad const int IntvMaxTRANS = 1;
16249dff14beSJay Foad
16259dff14beSJay Foad struct StateType {
16269dff14beSJay Foad int VALUs = 0;
16279dff14beSJay Foad int TRANS = 0;
16289dff14beSJay Foad };
16299dff14beSJay Foad
16309dff14beSJay Foad StateType State;
16319dff14beSJay Foad
16329dff14beSJay Foad // This overloads expiry testing with all the hazard detection
16339dff14beSJay Foad auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
16349dff14beSJay Foad // Too many VALU states have passed
16359dff14beSJay Foad if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS)
16369dff14beSJay Foad return HazardExpired;
16379dff14beSJay Foad
16389dff14beSJay Foad // Instructions which cause va_vdst==0 expire hazard
16399dff14beSJay Foad if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
16409dff14beSJay Foad SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
16419dff14beSJay Foad (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
16429dff14beSJay Foad I.getOperand(0).getImm() == 0x0fff))
16439dff14beSJay Foad return HazardExpired;
16449dff14beSJay Foad
16459dff14beSJay Foad // Track registers writes
16469dff14beSJay Foad if (SIInstrInfo::isTRANS(I)) {
16479dff14beSJay Foad for (Register Src : SrcVGPRs) {
16489dff14beSJay Foad if (I.modifiesRegister(Src, &TRI)) {
16499dff14beSJay Foad return HazardFound;
16509dff14beSJay Foad }
16519dff14beSJay Foad }
16529dff14beSJay Foad }
16539dff14beSJay Foad
16549dff14beSJay Foad return NoHazardFound;
16559dff14beSJay Foad };
16569dff14beSJay Foad auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
16579dff14beSJay Foad if (SIInstrInfo::isVALU(MI))
16589dff14beSJay Foad State.VALUs += 1;
16599dff14beSJay Foad if (SIInstrInfo::isTRANS(MI))
16609dff14beSJay Foad State.TRANS += 1;
16619dff14beSJay Foad };
16629dff14beSJay Foad
16639dff14beSJay Foad DenseSet<const MachineBasicBlock *> Visited;
16649dff14beSJay Foad if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
16659dff14beSJay Foad std::next(MI->getReverseIterator()), Visited))
16669dff14beSJay Foad return false;
16679dff14beSJay Foad
16689dff14beSJay Foad // Hazard is observed - insert a wait on va_dst counter to ensure hazard is
16699dff14beSJay Foad // avoided (mask 0x0fff achieves this).
16709dff14beSJay Foad BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
16719dff14beSJay Foad TII.get(AMDGPU::S_WAITCNT_DEPCTR))
16729dff14beSJay Foad .addImm(0x0fff);
16739dff14beSJay Foad
16749dff14beSJay Foad return true;
16759dff14beSJay Foad }
16769dff14beSJay Foad
fixWMMAHazards(MachineInstr * MI)1677*4874838aSPiotr Sobczak bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
1678*4874838aSPiotr Sobczak if (!SIInstrInfo::isWMMA(*MI))
1679*4874838aSPiotr Sobczak return false;
1680*4874838aSPiotr Sobczak
1681*4874838aSPiotr Sobczak const SIInstrInfo *TII = ST.getInstrInfo();
1682*4874838aSPiotr Sobczak const SIRegisterInfo *TRI = ST.getRegisterInfo();
1683*4874838aSPiotr Sobczak
1684*4874838aSPiotr Sobczak auto IsHazardFn = [MI, TII, TRI](const MachineInstr &I) {
1685*4874838aSPiotr Sobczak if (!SIInstrInfo::isWMMA(I))
1686*4874838aSPiotr Sobczak return false;
1687*4874838aSPiotr Sobczak
1688*4874838aSPiotr Sobczak // Src0 or Src1 of the current wmma instruction overlaps with the dest of
1689*4874838aSPiotr Sobczak // the previous wmma.
1690*4874838aSPiotr Sobczak const Register CurSrc0Reg =
1691*4874838aSPiotr Sobczak TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg();
1692*4874838aSPiotr Sobczak const Register CurSrc1Reg =
1693*4874838aSPiotr Sobczak TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg();
1694*4874838aSPiotr Sobczak
1695*4874838aSPiotr Sobczak const Register PrevDstReg =
1696*4874838aSPiotr Sobczak TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg();
1697*4874838aSPiotr Sobczak
1698*4874838aSPiotr Sobczak if (TRI->regsOverlap(PrevDstReg, CurSrc0Reg) ||
1699*4874838aSPiotr Sobczak TRI->regsOverlap(PrevDstReg, CurSrc1Reg)) {
1700*4874838aSPiotr Sobczak return true;
1701*4874838aSPiotr Sobczak }
1702*4874838aSPiotr Sobczak
1703*4874838aSPiotr Sobczak // Src2 of the current wmma instruction overlaps with the dest of the
1704*4874838aSPiotr Sobczak // previous wmma.
1705*4874838aSPiotr Sobczak const MachineOperand *Src2 =
1706*4874838aSPiotr Sobczak TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
1707*4874838aSPiotr Sobczak const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register();
1708*4874838aSPiotr Sobczak
1709*4874838aSPiotr Sobczak if (CurSrc2Reg != AMDGPU::NoRegister &&
1710*4874838aSPiotr Sobczak TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) {
1711*4874838aSPiotr Sobczak
1712*4874838aSPiotr Sobczak const MachineOperand *Src2Mods =
1713*4874838aSPiotr Sobczak TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
1714*4874838aSPiotr Sobczak const bool NoSrc2Mods =
1715*4874838aSPiotr Sobczak (Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0;
1716*4874838aSPiotr Sobczak // Exception: there is no hazard if the wmma instructions are of the same
1717*4874838aSPiotr Sobczak // type and there is no input modifier on src2 of the current instruction.
1718*4874838aSPiotr Sobczak return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) ==
1719*4874838aSPiotr Sobczak TII->pseudoToMCOpcode(MI->getOpcode())));
1720*4874838aSPiotr Sobczak }
1721*4874838aSPiotr Sobczak
1722*4874838aSPiotr Sobczak return false;
1723*4874838aSPiotr Sobczak };
1724*4874838aSPiotr Sobczak
1725*4874838aSPiotr Sobczak auto IsExpiredFn = [](const MachineInstr &I, int) {
1726*4874838aSPiotr Sobczak return SIInstrInfo::isVALU(I);
1727*4874838aSPiotr Sobczak };
1728*4874838aSPiotr Sobczak
1729*4874838aSPiotr Sobczak if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1730*4874838aSPiotr Sobczak std::numeric_limits<int>::max())
1731*4874838aSPiotr Sobczak return false;
1732*4874838aSPiotr Sobczak
1733*4874838aSPiotr Sobczak BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
1734*4874838aSPiotr Sobczak
1735*4874838aSPiotr Sobczak return true;
1736*4874838aSPiotr Sobczak }
1737*4874838aSPiotr Sobczak
checkNSAtoVMEMHazard(MachineInstr * MI)173851d1415aSStanislav Mekhanoshin int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
173951d1415aSStanislav Mekhanoshin int NSAtoVMEMWaitStates = 1;
174051d1415aSStanislav Mekhanoshin
174151d1415aSStanislav Mekhanoshin if (!ST.hasNSAtoVMEMBug())
174251d1415aSStanislav Mekhanoshin return 0;
174351d1415aSStanislav Mekhanoshin
174451d1415aSStanislav Mekhanoshin if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
174551d1415aSStanislav Mekhanoshin return 0;
174651d1415aSStanislav Mekhanoshin
174751d1415aSStanislav Mekhanoshin const SIInstrInfo *TII = ST.getInstrInfo();
174851d1415aSStanislav Mekhanoshin const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
174951d1415aSStanislav Mekhanoshin if (!Offset || (Offset->getImm() & 6) == 0)
175051d1415aSStanislav Mekhanoshin return 0;
175151d1415aSStanislav Mekhanoshin
1752424f1f6fSCarl Ritson auto IsHazardFn = [TII](const MachineInstr &I) {
1753424f1f6fSCarl Ritson if (!SIInstrInfo::isMIMG(I))
175451d1415aSStanislav Mekhanoshin return false;
1755424f1f6fSCarl Ritson const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I.getOpcode());
175651d1415aSStanislav Mekhanoshin return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1757424f1f6fSCarl Ritson TII->getInstSizeInBytes(I) >= 16;
175851d1415aSStanislav Mekhanoshin };
175951d1415aSStanislav Mekhanoshin
176051d1415aSStanislav Mekhanoshin return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
176151d1415aSStanislav Mekhanoshin }
1762bdf7f81bSStanislav Mekhanoshin
checkFPAtomicToDenormModeHazard(MachineInstr * MI)1763bdf7f81bSStanislav Mekhanoshin int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1764bdf7f81bSStanislav Mekhanoshin int FPAtomicToDenormModeWaitStates = 3;
1765bdf7f81bSStanislav Mekhanoshin
1766bdf7f81bSStanislav Mekhanoshin if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1767bdf7f81bSStanislav Mekhanoshin return 0;
1768bdf7f81bSStanislav Mekhanoshin
1769424f1f6fSCarl Ritson auto IsHazardFn = [](const MachineInstr &I) {
1770424f1f6fSCarl Ritson if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I))
1771bdf7f81bSStanislav Mekhanoshin return false;
1772424f1f6fSCarl Ritson return SIInstrInfo::isFPAtomic(I);
1773bdf7f81bSStanislav Mekhanoshin };
1774bdf7f81bSStanislav Mekhanoshin
1775424f1f6fSCarl Ritson auto IsExpiredFn = [](const MachineInstr &MI, int WaitStates) {
1776424f1f6fSCarl Ritson if (WaitStates >= 3 || SIInstrInfo::isVALU(MI))
1777bdf7f81bSStanislav Mekhanoshin return true;
1778bdf7f81bSStanislav Mekhanoshin
1779424f1f6fSCarl Ritson switch (MI.getOpcode()) {
1780bdf7f81bSStanislav Mekhanoshin case AMDGPU::S_WAITCNT:
1781bdf7f81bSStanislav Mekhanoshin case AMDGPU::S_WAITCNT_VSCNT:
1782bdf7f81bSStanislav Mekhanoshin case AMDGPU::S_WAITCNT_VMCNT:
1783bdf7f81bSStanislav Mekhanoshin case AMDGPU::S_WAITCNT_EXPCNT:
1784bdf7f81bSStanislav Mekhanoshin case AMDGPU::S_WAITCNT_LGKMCNT:
17859f69c1bcSJay Foad case AMDGPU::S_WAIT_IDLE:
1786bdf7f81bSStanislav Mekhanoshin return true;
1787bdf7f81bSStanislav Mekhanoshin default:
1788bdf7f81bSStanislav Mekhanoshin break;
1789bdf7f81bSStanislav Mekhanoshin }
1790bdf7f81bSStanislav Mekhanoshin
1791bdf7f81bSStanislav Mekhanoshin return false;
1792bdf7f81bSStanislav Mekhanoshin };
1793bdf7f81bSStanislav Mekhanoshin
1794bdf7f81bSStanislav Mekhanoshin return FPAtomicToDenormModeWaitStates -
1795bdf7f81bSStanislav Mekhanoshin ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1796bdf7f81bSStanislav Mekhanoshin }
17977d2019bbSStanislav Mekhanoshin
checkMAIHazards(MachineInstr * MI)17987d2019bbSStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
17997d2019bbSStanislav Mekhanoshin assert(SIInstrInfo::isMAI(*MI));
18007d2019bbSStanislav Mekhanoshin
1801a8d9d507SStanislav Mekhanoshin return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI);
1802a8d9d507SStanislav Mekhanoshin }
1803a8d9d507SStanislav Mekhanoshin
checkMFMAPadding(MachineInstr * MI)18041e15adbaSAustin Kerbow int GCNHazardRecognizer::checkMFMAPadding(MachineInstr *MI) {
18051e15adbaSAustin Kerbow // Early exit if no padding is requested.
18061e15adbaSAustin Kerbow if (MFMAPaddingRatio == 0)
18071e15adbaSAustin Kerbow return 0;
18081e15adbaSAustin Kerbow
18091e15adbaSAustin Kerbow const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1810bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(*MI) || MFI->getOccupancy() < 2)
18111e15adbaSAustin Kerbow return 0;
18121e15adbaSAustin Kerbow
18131e15adbaSAustin Kerbow int NeighborMFMALatency = 0;
1814bd9eed3aSAustin Kerbow auto IsNeighboringMFMA = [&NeighborMFMALatency,
18151e15adbaSAustin Kerbow this](const MachineInstr &MI) {
1816bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(MI))
18171e15adbaSAustin Kerbow return false;
18181e15adbaSAustin Kerbow
18191e15adbaSAustin Kerbow NeighborMFMALatency = this->getMFMAPipelineWaitStates(MI);
18201e15adbaSAustin Kerbow return true;
18211e15adbaSAustin Kerbow };
18221e15adbaSAustin Kerbow
18231e15adbaSAustin Kerbow const int MaxMFMAPipelineWaitStates = 16;
18241e15adbaSAustin Kerbow int WaitStatesSinceNeighborMFMA =
18251e15adbaSAustin Kerbow getWaitStatesSince(IsNeighboringMFMA, MaxMFMAPipelineWaitStates);
18261e15adbaSAustin Kerbow
18271e15adbaSAustin Kerbow int NeighborMFMAPaddingNeeded =
18281e15adbaSAustin Kerbow (NeighborMFMALatency * MFMAPaddingRatio / 100) -
18291e15adbaSAustin Kerbow WaitStatesSinceNeighborMFMA;
18301e15adbaSAustin Kerbow
18311e15adbaSAustin Kerbow return std::max(0, NeighborMFMAPaddingNeeded);
18321e15adbaSAustin Kerbow }
18331e15adbaSAustin Kerbow
checkMAIHazards908(MachineInstr * MI)1834a8d9d507SStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
18357d2019bbSStanislav Mekhanoshin int WaitStatesNeeded = 0;
18367d2019bbSStanislav Mekhanoshin unsigned Opc = MI->getOpcode();
18377d2019bbSStanislav Mekhanoshin
1838424f1f6fSCarl Ritson auto IsVALUFn = [](const MachineInstr &MI) {
1839424f1f6fSCarl Ritson return SIInstrInfo::isVALU(MI);
18407d2019bbSStanislav Mekhanoshin };
18417d2019bbSStanislav Mekhanoshin
1842314e29edSJoe Nash if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write
18437d2019bbSStanislav Mekhanoshin const int LegacyVALUWritesVGPRWaitStates = 2;
18447d2019bbSStanislav Mekhanoshin const int VALUWritesExecWaitStates = 4;
18457d2019bbSStanislav Mekhanoshin const int MaxWaitStates = 4;
18467d2019bbSStanislav Mekhanoshin
18477d2019bbSStanislav Mekhanoshin int WaitStatesNeededForUse = VALUWritesExecWaitStates -
18487d2019bbSStanislav Mekhanoshin getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
18497d2019bbSStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
18507d2019bbSStanislav Mekhanoshin
18517d2019bbSStanislav Mekhanoshin if (WaitStatesNeeded < MaxWaitStates) {
18527d2019bbSStanislav Mekhanoshin for (const MachineOperand &Use : MI->explicit_uses()) {
18537d2019bbSStanislav Mekhanoshin const int MaxWaitStates = 2;
18547d2019bbSStanislav Mekhanoshin
18557d2019bbSStanislav Mekhanoshin if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
18567d2019bbSStanislav Mekhanoshin continue;
18577d2019bbSStanislav Mekhanoshin
18587d2019bbSStanislav Mekhanoshin int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
18597d2019bbSStanislav Mekhanoshin getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
18607d2019bbSStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
18617d2019bbSStanislav Mekhanoshin
18627d2019bbSStanislav Mekhanoshin if (WaitStatesNeeded == MaxWaitStates)
18637d2019bbSStanislav Mekhanoshin break;
18647d2019bbSStanislav Mekhanoshin }
18657d2019bbSStanislav Mekhanoshin }
18667d2019bbSStanislav Mekhanoshin }
18677d2019bbSStanislav Mekhanoshin
18687d2019bbSStanislav Mekhanoshin for (const MachineOperand &Op : MI->explicit_operands()) {
18697d2019bbSStanislav Mekhanoshin if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
18707d2019bbSStanislav Mekhanoshin continue;
18717d2019bbSStanislav Mekhanoshin
1872314e29edSJoe Nash if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
18737d2019bbSStanislav Mekhanoshin continue;
18747d2019bbSStanislav Mekhanoshin
18757d2019bbSStanislav Mekhanoshin const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
18767d2019bbSStanislav Mekhanoshin const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
18777d2019bbSStanislav Mekhanoshin const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
18787d2019bbSStanislav Mekhanoshin const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
18797d2019bbSStanislav Mekhanoshin const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
18807d2019bbSStanislav Mekhanoshin const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
18817d2019bbSStanislav Mekhanoshin const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
18827d2019bbSStanislav Mekhanoshin const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
18837d2019bbSStanislav Mekhanoshin const int MaxWaitStates = 18;
18840c476111SDaniel Sanders Register Reg = Op.getReg();
18857d2019bbSStanislav Mekhanoshin unsigned HazardDefLatency = 0;
18867d2019bbSStanislav Mekhanoshin
1887bd9eed3aSAustin Kerbow auto IsOverlappedMFMAFn = [Reg, &HazardDefLatency,
1888424f1f6fSCarl Ritson this](const MachineInstr &MI) {
1889bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(MI))
18907d2019bbSStanislav Mekhanoshin return false;
1891424f1f6fSCarl Ritson Register DstReg = MI.getOperand(0).getReg();
18927d2019bbSStanislav Mekhanoshin if (DstReg == Reg)
18937d2019bbSStanislav Mekhanoshin return false;
1894424f1f6fSCarl Ritson HazardDefLatency =
1895424f1f6fSCarl Ritson std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
18967d2019bbSStanislav Mekhanoshin return TRI.regsOverlap(DstReg, Reg);
18977d2019bbSStanislav Mekhanoshin };
18987d2019bbSStanislav Mekhanoshin
18997d2019bbSStanislav Mekhanoshin int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
19007d2019bbSStanislav Mekhanoshin MaxWaitStates);
19017d2019bbSStanislav Mekhanoshin int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
19027d2019bbSStanislav Mekhanoshin int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
19037d2019bbSStanislav Mekhanoshin int OpNo = MI->getOperandNo(&Op);
19047d2019bbSStanislav Mekhanoshin if (OpNo == SrcCIdx) {
19057d2019bbSStanislav Mekhanoshin NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1906314e29edSJoe Nash } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
19077d2019bbSStanislav Mekhanoshin switch (HazardDefLatency) {
19087d2019bbSStanislav Mekhanoshin case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
19097d2019bbSStanislav Mekhanoshin break;
19107d2019bbSStanislav Mekhanoshin case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
19117d2019bbSStanislav Mekhanoshin break;
19127d2019bbSStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
19137d2019bbSStanislav Mekhanoshin default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
19147d2019bbSStanislav Mekhanoshin break;
19157d2019bbSStanislav Mekhanoshin }
1916314e29edSJoe Nash } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
19177d2019bbSStanislav Mekhanoshin switch (HazardDefLatency) {
19187d2019bbSStanislav Mekhanoshin case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
19197d2019bbSStanislav Mekhanoshin break;
19207d2019bbSStanislav Mekhanoshin case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
19217d2019bbSStanislav Mekhanoshin break;
19227d2019bbSStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
19237d2019bbSStanislav Mekhanoshin default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
19247d2019bbSStanislav Mekhanoshin break;
19257d2019bbSStanislav Mekhanoshin }
19267d2019bbSStanislav Mekhanoshin }
19277d2019bbSStanislav Mekhanoshin
19287d2019bbSStanislav Mekhanoshin int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
19297d2019bbSStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
19307d2019bbSStanislav Mekhanoshin
19317d2019bbSStanislav Mekhanoshin if (WaitStatesNeeded == MaxWaitStates)
19327d2019bbSStanislav Mekhanoshin return WaitStatesNeeded; // Early exit.
19337d2019bbSStanislav Mekhanoshin
1934424f1f6fSCarl Ritson auto IsAccVgprWriteFn = [Reg, this](const MachineInstr &MI) {
1935424f1f6fSCarl Ritson if (MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
19367d2019bbSStanislav Mekhanoshin return false;
1937424f1f6fSCarl Ritson Register DstReg = MI.getOperand(0).getReg();
19387d2019bbSStanislav Mekhanoshin return TRI.regsOverlap(Reg, DstReg);
19397d2019bbSStanislav Mekhanoshin };
19407d2019bbSStanislav Mekhanoshin
19417d2019bbSStanislav Mekhanoshin const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
19427d2019bbSStanislav Mekhanoshin const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
19437d2019bbSStanislav Mekhanoshin const int AccVGPRWriteAccVgprReadWaitStates = 3;
19447d2019bbSStanislav Mekhanoshin NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
19457d2019bbSStanislav Mekhanoshin if (OpNo == SrcCIdx)
19467d2019bbSStanislav Mekhanoshin NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1947314e29edSJoe Nash else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64)
19487d2019bbSStanislav Mekhanoshin NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
19497d2019bbSStanislav Mekhanoshin
19507d2019bbSStanislav Mekhanoshin WaitStatesNeededForUse = NeedWaitStates -
19517d2019bbSStanislav Mekhanoshin getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
19527d2019bbSStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
19537d2019bbSStanislav Mekhanoshin
19547d2019bbSStanislav Mekhanoshin if (WaitStatesNeeded == MaxWaitStates)
19557d2019bbSStanislav Mekhanoshin return WaitStatesNeeded; // Early exit.
19567d2019bbSStanislav Mekhanoshin }
19577d2019bbSStanislav Mekhanoshin
1958314e29edSJoe Nash if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
19597d2019bbSStanislav Mekhanoshin const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
19607d2019bbSStanislav Mekhanoshin const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
19617d2019bbSStanislav Mekhanoshin const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
19627d2019bbSStanislav Mekhanoshin const int MaxWaitStates = 13;
19630c476111SDaniel Sanders Register DstReg = MI->getOperand(0).getReg();
19647d2019bbSStanislav Mekhanoshin unsigned HazardDefLatency = 0;
19657d2019bbSStanislav Mekhanoshin
1966bd9eed3aSAustin Kerbow auto IsSrcCMFMAFn = [DstReg, &HazardDefLatency,
1967424f1f6fSCarl Ritson this](const MachineInstr &MI) {
1968bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(MI))
19697d2019bbSStanislav Mekhanoshin return false;
1970424f1f6fSCarl Ritson Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg();
1971424f1f6fSCarl Ritson HazardDefLatency =
1972424f1f6fSCarl Ritson std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
19737d2019bbSStanislav Mekhanoshin return TRI.regsOverlap(Reg, DstReg);
19747d2019bbSStanislav Mekhanoshin };
19757d2019bbSStanislav Mekhanoshin
19767d2019bbSStanislav Mekhanoshin int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
19777d2019bbSStanislav Mekhanoshin int NeedWaitStates;
19787d2019bbSStanislav Mekhanoshin switch (HazardDefLatency) {
19797d2019bbSStanislav Mekhanoshin case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
19807d2019bbSStanislav Mekhanoshin break;
19817d2019bbSStanislav Mekhanoshin case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
19827d2019bbSStanislav Mekhanoshin break;
19837d2019bbSStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
19847d2019bbSStanislav Mekhanoshin default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
19857d2019bbSStanislav Mekhanoshin break;
19867d2019bbSStanislav Mekhanoshin }
19877d2019bbSStanislav Mekhanoshin
19887d2019bbSStanislav Mekhanoshin int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
19897d2019bbSStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
19907d2019bbSStanislav Mekhanoshin }
19917d2019bbSStanislav Mekhanoshin
19921e15adbaSAustin Kerbow // Pad neighboring MFMA with noops for better inter-wave performance.
19931e15adbaSAustin Kerbow WaitStatesNeeded = std::max(WaitStatesNeeded, checkMFMAPadding(MI));
19941e15adbaSAustin Kerbow
19957d2019bbSStanislav Mekhanoshin return WaitStatesNeeded;
19967d2019bbSStanislav Mekhanoshin }
19977d2019bbSStanislav Mekhanoshin
checkMAIHazards90A(MachineInstr * MI)1998a8d9d507SStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1999a8d9d507SStanislav Mekhanoshin int WaitStatesNeeded = 0;
2000a8d9d507SStanislav Mekhanoshin unsigned Opc = MI->getOpcode();
2001a8d9d507SStanislav Mekhanoshin
2002bd9eed3aSAustin Kerbow auto IsLegacyVALUFn = [](const MachineInstr &MI) {
2003bd9eed3aSAustin Kerbow return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI);
2004a8d9d507SStanislav Mekhanoshin };
2005a8d9d507SStanislav Mekhanoshin
2006bd9eed3aSAustin Kerbow auto IsLegacyVALUNotDotFn = [](const MachineInstr &MI) {
2007bd9eed3aSAustin Kerbow return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI) &&
2008bd9eed3aSAustin Kerbow !SIInstrInfo::isDOT(MI);
2009a8d9d507SStanislav Mekhanoshin };
2010a8d9d507SStanislav Mekhanoshin
2011bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(*MI))
2012a8d9d507SStanislav Mekhanoshin return WaitStatesNeeded;
2013a8d9d507SStanislav Mekhanoshin
2014a8d9d507SStanislav Mekhanoshin const int VALUWritesExecWaitStates = 4;
2015a8d9d507SStanislav Mekhanoshin int WaitStatesNeededForUse = VALUWritesExecWaitStates -
2016a8d9d507SStanislav Mekhanoshin getWaitStatesSinceDef(AMDGPU::EXEC, IsLegacyVALUFn,
2017a8d9d507SStanislav Mekhanoshin VALUWritesExecWaitStates);
2018a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2019a8d9d507SStanislav Mekhanoshin
2020a8d9d507SStanislav Mekhanoshin int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
2021a8d9d507SStanislav Mekhanoshin
2022a8d9d507SStanislav Mekhanoshin // Loop for both DGEMM and S/HGEMM 2nd instruction.
2023a8d9d507SStanislav Mekhanoshin for (const MachineOperand &Use : MI->explicit_uses()) {
2024a8d9d507SStanislav Mekhanoshin const int LegacyVALUNotDotWritesVGPRWaitStates = 2;
2025a8d9d507SStanislav Mekhanoshin const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2;
2026cad9de71SStanislav Mekhanoshin const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3;
2027cad9de71SStanislav Mekhanoshin const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5;
2028cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4;
2029cad9de71SStanislav Mekhanoshin const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9;
2030cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8;
2031cad9de71SStanislav Mekhanoshin const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17;
2032cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16;
2033a8d9d507SStanislav Mekhanoshin const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8;
2034a8d9d507SStanislav Mekhanoshin const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16;
2035a8d9d507SStanislav Mekhanoshin const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3;
2036a8d9d507SStanislav Mekhanoshin const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
2037a8d9d507SStanislav Mekhanoshin const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
2038a8d9d507SStanislav Mekhanoshin const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
2039a8d9d507SStanislav Mekhanoshin const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
2040a8d9d507SStanislav Mekhanoshin const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
2041a8d9d507SStanislav Mekhanoshin const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
2042a8d9d507SStanislav Mekhanoshin const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
2043cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4;
2044cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6;
2045cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10;
2046cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18;
2047cad9de71SStanislav Mekhanoshin const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5;
2048cad9de71SStanislav Mekhanoshin const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7;
2049cad9de71SStanislav Mekhanoshin const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11;
2050cad9de71SStanislav Mekhanoshin const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19;
2051a8d9d507SStanislav Mekhanoshin const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
2052a8d9d507SStanislav Mekhanoshin const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
2053a8d9d507SStanislav Mekhanoshin const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
2054cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
2055a8d9d507SStanislav Mekhanoshin const int MaxWaitStates = 19;
2056a8d9d507SStanislav Mekhanoshin
2057a8d9d507SStanislav Mekhanoshin if (!Use.isReg())
2058a8d9d507SStanislav Mekhanoshin continue;
2059d6b07348SJim Lin Register Reg = Use.getReg();
2060a8d9d507SStanislav Mekhanoshin bool FullReg;
2061424f1f6fSCarl Ritson const MachineInstr *MI1;
2062a8d9d507SStanislav Mekhanoshin
2063bd9eed3aSAustin Kerbow auto IsOverlappedMFMAFn = [Reg, &FullReg, &MI1,
2064424f1f6fSCarl Ritson this](const MachineInstr &MI) {
2065bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(MI))
2066a8d9d507SStanislav Mekhanoshin return false;
2067424f1f6fSCarl Ritson Register DstReg = MI.getOperand(0).getReg();
2068a8d9d507SStanislav Mekhanoshin FullReg = (DstReg == Reg);
2069424f1f6fSCarl Ritson MI1 = &MI;
2070a8d9d507SStanislav Mekhanoshin return TRI.regsOverlap(DstReg, Reg);
2071a8d9d507SStanislav Mekhanoshin };
2072a8d9d507SStanislav Mekhanoshin
2073a8d9d507SStanislav Mekhanoshin WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates -
2074a8d9d507SStanislav Mekhanoshin getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates);
2075a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2076a8d9d507SStanislav Mekhanoshin
2077661a232eSStanislav Mekhanoshin int NumWaitStates =
2078661a232eSStanislav Mekhanoshin getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates);
2079a8d9d507SStanislav Mekhanoshin if (NumWaitStates == std::numeric_limits<int>::max())
2080a8d9d507SStanislav Mekhanoshin continue;
2081a8d9d507SStanislav Mekhanoshin
2082a8d9d507SStanislav Mekhanoshin int OpNo = MI->getOperandNo(&Use);
2083a8d9d507SStanislav Mekhanoshin unsigned Opc1 = MI1->getOpcode();
2084a8d9d507SStanislav Mekhanoshin int NeedWaitStates = 0;
2085a8d9d507SStanislav Mekhanoshin if (OpNo == SrcCIdx) {
2086cad9de71SStanislav Mekhanoshin if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) {
2087a8d9d507SStanislav Mekhanoshin NeedWaitStates = 0;
2088a8d9d507SStanislav Mekhanoshin } else if (FullReg) {
2089a8d9d507SStanislav Mekhanoshin if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
2090a8d9d507SStanislav Mekhanoshin Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) &&
2091a8d9d507SStanislav Mekhanoshin (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
2092a8d9d507SStanislav Mekhanoshin Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
2093a8d9d507SStanislav Mekhanoshin NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
2094cad9de71SStanislav Mekhanoshin else if (ST.hasGFX940Insts() &&
2095cad9de71SStanislav Mekhanoshin TSchedModel.computeInstrLatency(MI1) == 2)
2096cad9de71SStanislav Mekhanoshin NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
2097a8d9d507SStanislav Mekhanoshin } else {
2098a8d9d507SStanislav Mekhanoshin switch (Opc1) {
2099a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
2100a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
2101dbf278b9SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
2102dbf278b9SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2103a8d9d507SStanislav Mekhanoshin if (!isXDL(ST, *MI))
2104a8d9d507SStanislav Mekhanoshin NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
2105a8d9d507SStanislav Mekhanoshin break;
2106a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
2107a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
2108a8d9d507SStanislav Mekhanoshin if (!isXDL(ST, *MI))
2109a8d9d507SStanislav Mekhanoshin NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
2110a8d9d507SStanislav Mekhanoshin break;
2111a8d9d507SStanislav Mekhanoshin default:
2112cad9de71SStanislav Mekhanoshin if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1))
2113cad9de71SStanislav Mekhanoshin break;
2114a8d9d507SStanislav Mekhanoshin switch (TSchedModel.computeInstrLatency(MI1)) {
2115a8d9d507SStanislav Mekhanoshin case 2:
2116cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2117cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MI1)
2118cad9de71SStanislav Mekhanoshin ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates
2119cad9de71SStanislav Mekhanoshin : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates
2120cad9de71SStanislav Mekhanoshin : isDGEMM(Opc)
2121a8d9d507SStanislav Mekhanoshin ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
2122a8d9d507SStanislav Mekhanoshin : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
2123a8d9d507SStanislav Mekhanoshin break;
2124cad9de71SStanislav Mekhanoshin case 4:
2125cad9de71SStanislav Mekhanoshin assert(ST.hasGFX940Insts());
2126cad9de71SStanislav Mekhanoshin NeedWaitStates = isXDL(ST, *MI1)
2127cad9de71SStanislav Mekhanoshin ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates
2128cad9de71SStanislav Mekhanoshin : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates;
2129cad9de71SStanislav Mekhanoshin break;
2130a8d9d507SStanislav Mekhanoshin case 8:
2131cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2132cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MI1)
2133cad9de71SStanislav Mekhanoshin ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates
2134cad9de71SStanislav Mekhanoshin : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates
2135cad9de71SStanislav Mekhanoshin : isDGEMM(Opc)
2136a8d9d507SStanislav Mekhanoshin ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
2137a8d9d507SStanislav Mekhanoshin : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
2138a8d9d507SStanislav Mekhanoshin break;
2139a8d9d507SStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
2140a8d9d507SStanislav Mekhanoshin default:
2141cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2142cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MI1)
2143cad9de71SStanislav Mekhanoshin ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates
2144cad9de71SStanislav Mekhanoshin : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates
2145cad9de71SStanislav Mekhanoshin : isDGEMM(Opc)
2146a8d9d507SStanislav Mekhanoshin ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
2147a8d9d507SStanislav Mekhanoshin : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
2148a8d9d507SStanislav Mekhanoshin }
2149a8d9d507SStanislav Mekhanoshin }
2150a8d9d507SStanislav Mekhanoshin }
2151a8d9d507SStanislav Mekhanoshin } else {
2152a8d9d507SStanislav Mekhanoshin switch (Opc1) {
2153a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
2154a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
2155dbf278b9SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
2156dbf278b9SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2157a8d9d507SStanislav Mekhanoshin NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
2158a8d9d507SStanislav Mekhanoshin break;
2159a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
2160a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
2161a8d9d507SStanislav Mekhanoshin NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates;
2162a8d9d507SStanislav Mekhanoshin break;
2163a8d9d507SStanislav Mekhanoshin default:
2164a8d9d507SStanislav Mekhanoshin switch (TSchedModel.computeInstrLatency(MI1)) {
2165a8d9d507SStanislav Mekhanoshin case 2:
2166cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2167cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MI1)
2168cad9de71SStanislav Mekhanoshin ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates
2169cad9de71SStanislav Mekhanoshin : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates
2170cad9de71SStanislav Mekhanoshin : SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
2171cad9de71SStanislav Mekhanoshin break;
2172cad9de71SStanislav Mekhanoshin case 4:
2173cad9de71SStanislav Mekhanoshin assert(ST.hasGFX940Insts());
2174cad9de71SStanislav Mekhanoshin NeedWaitStates = isXDL(ST, *MI1)
2175cad9de71SStanislav Mekhanoshin ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates
2176cad9de71SStanislav Mekhanoshin : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates;
2177a8d9d507SStanislav Mekhanoshin break;
2178a8d9d507SStanislav Mekhanoshin case 8:
2179cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2180cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MI1)
2181cad9de71SStanislav Mekhanoshin ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates
2182cad9de71SStanislav Mekhanoshin : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates
2183cad9de71SStanislav Mekhanoshin : SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
2184a8d9d507SStanislav Mekhanoshin break;
2185a8d9d507SStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
2186a8d9d507SStanislav Mekhanoshin default:
2187cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2188cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MI1)
2189cad9de71SStanislav Mekhanoshin ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates
2190cad9de71SStanislav Mekhanoshin : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates
2191cad9de71SStanislav Mekhanoshin : SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
2192a8d9d507SStanislav Mekhanoshin }
2193a8d9d507SStanislav Mekhanoshin }
2194a8d9d507SStanislav Mekhanoshin }
2195a8d9d507SStanislav Mekhanoshin if (WaitStatesNeeded >= NeedWaitStates)
2196a8d9d507SStanislav Mekhanoshin continue;
2197a8d9d507SStanislav Mekhanoshin
2198a8d9d507SStanislav Mekhanoshin WaitStatesNeededForUse = NeedWaitStates - NumWaitStates;
2199a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2200a8d9d507SStanislav Mekhanoshin
2201a8d9d507SStanislav Mekhanoshin if (WaitStatesNeeded == MaxWaitStates)
2202a8d9d507SStanislav Mekhanoshin break;
2203a8d9d507SStanislav Mekhanoshin }
2204a8d9d507SStanislav Mekhanoshin
2205a8d9d507SStanislav Mekhanoshin return WaitStatesNeeded;
2206a8d9d507SStanislav Mekhanoshin }
2207a8d9d507SStanislav Mekhanoshin
checkMAILdStHazards(MachineInstr * MI)22087d2019bbSStanislav Mekhanoshin int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
2209d1f45ed5SNeubauer, Sebastian // On gfx90a+ relevant hazards are checked in checkMAIVALUHazards()
2210a8d9d507SStanislav Mekhanoshin if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
22117d2019bbSStanislav Mekhanoshin return 0;
22127d2019bbSStanislav Mekhanoshin
22137d2019bbSStanislav Mekhanoshin int WaitStatesNeeded = 0;
22147d2019bbSStanislav Mekhanoshin
2215424f1f6fSCarl Ritson auto IsAccVgprReadFn = [](const MachineInstr &MI) {
2216424f1f6fSCarl Ritson return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64;
22177d2019bbSStanislav Mekhanoshin };
22187d2019bbSStanislav Mekhanoshin
22197d2019bbSStanislav Mekhanoshin for (const MachineOperand &Op : MI->explicit_uses()) {
22207d2019bbSStanislav Mekhanoshin if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
22217d2019bbSStanislav Mekhanoshin continue;
22227d2019bbSStanislav Mekhanoshin
22230c476111SDaniel Sanders Register Reg = Op.getReg();
22247d2019bbSStanislav Mekhanoshin
22257d2019bbSStanislav Mekhanoshin const int AccVgprReadLdStWaitStates = 2;
2226a4f35ab2SAustin Kerbow const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
22277d2019bbSStanislav Mekhanoshin const int MaxWaitStates = 2;
22287d2019bbSStanislav Mekhanoshin
22297d2019bbSStanislav Mekhanoshin int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
22307d2019bbSStanislav Mekhanoshin getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
22317d2019bbSStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
22327d2019bbSStanislav Mekhanoshin
22337d2019bbSStanislav Mekhanoshin if (WaitStatesNeeded == MaxWaitStates)
22347d2019bbSStanislav Mekhanoshin return WaitStatesNeeded; // Early exit.
22357d2019bbSStanislav Mekhanoshin
2236424f1f6fSCarl Ritson auto IsVALUAccVgprRdWrCheckFn = [Reg, this](const MachineInstr &MI) {
2237424f1f6fSCarl Ritson if (MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 &&
2238424f1f6fSCarl Ritson MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
22397d2019bbSStanislav Mekhanoshin return false;
2240424f1f6fSCarl Ritson auto IsVALUFn = [](const MachineInstr &MI) {
2241424f1f6fSCarl Ritson return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMAI(MI);
22427d2019bbSStanislav Mekhanoshin };
22437d2019bbSStanislav Mekhanoshin return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
22447d2019bbSStanislav Mekhanoshin std::numeric_limits<int>::max();
22457d2019bbSStanislav Mekhanoshin };
22467d2019bbSStanislav Mekhanoshin
2247a4f35ab2SAustin Kerbow WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
2248a4f35ab2SAustin Kerbow getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
22497d2019bbSStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
22507d2019bbSStanislav Mekhanoshin }
22517d2019bbSStanislav Mekhanoshin
22527d2019bbSStanislav Mekhanoshin return WaitStatesNeeded;
22537d2019bbSStanislav Mekhanoshin }
225413b63be4SStanislav Mekhanoshin
checkMAIVALUHazards(MachineInstr * MI)2255a8d9d507SStanislav Mekhanoshin int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
2256a8d9d507SStanislav Mekhanoshin if (!ST.hasGFX90AInsts())
2257a8d9d507SStanislav Mekhanoshin return 0;
2258a8d9d507SStanislav Mekhanoshin
2259424f1f6fSCarl Ritson auto IsDGEMMFn = [](const MachineInstr &MI) -> bool {
2260424f1f6fSCarl Ritson return isDGEMM(MI.getOpcode());
2261a8d9d507SStanislav Mekhanoshin };
2262a8d9d507SStanislav Mekhanoshin
2263a8d9d507SStanislav Mekhanoshin // This is checked in checkMAIHazards90A()
2264bd9eed3aSAustin Kerbow if (SIInstrInfo::isMFMA(*MI))
2265a8d9d507SStanislav Mekhanoshin return 0;
2266a8d9d507SStanislav Mekhanoshin
2267a8d9d507SStanislav Mekhanoshin int WaitStatesNeeded = 0;
2268a8d9d507SStanislav Mekhanoshin
2269a8d9d507SStanislav Mekhanoshin bool IsMemOrExport = SIInstrInfo::isVMEM(*MI) ||
2270a8d9d507SStanislav Mekhanoshin SIInstrInfo::isFLAT(*MI) ||
2271a8d9d507SStanislav Mekhanoshin SIInstrInfo::isDS(*MI) ||
2272a8d9d507SStanislav Mekhanoshin SIInstrInfo::isEXP(*MI);
2273a8d9d507SStanislav Mekhanoshin bool IsVALU = SIInstrInfo::isVALU(*MI);
2274a8d9d507SStanislav Mekhanoshin
2275424f1f6fSCarl Ritson const MachineInstr *MFMA = nullptr;
2276a8d9d507SStanislav Mekhanoshin unsigned Reg;
2277bd9eed3aSAustin Kerbow auto IsMFMAWriteFn = [&Reg, &MFMA, this](const MachineInstr &MI) {
2278bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(MI) ||
2279bd9eed3aSAustin Kerbow !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
2280a8d9d507SStanislav Mekhanoshin return false;
2281424f1f6fSCarl Ritson MFMA = &MI;
2282a8d9d507SStanislav Mekhanoshin return true;
2283a8d9d507SStanislav Mekhanoshin };
2284a8d9d507SStanislav Mekhanoshin
2285424f1f6fSCarl Ritson const MachineInstr *DOT = nullptr;
2286424f1f6fSCarl Ritson auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) {
2287424f1f6fSCarl Ritson if (!SIInstrInfo::isDOT(MI) ||
2288424f1f6fSCarl Ritson !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
2289a8d9d507SStanislav Mekhanoshin return false;
2290424f1f6fSCarl Ritson DOT = &MI;
2291a8d9d507SStanislav Mekhanoshin return true;
2292a8d9d507SStanislav Mekhanoshin };
2293a8d9d507SStanislav Mekhanoshin
2294a8d9d507SStanislav Mekhanoshin int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2295a8d9d507SStanislav Mekhanoshin AMDGPU::OpName::src2);
2296a8d9d507SStanislav Mekhanoshin
2297a8d9d507SStanislav Mekhanoshin if (IsMemOrExport || IsVALU) {
2298a8d9d507SStanislav Mekhanoshin const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
2299a8d9d507SStanislav Mekhanoshin const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
2300a8d9d507SStanislav Mekhanoshin const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
2301cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4;
2302cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6;
2303cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10;
2304cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18;
2305cad9de71SStanislav Mekhanoshin const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5;
2306cad9de71SStanislav Mekhanoshin const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7;
2307cad9de71SStanislav Mekhanoshin const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11;
2308cad9de71SStanislav Mekhanoshin const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19;
2309a8d9d507SStanislav Mekhanoshin const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
2310a8d9d507SStanislav Mekhanoshin const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
2311a8d9d507SStanislav Mekhanoshin const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
2312a8d9d507SStanislav Mekhanoshin const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
2313a8d9d507SStanislav Mekhanoshin const int DotWriteSameDotReadSrcAB = 3;
2314a8d9d507SStanislav Mekhanoshin const int DotWriteDifferentVALURead = 3;
2315a8d9d507SStanislav Mekhanoshin const int MaxWaitStates = 19;
2316a8d9d507SStanislav Mekhanoshin
2317a8d9d507SStanislav Mekhanoshin for (const MachineOperand &Use : MI->explicit_uses()) {
2318a8d9d507SStanislav Mekhanoshin if (!Use.isReg())
2319a8d9d507SStanislav Mekhanoshin continue;
2320a8d9d507SStanislav Mekhanoshin Reg = Use.getReg();
2321a8d9d507SStanislav Mekhanoshin
2322a8d9d507SStanislav Mekhanoshin DOT = nullptr;
2323a8d9d507SStanislav Mekhanoshin int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
2324a8d9d507SStanislav Mekhanoshin MaxWaitStates);
2325a8d9d507SStanislav Mekhanoshin if (DOT) {
2326a8d9d507SStanislav Mekhanoshin int NeedWaitStates = 0;
2327a8d9d507SStanislav Mekhanoshin if (DOT->getOpcode() == MI->getOpcode()) {
2328a8d9d507SStanislav Mekhanoshin if (&Use - &MI->getOperand(0) != SrcCIdx)
2329a8d9d507SStanislav Mekhanoshin NeedWaitStates = DotWriteSameDotReadSrcAB;
2330a8d9d507SStanislav Mekhanoshin } else {
2331a8d9d507SStanislav Mekhanoshin NeedWaitStates = DotWriteDifferentVALURead;
2332a8d9d507SStanislav Mekhanoshin }
2333a8d9d507SStanislav Mekhanoshin
2334a8d9d507SStanislav Mekhanoshin int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2335a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2336a8d9d507SStanislav Mekhanoshin }
2337a8d9d507SStanislav Mekhanoshin
2338a8d9d507SStanislav Mekhanoshin MFMA = nullptr;
2339661a232eSStanislav Mekhanoshin WaitStatesSinceDef =
2340661a232eSStanislav Mekhanoshin getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
2341a8d9d507SStanislav Mekhanoshin if (!MFMA)
2342a8d9d507SStanislav Mekhanoshin continue;
2343a8d9d507SStanislav Mekhanoshin
2344a8d9d507SStanislav Mekhanoshin unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
2345a8d9d507SStanislav Mekhanoshin int NeedWaitStates = MaxWaitStates;
2346a8d9d507SStanislav Mekhanoshin switch (HazardDefLatency) {
2347a8d9d507SStanislav Mekhanoshin case 2:
2348cad9de71SStanislav Mekhanoshin NeedWaitStates =
2349cad9de71SStanislav Mekhanoshin ST.hasGFX940Insts()
2350cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MFMA)
2351cad9de71SStanislav Mekhanoshin ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates
2352cad9de71SStanislav Mekhanoshin : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates
2353cad9de71SStanislav Mekhanoshin : SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
2354a8d9d507SStanislav Mekhanoshin break;
2355a8d9d507SStanislav Mekhanoshin case 4:
2356e9a49c64SStanislav Mekhanoshin assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
2357a8d9d507SStanislav Mekhanoshin NeedWaitStates =
2358cad9de71SStanislav Mekhanoshin isDGEMM(MFMA->getOpcode())
2359cad9de71SStanislav Mekhanoshin ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
2360cad9de71SStanislav Mekhanoshin : DMFMA4x4WriteVgprVALUReadWaitStates
2361cad9de71SStanislav Mekhanoshin : isXDL(ST, *MFMA)
2362cad9de71SStanislav Mekhanoshin ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates
2363cad9de71SStanislav Mekhanoshin : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates;
2364a8d9d507SStanislav Mekhanoshin break;
2365a8d9d507SStanislav Mekhanoshin case 8:
2366cad9de71SStanislav Mekhanoshin NeedWaitStates =
2367cad9de71SStanislav Mekhanoshin ST.hasGFX940Insts()
2368cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MFMA)
2369cad9de71SStanislav Mekhanoshin ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates
2370cad9de71SStanislav Mekhanoshin : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates
2371cad9de71SStanislav Mekhanoshin : SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
2372a8d9d507SStanislav Mekhanoshin break;
2373a8d9d507SStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
2374a8d9d507SStanislav Mekhanoshin default:
2375a8d9d507SStanislav Mekhanoshin NeedWaitStates =
2376a8d9d507SStanislav Mekhanoshin isDGEMM(MFMA->getOpcode())
2377a8d9d507SStanislav Mekhanoshin ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
2378a8d9d507SStanislav Mekhanoshin : DMFMA16x16WriteVgprVALUReadWaitStates
2379cad9de71SStanislav Mekhanoshin : ST.hasGFX940Insts()
2380cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MFMA)
2381cad9de71SStanislav Mekhanoshin ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates
2382cad9de71SStanislav Mekhanoshin : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates
2383a8d9d507SStanislav Mekhanoshin : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
2384a8d9d507SStanislav Mekhanoshin break;
2385a8d9d507SStanislav Mekhanoshin }
2386a8d9d507SStanislav Mekhanoshin
2387a8d9d507SStanislav Mekhanoshin int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2388a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2389a8d9d507SStanislav Mekhanoshin
2390a8d9d507SStanislav Mekhanoshin if (WaitStatesNeeded == MaxWaitStates)
2391a8d9d507SStanislav Mekhanoshin break;
2392a8d9d507SStanislav Mekhanoshin }
2393a8d9d507SStanislav Mekhanoshin }
2394a8d9d507SStanislav Mekhanoshin
2395a8d9d507SStanislav Mekhanoshin unsigned Opc = MI->getOpcode();
2396a8d9d507SStanislav Mekhanoshin const int DMFMAToFMA64WaitStates = 2;
2397a8d9d507SStanislav Mekhanoshin if ((Opc == AMDGPU::V_FMA_F64_e64 ||
2398a8d9d507SStanislav Mekhanoshin Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64 ||
2399a8d9d507SStanislav Mekhanoshin Opc == AMDGPU::V_FMAC_F64_dpp) &&
2400a8d9d507SStanislav Mekhanoshin WaitStatesNeeded < DMFMAToFMA64WaitStates) {
2401a8d9d507SStanislav Mekhanoshin int WaitStatesNeededForUse = DMFMAToFMA64WaitStates -
2402a8d9d507SStanislav Mekhanoshin getWaitStatesSince(IsDGEMMFn, DMFMAToFMA64WaitStates);
2403a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2404a8d9d507SStanislav Mekhanoshin }
2405a8d9d507SStanislav Mekhanoshin
2406a8d9d507SStanislav Mekhanoshin if (!IsVALU && !IsMemOrExport)
2407a8d9d507SStanislav Mekhanoshin return WaitStatesNeeded;
2408a8d9d507SStanislav Mekhanoshin
2409a8d9d507SStanislav Mekhanoshin for (const MachineOperand &Def : MI->defs()) {
2410a8d9d507SStanislav Mekhanoshin const int SMFMA4x4WriteVgprVALUWawWaitStates = 5;
2411a8d9d507SStanislav Mekhanoshin const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
2412a8d9d507SStanislav Mekhanoshin const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
2413cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4;
2414cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6;
2415cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10;
2416cad9de71SStanislav Mekhanoshin const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18;
2417cad9de71SStanislav Mekhanoshin const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5;
2418cad9de71SStanislav Mekhanoshin const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7;
2419cad9de71SStanislav Mekhanoshin const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11;
2420cad9de71SStanislav Mekhanoshin const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19;
2421a8d9d507SStanislav Mekhanoshin const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
2422cad9de71SStanislav Mekhanoshin const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3;
2423a8d9d507SStanislav Mekhanoshin const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
2424a8d9d507SStanislav Mekhanoshin const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
2425a8d9d507SStanislav Mekhanoshin const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
2426a8d9d507SStanislav Mekhanoshin const int DMFMA16x16WriteVgprVALUWriteWaitStates = 11;
2427a8d9d507SStanislav Mekhanoshin const int DotWriteDifferentVALUWrite = 3;
2428a8d9d507SStanislav Mekhanoshin const int MaxWaitStates = 19;
2429a8d9d507SStanislav Mekhanoshin const int MaxWarWaitStates = 15;
2430a8d9d507SStanislav Mekhanoshin
2431a8d9d507SStanislav Mekhanoshin Reg = Def.getReg();
2432a8d9d507SStanislav Mekhanoshin
2433a8d9d507SStanislav Mekhanoshin DOT = nullptr;
2434a8d9d507SStanislav Mekhanoshin int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
2435a8d9d507SStanislav Mekhanoshin MaxWaitStates);
2436a8d9d507SStanislav Mekhanoshin if (DOT && DOT->getOpcode() != MI->getOpcode())
2437a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite -
2438a8d9d507SStanislav Mekhanoshin WaitStatesSinceDef);
2439a8d9d507SStanislav Mekhanoshin
2440a8d9d507SStanislav Mekhanoshin MFMA = nullptr;
2441661a232eSStanislav Mekhanoshin WaitStatesSinceDef =
2442661a232eSStanislav Mekhanoshin getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
2443a8d9d507SStanislav Mekhanoshin if (MFMA) {
2444a8d9d507SStanislav Mekhanoshin int NeedWaitStates = MaxWaitStates;
2445a8d9d507SStanislav Mekhanoshin switch (TSchedModel.computeInstrLatency(MFMA)) {
2446a8d9d507SStanislav Mekhanoshin case 2:
2447cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2448cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MFMA)
2449cad9de71SStanislav Mekhanoshin ? GFX940_XDL2PassWriteVgprVALUWawWaitStates
2450cad9de71SStanislav Mekhanoshin : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates
2451cad9de71SStanislav Mekhanoshin : SMFMA4x4WriteVgprVALUWawWaitStates;
2452a8d9d507SStanislav Mekhanoshin break;
2453a8d9d507SStanislav Mekhanoshin case 4:
2454cad9de71SStanislav Mekhanoshin assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
2455cad9de71SStanislav Mekhanoshin NeedWaitStates = isDGEMM(MFMA->getOpcode())
2456cad9de71SStanislav Mekhanoshin ? DMFMA4x4WriteVgprVALUWriteWaitStates
2457cad9de71SStanislav Mekhanoshin : isXDL(ST, *MFMA)
2458cad9de71SStanislav Mekhanoshin ? GFX940_XDL4PassWriteVgprVALUWawWaitStates
2459cad9de71SStanislav Mekhanoshin : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates;
2460a8d9d507SStanislav Mekhanoshin break;
2461a8d9d507SStanislav Mekhanoshin case 8:
2462cad9de71SStanislav Mekhanoshin NeedWaitStates = ST.hasGFX940Insts()
2463cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MFMA)
2464cad9de71SStanislav Mekhanoshin ? GFX940_XDL8PassWriteVgprVALUWawWaitStates
2465cad9de71SStanislav Mekhanoshin : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates
2466cad9de71SStanislav Mekhanoshin : SMFMA16x16WriteVgprVALUWawWaitStates;
2467a8d9d507SStanislav Mekhanoshin break;
2468a8d9d507SStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
2469a8d9d507SStanislav Mekhanoshin default:
2470a8d9d507SStanislav Mekhanoshin NeedWaitStates = isDGEMM(MFMA->getOpcode())
2471a8d9d507SStanislav Mekhanoshin ? DMFMA16x16WriteVgprVALUWriteWaitStates
2472cad9de71SStanislav Mekhanoshin : ST.hasGFX940Insts()
2473cad9de71SStanislav Mekhanoshin ? isXDL(ST, *MFMA)
2474cad9de71SStanislav Mekhanoshin ? GFX940_XDL16PassWriteVgprVALUWawWaitStates
2475cad9de71SStanislav Mekhanoshin : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates
2476a8d9d507SStanislav Mekhanoshin : SMFMA32x32WriteVgprVALUWawWaitStates;
2477a8d9d507SStanislav Mekhanoshin break;
2478a8d9d507SStanislav Mekhanoshin }
2479a8d9d507SStanislav Mekhanoshin
2480a8d9d507SStanislav Mekhanoshin int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2481a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2482a8d9d507SStanislav Mekhanoshin
2483a8d9d507SStanislav Mekhanoshin if (WaitStatesNeeded == MaxWaitStates)
2484a8d9d507SStanislav Mekhanoshin break;
2485a8d9d507SStanislav Mekhanoshin }
2486a8d9d507SStanislav Mekhanoshin
2487bd9eed3aSAustin Kerbow auto IsSMFMAReadAsCFn = [&Reg, &MFMA, this](const MachineInstr &MI) {
2488bd9eed3aSAustin Kerbow if (!SIInstrInfo::isMFMA(MI) || isDGEMM(MI.getOpcode()) ||
2489424f1f6fSCarl Ritson !MI.readsRegister(Reg, &TRI))
2490a8d9d507SStanislav Mekhanoshin return false;
2491a8d9d507SStanislav Mekhanoshin
2492cad9de71SStanislav Mekhanoshin if (ST.hasGFX940Insts() && !isXDL(ST, MI))
2493cad9de71SStanislav Mekhanoshin return false;
2494cad9de71SStanislav Mekhanoshin
2495424f1f6fSCarl Ritson const MachineOperand *SrcC =
2496424f1f6fSCarl Ritson TII.getNamedOperand(MI, AMDGPU::OpName::src2);
2497a8d9d507SStanislav Mekhanoshin assert(SrcC);
2498a8d9d507SStanislav Mekhanoshin if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg))
2499a8d9d507SStanislav Mekhanoshin return false;
2500a8d9d507SStanislav Mekhanoshin
2501424f1f6fSCarl Ritson MFMA = &MI;
2502a8d9d507SStanislav Mekhanoshin return true;
2503a8d9d507SStanislav Mekhanoshin };
2504a8d9d507SStanislav Mekhanoshin
2505a8d9d507SStanislav Mekhanoshin MFMA = nullptr;
2506a8d9d507SStanislav Mekhanoshin int WaitStatesSinceUse = getWaitStatesSince(IsSMFMAReadAsCFn,
2507a8d9d507SStanislav Mekhanoshin MaxWarWaitStates);
2508a8d9d507SStanislav Mekhanoshin if (!MFMA)
2509a8d9d507SStanislav Mekhanoshin continue;
2510a8d9d507SStanislav Mekhanoshin
2511a8d9d507SStanislav Mekhanoshin unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
2512a8d9d507SStanislav Mekhanoshin int NeedWaitStates = MaxWaitStates;
2513a8d9d507SStanislav Mekhanoshin switch (HazardDefLatency) {
2514a8d9d507SStanislav Mekhanoshin case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
2515a8d9d507SStanislav Mekhanoshin break;
2516cad9de71SStanislav Mekhanoshin case 4: assert(ST.hasGFX940Insts());
2517cad9de71SStanislav Mekhanoshin NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
2518cad9de71SStanislav Mekhanoshin break;
2519a8d9d507SStanislav Mekhanoshin case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
2520a8d9d507SStanislav Mekhanoshin break;
2521a8d9d507SStanislav Mekhanoshin case 16: LLVM_FALLTHROUGH;
2522a8d9d507SStanislav Mekhanoshin default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
2523a8d9d507SStanislav Mekhanoshin break;
2524a8d9d507SStanislav Mekhanoshin }
2525a8d9d507SStanislav Mekhanoshin
2526a8d9d507SStanislav Mekhanoshin int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse;
2527a8d9d507SStanislav Mekhanoshin WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2528a8d9d507SStanislav Mekhanoshin }
2529a8d9d507SStanislav Mekhanoshin
2530a8d9d507SStanislav Mekhanoshin return WaitStatesNeeded;
2531a8d9d507SStanislav Mekhanoshin }
2532a8d9d507SStanislav Mekhanoshin
ShouldPreferAnother(SUnit * SU)253313b63be4SStanislav Mekhanoshin bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
253413b63be4SStanislav Mekhanoshin if (!SU->isInstr())
253513b63be4SStanislav Mekhanoshin return false;
253613b63be4SStanislav Mekhanoshin
2537424f1f6fSCarl Ritson const MachineInstr *MAI = nullptr;
2538bd9eed3aSAustin Kerbow
2539424f1f6fSCarl Ritson auto IsMFMAFn = [&MAI](const MachineInstr &MI) {
254013b63be4SStanislav Mekhanoshin MAI = nullptr;
2541bd9eed3aSAustin Kerbow if (SIInstrInfo::isMFMA(MI))
2542424f1f6fSCarl Ritson MAI = &MI;
254313b63be4SStanislav Mekhanoshin return MAI != nullptr;
254413b63be4SStanislav Mekhanoshin };
254513b63be4SStanislav Mekhanoshin
254613b63be4SStanislav Mekhanoshin MachineInstr *MI = SU->getInstr();
2547424f1f6fSCarl Ritson if (IsMFMAFn(*MI)) {
254813b63be4SStanislav Mekhanoshin int W = getWaitStatesSince(IsMFMAFn, 16);
254913b63be4SStanislav Mekhanoshin if (MAI)
255013b63be4SStanislav Mekhanoshin return W < (int)TSchedModel.computeInstrLatency(MAI);
255113b63be4SStanislav Mekhanoshin }
255213b63be4SStanislav Mekhanoshin
255313b63be4SStanislav Mekhanoshin return false;
255413b63be4SStanislav Mekhanoshin }
2555