1dbed061bSPatrick Holland //===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
2dbed061bSPatrick Holland //
3dbed061bSPatrick Holland // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4dbed061bSPatrick Holland // See https://llvm.org/LICENSE.txt for license information.
5dbed061bSPatrick Holland // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6dbed061bSPatrick Holland //
7dbed061bSPatrick Holland //===----------------------------------------------------------------------===//
8dbed061bSPatrick Holland /// \file
9dbed061bSPatrick Holland ///
10dbed061bSPatrick Holland /// This file implements methods from the AMDGPUCustomBehaviour class.
11dbed061bSPatrick Holland ///
12dbed061bSPatrick Holland //===----------------------------------------------------------------------===//
13dbed061bSPatrick Holland
14dbed061bSPatrick Holland #include "AMDGPUCustomBehaviour.h"
15dbed061bSPatrick Holland #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16dbed061bSPatrick Holland #include "SIInstrInfo.h"
17dbed061bSPatrick Holland #include "TargetInfo/AMDGPUTargetInfo.h"
1889b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
19dbed061bSPatrick Holland #include "llvm/Support/WithColor.h"
20dbed061bSPatrick Holland
21dbed061bSPatrick Holland namespace llvm {
22dbed061bSPatrick Holland namespace mca {
23dbed061bSPatrick Holland
postProcessInstruction(std::unique_ptr<Instruction> & Inst,const MCInst & MCI)24e4ebfb57SPatrick Holland void AMDGPUInstrPostProcess::postProcessInstruction(
25e4ebfb57SPatrick Holland std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
26e4ebfb57SPatrick Holland switch (MCI.getOpcode()) {
27e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT:
28e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT:
29e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT:
30e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT:
31e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT:
32e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
33e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
34e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10:
35e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10:
36e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx10:
37e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx6_gfx7:
38e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_vi:
39e4ebfb57SPatrick Holland return processWaitCnt(Inst, MCI);
40e4ebfb57SPatrick Holland }
41e4ebfb57SPatrick Holland }
42e4ebfb57SPatrick Holland
43e4ebfb57SPatrick Holland // s_waitcnt instructions encode important information as immediate operands
44e4ebfb57SPatrick Holland // which are lost during the MCInst -> mca::Instruction lowering.
processWaitCnt(std::unique_ptr<Instruction> & Inst,const MCInst & MCI)45e4ebfb57SPatrick Holland void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
46e4ebfb57SPatrick Holland const MCInst &MCI) {
47e4ebfb57SPatrick Holland for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
48e4ebfb57SPatrick Holland MCAOperand Op;
49e4ebfb57SPatrick Holland const MCOperand &MCOp = MCI.getOperand(Idx);
50e4ebfb57SPatrick Holland if (MCOp.isReg()) {
51e4ebfb57SPatrick Holland Op = MCAOperand::createReg(MCOp.getReg());
52e4ebfb57SPatrick Holland } else if (MCOp.isImm()) {
53e4ebfb57SPatrick Holland Op = MCAOperand::createImm(MCOp.getImm());
54e4ebfb57SPatrick Holland }
55e4ebfb57SPatrick Holland Op.setIndex(Idx);
56e4ebfb57SPatrick Holland Inst->addOperand(Op);
57e4ebfb57SPatrick Holland }
58e4ebfb57SPatrick Holland }
59e4ebfb57SPatrick Holland
AMDGPUCustomBehaviour(const MCSubtargetInfo & STI,const mca::SourceMgr & SrcMgr,const MCInstrInfo & MCII)60dbed061bSPatrick Holland AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
61dbed061bSPatrick Holland const mca::SourceMgr &SrcMgr,
62dbed061bSPatrick Holland const MCInstrInfo &MCII)
63e4ebfb57SPatrick Holland : CustomBehaviour(STI, SrcMgr, MCII) {
64e4ebfb57SPatrick Holland generateWaitCntInfo();
65e4ebfb57SPatrick Holland }
66dbed061bSPatrick Holland
checkCustomHazard(ArrayRef<InstRef> IssuedInst,const InstRef & IR)67e4ebfb57SPatrick Holland unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
68e4ebfb57SPatrick Holland const InstRef &IR) {
69e4ebfb57SPatrick Holland const Instruction &Inst = *IR.getInstruction();
70e4ebfb57SPatrick Holland unsigned Opcode = Inst.getOpcode();
71e4ebfb57SPatrick Holland
72e4ebfb57SPatrick Holland // llvm-mca is generally run on fully compiled assembly so we wouldn't see any
73e4ebfb57SPatrick Holland // pseudo instructions here. However, there are plans for the future to make
74e4ebfb57SPatrick Holland // it possible to use mca within backend passes. As such, I have left the
75e4ebfb57SPatrick Holland // pseudo version of s_waitcnt within this switch statement.
76e4ebfb57SPatrick Holland switch (Opcode) {
77e4ebfb57SPatrick Holland default:
78dbed061bSPatrick Holland return 0;
79e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT: // This instruction
80e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT:
81e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT:
82e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT:
83e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT: // to this instruction are all pseudo.
84e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
85e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
86e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10:
87e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10:
88e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx10:
89e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx6_gfx7:
90e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_vi:
91e4ebfb57SPatrick Holland // s_endpgm also behaves as if there is an implicit
92e4ebfb57SPatrick Holland // s_waitcnt 0, but I'm not sure if it would be appropriate
93e4ebfb57SPatrick Holland // to model this in llvm-mca based on how the iterations work
94e4ebfb57SPatrick Holland // while simulating the pipeline over and over.
95e4ebfb57SPatrick Holland return handleWaitCnt(IssuedInst, IR);
96e4ebfb57SPatrick Holland }
97e4ebfb57SPatrick Holland
98e4ebfb57SPatrick Holland return 0;
99e4ebfb57SPatrick Holland }
100e4ebfb57SPatrick Holland
handleWaitCnt(ArrayRef<InstRef> IssuedInst,const InstRef & IR)101e4ebfb57SPatrick Holland unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
102e4ebfb57SPatrick Holland const InstRef &IR) {
103e4ebfb57SPatrick Holland // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr.
104e4ebfb57SPatrick Holland // I do not know how that instruction works so I did not attempt to model it.
105e4ebfb57SPatrick Holland // set the max values to begin
106e4ebfb57SPatrick Holland unsigned Vmcnt = 63;
107e4ebfb57SPatrick Holland unsigned Expcnt = 7;
108e4ebfb57SPatrick Holland unsigned Lgkmcnt = 31;
109e4ebfb57SPatrick Holland unsigned Vscnt = 63;
110e4ebfb57SPatrick Holland unsigned CurrVmcnt = 0;
111e4ebfb57SPatrick Holland unsigned CurrExpcnt = 0;
112e4ebfb57SPatrick Holland unsigned CurrLgkmcnt = 0;
113e4ebfb57SPatrick Holland unsigned CurrVscnt = 0;
114e4ebfb57SPatrick Holland unsigned CyclesToWaitVm = ~0U;
115e4ebfb57SPatrick Holland unsigned CyclesToWaitExp = ~0U;
116e4ebfb57SPatrick Holland unsigned CyclesToWaitLgkm = ~0U;
117e4ebfb57SPatrick Holland unsigned CyclesToWaitVs = ~0U;
118e4ebfb57SPatrick Holland
119e4ebfb57SPatrick Holland computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
120e4ebfb57SPatrick Holland
121e4ebfb57SPatrick Holland // We will now look at each of the currently executing instructions
122e4ebfb57SPatrick Holland // to find out if this wait instruction still needs to wait.
123d395befaSKazu Hirata for (const InstRef &PrevIR : IssuedInst) {
124e4ebfb57SPatrick Holland const Instruction &PrevInst = *PrevIR.getInstruction();
125e4ebfb57SPatrick Holland const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
126e4ebfb57SPatrick Holland const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
127e4ebfb57SPatrick Holland const int CyclesLeft = PrevInst.getCyclesLeft();
128e4ebfb57SPatrick Holland assert(CyclesLeft != UNKNOWN_CYCLES &&
129e4ebfb57SPatrick Holland "We should know how many cycles are left for this instruction");
130e4ebfb57SPatrick Holland if (PrevInstWaitInfo.VmCnt) {
131e4ebfb57SPatrick Holland CurrVmcnt++;
132e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitVm)
133e4ebfb57SPatrick Holland CyclesToWaitVm = CyclesLeft;
134e4ebfb57SPatrick Holland }
135e4ebfb57SPatrick Holland if (PrevInstWaitInfo.ExpCnt) {
136e4ebfb57SPatrick Holland CurrExpcnt++;
137e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitExp)
138e4ebfb57SPatrick Holland CyclesToWaitExp = CyclesLeft;
139e4ebfb57SPatrick Holland }
140e4ebfb57SPatrick Holland if (PrevInstWaitInfo.LgkmCnt) {
141e4ebfb57SPatrick Holland CurrLgkmcnt++;
142e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitLgkm)
143e4ebfb57SPatrick Holland CyclesToWaitLgkm = CyclesLeft;
144e4ebfb57SPatrick Holland }
145e4ebfb57SPatrick Holland if (PrevInstWaitInfo.VsCnt) {
146e4ebfb57SPatrick Holland CurrVscnt++;
147e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitVs)
148e4ebfb57SPatrick Holland CyclesToWaitVs = CyclesLeft;
149e4ebfb57SPatrick Holland }
150e4ebfb57SPatrick Holland }
151e4ebfb57SPatrick Holland
152e4ebfb57SPatrick Holland unsigned CyclesToWait = ~0U;
153e4ebfb57SPatrick Holland if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
154e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitVm;
155e4ebfb57SPatrick Holland if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
156e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitExp;
157e4ebfb57SPatrick Holland if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
158e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitLgkm;
159e4ebfb57SPatrick Holland if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
160e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitVs;
161e4ebfb57SPatrick Holland
162e4ebfb57SPatrick Holland // We may underestimate how many cycles we need to wait, but this
163e4ebfb57SPatrick Holland // isn't a big deal. Our return value is just how many cycles until
164e4ebfb57SPatrick Holland // this function gets run again. So as long as we don't overestimate
165e4ebfb57SPatrick Holland // the wait time, we'll still end up stalling at this instruction
166e4ebfb57SPatrick Holland // for the correct number of cycles.
167e4ebfb57SPatrick Holland
168e4ebfb57SPatrick Holland if (CyclesToWait == ~0U)
169e4ebfb57SPatrick Holland return 0;
170e4ebfb57SPatrick Holland return CyclesToWait;
171e4ebfb57SPatrick Holland }
172e4ebfb57SPatrick Holland
computeWaitCnt(const InstRef & IR,unsigned & Vmcnt,unsigned & Expcnt,unsigned & Lgkmcnt,unsigned & Vscnt)173e4ebfb57SPatrick Holland void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,
174e4ebfb57SPatrick Holland unsigned &Expcnt, unsigned &Lgkmcnt,
175e4ebfb57SPatrick Holland unsigned &Vscnt) {
176e4ebfb57SPatrick Holland AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
177e4ebfb57SPatrick Holland const Instruction &Inst = *IR.getInstruction();
178e4ebfb57SPatrick Holland unsigned Opcode = Inst.getOpcode();
179e4ebfb57SPatrick Holland
180e4ebfb57SPatrick Holland switch (Opcode) {
181e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
182e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
183e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10:
184e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
185e4ebfb57SPatrick Holland // Should probably be checking for nullptr
186e4ebfb57SPatrick Holland // here, but I'm not sure how I should handle the case
187e4ebfb57SPatrick Holland // where we see a nullptr.
188e4ebfb57SPatrick Holland const MCAOperand *OpReg = Inst.getOperand(0);
189e4ebfb57SPatrick Holland const MCAOperand *OpImm = Inst.getOperand(1);
190e4ebfb57SPatrick Holland assert(OpReg && OpReg->isReg() && "First operand should be a register.");
191e4ebfb57SPatrick Holland assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");
192e4ebfb57SPatrick Holland if (OpReg->getReg() != AMDGPU::SGPR_NULL) {
193e4ebfb57SPatrick Holland // Instruction is using a real register.
194e4ebfb57SPatrick Holland // Since we can't know what value this register will have,
195e4ebfb57SPatrick Holland // we can't compute what the value of this wait should be.
196e4ebfb57SPatrick Holland WithColor::warning() << "The register component of "
197e4ebfb57SPatrick Holland << MCII.getName(Opcode) << " will be completely "
198e4ebfb57SPatrick Holland << "ignored. So the wait may not be accurate.\n";
199e4ebfb57SPatrick Holland }
200e4ebfb57SPatrick Holland switch (Opcode) {
201e4ebfb57SPatrick Holland // Redundant switch so I don't have to repeat the code above
202e4ebfb57SPatrick Holland // for each case. There are more clever ways to avoid this
203e4ebfb57SPatrick Holland // extra switch and anyone can feel free to implement one of them.
204e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
205e4ebfb57SPatrick Holland Expcnt = OpImm->getImm();
206e4ebfb57SPatrick Holland break;
207e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
208e4ebfb57SPatrick Holland Lgkmcnt = OpImm->getImm();
209e4ebfb57SPatrick Holland break;
210e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10:
211e4ebfb57SPatrick Holland Vmcnt = OpImm->getImm();
212e4ebfb57SPatrick Holland break;
213e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10:
214e4ebfb57SPatrick Holland Vscnt = OpImm->getImm();
215e4ebfb57SPatrick Holland break;
216e4ebfb57SPatrick Holland }
217e4ebfb57SPatrick Holland return;
218e4ebfb57SPatrick Holland }
219e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx10:
220e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx6_gfx7:
221e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_vi:
222e4ebfb57SPatrick Holland unsigned WaitCnt = Inst.getOperand(0)->getImm();
223e4ebfb57SPatrick Holland AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt);
224e4ebfb57SPatrick Holland return;
225e4ebfb57SPatrick Holland }
226e4ebfb57SPatrick Holland }
227e4ebfb57SPatrick Holland
generateWaitCntInfo()228e4ebfb57SPatrick Holland void AMDGPUCustomBehaviour::generateWaitCntInfo() {
229e4ebfb57SPatrick Holland // The core logic from this function is taken from
230e4ebfb57SPatrick Holland // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions
231e4ebfb57SPatrick Holland // that are being looked at are in the MachineInstr format, whereas we have
232e4ebfb57SPatrick Holland // access to the MCInst format. The side effects of this are that we can't use
233e4ebfb57SPatrick Holland // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst)
234e4ebfb57SPatrick Holland // functions. Therefore, we conservatively assume that these functions will
235e4ebfb57SPatrick Holland // return true. This may cause a few instructions to be incorrectly tagged
236e4ebfb57SPatrick Holland // with an extra CNT. However, these are instructions that do interact with at
237e4ebfb57SPatrick Holland // least one CNT so giving them an extra CNT shouldn't cause issues in most
238e4ebfb57SPatrick Holland // scenarios.
239e4ebfb57SPatrick Holland AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
240e4ebfb57SPatrick Holland InstrWaitCntInfo.resize(SrcMgr.size());
241e4ebfb57SPatrick Holland
242*97579dccSMin-Yih Hsu for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) {
243*97579dccSMin-Yih Hsu const std::unique_ptr<Instruction> &Inst = EN.value();
244*97579dccSMin-Yih Hsu unsigned Index = EN.index();
245e4ebfb57SPatrick Holland unsigned Opcode = Inst->getOpcode();
246e4ebfb57SPatrick Holland const MCInstrDesc &MCID = MCII.get(Opcode);
247e4ebfb57SPatrick Holland if ((MCID.TSFlags & SIInstrFlags::DS) &&
248e4ebfb57SPatrick Holland (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) {
249e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true;
250e4ebfb57SPatrick Holland if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
251e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].ExpCnt = true;
252e4ebfb57SPatrick Holland } else if (MCID.TSFlags & SIInstrFlags::FLAT) {
253e4ebfb57SPatrick Holland // We conservatively assume that mayAccessVMEMThroughFlat(Inst)
254e4ebfb57SPatrick Holland // and mayAccessLDSThroughFlat(Inst) would both return true for this
255e4ebfb57SPatrick Holland // instruction. We have to do this because those functions use
256e4ebfb57SPatrick Holland // information about the memory operands that we don't have access to.
257e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true;
258e4ebfb57SPatrick Holland if (!STI.hasFeature(AMDGPU::FeatureVscnt))
259e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true;
260e4ebfb57SPatrick Holland else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet))
261e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true;
262e4ebfb57SPatrick Holland else
263e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VsCnt = true;
264e4ebfb57SPatrick Holland } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) {
265e4ebfb57SPatrick Holland if (!STI.hasFeature(AMDGPU::FeatureVscnt))
266e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true;
267e4ebfb57SPatrick Holland else if ((MCID.mayLoad() &&
268e4ebfb57SPatrick Holland !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) ||
269e4ebfb57SPatrick Holland ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() &&
270e4ebfb57SPatrick Holland !MCID.mayStore()))
271e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true;
272e4ebfb57SPatrick Holland else if (MCID.mayStore())
273e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VsCnt = true;
274e4ebfb57SPatrick Holland
275e4ebfb57SPatrick Holland // (IV.Major < 7) is meant to represent
276e4ebfb57SPatrick Holland // GCNTarget.vmemWriteNeedsExpWaitcnt()
277e4ebfb57SPatrick Holland // which is defined as
278e4ebfb57SPatrick Holland // { return getGeneration() < SEA_ISLANDS; }
279e4ebfb57SPatrick Holland if (IV.Major < 7 &&
280e4ebfb57SPatrick Holland (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet)))
281e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].ExpCnt = true;
282e4ebfb57SPatrick Holland } else if (MCID.TSFlags & SIInstrFlags::SMRD) {
283e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true;
284e4ebfb57SPatrick Holland } else if (MCID.TSFlags & SIInstrFlags::EXP) {
285e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].ExpCnt = true;
286e4ebfb57SPatrick Holland } else {
287e4ebfb57SPatrick Holland switch (Opcode) {
288e4ebfb57SPatrick Holland case AMDGPU::S_SENDMSG:
289e4ebfb57SPatrick Holland case AMDGPU::S_SENDMSGHALT:
290e4ebfb57SPatrick Holland case AMDGPU::S_MEMTIME:
291e4ebfb57SPatrick Holland case AMDGPU::S_MEMREALTIME:
292e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true;
293e4ebfb57SPatrick Holland break;
294e4ebfb57SPatrick Holland }
295e4ebfb57SPatrick Holland }
296e4ebfb57SPatrick Holland }
297e4ebfb57SPatrick Holland }
298e4ebfb57SPatrick Holland
299e4ebfb57SPatrick Holland // taken from SIInstrInfo::isVMEM()
isVMEM(const MCInstrDesc & MCID)300e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
301e4ebfb57SPatrick Holland return MCID.TSFlags & SIInstrFlags::MUBUF ||
302e4ebfb57SPatrick Holland MCID.TSFlags & SIInstrFlags::MTBUF ||
303e4ebfb57SPatrick Holland MCID.TSFlags & SIInstrFlags::MIMG;
304e4ebfb57SPatrick Holland }
305e4ebfb57SPatrick Holland
306e4ebfb57SPatrick Holland // taken from SIInstrInfo::hasModifiersSet()
hasModifiersSet(const std::unique_ptr<Instruction> & Inst,unsigned OpName) const307e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::hasModifiersSet(
308e4ebfb57SPatrick Holland const std::unique_ptr<Instruction> &Inst, unsigned OpName) const {
309e4ebfb57SPatrick Holland int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
310e4ebfb57SPatrick Holland if (Idx == -1)
311e4ebfb57SPatrick Holland return false;
312e4ebfb57SPatrick Holland
313e4ebfb57SPatrick Holland const MCAOperand *Op = Inst->getOperand(Idx);
314e4ebfb57SPatrick Holland if (Op == nullptr || !Op->isImm() || !Op->getImm())
315e4ebfb57SPatrick Holland return false;
316e4ebfb57SPatrick Holland
317e4ebfb57SPatrick Holland return true;
318e4ebfb57SPatrick Holland }
319e4ebfb57SPatrick Holland
320e4ebfb57SPatrick Holland // taken from SIInstrInfo::isAlwaysGDS()
isAlwaysGDS(uint16_t Opcode) const321e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {
322e4ebfb57SPatrick Holland return Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::DS_GWS_INIT ||
323e4ebfb57SPatrick Holland Opcode == AMDGPU::DS_GWS_SEMA_V || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
324e4ebfb57SPatrick Holland Opcode == AMDGPU::DS_GWS_SEMA_P ||
325e4ebfb57SPatrick Holland Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
326e4ebfb57SPatrick Holland Opcode == AMDGPU::DS_GWS_BARRIER;
327dbed061bSPatrick Holland }
328dbed061bSPatrick Holland
329dbed061bSPatrick Holland } // namespace mca
330dbed061bSPatrick Holland } // namespace llvm
331dbed061bSPatrick Holland
332dbed061bSPatrick Holland using namespace llvm;
333dbed061bSPatrick Holland using namespace mca;
334dbed061bSPatrick Holland
335dbed061bSPatrick Holland static CustomBehaviour *
createAMDGPUCustomBehaviour(const MCSubtargetInfo & STI,const mca::SourceMgr & SrcMgr,const MCInstrInfo & MCII)336dbed061bSPatrick Holland createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
337dbed061bSPatrick Holland const mca::SourceMgr &SrcMgr,
338dbed061bSPatrick Holland const MCInstrInfo &MCII) {
339dbed061bSPatrick Holland return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII);
340dbed061bSPatrick Holland }
341dbed061bSPatrick Holland
342dbed061bSPatrick Holland static InstrPostProcess *
createAMDGPUInstrPostProcess(const MCSubtargetInfo & STI,const MCInstrInfo & MCII)343dbed061bSPatrick Holland createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI,
344dbed061bSPatrick Holland const MCInstrInfo &MCII) {
345dbed061bSPatrick Holland return new AMDGPUInstrPostProcess(STI, MCII);
346dbed061bSPatrick Holland }
347dbed061bSPatrick Holland
348dbed061bSPatrick Holland /// Extern function to initialize the targets for the AMDGPU backend
349dbed061bSPatrick Holland
LLVMInitializeAMDGPUTargetMCA()350dbed061bSPatrick Holland extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() {
351dbed061bSPatrick Holland TargetRegistry::RegisterCustomBehaviour(getTheAMDGPUTarget(),
352dbed061bSPatrick Holland createAMDGPUCustomBehaviour);
353dbed061bSPatrick Holland TargetRegistry::RegisterInstrPostProcess(getTheAMDGPUTarget(),
354dbed061bSPatrick Holland createAMDGPUInstrPostProcess);
355dbed061bSPatrick Holland
356dbed061bSPatrick Holland TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
357dbed061bSPatrick Holland createAMDGPUCustomBehaviour);
358dbed061bSPatrick Holland TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(),
359dbed061bSPatrick Holland createAMDGPUInstrPostProcess);
360dbed061bSPatrick Holland }
361