13d9afa27SValery Pykhtin //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
23d9afa27SValery Pykhtin //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63d9afa27SValery Pykhtin //
73d9afa27SValery Pykhtin //===----------------------------------------------------------------------===//
83d9afa27SValery Pykhtin // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
93d9afa27SValery Pykhtin // operand. If any of the use instruction cannot be combined with the mov the
103d9afa27SValery Pykhtin // whole sequence is reverted.
113d9afa27SValery Pykhtin //
123d9afa27SValery Pykhtin // $old = ...
133d9afa27SValery Pykhtin // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
147fe97f8cSValery Pykhtin // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
157fe97f8cSValery Pykhtin // $res = VALU $dpp_value [, src1]
163d9afa27SValery Pykhtin //
173d9afa27SValery Pykhtin // to
183d9afa27SValery Pykhtin //
197fe97f8cSValery Pykhtin // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
207fe97f8cSValery Pykhtin // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
213d9afa27SValery Pykhtin //
223d9afa27SValery Pykhtin // Combining rules :
233d9afa27SValery Pykhtin //
247fe97f8cSValery Pykhtin // if $row_mask and $bank_mask are fully enabled (0xF) and
257fe97f8cSValery Pykhtin // $bound_ctrl==DPP_BOUND_ZERO or $old==0
267fe97f8cSValery Pykhtin // -> $combined_old = undef,
277fe97f8cSValery Pykhtin // $combined_bound_ctrl = DPP_BOUND_ZERO
283d9afa27SValery Pykhtin //
297fe97f8cSValery Pykhtin // if the VALU op is binary and
307fe97f8cSValery Pykhtin // $bound_ctrl==DPP_BOUND_OFF and
317fe97f8cSValery Pykhtin // $old==identity value (immediate) for the VALU op
327fe97f8cSValery Pykhtin // -> $combined_old = src1,
337fe97f8cSValery Pykhtin // $combined_bound_ctrl = DPP_BOUND_OFF
343d9afa27SValery Pykhtin //
350cd50b2aSJay Foad // Otherwise cancel.
363d9afa27SValery Pykhtin //
370cd50b2aSJay Foad // The mov_dpp instruction should reside in the same BB as all its uses
383d9afa27SValery Pykhtin //===----------------------------------------------------------------------===//
393d9afa27SValery Pykhtin
403d9afa27SValery Pykhtin #include "AMDGPU.h"
41560d7e04Sdfukalov #include "GCNSubtarget.h"
42560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
433d9afa27SValery Pykhtin #include "llvm/ADT/Statistic.h"
443d9afa27SValery Pykhtin #include "llvm/CodeGen/MachineFunctionPass.h"
453d9afa27SValery Pykhtin
463d9afa27SValery Pykhtin using namespace llvm;
473d9afa27SValery Pykhtin
483d9afa27SValery Pykhtin #define DEBUG_TYPE "gcn-dpp-combine"
493d9afa27SValery Pykhtin
503d9afa27SValery Pykhtin STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
513d9afa27SValery Pykhtin
523d9afa27SValery Pykhtin namespace {
533d9afa27SValery Pykhtin
543d9afa27SValery Pykhtin class GCNDPPCombine : public MachineFunctionPass {
553d9afa27SValery Pykhtin MachineRegisterInfo *MRI;
563d9afa27SValery Pykhtin const SIInstrInfo *TII;
57538bda0bSJoe Nash const GCNSubtarget *ST;
583d9afa27SValery Pykhtin
593d9afa27SValery Pykhtin using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
603d9afa27SValery Pykhtin
613d9afa27SValery Pykhtin MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
623d9afa27SValery Pykhtin
63538bda0bSJoe Nash MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
647fe97f8cSValery Pykhtin RegSubRegPair CombOldVGPR,
65538bda0bSJoe Nash MachineOperand *OldOpnd, bool CombBCZ,
66538bda0bSJoe Nash bool IsShrinkable) const;
673d9afa27SValery Pykhtin
68538bda0bSJoe Nash MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
69538bda0bSJoe Nash RegSubRegPair CombOldVGPR, bool CombBCZ,
70538bda0bSJoe Nash bool IsShrinkable) const;
713d9afa27SValery Pykhtin
723d9afa27SValery Pykhtin bool hasNoImmOrEqual(MachineInstr &MI,
733d9afa27SValery Pykhtin unsigned OpndName,
743d9afa27SValery Pykhtin int64_t Value,
753d9afa27SValery Pykhtin int64_t Mask = -1) const;
763d9afa27SValery Pykhtin
773d9afa27SValery Pykhtin bool combineDPPMov(MachineInstr &MI) const;
783d9afa27SValery Pykhtin
793d9afa27SValery Pykhtin public:
803d9afa27SValery Pykhtin static char ID;
813d9afa27SValery Pykhtin
GCNDPPCombine()823d9afa27SValery Pykhtin GCNDPPCombine() : MachineFunctionPass(ID) {
833d9afa27SValery Pykhtin initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
843d9afa27SValery Pykhtin }
853d9afa27SValery Pykhtin
863d9afa27SValery Pykhtin bool runOnMachineFunction(MachineFunction &MF) override;
873d9afa27SValery Pykhtin
getPassName() const883d9afa27SValery Pykhtin StringRef getPassName() const override { return "GCN DPP Combine"; }
893d9afa27SValery Pykhtin
getAnalysisUsage(AnalysisUsage & AU) const903d9afa27SValery Pykhtin void getAnalysisUsage(AnalysisUsage &AU) const override {
913d9afa27SValery Pykhtin AU.setPreservesCFG();
923d9afa27SValery Pykhtin MachineFunctionPass::getAnalysisUsage(AU);
933d9afa27SValery Pykhtin }
94525f9c0bSDmitry Preobrazhensky
getRequiredProperties() const95d0b0b252SMatt Arsenault MachineFunctionProperties getRequiredProperties() const override {
96d0b0b252SMatt Arsenault return MachineFunctionProperties()
97d0b0b252SMatt Arsenault .set(MachineFunctionProperties::Property::IsSSA);
98d0b0b252SMatt Arsenault }
99d0b0b252SMatt Arsenault
100525f9c0bSDmitry Preobrazhensky private:
101538bda0bSJoe Nash int getDPPOp(unsigned Op, bool IsShrinkable) const;
102a02aa913SJay Foad bool isShrinkable(MachineInstr &MI) const;
1033d9afa27SValery Pykhtin };
1043d9afa27SValery Pykhtin
1053d9afa27SValery Pykhtin } // end anonymous namespace
1063d9afa27SValery Pykhtin
1073d9afa27SValery Pykhtin INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
1083d9afa27SValery Pykhtin
1093d9afa27SValery Pykhtin char GCNDPPCombine::ID = 0;
1103d9afa27SValery Pykhtin
1113d9afa27SValery Pykhtin char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
1123d9afa27SValery Pykhtin
createGCNDPPCombinePass()1133d9afa27SValery Pykhtin FunctionPass *llvm::createGCNDPPCombinePass() {
1143d9afa27SValery Pykhtin return new GCNDPPCombine();
1153d9afa27SValery Pykhtin }
1163d9afa27SValery Pykhtin
isShrinkable(MachineInstr & MI) const117a02aa913SJay Foad bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
118a02aa913SJay Foad unsigned Op = MI.getOpcode();
119a02aa913SJay Foad if (!TII->isVOP3(Op)) {
120538bda0bSJoe Nash return false;
121538bda0bSJoe Nash }
122a02aa913SJay Foad if (!TII->hasVALU32BitEncoding(Op)) {
123538bda0bSJoe Nash LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n");
124538bda0bSJoe Nash return false;
125538bda0bSJoe Nash }
126b22721f0SJay Foad if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
127b22721f0SJay Foad // Give up if there are any uses of the carry-out from instructions like
128b22721f0SJay Foad // V_ADD_CO_U32. The shrunken form of the instruction would write it to vcc
129b22721f0SJay Foad // instead of to a virtual register.
130b22721f0SJay Foad if (!MRI->use_nodbg_empty(SDst->getReg()))
131b22721f0SJay Foad return false;
132b22721f0SJay Foad }
133538bda0bSJoe Nash // check if other than abs|neg modifiers are set (opsel for example)
134538bda0bSJoe Nash const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
135a02aa913SJay Foad if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
136a02aa913SJay Foad !hasNoImmOrEqual(MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
137a02aa913SJay Foad !hasNoImmOrEqual(MI, AMDGPU::OpName::clamp, 0) ||
138a02aa913SJay Foad !hasNoImmOrEqual(MI, AMDGPU::OpName::omod, 0)) {
139538bda0bSJoe Nash LLVM_DEBUG(dbgs() << " Inst has non-default modifiers\n");
140538bda0bSJoe Nash return false;
141538bda0bSJoe Nash }
142538bda0bSJoe Nash return true;
143538bda0bSJoe Nash }
144538bda0bSJoe Nash
getDPPOp(unsigned Op,bool IsShrinkable) const145538bda0bSJoe Nash int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {
1460483c91eSJoe Nash int DPP32 = AMDGPU::getDPPOp32(Op);
147538bda0bSJoe Nash if (IsShrinkable) {
148538bda0bSJoe Nash assert(DPP32 == -1);
1490483c91eSJoe Nash int E32 = AMDGPU::getVOPe32(Op);
150525f9c0bSDmitry Preobrazhensky DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);
151525f9c0bSDmitry Preobrazhensky }
1520483c91eSJoe Nash if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1)
1530483c91eSJoe Nash return DPP32;
1540483c91eSJoe Nash int DPP64 = -1;
1550483c91eSJoe Nash if (ST->hasVOP3DPP())
1560483c91eSJoe Nash DPP64 = AMDGPU::getDPPOp64(Op);
1570483c91eSJoe Nash if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1)
1580483c91eSJoe Nash return DPP64;
1590483c91eSJoe Nash return -1;
1603d9afa27SValery Pykhtin }
1613d9afa27SValery Pykhtin
1623d9afa27SValery Pykhtin // tracks the register operand definition and returns:
1633d9afa27SValery Pykhtin // 1. immediate operand used to initialize the register if found
1643d9afa27SValery Pykhtin // 2. nullptr if the register operand is undef
1653d9afa27SValery Pykhtin // 3. the operand itself otherwise
getOldOpndValue(MachineOperand & OldOpnd) const1663d9afa27SValery Pykhtin MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
1673d9afa27SValery Pykhtin auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
1683d9afa27SValery Pykhtin if (!Def)
1693d9afa27SValery Pykhtin return nullptr;
1703d9afa27SValery Pykhtin
1713d9afa27SValery Pykhtin switch(Def->getOpcode()) {
1723d9afa27SValery Pykhtin default: break;
1733d9afa27SValery Pykhtin case AMDGPU::IMPLICIT_DEF:
1743d9afa27SValery Pykhtin return nullptr;
1753d9afa27SValery Pykhtin case AMDGPU::COPY:
176a8d9d507SStanislav Mekhanoshin case AMDGPU::V_MOV_B32_e32:
17731f215abSStanislav Mekhanoshin case AMDGPU::V_MOV_B64_PSEUDO:
17831f215abSStanislav Mekhanoshin case AMDGPU::V_MOV_B64_e32:
17931f215abSStanislav Mekhanoshin case AMDGPU::V_MOV_B64_e64: {
1803d9afa27SValery Pykhtin auto &Op1 = Def->getOperand(1);
1813d9afa27SValery Pykhtin if (Op1.isImm())
1823d9afa27SValery Pykhtin return &Op1;
1833d9afa27SValery Pykhtin break;
1843d9afa27SValery Pykhtin }
1853d9afa27SValery Pykhtin }
1863d9afa27SValery Pykhtin return &OldOpnd;
1873d9afa27SValery Pykhtin }
1883d9afa27SValery Pykhtin
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,bool CombBCZ,bool IsShrinkable) const1893d9afa27SValery Pykhtin MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
1903d9afa27SValery Pykhtin MachineInstr &MovMI,
1917fe97f8cSValery Pykhtin RegSubRegPair CombOldVGPR,
192538bda0bSJoe Nash bool CombBCZ,
193538bda0bSJoe Nash bool IsShrinkable) const {
194a8d9d507SStanislav Mekhanoshin assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
19531f215abSStanislav Mekhanoshin MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
196a8d9d507SStanislav Mekhanoshin MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
1973d9afa27SValery Pykhtin
1980483c91eSJoe Nash bool HasVOP3DPP = ST->hasVOP3DPP();
1993d9afa27SValery Pykhtin auto OrigOp = OrigMI.getOpcode();
200538bda0bSJoe Nash auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
2013d9afa27SValery Pykhtin if (DPPOp == -1) {
2023d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
2033d9afa27SValery Pykhtin return nullptr;
2043d9afa27SValery Pykhtin }
205dc850fbfSJoe Nash int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
206dc850fbfSJoe Nash // Prior checks cover Mask with VOPC condition, but not on purpose
207dc850fbfSJoe Nash auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
208dc850fbfSJoe Nash assert(RowMaskOpnd && RowMaskOpnd->isImm());
209dc850fbfSJoe Nash auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
210dc850fbfSJoe Nash assert(BankMaskOpnd && BankMaskOpnd->isImm());
211dc850fbfSJoe Nash const bool MaskAllLanes =
212dc850fbfSJoe Nash RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
213*bc9b964fSArthur Eubanks (void)MaskAllLanes;
214dc850fbfSJoe Nash assert(MaskAllLanes ||
215dc850fbfSJoe Nash !(TII->isVOPC(DPPOp) ||
216dc850fbfSJoe Nash (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
217dc850fbfSJoe Nash "VOPC cannot form DPP unless mask is full");
2183d9afa27SValery Pykhtin
2193d9afa27SValery Pykhtin auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
22007cd19efSMatt Arsenault OrigMI.getDebugLoc(), TII->get(DPPOp))
22107cd19efSMatt Arsenault .setMIFlags(OrigMI.getFlags());
22207cd19efSMatt Arsenault
2233d9afa27SValery Pykhtin bool Fail = false;
2243d9afa27SValery Pykhtin do {
2250483c91eSJoe Nash int NumOperands = 0;
2260483c91eSJoe Nash if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
2273d9afa27SValery Pykhtin DPPInst.add(*Dst);
2280483c91eSJoe Nash ++NumOperands;
2290483c91eSJoe Nash }
2300483c91eSJoe Nash if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
2310483c91eSJoe Nash if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
2320483c91eSJoe Nash DPPInst.add(*SDst);
2330483c91eSJoe Nash ++NumOperands;
2340483c91eSJoe Nash }
2350483c91eSJoe Nash // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
2360483c91eSJoe Nash }
2373d9afa27SValery Pykhtin
2383d9afa27SValery Pykhtin const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
2393d9afa27SValery Pykhtin if (OldIdx != -1) {
2403d9afa27SValery Pykhtin assert(OldIdx == NumOperands);
241a8d9d507SStanislav Mekhanoshin assert(isOfRegClass(
242a8d9d507SStanislav Mekhanoshin CombOldVGPR,
243a8d9d507SStanislav Mekhanoshin *MRI->getRegClass(
244a8d9d507SStanislav Mekhanoshin TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
245a8d9d507SStanislav Mekhanoshin *MRI));
24619a1a739SStanislav Mekhanoshin auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
24719a1a739SStanislav Mekhanoshin DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
24819a1a739SStanislav Mekhanoshin CombOldVGPR.SubReg);
2493d9afa27SValery Pykhtin ++NumOperands;
250b28bb8ccSJoe Nash } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
251b28bb8ccSJoe Nash TII->isVOPC(OrigOpE32))) {
252b28bb8ccSJoe Nash // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand
253b28bb8ccSJoe Nash // because they write to SGPRs not VGPRs
2547fe97f8cSValery Pykhtin } else {
2557fe97f8cSValery Pykhtin // TODO: this discards MAC/FMA instructions for now, let's add it later
2567fe97f8cSValery Pykhtin LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
2577fe97f8cSValery Pykhtin " TBD\n");
2587fe97f8cSValery Pykhtin Fail = true;
2597fe97f8cSValery Pykhtin break;
2603d9afa27SValery Pykhtin }
2613d9afa27SValery Pykhtin
2623d9afa27SValery Pykhtin if (auto *Mod0 = TII->getNamedOperand(OrigMI,
2633d9afa27SValery Pykhtin AMDGPU::OpName::src0_modifiers)) {
2643d9afa27SValery Pykhtin assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
2653d9afa27SValery Pykhtin AMDGPU::OpName::src0_modifiers));
2660483c91eSJoe Nash assert(HasVOP3DPP ||
2670483c91eSJoe Nash (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
2683d9afa27SValery Pykhtin DPPInst.addImm(Mod0->getImm());
2693d9afa27SValery Pykhtin ++NumOperands;
270c6dec1d8SStanislav Mekhanoshin } else if (AMDGPU::getNamedOperandIdx(DPPOp,
271c6dec1d8SStanislav Mekhanoshin AMDGPU::OpName::src0_modifiers) != -1) {
272c6dec1d8SStanislav Mekhanoshin DPPInst.addImm(0);
273c6dec1d8SStanislav Mekhanoshin ++NumOperands;
2743d9afa27SValery Pykhtin }
2753d9afa27SValery Pykhtin auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
2763d9afa27SValery Pykhtin assert(Src0);
2773d9afa27SValery Pykhtin if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
2783d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
2793d9afa27SValery Pykhtin Fail = true;
2803d9afa27SValery Pykhtin break;
2813d9afa27SValery Pykhtin }
2823d9afa27SValery Pykhtin DPPInst.add(*Src0);
2837fe97f8cSValery Pykhtin DPPInst->getOperand(NumOperands).setIsKill(false);
2843d9afa27SValery Pykhtin ++NumOperands;
2853d9afa27SValery Pykhtin
2863d9afa27SValery Pykhtin if (auto *Mod1 = TII->getNamedOperand(OrigMI,
2873d9afa27SValery Pykhtin AMDGPU::OpName::src1_modifiers)) {
2883d9afa27SValery Pykhtin assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
2893d9afa27SValery Pykhtin AMDGPU::OpName::src1_modifiers));
2900483c91eSJoe Nash assert(HasVOP3DPP ||
2910483c91eSJoe Nash (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
2923d9afa27SValery Pykhtin DPPInst.addImm(Mod1->getImm());
2933d9afa27SValery Pykhtin ++NumOperands;
294c6dec1d8SStanislav Mekhanoshin } else if (AMDGPU::getNamedOperandIdx(DPPOp,
295c6dec1d8SStanislav Mekhanoshin AMDGPU::OpName::src1_modifiers) != -1) {
296c6dec1d8SStanislav Mekhanoshin DPPInst.addImm(0);
297c6dec1d8SStanislav Mekhanoshin ++NumOperands;
2983d9afa27SValery Pykhtin }
2990483c91eSJoe Nash auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
3000483c91eSJoe Nash if (Src1) {
3013d9afa27SValery Pykhtin if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
3023d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
3033d9afa27SValery Pykhtin Fail = true;
3043d9afa27SValery Pykhtin break;
3053d9afa27SValery Pykhtin }
3063d9afa27SValery Pykhtin DPPInst.add(*Src1);
3073d9afa27SValery Pykhtin ++NumOperands;
3083d9afa27SValery Pykhtin }
3090483c91eSJoe Nash if (auto *Mod2 =
3100483c91eSJoe Nash TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
3110483c91eSJoe Nash assert(NumOperands ==
3120483c91eSJoe Nash AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));
3130483c91eSJoe Nash assert(HasVOP3DPP ||
3140483c91eSJoe Nash (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
3150483c91eSJoe Nash DPPInst.addImm(Mod2->getImm());
3160483c91eSJoe Nash ++NumOperands;
3170483c91eSJoe Nash }
3180483c91eSJoe Nash auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
3190483c91eSJoe Nash if (Src2) {
320c9c18e5aSvpykhtin if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
321c9c18e5aSvpykhtin !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
3223d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
3233d9afa27SValery Pykhtin Fail = true;
3243d9afa27SValery Pykhtin break;
3253d9afa27SValery Pykhtin }
3263d9afa27SValery Pykhtin DPPInst.add(*Src2);
3270483c91eSJoe Nash ++NumOperands;
3283d9afa27SValery Pykhtin }
3290483c91eSJoe Nash if (HasVOP3DPP) {
3300483c91eSJoe Nash auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
3310483c91eSJoe Nash if (ClampOpr &&
3320483c91eSJoe Nash AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) {
3330483c91eSJoe Nash DPPInst.addImm(ClampOpr->getImm());
3340483c91eSJoe Nash }
3350483c91eSJoe Nash auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
3360483c91eSJoe Nash if (VdstInOpr &&
3370483c91eSJoe Nash AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) {
3380483c91eSJoe Nash DPPInst.add(*VdstInOpr);
3390483c91eSJoe Nash }
3400483c91eSJoe Nash auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
3410483c91eSJoe Nash if (OmodOpr &&
3420483c91eSJoe Nash AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) {
3430483c91eSJoe Nash DPPInst.addImm(OmodOpr->getImm());
3440483c91eSJoe Nash }
3450483c91eSJoe Nash // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to
3460483c91eSJoe Nash // all 1.
3470483c91eSJoe Nash if (auto *OpSelOpr =
3480483c91eSJoe Nash TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
3490483c91eSJoe Nash auto OpSel = OpSelOpr->getImm();
3500483c91eSJoe Nash if (OpSel != 0) {
3510483c91eSJoe Nash LLVM_DEBUG(dbgs() << " failed: op_sel must be zero\n");
3520483c91eSJoe Nash Fail = true;
3530483c91eSJoe Nash break;
3540483c91eSJoe Nash }
3550483c91eSJoe Nash if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1)
3560483c91eSJoe Nash DPPInst.addImm(OpSel);
3570483c91eSJoe Nash }
3580483c91eSJoe Nash if (auto *OpSelHiOpr =
3590483c91eSJoe Nash TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
3600483c91eSJoe Nash auto OpSelHi = OpSelHiOpr->getImm();
3610483c91eSJoe Nash // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check
3620483c91eSJoe Nash // the bitmask for 3 op_sel_hi bits set
3630483c91eSJoe Nash assert(Src2 && "Expected vop3p with 3 operands");
3640483c91eSJoe Nash if (OpSelHi != 7) {
3650483c91eSJoe Nash LLVM_DEBUG(dbgs() << " failed: op_sel_hi must be all set to one\n");
3660483c91eSJoe Nash Fail = true;
3670483c91eSJoe Nash break;
3680483c91eSJoe Nash }
3690483c91eSJoe Nash if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1)
3700483c91eSJoe Nash DPPInst.addImm(OpSelHi);
3710483c91eSJoe Nash }
3720483c91eSJoe Nash auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
3730483c91eSJoe Nash if (NegOpr &&
3740483c91eSJoe Nash AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) {
3750483c91eSJoe Nash DPPInst.addImm(NegOpr->getImm());
3760483c91eSJoe Nash }
3770483c91eSJoe Nash auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
3780483c91eSJoe Nash if (NegHiOpr &&
3790483c91eSJoe Nash AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) {
3800483c91eSJoe Nash DPPInst.addImm(NegHiOpr->getImm());
3810483c91eSJoe Nash }
3820483c91eSJoe Nash }
3833d9afa27SValery Pykhtin DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
3843d9afa27SValery Pykhtin DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
3853d9afa27SValery Pykhtin DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
3867fe97f8cSValery Pykhtin DPPInst.addImm(CombBCZ ? 1 : 0);
3873d9afa27SValery Pykhtin } while (false);
3883d9afa27SValery Pykhtin
3893d9afa27SValery Pykhtin if (Fail) {
3903d9afa27SValery Pykhtin DPPInst.getInstr()->eraseFromParent();
3913d9afa27SValery Pykhtin return nullptr;
3923d9afa27SValery Pykhtin }
3933d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
3943d9afa27SValery Pykhtin return DPPInst.getInstr();
3953d9afa27SValery Pykhtin }
3963d9afa27SValery Pykhtin
isIdentityValue(unsigned OrigMIOp,MachineOperand * OldOpnd)3977fe97f8cSValery Pykhtin static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
3987fe97f8cSValery Pykhtin assert(OldOpnd->isImm());
3997fe97f8cSValery Pykhtin switch (OrigMIOp) {
4003d9afa27SValery Pykhtin default: break;
4017fe97f8cSValery Pykhtin case AMDGPU::V_ADD_U32_e32:
4027e0c10b5SJay Foad case AMDGPU::V_ADD_U32_e64:
40379f67caeSMatt Arsenault case AMDGPU::V_ADD_CO_U32_e32:
40479f67caeSMatt Arsenault case AMDGPU::V_ADD_CO_U32_e64:
4057fe97f8cSValery Pykhtin case AMDGPU::V_OR_B32_e32:
4067e0c10b5SJay Foad case AMDGPU::V_OR_B32_e64:
4077fe97f8cSValery Pykhtin case AMDGPU::V_SUBREV_U32_e32:
4087e0c10b5SJay Foad case AMDGPU::V_SUBREV_U32_e64:
40979f67caeSMatt Arsenault case AMDGPU::V_SUBREV_CO_U32_e32:
41079f67caeSMatt Arsenault case AMDGPU::V_SUBREV_CO_U32_e64:
4113d9afa27SValery Pykhtin case AMDGPU::V_MAX_U32_e32:
4127e0c10b5SJay Foad case AMDGPU::V_MAX_U32_e64:
4137fe97f8cSValery Pykhtin case AMDGPU::V_XOR_B32_e32:
4147e0c10b5SJay Foad case AMDGPU::V_XOR_B32_e64:
4157fe97f8cSValery Pykhtin if (OldOpnd->getImm() == 0)
4167fe97f8cSValery Pykhtin return true;
4173d9afa27SValery Pykhtin break;
4187fe97f8cSValery Pykhtin case AMDGPU::V_AND_B32_e32:
4197e0c10b5SJay Foad case AMDGPU::V_AND_B32_e64:
4207fe97f8cSValery Pykhtin case AMDGPU::V_MIN_U32_e32:
4217e0c10b5SJay Foad case AMDGPU::V_MIN_U32_e64:
4227fe97f8cSValery Pykhtin if (static_cast<uint32_t>(OldOpnd->getImm()) ==
4237fe97f8cSValery Pykhtin std::numeric_limits<uint32_t>::max())
4247fe97f8cSValery Pykhtin return true;
4253d9afa27SValery Pykhtin break;
4263d9afa27SValery Pykhtin case AMDGPU::V_MIN_I32_e32:
4277e0c10b5SJay Foad case AMDGPU::V_MIN_I32_e64:
4287fe97f8cSValery Pykhtin if (static_cast<int32_t>(OldOpnd->getImm()) ==
4297fe97f8cSValery Pykhtin std::numeric_limits<int32_t>::max())
4307fe97f8cSValery Pykhtin return true;
4313d9afa27SValery Pykhtin break;
4327fe97f8cSValery Pykhtin case AMDGPU::V_MAX_I32_e32:
4337e0c10b5SJay Foad case AMDGPU::V_MAX_I32_e64:
4347fe97f8cSValery Pykhtin if (static_cast<int32_t>(OldOpnd->getImm()) ==
4357fe97f8cSValery Pykhtin std::numeric_limits<int32_t>::min())
4367fe97f8cSValery Pykhtin return true;
4377fe97f8cSValery Pykhtin break;
4383d9afa27SValery Pykhtin case AMDGPU::V_MUL_I32_I24_e32:
4397e0c10b5SJay Foad case AMDGPU::V_MUL_I32_I24_e64:
4403d9afa27SValery Pykhtin case AMDGPU::V_MUL_U32_U24_e32:
4417e0c10b5SJay Foad case AMDGPU::V_MUL_U32_U24_e64:
4427fe97f8cSValery Pykhtin if (OldOpnd->getImm() == 1)
4437fe97f8cSValery Pykhtin return true;
4443d9afa27SValery Pykhtin break;
4453d9afa27SValery Pykhtin }
4467fe97f8cSValery Pykhtin return false;
4473d9afa27SValery Pykhtin }
4483d9afa27SValery Pykhtin
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,MachineOperand * OldOpndValue,bool CombBCZ,bool IsShrinkable) const449538bda0bSJoe Nash MachineInstr *GCNDPPCombine::createDPPInst(
450538bda0bSJoe Nash MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR,
451538bda0bSJoe Nash MachineOperand *OldOpndValue, bool CombBCZ, bool IsShrinkable) const {
4527fe97f8cSValery Pykhtin assert(CombOldVGPR.Reg);
4537fe97f8cSValery Pykhtin if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
4547fe97f8cSValery Pykhtin auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
4557fe97f8cSValery Pykhtin if (!Src1 || !Src1->isReg()) {
4567fe97f8cSValery Pykhtin LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
4577fe97f8cSValery Pykhtin return nullptr;
4587fe97f8cSValery Pykhtin }
4597fe97f8cSValery Pykhtin if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
4600cd50b2aSJay Foad LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
4617fe97f8cSValery Pykhtin return nullptr;
4627fe97f8cSValery Pykhtin }
4637fe97f8cSValery Pykhtin CombOldVGPR = getRegSubRegPair(*Src1);
464a8d9d507SStanislav Mekhanoshin auto MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
465a8d9d507SStanislav Mekhanoshin const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg());
466a8d9d507SStanislav Mekhanoshin if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) {
467a8d9d507SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " failed: src1 has wrong register class\n");
4683d9afa27SValery Pykhtin return nullptr;
4693d9afa27SValery Pykhtin }
4703d9afa27SValery Pykhtin }
471538bda0bSJoe Nash return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
4723d9afa27SValery Pykhtin }
4733d9afa27SValery Pykhtin
4743d9afa27SValery Pykhtin // returns true if MI doesn't have OpndName immediate operand or the
4753d9afa27SValery Pykhtin // operand has Value
hasNoImmOrEqual(MachineInstr & MI,unsigned OpndName,int64_t Value,int64_t Mask) const4763d9afa27SValery Pykhtin bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
4773d9afa27SValery Pykhtin int64_t Value, int64_t Mask) const {
4783d9afa27SValery Pykhtin auto *Imm = TII->getNamedOperand(MI, OpndName);
4793d9afa27SValery Pykhtin if (!Imm)
4803d9afa27SValery Pykhtin return true;
4813d9afa27SValery Pykhtin
4823d9afa27SValery Pykhtin assert(Imm->isImm());
4833d9afa27SValery Pykhtin return (Imm->getImm() & Mask) == Value;
4843d9afa27SValery Pykhtin }
4853d9afa27SValery Pykhtin
combineDPPMov(MachineInstr & MovMI) const4863d9afa27SValery Pykhtin bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
487a8d9d507SStanislav Mekhanoshin assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
48831f215abSStanislav Mekhanoshin MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
489a8d9d507SStanislav Mekhanoshin MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
4907fe97f8cSValery Pykhtin LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
4917fe97f8cSValery Pykhtin
4927fe97f8cSValery Pykhtin auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
4937fe97f8cSValery Pykhtin assert(DstOpnd && DstOpnd->isReg());
4947fe97f8cSValery Pykhtin auto DPPMovReg = DstOpnd->getReg();
4953d99310cSStanislav Mekhanoshin if (DPPMovReg.isPhysical()) {
4963d99310cSStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n");
4973d99310cSStanislav Mekhanoshin return false;
4983d99310cSStanislav Mekhanoshin }
49927ec195fSJay Foad if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
5007fe97f8cSValery Pykhtin LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
5017fe97f8cSValery Pykhtin " for all uses\n");
5027fe97f8cSValery Pykhtin return false;
5037fe97f8cSValery Pykhtin }
5047fe97f8cSValery Pykhtin
50531f215abSStanislav Mekhanoshin if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
50631f215abSStanislav Mekhanoshin MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
507a8d9d507SStanislav Mekhanoshin auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
508a8d9d507SStanislav Mekhanoshin assert(DppCtrl && DppCtrl->isImm());
509a8d9d507SStanislav Mekhanoshin if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
510a8d9d507SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
511a8d9d507SStanislav Mekhanoshin " control value\n");
512a8d9d507SStanislav Mekhanoshin // Let it split, then control may become legal.
513a8d9d507SStanislav Mekhanoshin return false;
514a8d9d507SStanislav Mekhanoshin }
515a8d9d507SStanislav Mekhanoshin }
516a8d9d507SStanislav Mekhanoshin
5177fe97f8cSValery Pykhtin auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
5187fe97f8cSValery Pykhtin assert(RowMaskOpnd && RowMaskOpnd->isImm());
5197fe97f8cSValery Pykhtin auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
5207fe97f8cSValery Pykhtin assert(BankMaskOpnd && BankMaskOpnd->isImm());
5217fe97f8cSValery Pykhtin const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
5227fe97f8cSValery Pykhtin BankMaskOpnd->getImm() == 0xF;
5237fe97f8cSValery Pykhtin
5243d9afa27SValery Pykhtin auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
5253d9afa27SValery Pykhtin assert(BCZOpnd && BCZOpnd->isImm());
5267fe97f8cSValery Pykhtin bool BoundCtrlZero = BCZOpnd->getImm();
5273d9afa27SValery Pykhtin
5283d9afa27SValery Pykhtin auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
529edcd5815SStanislav Mekhanoshin auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
5303d9afa27SValery Pykhtin assert(OldOpnd && OldOpnd->isReg());
531edcd5815SStanislav Mekhanoshin assert(SrcOpnd && SrcOpnd->isReg());
532edcd5815SStanislav Mekhanoshin if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
533edcd5815SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n");
534edcd5815SStanislav Mekhanoshin return false;
535edcd5815SStanislav Mekhanoshin }
5367fe97f8cSValery Pykhtin
5377fe97f8cSValery Pykhtin auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
5387fe97f8cSValery Pykhtin // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
5397fe97f8cSValery Pykhtin // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
5407fe97f8cSValery Pykhtin // but the third option is used to distinguish undef from non-immediate
5417fe97f8cSValery Pykhtin // to reuse IMPLICIT_DEF instruction later
5423d9afa27SValery Pykhtin assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
5437fe97f8cSValery Pykhtin
5447fe97f8cSValery Pykhtin bool CombBCZ = false;
5457fe97f8cSValery Pykhtin
5467fe97f8cSValery Pykhtin if (MaskAllLanes && BoundCtrlZero) { // [1]
5477fe97f8cSValery Pykhtin CombBCZ = true;
548b7a45954SValery Pykhtin } else {
5497fe97f8cSValery Pykhtin if (!OldOpndValue || !OldOpndValue->isImm()) {
5507fe97f8cSValery Pykhtin LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
5511e0b5c71SValery Pykhtin return false;
5521e0b5c71SValery Pykhtin }
5537fe97f8cSValery Pykhtin
5547fe97f8cSValery Pykhtin if (OldOpndValue->getImm() == 0) {
5557fe97f8cSValery Pykhtin if (MaskAllLanes) {
5567fe97f8cSValery Pykhtin assert(!BoundCtrlZero); // by check [1]
5577fe97f8cSValery Pykhtin CombBCZ = true;
5587fe97f8cSValery Pykhtin }
5597fe97f8cSValery Pykhtin } else if (BoundCtrlZero) {
5607fe97f8cSValery Pykhtin assert(!MaskAllLanes); // by check [1]
5617fe97f8cSValery Pykhtin LLVM_DEBUG(dbgs() <<
5627fe97f8cSValery Pykhtin " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
5637fe97f8cSValery Pykhtin return false;
5643d9afa27SValery Pykhtin }
5653d9afa27SValery Pykhtin }
5663d9afa27SValery Pykhtin
5673d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " old=";
5683d9afa27SValery Pykhtin if (!OldOpndValue)
5693d9afa27SValery Pykhtin dbgs() << "undef";
5703d9afa27SValery Pykhtin else
5717fe97f8cSValery Pykhtin dbgs() << *OldOpndValue;
5727fe97f8cSValery Pykhtin dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
5733d9afa27SValery Pykhtin
5747fe97f8cSValery Pykhtin SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
5756e8599d9SStanislav Mekhanoshin DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
5767fe97f8cSValery Pykhtin auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
5777fe97f8cSValery Pykhtin // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
5787fe97f8cSValery Pykhtin if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
579a8d9d507SStanislav Mekhanoshin const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg);
5807fe97f8cSValery Pykhtin CombOldVGPR = RegSubRegPair(
581a8d9d507SStanislav Mekhanoshin MRI->createVirtualRegister(RC));
5823d9afa27SValery Pykhtin auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
5837fe97f8cSValery Pykhtin TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
5843d9afa27SValery Pykhtin DPPMIs.push_back(UndefInst.getInstr());
5853d9afa27SValery Pykhtin }
5863d9afa27SValery Pykhtin
5873d9afa27SValery Pykhtin OrigMIs.push_back(&MovMI);
5883d9afa27SValery Pykhtin bool Rollback = true;
5896e8599d9SStanislav Mekhanoshin SmallVector<MachineOperand*, 16> Uses;
5906e8599d9SStanislav Mekhanoshin
5917fe97f8cSValery Pykhtin for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
5926e8599d9SStanislav Mekhanoshin Uses.push_back(&Use);
5936e8599d9SStanislav Mekhanoshin }
5946e8599d9SStanislav Mekhanoshin
5956e8599d9SStanislav Mekhanoshin while (!Uses.empty()) {
5966e8599d9SStanislav Mekhanoshin MachineOperand *Use = Uses.pop_back_val();
5973d9afa27SValery Pykhtin Rollback = true;
5983d9afa27SValery Pykhtin
5996e8599d9SStanislav Mekhanoshin auto &OrigMI = *Use->getParent();
6007fe97f8cSValery Pykhtin LLVM_DEBUG(dbgs() << " try: " << OrigMI);
6017fe97f8cSValery Pykhtin
6023d9afa27SValery Pykhtin auto OrigOp = OrigMI.getOpcode();
6036e8599d9SStanislav Mekhanoshin if (OrigOp == AMDGPU::REG_SEQUENCE) {
6046e8599d9SStanislav Mekhanoshin Register FwdReg = OrigMI.getOperand(0).getReg();
6056e8599d9SStanislav Mekhanoshin unsigned FwdSubReg = 0;
6066e8599d9SStanislav Mekhanoshin
6076e8599d9SStanislav Mekhanoshin if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
6086e8599d9SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
6096e8599d9SStanislav Mekhanoshin " for all uses\n");
6106e8599d9SStanislav Mekhanoshin break;
6116e8599d9SStanislav Mekhanoshin }
6126e8599d9SStanislav Mekhanoshin
6136e8599d9SStanislav Mekhanoshin unsigned OpNo, E = OrigMI.getNumOperands();
6146e8599d9SStanislav Mekhanoshin for (OpNo = 1; OpNo < E; OpNo += 2) {
6156e8599d9SStanislav Mekhanoshin if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
6166e8599d9SStanislav Mekhanoshin FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
6176e8599d9SStanislav Mekhanoshin break;
6186e8599d9SStanislav Mekhanoshin }
6196e8599d9SStanislav Mekhanoshin }
6206e8599d9SStanislav Mekhanoshin
6216e8599d9SStanislav Mekhanoshin if (!FwdSubReg)
6226e8599d9SStanislav Mekhanoshin break;
6236e8599d9SStanislav Mekhanoshin
6246e8599d9SStanislav Mekhanoshin for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
6256e8599d9SStanislav Mekhanoshin if (Op.getSubReg() == FwdSubReg)
6266e8599d9SStanislav Mekhanoshin Uses.push_back(&Op);
6276e8599d9SStanislav Mekhanoshin }
6286e8599d9SStanislav Mekhanoshin RegSeqWithOpNos[&OrigMI].push_back(OpNo);
6296e8599d9SStanislav Mekhanoshin continue;
6306e8599d9SStanislav Mekhanoshin }
6316e8599d9SStanislav Mekhanoshin
632a02aa913SJay Foad bool IsShrinkable = isShrinkable(OrigMI);
6330483c91eSJoe Nash if (!(IsShrinkable ||
6340483c91eSJoe Nash ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) ||
6350483c91eSJoe Nash TII->isVOP3(OrigOp)) &&
6360483c91eSJoe Nash ST->hasVOP3DPP()) ||
6370483c91eSJoe Nash TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
6380483c91eSJoe Nash LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3/3P/C\n");
6390483c91eSJoe Nash break;
6400483c91eSJoe Nash }
6410483c91eSJoe Nash if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) {
6420483c91eSJoe Nash LLVM_DEBUG(dbgs() << " failed: can't combine v_cmpx\n");
6433d9afa27SValery Pykhtin break;
6443d9afa27SValery Pykhtin }
6453d9afa27SValery Pykhtin
646bb69ca82Svpykhtin auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
647bb69ca82Svpykhtin auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
648bb69ca82Svpykhtin if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
649bb69ca82Svpykhtin LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
650bb69ca82Svpykhtin break;
651bb69ca82Svpykhtin }
652bb69ca82Svpykhtin
6530483c91eSJoe Nash auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
654bb69ca82Svpykhtin assert(Src0 && "Src1 without Src0?");
6550483c91eSJoe Nash if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
6560483c91eSJoe Nash (Src2 && Src2->isIdenticalTo(*Src0)))) ||
6570483c91eSJoe Nash (Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
6580483c91eSJoe Nash (Src2 && Src2->isIdenticalTo(*Src1))))) {
659bb69ca82Svpykhtin LLVM_DEBUG(
660bb69ca82Svpykhtin dbgs()
661bb69ca82Svpykhtin << " " << OrigMI
662bb69ca82Svpykhtin << " failed: DPP register is used more than once per instruction\n");
663bb69ca82Svpykhtin break;
664bb69ca82Svpykhtin }
665bb69ca82Svpykhtin
6663d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
667bb69ca82Svpykhtin if (Use == Src0) {
6687fe97f8cSValery Pykhtin if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
669538bda0bSJoe Nash OldOpndValue, CombBCZ, IsShrinkable)) {
6703d9afa27SValery Pykhtin DPPMIs.push_back(DPPInst);
6713d9afa27SValery Pykhtin Rollback = false;
6723d9afa27SValery Pykhtin }
673bb69ca82Svpykhtin } else {
674bb69ca82Svpykhtin assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
6753d9afa27SValery Pykhtin auto *BB = OrigMI.getParent();
6763d9afa27SValery Pykhtin auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
6773d9afa27SValery Pykhtin BB->insert(OrigMI, NewMI);
6783d9afa27SValery Pykhtin if (TII->commuteInstruction(*NewMI)) {
6793d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
680538bda0bSJoe Nash if (auto *DPPInst =
681538bda0bSJoe Nash createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
682538bda0bSJoe Nash IsShrinkable)) {
6833d9afa27SValery Pykhtin DPPMIs.push_back(DPPInst);
6843d9afa27SValery Pykhtin Rollback = false;
6853d9afa27SValery Pykhtin }
6863d9afa27SValery Pykhtin } else
6873d9afa27SValery Pykhtin LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
6883d9afa27SValery Pykhtin NewMI->eraseFromParent();
689bb69ca82Svpykhtin }
6903d9afa27SValery Pykhtin if (Rollback)
6913d9afa27SValery Pykhtin break;
6923d9afa27SValery Pykhtin OrigMIs.push_back(&OrigMI);
6933d9afa27SValery Pykhtin }
6943d9afa27SValery Pykhtin
6956e8599d9SStanislav Mekhanoshin Rollback |= !Uses.empty();
6966e8599d9SStanislav Mekhanoshin
6973d9afa27SValery Pykhtin for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
6983d9afa27SValery Pykhtin MI->eraseFromParent();
6993d9afa27SValery Pykhtin
7006e8599d9SStanislav Mekhanoshin if (!Rollback) {
7016e8599d9SStanislav Mekhanoshin for (auto &S : RegSeqWithOpNos) {
7026e8599d9SStanislav Mekhanoshin if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
7036e8599d9SStanislav Mekhanoshin S.first->eraseFromParent();
7046e8599d9SStanislav Mekhanoshin continue;
7056e8599d9SStanislav Mekhanoshin }
7066e8599d9SStanislav Mekhanoshin while (!S.second.empty())
7076e8599d9SStanislav Mekhanoshin S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
7086e8599d9SStanislav Mekhanoshin }
7096e8599d9SStanislav Mekhanoshin }
7106e8599d9SStanislav Mekhanoshin
7113d9afa27SValery Pykhtin return !Rollback;
7123d9afa27SValery Pykhtin }
7133d9afa27SValery Pykhtin
runOnMachineFunction(MachineFunction & MF)7143d9afa27SValery Pykhtin bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
715538bda0bSJoe Nash ST = &MF.getSubtarget<GCNSubtarget>();
716538bda0bSJoe Nash if (!ST->hasDPP() || skipFunction(MF.getFunction()))
7173d9afa27SValery Pykhtin return false;
7183d9afa27SValery Pykhtin
7193d9afa27SValery Pykhtin MRI = &MF.getRegInfo();
720538bda0bSJoe Nash TII = ST->getInstrInfo();
7213d9afa27SValery Pykhtin
7223d9afa27SValery Pykhtin bool Changed = false;
7233d9afa27SValery Pykhtin for (auto &MBB : MF) {
7244bef0304SKazu Hirata for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
7253d9afa27SValery Pykhtin if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
7263d9afa27SValery Pykhtin Changed = true;
7273d9afa27SValery Pykhtin ++NumDPPMovsCombined;
72831f215abSStanislav Mekhanoshin } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
72931f215abSStanislav Mekhanoshin MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
730538bda0bSJoe Nash if (ST->has64BitDPP() && combineDPPMov(MI)) {
731a8d9d507SStanislav Mekhanoshin Changed = true;
732a8d9d507SStanislav Mekhanoshin ++NumDPPMovsCombined;
733a8d9d507SStanislav Mekhanoshin } else {
7341184c27fSStanislav Mekhanoshin auto Split = TII->expandMovDPP64(MI);
7351184c27fSStanislav Mekhanoshin for (auto M : { Split.first, Split.second }) {
736a8d9d507SStanislav Mekhanoshin if (M && combineDPPMov(*M))
7371184c27fSStanislav Mekhanoshin ++NumDPPMovsCombined;
7381184c27fSStanislav Mekhanoshin }
7391184c27fSStanislav Mekhanoshin Changed = true;
7403d9afa27SValery Pykhtin }
7413d9afa27SValery Pykhtin }
7423d9afa27SValery Pykhtin }
743a8d9d507SStanislav Mekhanoshin }
7443d9afa27SValery Pykhtin return Changed;
7453d9afa27SValery Pykhtin }
746