13d9afa27SValery Pykhtin //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
23d9afa27SValery Pykhtin //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63d9afa27SValery Pykhtin //
73d9afa27SValery Pykhtin //===----------------------------------------------------------------------===//
83d9afa27SValery Pykhtin // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
93d9afa27SValery Pykhtin // operand. If any of the use instruction cannot be combined with the mov the
103d9afa27SValery Pykhtin // whole sequence is reverted.
113d9afa27SValery Pykhtin //
123d9afa27SValery Pykhtin // $old = ...
133d9afa27SValery Pykhtin // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
147fe97f8cSValery Pykhtin //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
157fe97f8cSValery Pykhtin // $res = VALU $dpp_value [, src1]
163d9afa27SValery Pykhtin //
173d9afa27SValery Pykhtin // to
183d9afa27SValery Pykhtin //
197fe97f8cSValery Pykhtin // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
207fe97f8cSValery Pykhtin //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
213d9afa27SValery Pykhtin //
223d9afa27SValery Pykhtin // Combining rules :
233d9afa27SValery Pykhtin //
247fe97f8cSValery Pykhtin // if $row_mask and $bank_mask are fully enabled (0xF) and
257fe97f8cSValery Pykhtin //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
267fe97f8cSValery Pykhtin // -> $combined_old = undef,
277fe97f8cSValery Pykhtin //    $combined_bound_ctrl = DPP_BOUND_ZERO
283d9afa27SValery Pykhtin //
297fe97f8cSValery Pykhtin // if the VALU op is binary and
307fe97f8cSValery Pykhtin //    $bound_ctrl==DPP_BOUND_OFF and
317fe97f8cSValery Pykhtin //    $old==identity value (immediate) for the VALU op
327fe97f8cSValery Pykhtin // -> $combined_old = src1,
337fe97f8cSValery Pykhtin //    $combined_bound_ctrl = DPP_BOUND_OFF
343d9afa27SValery Pykhtin //
350cd50b2aSJay Foad // Otherwise cancel.
363d9afa27SValery Pykhtin //
370cd50b2aSJay Foad // The mov_dpp instruction should reside in the same BB as all its uses
383d9afa27SValery Pykhtin //===----------------------------------------------------------------------===//
393d9afa27SValery Pykhtin 
403d9afa27SValery Pykhtin #include "AMDGPU.h"
41560d7e04Sdfukalov #include "GCNSubtarget.h"
42560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
433d9afa27SValery Pykhtin #include "llvm/ADT/Statistic.h"
443d9afa27SValery Pykhtin #include "llvm/CodeGen/MachineFunctionPass.h"
453d9afa27SValery Pykhtin 
463d9afa27SValery Pykhtin using namespace llvm;
473d9afa27SValery Pykhtin 
483d9afa27SValery Pykhtin #define DEBUG_TYPE "gcn-dpp-combine"
493d9afa27SValery Pykhtin 
503d9afa27SValery Pykhtin STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
513d9afa27SValery Pykhtin 
523d9afa27SValery Pykhtin namespace {
533d9afa27SValery Pykhtin 
543d9afa27SValery Pykhtin class GCNDPPCombine : public MachineFunctionPass {
553d9afa27SValery Pykhtin   MachineRegisterInfo *MRI;
563d9afa27SValery Pykhtin   const SIInstrInfo *TII;
57538bda0bSJoe Nash   const GCNSubtarget *ST;
583d9afa27SValery Pykhtin 
593d9afa27SValery Pykhtin   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
603d9afa27SValery Pykhtin 
613d9afa27SValery Pykhtin   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
623d9afa27SValery Pykhtin 
63538bda0bSJoe Nash   MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
647fe97f8cSValery Pykhtin                               RegSubRegPair CombOldVGPR,
65538bda0bSJoe Nash                               MachineOperand *OldOpnd, bool CombBCZ,
66538bda0bSJoe Nash                               bool IsShrinkable) const;
673d9afa27SValery Pykhtin 
68538bda0bSJoe Nash   MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
69538bda0bSJoe Nash                               RegSubRegPair CombOldVGPR, bool CombBCZ,
70538bda0bSJoe Nash                               bool IsShrinkable) const;
713d9afa27SValery Pykhtin 
723d9afa27SValery Pykhtin   bool hasNoImmOrEqual(MachineInstr &MI,
733d9afa27SValery Pykhtin                        unsigned OpndName,
743d9afa27SValery Pykhtin                        int64_t Value,
753d9afa27SValery Pykhtin                        int64_t Mask = -1) const;
763d9afa27SValery Pykhtin 
773d9afa27SValery Pykhtin   bool combineDPPMov(MachineInstr &MI) const;
783d9afa27SValery Pykhtin 
793d9afa27SValery Pykhtin public:
803d9afa27SValery Pykhtin   static char ID;
813d9afa27SValery Pykhtin 
GCNDPPCombine()823d9afa27SValery Pykhtin   GCNDPPCombine() : MachineFunctionPass(ID) {
833d9afa27SValery Pykhtin     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
843d9afa27SValery Pykhtin   }
853d9afa27SValery Pykhtin 
863d9afa27SValery Pykhtin   bool runOnMachineFunction(MachineFunction &MF) override;
873d9afa27SValery Pykhtin 
getPassName() const883d9afa27SValery Pykhtin   StringRef getPassName() const override { return "GCN DPP Combine"; }
893d9afa27SValery Pykhtin 
getAnalysisUsage(AnalysisUsage & AU) const903d9afa27SValery Pykhtin   void getAnalysisUsage(AnalysisUsage &AU) const override {
913d9afa27SValery Pykhtin     AU.setPreservesCFG();
923d9afa27SValery Pykhtin     MachineFunctionPass::getAnalysisUsage(AU);
933d9afa27SValery Pykhtin   }
94525f9c0bSDmitry Preobrazhensky 
getRequiredProperties() const95d0b0b252SMatt Arsenault   MachineFunctionProperties getRequiredProperties() const override {
96d0b0b252SMatt Arsenault     return MachineFunctionProperties()
97d0b0b252SMatt Arsenault       .set(MachineFunctionProperties::Property::IsSSA);
98d0b0b252SMatt Arsenault   }
99d0b0b252SMatt Arsenault 
100525f9c0bSDmitry Preobrazhensky private:
101538bda0bSJoe Nash   int getDPPOp(unsigned Op, bool IsShrinkable) const;
102a02aa913SJay Foad   bool isShrinkable(MachineInstr &MI) const;
1033d9afa27SValery Pykhtin };
1043d9afa27SValery Pykhtin 
1053d9afa27SValery Pykhtin } // end anonymous namespace
1063d9afa27SValery Pykhtin 
1073d9afa27SValery Pykhtin INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
1083d9afa27SValery Pykhtin 
1093d9afa27SValery Pykhtin char GCNDPPCombine::ID = 0;
1103d9afa27SValery Pykhtin 
1113d9afa27SValery Pykhtin char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
1123d9afa27SValery Pykhtin 
createGCNDPPCombinePass()1133d9afa27SValery Pykhtin FunctionPass *llvm::createGCNDPPCombinePass() {
1143d9afa27SValery Pykhtin   return new GCNDPPCombine();
1153d9afa27SValery Pykhtin }
1163d9afa27SValery Pykhtin 
isShrinkable(MachineInstr & MI) const117a02aa913SJay Foad bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
118a02aa913SJay Foad   unsigned Op = MI.getOpcode();
119a02aa913SJay Foad   if (!TII->isVOP3(Op)) {
120538bda0bSJoe Nash     return false;
121538bda0bSJoe Nash   }
122a02aa913SJay Foad   if (!TII->hasVALU32BitEncoding(Op)) {
123538bda0bSJoe Nash     LLVM_DEBUG(dbgs() << "  Inst hasn't e32 equivalent\n");
124538bda0bSJoe Nash     return false;
125538bda0bSJoe Nash   }
126b22721f0SJay Foad   if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
127b22721f0SJay Foad     // Give up if there are any uses of the carry-out from instructions like
128b22721f0SJay Foad     // V_ADD_CO_U32. The shrunken form of the instruction would write it to vcc
129b22721f0SJay Foad     // instead of to a virtual register.
130b22721f0SJay Foad     if (!MRI->use_nodbg_empty(SDst->getReg()))
131b22721f0SJay Foad       return false;
132b22721f0SJay Foad   }
133538bda0bSJoe Nash   // check if other than abs|neg modifiers are set (opsel for example)
134538bda0bSJoe Nash   const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
135a02aa913SJay Foad   if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
136a02aa913SJay Foad       !hasNoImmOrEqual(MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
137a02aa913SJay Foad       !hasNoImmOrEqual(MI, AMDGPU::OpName::clamp, 0) ||
138a02aa913SJay Foad       !hasNoImmOrEqual(MI, AMDGPU::OpName::omod, 0)) {
139538bda0bSJoe Nash     LLVM_DEBUG(dbgs() << "  Inst has non-default modifiers\n");
140538bda0bSJoe Nash     return false;
141538bda0bSJoe Nash   }
142538bda0bSJoe Nash   return true;
143538bda0bSJoe Nash }
144538bda0bSJoe Nash 
getDPPOp(unsigned Op,bool IsShrinkable) const145538bda0bSJoe Nash int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {
1460483c91eSJoe Nash   int DPP32 = AMDGPU::getDPPOp32(Op);
147538bda0bSJoe Nash   if (IsShrinkable) {
148538bda0bSJoe Nash     assert(DPP32 == -1);
1490483c91eSJoe Nash     int E32 = AMDGPU::getVOPe32(Op);
150525f9c0bSDmitry Preobrazhensky     DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);
151525f9c0bSDmitry Preobrazhensky   }
1520483c91eSJoe Nash   if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1)
1530483c91eSJoe Nash     return DPP32;
1540483c91eSJoe Nash   int DPP64 = -1;
1550483c91eSJoe Nash   if (ST->hasVOP3DPP())
1560483c91eSJoe Nash     DPP64 = AMDGPU::getDPPOp64(Op);
1570483c91eSJoe Nash   if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1)
1580483c91eSJoe Nash     return DPP64;
1590483c91eSJoe Nash   return -1;
1603d9afa27SValery Pykhtin }
1613d9afa27SValery Pykhtin 
1623d9afa27SValery Pykhtin // tracks the register operand definition and returns:
1633d9afa27SValery Pykhtin //   1. immediate operand used to initialize the register if found
1643d9afa27SValery Pykhtin //   2. nullptr if the register operand is undef
1653d9afa27SValery Pykhtin //   3. the operand itself otherwise
getOldOpndValue(MachineOperand & OldOpnd) const1663d9afa27SValery Pykhtin MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
1673d9afa27SValery Pykhtin   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
1683d9afa27SValery Pykhtin   if (!Def)
1693d9afa27SValery Pykhtin     return nullptr;
1703d9afa27SValery Pykhtin 
1713d9afa27SValery Pykhtin   switch(Def->getOpcode()) {
1723d9afa27SValery Pykhtin   default: break;
1733d9afa27SValery Pykhtin   case AMDGPU::IMPLICIT_DEF:
1743d9afa27SValery Pykhtin     return nullptr;
1753d9afa27SValery Pykhtin   case AMDGPU::COPY:
176a8d9d507SStanislav Mekhanoshin   case AMDGPU::V_MOV_B32_e32:
17731f215abSStanislav Mekhanoshin   case AMDGPU::V_MOV_B64_PSEUDO:
17831f215abSStanislav Mekhanoshin   case AMDGPU::V_MOV_B64_e32:
17931f215abSStanislav Mekhanoshin   case AMDGPU::V_MOV_B64_e64: {
1803d9afa27SValery Pykhtin     auto &Op1 = Def->getOperand(1);
1813d9afa27SValery Pykhtin     if (Op1.isImm())
1823d9afa27SValery Pykhtin       return &Op1;
1833d9afa27SValery Pykhtin     break;
1843d9afa27SValery Pykhtin   }
1853d9afa27SValery Pykhtin   }
1863d9afa27SValery Pykhtin   return &OldOpnd;
1873d9afa27SValery Pykhtin }
1883d9afa27SValery Pykhtin 
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,bool CombBCZ,bool IsShrinkable) const1893d9afa27SValery Pykhtin MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
1903d9afa27SValery Pykhtin                                            MachineInstr &MovMI,
1917fe97f8cSValery Pykhtin                                            RegSubRegPair CombOldVGPR,
192538bda0bSJoe Nash                                            bool CombBCZ,
193538bda0bSJoe Nash                                            bool IsShrinkable) const {
194a8d9d507SStanislav Mekhanoshin   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
19531f215abSStanislav Mekhanoshin          MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
196a8d9d507SStanislav Mekhanoshin          MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
1973d9afa27SValery Pykhtin 
1980483c91eSJoe Nash   bool HasVOP3DPP = ST->hasVOP3DPP();
1993d9afa27SValery Pykhtin   auto OrigOp = OrigMI.getOpcode();
200538bda0bSJoe Nash   auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
2013d9afa27SValery Pykhtin   if (DPPOp == -1) {
2023d9afa27SValery Pykhtin     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
2033d9afa27SValery Pykhtin     return nullptr;
2043d9afa27SValery Pykhtin   }
205dc850fbfSJoe Nash   int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
206dc850fbfSJoe Nash   // Prior checks cover Mask with VOPC condition, but not on purpose
207dc850fbfSJoe Nash   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
208dc850fbfSJoe Nash   assert(RowMaskOpnd && RowMaskOpnd->isImm());
209dc850fbfSJoe Nash   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
210dc850fbfSJoe Nash   assert(BankMaskOpnd && BankMaskOpnd->isImm());
211dc850fbfSJoe Nash   const bool MaskAllLanes =
212dc850fbfSJoe Nash       RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
213*bc9b964fSArthur Eubanks   (void)MaskAllLanes;
214dc850fbfSJoe Nash   assert(MaskAllLanes ||
215dc850fbfSJoe Nash          !(TII->isVOPC(DPPOp) ||
216dc850fbfSJoe Nash            (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
217dc850fbfSJoe Nash              "VOPC cannot form DPP unless mask is full");
2183d9afa27SValery Pykhtin 
2193d9afa27SValery Pykhtin   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
22007cd19efSMatt Arsenault                          OrigMI.getDebugLoc(), TII->get(DPPOp))
22107cd19efSMatt Arsenault     .setMIFlags(OrigMI.getFlags());
22207cd19efSMatt Arsenault 
2233d9afa27SValery Pykhtin   bool Fail = false;
2243d9afa27SValery Pykhtin   do {
2250483c91eSJoe Nash     int NumOperands = 0;
2260483c91eSJoe Nash     if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
2273d9afa27SValery Pykhtin       DPPInst.add(*Dst);
2280483c91eSJoe Nash       ++NumOperands;
2290483c91eSJoe Nash     }
2300483c91eSJoe Nash     if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
2310483c91eSJoe Nash       if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
2320483c91eSJoe Nash         DPPInst.add(*SDst);
2330483c91eSJoe Nash         ++NumOperands;
2340483c91eSJoe Nash       }
2350483c91eSJoe Nash       // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
2360483c91eSJoe Nash     }
2373d9afa27SValery Pykhtin 
2383d9afa27SValery Pykhtin     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
2393d9afa27SValery Pykhtin     if (OldIdx != -1) {
2403d9afa27SValery Pykhtin       assert(OldIdx == NumOperands);
241a8d9d507SStanislav Mekhanoshin       assert(isOfRegClass(
242a8d9d507SStanislav Mekhanoshin           CombOldVGPR,
243a8d9d507SStanislav Mekhanoshin           *MRI->getRegClass(
244a8d9d507SStanislav Mekhanoshin               TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
245a8d9d507SStanislav Mekhanoshin           *MRI));
24619a1a739SStanislav Mekhanoshin       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
24719a1a739SStanislav Mekhanoshin       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
24819a1a739SStanislav Mekhanoshin                      CombOldVGPR.SubReg);
2493d9afa27SValery Pykhtin       ++NumOperands;
250b28bb8ccSJoe Nash     } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
251b28bb8ccSJoe Nash                                       TII->isVOPC(OrigOpE32))) {
252b28bb8ccSJoe Nash       // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand
253b28bb8ccSJoe Nash       // because they write to SGPRs not VGPRs
2547fe97f8cSValery Pykhtin     } else {
2557fe97f8cSValery Pykhtin       // TODO: this discards MAC/FMA instructions for now, let's add it later
2567fe97f8cSValery Pykhtin       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
2577fe97f8cSValery Pykhtin                            " TBD\n");
2587fe97f8cSValery Pykhtin       Fail = true;
2597fe97f8cSValery Pykhtin       break;
2603d9afa27SValery Pykhtin     }
2613d9afa27SValery Pykhtin 
2623d9afa27SValery Pykhtin     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
2633d9afa27SValery Pykhtin                                           AMDGPU::OpName::src0_modifiers)) {
2643d9afa27SValery Pykhtin       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
2653d9afa27SValery Pykhtin                                           AMDGPU::OpName::src0_modifiers));
2660483c91eSJoe Nash       assert(HasVOP3DPP ||
2670483c91eSJoe Nash              (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
2683d9afa27SValery Pykhtin       DPPInst.addImm(Mod0->getImm());
2693d9afa27SValery Pykhtin       ++NumOperands;
270c6dec1d8SStanislav Mekhanoshin     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
271c6dec1d8SStanislav Mekhanoshin                    AMDGPU::OpName::src0_modifiers) != -1) {
272c6dec1d8SStanislav Mekhanoshin       DPPInst.addImm(0);
273c6dec1d8SStanislav Mekhanoshin       ++NumOperands;
2743d9afa27SValery Pykhtin     }
2753d9afa27SValery Pykhtin     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
2763d9afa27SValery Pykhtin     assert(Src0);
2773d9afa27SValery Pykhtin     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
2783d9afa27SValery Pykhtin       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
2793d9afa27SValery Pykhtin       Fail = true;
2803d9afa27SValery Pykhtin       break;
2813d9afa27SValery Pykhtin     }
2823d9afa27SValery Pykhtin     DPPInst.add(*Src0);
2837fe97f8cSValery Pykhtin     DPPInst->getOperand(NumOperands).setIsKill(false);
2843d9afa27SValery Pykhtin     ++NumOperands;
2853d9afa27SValery Pykhtin 
2863d9afa27SValery Pykhtin     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
2873d9afa27SValery Pykhtin                                           AMDGPU::OpName::src1_modifiers)) {
2883d9afa27SValery Pykhtin       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
2893d9afa27SValery Pykhtin                                           AMDGPU::OpName::src1_modifiers));
2900483c91eSJoe Nash       assert(HasVOP3DPP ||
2910483c91eSJoe Nash              (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
2923d9afa27SValery Pykhtin       DPPInst.addImm(Mod1->getImm());
2933d9afa27SValery Pykhtin       ++NumOperands;
294c6dec1d8SStanislav Mekhanoshin     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
295c6dec1d8SStanislav Mekhanoshin                    AMDGPU::OpName::src1_modifiers) != -1) {
296c6dec1d8SStanislav Mekhanoshin       DPPInst.addImm(0);
297c6dec1d8SStanislav Mekhanoshin       ++NumOperands;
2983d9afa27SValery Pykhtin     }
2990483c91eSJoe Nash     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
3000483c91eSJoe Nash     if (Src1) {
3013d9afa27SValery Pykhtin       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
3023d9afa27SValery Pykhtin         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
3033d9afa27SValery Pykhtin         Fail = true;
3043d9afa27SValery Pykhtin         break;
3053d9afa27SValery Pykhtin       }
3063d9afa27SValery Pykhtin       DPPInst.add(*Src1);
3073d9afa27SValery Pykhtin       ++NumOperands;
3083d9afa27SValery Pykhtin     }
3090483c91eSJoe Nash     if (auto *Mod2 =
3100483c91eSJoe Nash             TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
3110483c91eSJoe Nash       assert(NumOperands ==
3120483c91eSJoe Nash              AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));
3130483c91eSJoe Nash       assert(HasVOP3DPP ||
3140483c91eSJoe Nash              (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
3150483c91eSJoe Nash       DPPInst.addImm(Mod2->getImm());
3160483c91eSJoe Nash       ++NumOperands;
3170483c91eSJoe Nash     }
3180483c91eSJoe Nash     auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
3190483c91eSJoe Nash     if (Src2) {
320c9c18e5aSvpykhtin       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
321c9c18e5aSvpykhtin           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
3223d9afa27SValery Pykhtin         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
3233d9afa27SValery Pykhtin         Fail = true;
3243d9afa27SValery Pykhtin         break;
3253d9afa27SValery Pykhtin       }
3263d9afa27SValery Pykhtin       DPPInst.add(*Src2);
3270483c91eSJoe Nash       ++NumOperands;
3283d9afa27SValery Pykhtin     }
3290483c91eSJoe Nash     if (HasVOP3DPP) {
3300483c91eSJoe Nash       auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
3310483c91eSJoe Nash       if (ClampOpr &&
3320483c91eSJoe Nash           AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) {
3330483c91eSJoe Nash         DPPInst.addImm(ClampOpr->getImm());
3340483c91eSJoe Nash       }
3350483c91eSJoe Nash       auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
3360483c91eSJoe Nash       if (VdstInOpr &&
3370483c91eSJoe Nash           AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) {
3380483c91eSJoe Nash         DPPInst.add(*VdstInOpr);
3390483c91eSJoe Nash       }
3400483c91eSJoe Nash       auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
3410483c91eSJoe Nash       if (OmodOpr &&
3420483c91eSJoe Nash           AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) {
3430483c91eSJoe Nash         DPPInst.addImm(OmodOpr->getImm());
3440483c91eSJoe Nash       }
3450483c91eSJoe Nash       // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to
3460483c91eSJoe Nash       // all 1.
3470483c91eSJoe Nash       if (auto *OpSelOpr =
3480483c91eSJoe Nash               TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
3490483c91eSJoe Nash         auto OpSel = OpSelOpr->getImm();
3500483c91eSJoe Nash         if (OpSel != 0) {
3510483c91eSJoe Nash           LLVM_DEBUG(dbgs() << "  failed: op_sel must be zero\n");
3520483c91eSJoe Nash           Fail = true;
3530483c91eSJoe Nash           break;
3540483c91eSJoe Nash         }
3550483c91eSJoe Nash         if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1)
3560483c91eSJoe Nash           DPPInst.addImm(OpSel);
3570483c91eSJoe Nash       }
3580483c91eSJoe Nash       if (auto *OpSelHiOpr =
3590483c91eSJoe Nash               TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
3600483c91eSJoe Nash         auto OpSelHi = OpSelHiOpr->getImm();
3610483c91eSJoe Nash         // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check
3620483c91eSJoe Nash         // the bitmask for 3 op_sel_hi bits set
3630483c91eSJoe Nash         assert(Src2 && "Expected vop3p with 3 operands");
3640483c91eSJoe Nash         if (OpSelHi != 7) {
3650483c91eSJoe Nash           LLVM_DEBUG(dbgs() << "  failed: op_sel_hi must be all set to one\n");
3660483c91eSJoe Nash           Fail = true;
3670483c91eSJoe Nash           break;
3680483c91eSJoe Nash         }
3690483c91eSJoe Nash         if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1)
3700483c91eSJoe Nash           DPPInst.addImm(OpSelHi);
3710483c91eSJoe Nash       }
3720483c91eSJoe Nash       auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
3730483c91eSJoe Nash       if (NegOpr &&
3740483c91eSJoe Nash           AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) {
3750483c91eSJoe Nash         DPPInst.addImm(NegOpr->getImm());
3760483c91eSJoe Nash       }
3770483c91eSJoe Nash       auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
3780483c91eSJoe Nash       if (NegHiOpr &&
3790483c91eSJoe Nash           AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) {
3800483c91eSJoe Nash         DPPInst.addImm(NegHiOpr->getImm());
3810483c91eSJoe Nash       }
3820483c91eSJoe Nash     }
3833d9afa27SValery Pykhtin     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
3843d9afa27SValery Pykhtin     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
3853d9afa27SValery Pykhtin     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
3867fe97f8cSValery Pykhtin     DPPInst.addImm(CombBCZ ? 1 : 0);
3873d9afa27SValery Pykhtin   } while (false);
3883d9afa27SValery Pykhtin 
3893d9afa27SValery Pykhtin   if (Fail) {
3903d9afa27SValery Pykhtin     DPPInst.getInstr()->eraseFromParent();
3913d9afa27SValery Pykhtin     return nullptr;
3923d9afa27SValery Pykhtin   }
3933d9afa27SValery Pykhtin   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
3943d9afa27SValery Pykhtin   return DPPInst.getInstr();
3953d9afa27SValery Pykhtin }
3963d9afa27SValery Pykhtin 
isIdentityValue(unsigned OrigMIOp,MachineOperand * OldOpnd)3977fe97f8cSValery Pykhtin static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
3987fe97f8cSValery Pykhtin   assert(OldOpnd->isImm());
3997fe97f8cSValery Pykhtin   switch (OrigMIOp) {
4003d9afa27SValery Pykhtin   default: break;
4017fe97f8cSValery Pykhtin   case AMDGPU::V_ADD_U32_e32:
4027e0c10b5SJay Foad   case AMDGPU::V_ADD_U32_e64:
40379f67caeSMatt Arsenault   case AMDGPU::V_ADD_CO_U32_e32:
40479f67caeSMatt Arsenault   case AMDGPU::V_ADD_CO_U32_e64:
4057fe97f8cSValery Pykhtin   case AMDGPU::V_OR_B32_e32:
4067e0c10b5SJay Foad   case AMDGPU::V_OR_B32_e64:
4077fe97f8cSValery Pykhtin   case AMDGPU::V_SUBREV_U32_e32:
4087e0c10b5SJay Foad   case AMDGPU::V_SUBREV_U32_e64:
40979f67caeSMatt Arsenault   case AMDGPU::V_SUBREV_CO_U32_e32:
41079f67caeSMatt Arsenault   case AMDGPU::V_SUBREV_CO_U32_e64:
4113d9afa27SValery Pykhtin   case AMDGPU::V_MAX_U32_e32:
4127e0c10b5SJay Foad   case AMDGPU::V_MAX_U32_e64:
4137fe97f8cSValery Pykhtin   case AMDGPU::V_XOR_B32_e32:
4147e0c10b5SJay Foad   case AMDGPU::V_XOR_B32_e64:
4157fe97f8cSValery Pykhtin     if (OldOpnd->getImm() == 0)
4167fe97f8cSValery Pykhtin       return true;
4173d9afa27SValery Pykhtin     break;
4187fe97f8cSValery Pykhtin   case AMDGPU::V_AND_B32_e32:
4197e0c10b5SJay Foad   case AMDGPU::V_AND_B32_e64:
4207fe97f8cSValery Pykhtin   case AMDGPU::V_MIN_U32_e32:
4217e0c10b5SJay Foad   case AMDGPU::V_MIN_U32_e64:
4227fe97f8cSValery Pykhtin     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
4237fe97f8cSValery Pykhtin         std::numeric_limits<uint32_t>::max())
4247fe97f8cSValery Pykhtin       return true;
4253d9afa27SValery Pykhtin     break;
4263d9afa27SValery Pykhtin   case AMDGPU::V_MIN_I32_e32:
4277e0c10b5SJay Foad   case AMDGPU::V_MIN_I32_e64:
4287fe97f8cSValery Pykhtin     if (static_cast<int32_t>(OldOpnd->getImm()) ==
4297fe97f8cSValery Pykhtin         std::numeric_limits<int32_t>::max())
4307fe97f8cSValery Pykhtin       return true;
4313d9afa27SValery Pykhtin     break;
4327fe97f8cSValery Pykhtin   case AMDGPU::V_MAX_I32_e32:
4337e0c10b5SJay Foad   case AMDGPU::V_MAX_I32_e64:
4347fe97f8cSValery Pykhtin     if (static_cast<int32_t>(OldOpnd->getImm()) ==
4357fe97f8cSValery Pykhtin         std::numeric_limits<int32_t>::min())
4367fe97f8cSValery Pykhtin       return true;
4377fe97f8cSValery Pykhtin     break;
4383d9afa27SValery Pykhtin   case AMDGPU::V_MUL_I32_I24_e32:
4397e0c10b5SJay Foad   case AMDGPU::V_MUL_I32_I24_e64:
4403d9afa27SValery Pykhtin   case AMDGPU::V_MUL_U32_U24_e32:
4417e0c10b5SJay Foad   case AMDGPU::V_MUL_U32_U24_e64:
4427fe97f8cSValery Pykhtin     if (OldOpnd->getImm() == 1)
4437fe97f8cSValery Pykhtin       return true;
4443d9afa27SValery Pykhtin     break;
4453d9afa27SValery Pykhtin   }
4467fe97f8cSValery Pykhtin   return false;
4473d9afa27SValery Pykhtin }
4483d9afa27SValery Pykhtin 
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,MachineOperand * OldOpndValue,bool CombBCZ,bool IsShrinkable) const449538bda0bSJoe Nash MachineInstr *GCNDPPCombine::createDPPInst(
450538bda0bSJoe Nash     MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR,
451538bda0bSJoe Nash     MachineOperand *OldOpndValue, bool CombBCZ, bool IsShrinkable) const {
4527fe97f8cSValery Pykhtin   assert(CombOldVGPR.Reg);
4537fe97f8cSValery Pykhtin   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
4547fe97f8cSValery Pykhtin     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
4557fe97f8cSValery Pykhtin     if (!Src1 || !Src1->isReg()) {
4567fe97f8cSValery Pykhtin       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
4577fe97f8cSValery Pykhtin       return nullptr;
4587fe97f8cSValery Pykhtin     }
4597fe97f8cSValery Pykhtin     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
4600cd50b2aSJay Foad       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
4617fe97f8cSValery Pykhtin       return nullptr;
4627fe97f8cSValery Pykhtin     }
4637fe97f8cSValery Pykhtin     CombOldVGPR = getRegSubRegPair(*Src1);
464a8d9d507SStanislav Mekhanoshin     auto MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
465a8d9d507SStanislav Mekhanoshin     const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg());
466a8d9d507SStanislav Mekhanoshin     if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) {
467a8d9d507SStanislav Mekhanoshin       LLVM_DEBUG(dbgs() << "  failed: src1 has wrong register class\n");
4683d9afa27SValery Pykhtin       return nullptr;
4693d9afa27SValery Pykhtin     }
4703d9afa27SValery Pykhtin   }
471538bda0bSJoe Nash   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
4723d9afa27SValery Pykhtin }
4733d9afa27SValery Pykhtin 
4743d9afa27SValery Pykhtin // returns true if MI doesn't have OpndName immediate operand or the
4753d9afa27SValery Pykhtin // operand has Value
hasNoImmOrEqual(MachineInstr & MI,unsigned OpndName,int64_t Value,int64_t Mask) const4763d9afa27SValery Pykhtin bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
4773d9afa27SValery Pykhtin                                     int64_t Value, int64_t Mask) const {
4783d9afa27SValery Pykhtin   auto *Imm = TII->getNamedOperand(MI, OpndName);
4793d9afa27SValery Pykhtin   if (!Imm)
4803d9afa27SValery Pykhtin     return true;
4813d9afa27SValery Pykhtin 
4823d9afa27SValery Pykhtin   assert(Imm->isImm());
4833d9afa27SValery Pykhtin   return (Imm->getImm() & Mask) == Value;
4843d9afa27SValery Pykhtin }
4853d9afa27SValery Pykhtin 
combineDPPMov(MachineInstr & MovMI) const4863d9afa27SValery Pykhtin bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
487a8d9d507SStanislav Mekhanoshin   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
48831f215abSStanislav Mekhanoshin          MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
489a8d9d507SStanislav Mekhanoshin          MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
4907fe97f8cSValery Pykhtin   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
4917fe97f8cSValery Pykhtin 
4927fe97f8cSValery Pykhtin   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
4937fe97f8cSValery Pykhtin   assert(DstOpnd && DstOpnd->isReg());
4947fe97f8cSValery Pykhtin   auto DPPMovReg = DstOpnd->getReg();
4953d99310cSStanislav Mekhanoshin   if (DPPMovReg.isPhysical()) {
4963d99310cSStanislav Mekhanoshin     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
4973d99310cSStanislav Mekhanoshin     return false;
4983d99310cSStanislav Mekhanoshin   }
49927ec195fSJay Foad   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
5007fe97f8cSValery Pykhtin     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
5017fe97f8cSValery Pykhtin                          " for all uses\n");
5027fe97f8cSValery Pykhtin     return false;
5037fe97f8cSValery Pykhtin   }
5047fe97f8cSValery Pykhtin 
50531f215abSStanislav Mekhanoshin   if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
50631f215abSStanislav Mekhanoshin       MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
507a8d9d507SStanislav Mekhanoshin     auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
508a8d9d507SStanislav Mekhanoshin     assert(DppCtrl && DppCtrl->isImm());
509a8d9d507SStanislav Mekhanoshin     if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
510a8d9d507SStanislav Mekhanoshin       LLVM_DEBUG(dbgs() << "  failed: 64 bit dpp move uses unsupported"
511a8d9d507SStanislav Mekhanoshin                            " control value\n");
512a8d9d507SStanislav Mekhanoshin       // Let it split, then control may become legal.
513a8d9d507SStanislav Mekhanoshin       return false;
514a8d9d507SStanislav Mekhanoshin     }
515a8d9d507SStanislav Mekhanoshin   }
516a8d9d507SStanislav Mekhanoshin 
5177fe97f8cSValery Pykhtin   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
5187fe97f8cSValery Pykhtin   assert(RowMaskOpnd && RowMaskOpnd->isImm());
5197fe97f8cSValery Pykhtin   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
5207fe97f8cSValery Pykhtin   assert(BankMaskOpnd && BankMaskOpnd->isImm());
5217fe97f8cSValery Pykhtin   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
5227fe97f8cSValery Pykhtin                             BankMaskOpnd->getImm() == 0xF;
5237fe97f8cSValery Pykhtin 
5243d9afa27SValery Pykhtin   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
5253d9afa27SValery Pykhtin   assert(BCZOpnd && BCZOpnd->isImm());
5267fe97f8cSValery Pykhtin   bool BoundCtrlZero = BCZOpnd->getImm();
5273d9afa27SValery Pykhtin 
5283d9afa27SValery Pykhtin   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
529edcd5815SStanislav Mekhanoshin   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
5303d9afa27SValery Pykhtin   assert(OldOpnd && OldOpnd->isReg());
531edcd5815SStanislav Mekhanoshin   assert(SrcOpnd && SrcOpnd->isReg());
532edcd5815SStanislav Mekhanoshin   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
533edcd5815SStanislav Mekhanoshin     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
534edcd5815SStanislav Mekhanoshin     return false;
535edcd5815SStanislav Mekhanoshin   }
5367fe97f8cSValery Pykhtin 
5377fe97f8cSValery Pykhtin   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
5387fe97f8cSValery Pykhtin   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
5397fe97f8cSValery Pykhtin   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
5407fe97f8cSValery Pykhtin   // but the third option is used to distinguish undef from non-immediate
5417fe97f8cSValery Pykhtin   // to reuse IMPLICIT_DEF instruction later
5423d9afa27SValery Pykhtin   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
5437fe97f8cSValery Pykhtin 
5447fe97f8cSValery Pykhtin   bool CombBCZ = false;
5457fe97f8cSValery Pykhtin 
5467fe97f8cSValery Pykhtin   if (MaskAllLanes && BoundCtrlZero) { // [1]
5477fe97f8cSValery Pykhtin     CombBCZ = true;
548b7a45954SValery Pykhtin   } else {
5497fe97f8cSValery Pykhtin     if (!OldOpndValue || !OldOpndValue->isImm()) {
5507fe97f8cSValery Pykhtin       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
5511e0b5c71SValery Pykhtin       return false;
5521e0b5c71SValery Pykhtin     }
5537fe97f8cSValery Pykhtin 
5547fe97f8cSValery Pykhtin     if (OldOpndValue->getImm() == 0) {
5557fe97f8cSValery Pykhtin       if (MaskAllLanes) {
5567fe97f8cSValery Pykhtin         assert(!BoundCtrlZero); // by check [1]
5577fe97f8cSValery Pykhtin         CombBCZ = true;
5587fe97f8cSValery Pykhtin       }
5597fe97f8cSValery Pykhtin     } else if (BoundCtrlZero) {
5607fe97f8cSValery Pykhtin       assert(!MaskAllLanes); // by check [1]
5617fe97f8cSValery Pykhtin       LLVM_DEBUG(dbgs() <<
5627fe97f8cSValery Pykhtin         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
5637fe97f8cSValery Pykhtin       return false;
5643d9afa27SValery Pykhtin     }
5653d9afa27SValery Pykhtin   }
5663d9afa27SValery Pykhtin 
5673d9afa27SValery Pykhtin   LLVM_DEBUG(dbgs() << "  old=";
5683d9afa27SValery Pykhtin     if (!OldOpndValue)
5693d9afa27SValery Pykhtin       dbgs() << "undef";
5703d9afa27SValery Pykhtin     else
5717fe97f8cSValery Pykhtin       dbgs() << *OldOpndValue;
5727fe97f8cSValery Pykhtin     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
5733d9afa27SValery Pykhtin 
5747fe97f8cSValery Pykhtin   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
5756e8599d9SStanislav Mekhanoshin   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
5767fe97f8cSValery Pykhtin   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
5777fe97f8cSValery Pykhtin   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
5787fe97f8cSValery Pykhtin   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
579a8d9d507SStanislav Mekhanoshin     const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg);
5807fe97f8cSValery Pykhtin     CombOldVGPR = RegSubRegPair(
581a8d9d507SStanislav Mekhanoshin       MRI->createVirtualRegister(RC));
5823d9afa27SValery Pykhtin     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
5837fe97f8cSValery Pykhtin                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
5843d9afa27SValery Pykhtin     DPPMIs.push_back(UndefInst.getInstr());
5853d9afa27SValery Pykhtin   }
5863d9afa27SValery Pykhtin 
5873d9afa27SValery Pykhtin   OrigMIs.push_back(&MovMI);
5883d9afa27SValery Pykhtin   bool Rollback = true;
5896e8599d9SStanislav Mekhanoshin   SmallVector<MachineOperand*, 16> Uses;
5906e8599d9SStanislav Mekhanoshin 
5917fe97f8cSValery Pykhtin   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
5926e8599d9SStanislav Mekhanoshin     Uses.push_back(&Use);
5936e8599d9SStanislav Mekhanoshin   }
5946e8599d9SStanislav Mekhanoshin 
5956e8599d9SStanislav Mekhanoshin   while (!Uses.empty()) {
5966e8599d9SStanislav Mekhanoshin     MachineOperand *Use = Uses.pop_back_val();
5973d9afa27SValery Pykhtin     Rollback = true;
5983d9afa27SValery Pykhtin 
5996e8599d9SStanislav Mekhanoshin     auto &OrigMI = *Use->getParent();
6007fe97f8cSValery Pykhtin     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
6017fe97f8cSValery Pykhtin 
6023d9afa27SValery Pykhtin     auto OrigOp = OrigMI.getOpcode();
6036e8599d9SStanislav Mekhanoshin     if (OrigOp == AMDGPU::REG_SEQUENCE) {
6046e8599d9SStanislav Mekhanoshin       Register FwdReg = OrigMI.getOperand(0).getReg();
6056e8599d9SStanislav Mekhanoshin       unsigned FwdSubReg = 0;
6066e8599d9SStanislav Mekhanoshin 
6076e8599d9SStanislav Mekhanoshin       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
6086e8599d9SStanislav Mekhanoshin         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
6096e8599d9SStanislav Mekhanoshin                              " for all uses\n");
6106e8599d9SStanislav Mekhanoshin         break;
6116e8599d9SStanislav Mekhanoshin       }
6126e8599d9SStanislav Mekhanoshin 
6136e8599d9SStanislav Mekhanoshin       unsigned OpNo, E = OrigMI.getNumOperands();
6146e8599d9SStanislav Mekhanoshin       for (OpNo = 1; OpNo < E; OpNo += 2) {
6156e8599d9SStanislav Mekhanoshin         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
6166e8599d9SStanislav Mekhanoshin           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
6176e8599d9SStanislav Mekhanoshin           break;
6186e8599d9SStanislav Mekhanoshin         }
6196e8599d9SStanislav Mekhanoshin       }
6206e8599d9SStanislav Mekhanoshin 
6216e8599d9SStanislav Mekhanoshin       if (!FwdSubReg)
6226e8599d9SStanislav Mekhanoshin         break;
6236e8599d9SStanislav Mekhanoshin 
6246e8599d9SStanislav Mekhanoshin       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
6256e8599d9SStanislav Mekhanoshin         if (Op.getSubReg() == FwdSubReg)
6266e8599d9SStanislav Mekhanoshin           Uses.push_back(&Op);
6276e8599d9SStanislav Mekhanoshin       }
6286e8599d9SStanislav Mekhanoshin       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
6296e8599d9SStanislav Mekhanoshin       continue;
6306e8599d9SStanislav Mekhanoshin     }
6316e8599d9SStanislav Mekhanoshin 
632a02aa913SJay Foad     bool IsShrinkable = isShrinkable(OrigMI);
6330483c91eSJoe Nash     if (!(IsShrinkable ||
6340483c91eSJoe Nash           ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) ||
6350483c91eSJoe Nash             TII->isVOP3(OrigOp)) &&
6360483c91eSJoe Nash            ST->hasVOP3DPP()) ||
6370483c91eSJoe Nash           TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
6380483c91eSJoe Nash       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3/3P/C\n");
6390483c91eSJoe Nash       break;
6400483c91eSJoe Nash     }
6410483c91eSJoe Nash     if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) {
6420483c91eSJoe Nash       LLVM_DEBUG(dbgs() << "  failed: can't combine v_cmpx\n");
6433d9afa27SValery Pykhtin       break;
6443d9afa27SValery Pykhtin     }
6453d9afa27SValery Pykhtin 
646bb69ca82Svpykhtin     auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
647bb69ca82Svpykhtin     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
648bb69ca82Svpykhtin     if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
649bb69ca82Svpykhtin       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
650bb69ca82Svpykhtin       break;
651bb69ca82Svpykhtin     }
652bb69ca82Svpykhtin 
6530483c91eSJoe Nash     auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
654bb69ca82Svpykhtin     assert(Src0 && "Src1 without Src0?");
6550483c91eSJoe Nash     if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
6560483c91eSJoe Nash                          (Src2 && Src2->isIdenticalTo(*Src0)))) ||
6570483c91eSJoe Nash         (Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
6580483c91eSJoe Nash                          (Src2 && Src2->isIdenticalTo(*Src1))))) {
659bb69ca82Svpykhtin       LLVM_DEBUG(
660bb69ca82Svpykhtin           dbgs()
661bb69ca82Svpykhtin           << "  " << OrigMI
662bb69ca82Svpykhtin           << "  failed: DPP register is used more than once per instruction\n");
663bb69ca82Svpykhtin       break;
664bb69ca82Svpykhtin     }
665bb69ca82Svpykhtin 
6663d9afa27SValery Pykhtin     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
667bb69ca82Svpykhtin     if (Use == Src0) {
6687fe97f8cSValery Pykhtin       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
669538bda0bSJoe Nash                                         OldOpndValue, CombBCZ, IsShrinkable)) {
6703d9afa27SValery Pykhtin         DPPMIs.push_back(DPPInst);
6713d9afa27SValery Pykhtin         Rollback = false;
6723d9afa27SValery Pykhtin       }
673bb69ca82Svpykhtin     } else {
674bb69ca82Svpykhtin       assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
6753d9afa27SValery Pykhtin       auto *BB = OrigMI.getParent();
6763d9afa27SValery Pykhtin       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
6773d9afa27SValery Pykhtin       BB->insert(OrigMI, NewMI);
6783d9afa27SValery Pykhtin       if (TII->commuteInstruction(*NewMI)) {
6793d9afa27SValery Pykhtin         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
680538bda0bSJoe Nash         if (auto *DPPInst =
681538bda0bSJoe Nash                 createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
682538bda0bSJoe Nash                               IsShrinkable)) {
6833d9afa27SValery Pykhtin           DPPMIs.push_back(DPPInst);
6843d9afa27SValery Pykhtin           Rollback = false;
6853d9afa27SValery Pykhtin         }
6863d9afa27SValery Pykhtin       } else
6873d9afa27SValery Pykhtin         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
6883d9afa27SValery Pykhtin       NewMI->eraseFromParent();
689bb69ca82Svpykhtin     }
6903d9afa27SValery Pykhtin     if (Rollback)
6913d9afa27SValery Pykhtin       break;
6923d9afa27SValery Pykhtin     OrigMIs.push_back(&OrigMI);
6933d9afa27SValery Pykhtin   }
6943d9afa27SValery Pykhtin 
6956e8599d9SStanislav Mekhanoshin   Rollback |= !Uses.empty();
6966e8599d9SStanislav Mekhanoshin 
6973d9afa27SValery Pykhtin   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
6983d9afa27SValery Pykhtin     MI->eraseFromParent();
6993d9afa27SValery Pykhtin 
7006e8599d9SStanislav Mekhanoshin   if (!Rollback) {
7016e8599d9SStanislav Mekhanoshin     for (auto &S : RegSeqWithOpNos) {
7026e8599d9SStanislav Mekhanoshin       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
7036e8599d9SStanislav Mekhanoshin         S.first->eraseFromParent();
7046e8599d9SStanislav Mekhanoshin         continue;
7056e8599d9SStanislav Mekhanoshin       }
7066e8599d9SStanislav Mekhanoshin       while (!S.second.empty())
7076e8599d9SStanislav Mekhanoshin         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
7086e8599d9SStanislav Mekhanoshin     }
7096e8599d9SStanislav Mekhanoshin   }
7106e8599d9SStanislav Mekhanoshin 
7113d9afa27SValery Pykhtin   return !Rollback;
7123d9afa27SValery Pykhtin }
7133d9afa27SValery Pykhtin 
runOnMachineFunction(MachineFunction & MF)7143d9afa27SValery Pykhtin bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
715538bda0bSJoe Nash   ST = &MF.getSubtarget<GCNSubtarget>();
716538bda0bSJoe Nash   if (!ST->hasDPP() || skipFunction(MF.getFunction()))
7173d9afa27SValery Pykhtin     return false;
7183d9afa27SValery Pykhtin 
7193d9afa27SValery Pykhtin   MRI = &MF.getRegInfo();
720538bda0bSJoe Nash   TII = ST->getInstrInfo();
7213d9afa27SValery Pykhtin 
7223d9afa27SValery Pykhtin   bool Changed = false;
7233d9afa27SValery Pykhtin   for (auto &MBB : MF) {
7244bef0304SKazu Hirata     for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
7253d9afa27SValery Pykhtin       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
7263d9afa27SValery Pykhtin         Changed = true;
7273d9afa27SValery Pykhtin         ++NumDPPMovsCombined;
72831f215abSStanislav Mekhanoshin       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
72931f215abSStanislav Mekhanoshin                  MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
730538bda0bSJoe Nash         if (ST->has64BitDPP() && combineDPPMov(MI)) {
731a8d9d507SStanislav Mekhanoshin           Changed = true;
732a8d9d507SStanislav Mekhanoshin           ++NumDPPMovsCombined;
733a8d9d507SStanislav Mekhanoshin         } else {
7341184c27fSStanislav Mekhanoshin           auto Split = TII->expandMovDPP64(MI);
7351184c27fSStanislav Mekhanoshin           for (auto M : { Split.first, Split.second }) {
736a8d9d507SStanislav Mekhanoshin             if (M && combineDPPMov(*M))
7371184c27fSStanislav Mekhanoshin               ++NumDPPMovsCombined;
7381184c27fSStanislav Mekhanoshin           }
7391184c27fSStanislav Mekhanoshin           Changed = true;
7403d9afa27SValery Pykhtin         }
7413d9afa27SValery Pykhtin       }
7423d9afa27SValery Pykhtin     }
743a8d9d507SStanislav Mekhanoshin   }
7443d9afa27SValery Pykhtin   return Changed;
7453d9afa27SValery Pykhtin }
746