159e12826SEugene Zelenko //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
2f60ad58dSSam Kolton //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f60ad58dSSam Kolton //
7f60ad58dSSam Kolton //===----------------------------------------------------------------------===//
8f60ad58dSSam Kolton //
9f60ad58dSSam Kolton /// \file This pass tries to apply several peephole SDWA patterns.
10f60ad58dSSam Kolton ///
11f60ad58dSSam Kolton /// E.g. original:
1293ef1458SFrancis Visoiu Mistrih ///   V_LSHRREV_B32_e32 %0, 16, %1
1379f67caeSMatt Arsenault ///   V_ADD_CO_U32_e32 %2, %0, %3
1493ef1458SFrancis Visoiu Mistrih ///   V_LSHLREV_B32_e32 %4, 16, %2
15f60ad58dSSam Kolton ///
16f60ad58dSSam Kolton /// Replace:
1779f67caeSMatt Arsenault ///   V_ADD_CO_U32_sdwa %4, %1, %3
18f60ad58dSSam Kolton ///       dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
19f60ad58dSSam Kolton ///
20f60ad58dSSam Kolton //===----------------------------------------------------------------------===//
21f60ad58dSSam Kolton 
22f60ad58dSSam Kolton #include "AMDGPU.h"
23560d7e04Sdfukalov #include "GCNSubtarget.h"
24560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
253d5ba7c6STim Renouf #include "llvm/ADT/MapVector.h"
266bda14b3SChandler Carruth #include "llvm/ADT/Statistic.h"
27f60ad58dSSam Kolton #include "llvm/CodeGen/MachineFunctionPass.h"
28f60ad58dSSam Kolton 
29f60ad58dSSam Kolton using namespace llvm;
30f60ad58dSSam Kolton 
31f60ad58dSSam Kolton #define DEBUG_TYPE "si-peephole-sdwa"
32f60ad58dSSam Kolton 
33f60ad58dSSam Kolton STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
34f60ad58dSSam Kolton STATISTIC(NumSDWAInstructionsPeepholed,
35f60ad58dSSam Kolton           "Number of instruction converted to SDWA.");
36f60ad58dSSam Kolton 
37f60ad58dSSam Kolton namespace {
38f60ad58dSSam Kolton 
39f60ad58dSSam Kolton class SDWAOperand;
405f7f32c3SSam Kolton class SDWADstOperand;
41f60ad58dSSam Kolton 
42f60ad58dSSam Kolton class SIPeepholeSDWA : public MachineFunctionPass {
43ebfdaf73SSam Kolton public:
4459e12826SEugene Zelenko   using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
45ebfdaf73SSam Kolton 
46f60ad58dSSam Kolton private:
47f60ad58dSSam Kolton   MachineRegisterInfo *MRI;
48f60ad58dSSam Kolton   const SIRegisterInfo *TRI;
49f60ad58dSSam Kolton   const SIInstrInfo *TII;
50f60ad58dSSam Kolton 
513d5ba7c6STim Renouf   MapVector<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
523d5ba7c6STim Renouf   MapVector<MachineInstr *, SDWAOperandsVector> PotentialMatches;
5356ea488dSStanislav Mekhanoshin   SmallVector<MachineInstr *, 8> ConvertedInstructions;
54f60ad58dSSam Kolton 
5527e0f8bcSSam Kolton   Optional<int64_t> foldToImm(const MachineOperand &Op) const;
5627e0f8bcSSam Kolton 
57f60ad58dSSam Kolton public:
58f60ad58dSSam Kolton   static char ID;
59f60ad58dSSam Kolton 
SIPeepholeSDWA()60f60ad58dSSam Kolton   SIPeepholeSDWA() : MachineFunctionPass(ID) {
61f60ad58dSSam Kolton     initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
62f60ad58dSSam Kolton   }
63f60ad58dSSam Kolton 
64f60ad58dSSam Kolton   bool runOnMachineFunction(MachineFunction &MF) override;
659c2f3c48SMatt Arsenault   void matchSDWAOperands(MachineBasicBlock &MBB);
665f7f32c3SSam Kolton   std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
6716de4fd2SRon Lieberman   bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const;
6816de4fd2SRon Lieberman   void pseudoOpConvertToVOP2(MachineInstr &MI,
6916de4fd2SRon Lieberman                              const GCNSubtarget &ST) const;
70f60ad58dSSam Kolton   bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
715bfbae5cSTom Stellard   void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
72f60ad58dSSam Kolton 
getPassName() const73f60ad58dSSam Kolton   StringRef getPassName() const override { return "SI Peephole SDWA"; }
74f60ad58dSSam Kolton 
getAnalysisUsage(AnalysisUsage & AU) const75f60ad58dSSam Kolton   void getAnalysisUsage(AnalysisUsage &AU) const override {
76f60ad58dSSam Kolton     AU.setPreservesCFG();
77f60ad58dSSam Kolton     MachineFunctionPass::getAnalysisUsage(AU);
78f60ad58dSSam Kolton   }
79f60ad58dSSam Kolton };
80f60ad58dSSam Kolton 
81f60ad58dSSam Kolton class SDWAOperand {
82f60ad58dSSam Kolton private:
83f60ad58dSSam Kolton   MachineOperand *Target; // Operand that would be used in converted instruction
84f60ad58dSSam Kolton   MachineOperand *Replaced; // Operand that would be replace by Target
85f60ad58dSSam Kolton 
86f60ad58dSSam Kolton public:
SDWAOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp)87f60ad58dSSam Kolton   SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
88f60ad58dSSam Kolton       : Target(TargetOp), Replaced(ReplacedOp) {
89f60ad58dSSam Kolton     assert(Target->isReg());
90f60ad58dSSam Kolton     assert(Replaced->isReg());
91f60ad58dSSam Kolton   }
92f60ad58dSSam Kolton 
9359e12826SEugene Zelenko   virtual ~SDWAOperand() = default;
94f60ad58dSSam Kolton 
95f60ad58dSSam Kolton   virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
96f60ad58dSSam Kolton   virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
97f60ad58dSSam Kolton 
getTargetOperand() const98f60ad58dSSam Kolton   MachineOperand *getTargetOperand() const { return Target; }
getReplacedOperand() const99f60ad58dSSam Kolton   MachineOperand *getReplacedOperand() const { return Replaced; }
getParentInst() const100f60ad58dSSam Kolton   MachineInstr *getParentInst() const { return Target->getParent(); }
10159e12826SEugene Zelenko 
getMRI() const102f60ad58dSSam Kolton   MachineRegisterInfo *getMRI() const {
103f60ad58dSSam Kolton     return &getParentInst()->getParent()->getParent()->getRegInfo();
104f60ad58dSSam Kolton   }
1055f7f32c3SSam Kolton 
1065f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1075f7f32c3SSam Kolton   virtual void print(raw_ostream& OS) const = 0;
dump() const1085f7f32c3SSam Kolton   void dump() const { print(dbgs()); }
1095f7f32c3SSam Kolton #endif
110f60ad58dSSam Kolton };
111f60ad58dSSam Kolton 
112f60ad58dSSam Kolton using namespace AMDGPU::SDWA;
113f60ad58dSSam Kolton 
114f60ad58dSSam Kolton class SDWASrcOperand : public SDWAOperand {
115f60ad58dSSam Kolton private:
116f60ad58dSSam Kolton   SdwaSel SrcSel;
117f60ad58dSSam Kolton   bool Abs;
118f60ad58dSSam Kolton   bool Neg;
119f60ad58dSSam Kolton   bool Sext;
120f60ad58dSSam Kolton 
121f60ad58dSSam Kolton public:
SDWASrcOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp,SdwaSel SrcSel_=DWORD,bool Abs_=false,bool Neg_=false,bool Sext_=false)122f60ad58dSSam Kolton   SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
123f60ad58dSSam Kolton                  SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
124f60ad58dSSam Kolton                  bool Sext_ = false)
1255f7f32c3SSam Kolton       : SDWAOperand(TargetOp, ReplacedOp),
1265f7f32c3SSam Kolton         SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
127f60ad58dSSam Kolton 
12859e12826SEugene Zelenko   MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
12959e12826SEugene Zelenko   bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
130f60ad58dSSam Kolton 
getSrcSel() const131f60ad58dSSam Kolton   SdwaSel getSrcSel() const { return SrcSel; }
getAbs() const132f60ad58dSSam Kolton   bool getAbs() const { return Abs; }
getNeg() const133f60ad58dSSam Kolton   bool getNeg() const { return Neg; }
getSext() const134f60ad58dSSam Kolton   bool getSext() const { return Sext; }
135f60ad58dSSam Kolton 
13603306604SStanislav Mekhanoshin   uint64_t getSrcMods(const SIInstrInfo *TII,
13703306604SStanislav Mekhanoshin                       const MachineOperand *SrcOp) const;
1385f7f32c3SSam Kolton 
1395f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1405f7f32c3SSam Kolton   void print(raw_ostream& OS) const override;
1415f7f32c3SSam Kolton #endif
142f60ad58dSSam Kolton };
143f60ad58dSSam Kolton 
144f60ad58dSSam Kolton class SDWADstOperand : public SDWAOperand {
145f60ad58dSSam Kolton private:
146f60ad58dSSam Kolton   SdwaSel DstSel;
147f60ad58dSSam Kolton   DstUnused DstUn;
148f60ad58dSSam Kolton 
149f60ad58dSSam Kolton public:
1505f7f32c3SSam Kolton 
SDWADstOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp,SdwaSel DstSel_=DWORD,DstUnused DstUn_=UNUSED_PAD)151f60ad58dSSam Kolton   SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
152f60ad58dSSam Kolton                  SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
153f60ad58dSSam Kolton     : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
154f60ad58dSSam Kolton 
15559e12826SEugene Zelenko   MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
15659e12826SEugene Zelenko   bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
157f60ad58dSSam Kolton 
getDstSel() const158f60ad58dSSam Kolton   SdwaSel getDstSel() const { return DstSel; }
getDstUnused() const159f60ad58dSSam Kolton   DstUnused getDstUnused() const { return DstUn; }
1605f7f32c3SSam Kolton 
1615f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1625f7f32c3SSam Kolton   void print(raw_ostream& OS) const override;
1635f7f32c3SSam Kolton #endif
1645f7f32c3SSam Kolton };
1655f7f32c3SSam Kolton 
1665f7f32c3SSam Kolton class SDWADstPreserveOperand : public SDWADstOperand {
1675f7f32c3SSam Kolton private:
1685f7f32c3SSam Kolton   MachineOperand *Preserve;
1695f7f32c3SSam Kolton 
1705f7f32c3SSam Kolton public:
SDWADstPreserveOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp,MachineOperand * PreserveOp,SdwaSel DstSel_=DWORD)1715f7f32c3SSam Kolton   SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
1725f7f32c3SSam Kolton                          MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)
1735f7f32c3SSam Kolton       : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
1745f7f32c3SSam Kolton         Preserve(PreserveOp) {}
1755f7f32c3SSam Kolton 
1765f7f32c3SSam Kolton   bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
1775f7f32c3SSam Kolton 
getPreservedOperand() const1785f7f32c3SSam Kolton   MachineOperand *getPreservedOperand() const { return Preserve; }
1795f7f32c3SSam Kolton 
1805f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1815f7f32c3SSam Kolton   void print(raw_ostream& OS) const override;
1825f7f32c3SSam Kolton #endif
183f60ad58dSSam Kolton };
184f60ad58dSSam Kolton 
18559e12826SEugene Zelenko } // end anonymous namespace
186f60ad58dSSam Kolton 
187f60ad58dSSam Kolton INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
188f60ad58dSSam Kolton 
189f60ad58dSSam Kolton char SIPeepholeSDWA::ID = 0;
190f60ad58dSSam Kolton 
191f60ad58dSSam Kolton char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
192f60ad58dSSam Kolton 
createSIPeepholeSDWAPass()193f60ad58dSSam Kolton FunctionPass *llvm::createSIPeepholeSDWAPass() {
194f60ad58dSSam Kolton   return new SIPeepholeSDWA();
195f60ad58dSSam Kolton }
196f60ad58dSSam Kolton 
1975f7f32c3SSam Kolton 
1985f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
operator <<(raw_ostream & OS,SdwaSel Sel)199c24d5e28SMatt Arsenault static raw_ostream& operator<<(raw_ostream &OS, SdwaSel Sel) {
200f60ad58dSSam Kolton   switch(Sel) {
201f60ad58dSSam Kolton   case BYTE_0: OS << "BYTE_0"; break;
202f60ad58dSSam Kolton   case BYTE_1: OS << "BYTE_1"; break;
203f60ad58dSSam Kolton   case BYTE_2: OS << "BYTE_2"; break;
204f60ad58dSSam Kolton   case BYTE_3: OS << "BYTE_3"; break;
205f60ad58dSSam Kolton   case WORD_0: OS << "WORD_0"; break;
206f60ad58dSSam Kolton   case WORD_1: OS << "WORD_1"; break;
207f60ad58dSSam Kolton   case DWORD:  OS << "DWORD"; break;
208f60ad58dSSam Kolton   }
209f60ad58dSSam Kolton   return OS;
210f60ad58dSSam Kolton }
211f60ad58dSSam Kolton 
operator <<(raw_ostream & OS,const DstUnused & Un)212f60ad58dSSam Kolton static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
213f60ad58dSSam Kolton   switch(Un) {
214f60ad58dSSam Kolton   case UNUSED_PAD: OS << "UNUSED_PAD"; break;
215f60ad58dSSam Kolton   case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
216f60ad58dSSam Kolton   case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
217f60ad58dSSam Kolton   }
218f60ad58dSSam Kolton   return OS;
219f60ad58dSSam Kolton }
220f60ad58dSSam Kolton 
2215f7f32c3SSam Kolton LLVM_DUMP_METHOD
print(raw_ostream & OS) const2225f7f32c3SSam Kolton void SDWASrcOperand::print(raw_ostream& OS) const {
2235f7f32c3SSam Kolton   OS << "SDWA src: " << *getTargetOperand()
2245f7f32c3SSam Kolton     << " src_sel:" << getSrcSel()
2255f7f32c3SSam Kolton     << " abs:" << getAbs() << " neg:" << getNeg()
2265f7f32c3SSam Kolton     << " sext:" << getSext() << '\n';
227f60ad58dSSam Kolton }
2285f7f32c3SSam Kolton 
2295f7f32c3SSam Kolton LLVM_DUMP_METHOD
print(raw_ostream & OS) const2305f7f32c3SSam Kolton void SDWADstOperand::print(raw_ostream& OS) const {
2315f7f32c3SSam Kolton   OS << "SDWA dst: " << *getTargetOperand()
2325f7f32c3SSam Kolton     << " dst_sel:" << getDstSel()
2335f7f32c3SSam Kolton     << " dst_unused:" << getDstUnused() << '\n';
2345f7f32c3SSam Kolton }
2355f7f32c3SSam Kolton 
2365f7f32c3SSam Kolton LLVM_DUMP_METHOD
print(raw_ostream & OS) const2375f7f32c3SSam Kolton void SDWADstPreserveOperand::print(raw_ostream& OS) const {
2385f7f32c3SSam Kolton   OS << "SDWA preserve dst: " << *getTargetOperand()
2395f7f32c3SSam Kolton     << " dst_sel:" << getDstSel()
2405f7f32c3SSam Kolton     << " preserve:" << *getPreservedOperand() << '\n';
2415f7f32c3SSam Kolton }
2425f7f32c3SSam Kolton 
243f60ad58dSSam Kolton #endif
244f60ad58dSSam Kolton 
copyRegOperand(MachineOperand & To,const MachineOperand & From)245f60ad58dSSam Kolton static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
246f60ad58dSSam Kolton   assert(To.isReg() && From.isReg());
247f60ad58dSSam Kolton   To.setReg(From.getReg());
248f60ad58dSSam Kolton   To.setSubReg(From.getSubReg());
249f60ad58dSSam Kolton   To.setIsUndef(From.isUndef());
250f60ad58dSSam Kolton   if (To.isUse()) {
251f60ad58dSSam Kolton     To.setIsKill(From.isKill());
252f60ad58dSSam Kolton   } else {
253f60ad58dSSam Kolton     To.setIsDead(From.isDead());
254f60ad58dSSam Kolton   }
255f60ad58dSSam Kolton }
256f60ad58dSSam Kolton 
isSameReg(const MachineOperand & LHS,const MachineOperand & RHS)257f60ad58dSSam Kolton static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
258f60ad58dSSam Kolton   return LHS.isReg() &&
259f60ad58dSSam Kolton          RHS.isReg() &&
260f60ad58dSSam Kolton          LHS.getReg() == RHS.getReg() &&
261f60ad58dSSam Kolton          LHS.getSubReg() == RHS.getSubReg();
262f60ad58dSSam Kolton }
263f60ad58dSSam Kolton 
findSingleRegUse(const MachineOperand * Reg,const MachineRegisterInfo * MRI)2645f7f32c3SSam Kolton static MachineOperand *findSingleRegUse(const MachineOperand *Reg,
2655f7f32c3SSam Kolton                                         const MachineRegisterInfo *MRI) {
2665f7f32c3SSam Kolton   if (!Reg->isReg() || !Reg->isDef())
2675f7f32c3SSam Kolton     return nullptr;
268f60ad58dSSam Kolton 
2695f7f32c3SSam Kolton   MachineOperand *ResMO = nullptr;
2705f7f32c3SSam Kolton   for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) {
2715f7f32c3SSam Kolton     // If there exist use of subreg of Reg then return nullptr
2725f7f32c3SSam Kolton     if (!isSameReg(UseMO, *Reg))
2735f7f32c3SSam Kolton       return nullptr;
274f60ad58dSSam Kolton 
2755f7f32c3SSam Kolton     // Check that there is only one instruction that uses Reg
2765f7f32c3SSam Kolton     if (!ResMO) {
2775f7f32c3SSam Kolton       ResMO = &UseMO;
2785f7f32c3SSam Kolton     } else if (ResMO->getParent() != UseMO.getParent()) {
2795f7f32c3SSam Kolton       return nullptr;
2805f7f32c3SSam Kolton     }
2815f7f32c3SSam Kolton   }
282f60ad58dSSam Kolton 
2835f7f32c3SSam Kolton   return ResMO;
2845f7f32c3SSam Kolton }
285f60ad58dSSam Kolton 
findSingleRegDef(const MachineOperand * Reg,const MachineRegisterInfo * MRI)2865f7f32c3SSam Kolton static MachineOperand *findSingleRegDef(const MachineOperand *Reg,
2875f7f32c3SSam Kolton                                         const MachineRegisterInfo *MRI) {
2885f7f32c3SSam Kolton   if (!Reg->isReg())
2895f7f32c3SSam Kolton     return nullptr;
2905f7f32c3SSam Kolton 
2915f7f32c3SSam Kolton   MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg());
2925f7f32c3SSam Kolton   if (!DefInstr)
2935f7f32c3SSam Kolton     return nullptr;
2945f7f32c3SSam Kolton 
2955f7f32c3SSam Kolton   for (auto &DefMO : DefInstr->defs()) {
2965f7f32c3SSam Kolton     if (DefMO.isReg() && DefMO.getReg() == Reg->getReg())
2975f7f32c3SSam Kolton       return &DefMO;
2985f7f32c3SSam Kolton   }
2995f7f32c3SSam Kolton 
3008ae38bc0SMatt Arsenault   // Ignore implicit defs.
3018ae38bc0SMatt Arsenault   return nullptr;
302f60ad58dSSam Kolton }
303f60ad58dSSam Kolton 
getSrcMods(const SIInstrInfo * TII,const MachineOperand * SrcOp) const30403306604SStanislav Mekhanoshin uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
30503306604SStanislav Mekhanoshin                                     const MachineOperand *SrcOp) const {
306f60ad58dSSam Kolton   uint64_t Mods = 0;
30703306604SStanislav Mekhanoshin   const auto *MI = SrcOp->getParent();
30803306604SStanislav Mekhanoshin   if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
30903306604SStanislav Mekhanoshin     if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
31003306604SStanislav Mekhanoshin       Mods = Mod->getImm();
31103306604SStanislav Mekhanoshin     }
31203306604SStanislav Mekhanoshin   } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
31303306604SStanislav Mekhanoshin     if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
31403306604SStanislav Mekhanoshin       Mods = Mod->getImm();
31503306604SStanislav Mekhanoshin     }
31603306604SStanislav Mekhanoshin   }
317f60ad58dSSam Kolton   if (Abs || Neg) {
318f60ad58dSSam Kolton     assert(!Sext &&
319*6527b2a4SSebastian Neubauer            "Float and integer src modifiers can't be set simultaneously");
320da644c02SStanislav Mekhanoshin     Mods |= Abs ? SISrcMods::ABS : 0u;
321da644c02SStanislav Mekhanoshin     Mods ^= Neg ? SISrcMods::NEG : 0u;
322f60ad58dSSam Kolton   } else if (Sext) {
323f60ad58dSSam Kolton     Mods |= SISrcMods::SEXT;
324f60ad58dSSam Kolton   }
325f60ad58dSSam Kolton 
326f60ad58dSSam Kolton   return Mods;
327f60ad58dSSam Kolton }
328f60ad58dSSam Kolton 
potentialToConvert(const SIInstrInfo * TII)329f60ad58dSSam Kolton MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
330f60ad58dSSam Kolton   // For SDWA src operand potential instruction is one that use register
331f60ad58dSSam Kolton   // defined by parent instruction
3325f7f32c3SSam Kolton   MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());
3335f7f32c3SSam Kolton   if (!PotentialMO)
334f60ad58dSSam Kolton     return nullptr;
335f60ad58dSSam Kolton 
3365f7f32c3SSam Kolton   return PotentialMO->getParent();
337f60ad58dSSam Kolton }
338f60ad58dSSam Kolton 
convertToSDWA(MachineInstr & MI,const SIInstrInfo * TII)339f60ad58dSSam Kolton bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
340f60ad58dSSam Kolton   // Find operand in instruction that matches source operand and replace it with
341f60ad58dSSam Kolton   // target operand. Set corresponding src_sel
34259e5ef79SMichael Bedy   bool IsPreserveSrc = false;
343f60ad58dSSam Kolton   MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
344f60ad58dSSam Kolton   MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
345f60ad58dSSam Kolton   MachineOperand *SrcMods =
346f60ad58dSSam Kolton       TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
34756ea488dSStanislav Mekhanoshin   assert(Src && (Src->isReg() || Src->isImm()));
348f60ad58dSSam Kolton   if (!isSameReg(*Src, *getReplacedOperand())) {
34959e5ef79SMichael Bedy     // If this is not src0 then it could be src1
350f60ad58dSSam Kolton     Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
351f60ad58dSSam Kolton     SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
352f60ad58dSSam Kolton     SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
353f60ad58dSSam Kolton 
35459e5ef79SMichael Bedy     if (!Src ||
35559e5ef79SMichael Bedy         !isSameReg(*Src, *getReplacedOperand())) {
35659e5ef79SMichael Bedy       // It's possible this Src is a tied operand for
35759e5ef79SMichael Bedy       // UNUSED_PRESERVE, in which case we can either
35859e5ef79SMichael Bedy       // abandon the peephole attempt, or if legal we can
35959e5ef79SMichael Bedy       // copy the target operand into the tied slot
36059e5ef79SMichael Bedy       // if the preserve operation will effectively cause the same
36159e5ef79SMichael Bedy       // result by overwriting the rest of the dst.
36259e5ef79SMichael Bedy       MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
36359e5ef79SMichael Bedy       MachineOperand *DstUnused =
36459e5ef79SMichael Bedy         TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
36559e5ef79SMichael Bedy 
36659e5ef79SMichael Bedy       if (Dst &&
36759e5ef79SMichael Bedy           DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
368d1f45ed5SNeubauer, Sebastian         // This will work if the tied src is accessing WORD_0, and the dst is
36959e5ef79SMichael Bedy         // writing WORD_1. Modifiers don't matter because all the bits that
37059e5ef79SMichael Bedy         // would be impacted are being overwritten by the dst.
37159e5ef79SMichael Bedy         // Any other case will not work.
37259e5ef79SMichael Bedy         SdwaSel DstSel = static_cast<SdwaSel>(
37359e5ef79SMichael Bedy             TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel));
37459e5ef79SMichael Bedy         if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
37559e5ef79SMichael Bedy             getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
37659e5ef79SMichael Bedy           IsPreserveSrc = true;
37759e5ef79SMichael Bedy           auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
37859e5ef79SMichael Bedy                                                    AMDGPU::OpName::vdst);
37959e5ef79SMichael Bedy           auto TiedIdx = MI.findTiedOperandIdx(DstIdx);
38059e5ef79SMichael Bedy           Src = &MI.getOperand(TiedIdx);
38159e5ef79SMichael Bedy           SrcSel = nullptr;
38259e5ef79SMichael Bedy           SrcMods = nullptr;
38359e5ef79SMichael Bedy         } else {
38459e5ef79SMichael Bedy           // Not legal to convert this src
38559e5ef79SMichael Bedy           return false;
38659e5ef79SMichael Bedy         }
38759e5ef79SMichael Bedy       }
38859e5ef79SMichael Bedy     }
389f60ad58dSSam Kolton     assert(Src && Src->isReg());
390f60ad58dSSam Kolton 
39128a1936fSStanislav Mekhanoshin     if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
39228a1936fSStanislav Mekhanoshin          MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
39328a1936fSStanislav Mekhanoshin          MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
394f60ad58dSSam Kolton          MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
395f60ad58dSSam Kolton          !isSameReg(*Src, *getReplacedOperand())) {
396f60ad58dSSam Kolton       // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
397f60ad58dSSam Kolton       // src2. This is not allowed.
398f60ad58dSSam Kolton       return false;
399f60ad58dSSam Kolton     }
400f60ad58dSSam Kolton 
40159e5ef79SMichael Bedy     assert(isSameReg(*Src, *getReplacedOperand()) &&
40259e5ef79SMichael Bedy            (IsPreserveSrc || (SrcSel && SrcMods)));
403f60ad58dSSam Kolton   }
404f60ad58dSSam Kolton   copyRegOperand(*Src, *getTargetOperand());
40559e5ef79SMichael Bedy   if (!IsPreserveSrc) {
406f60ad58dSSam Kolton     SrcSel->setImm(getSrcSel());
40703306604SStanislav Mekhanoshin     SrcMods->setImm(getSrcMods(TII, Src));
40859e5ef79SMichael Bedy   }
409f60ad58dSSam Kolton   getTargetOperand()->setIsKill(false);
410f60ad58dSSam Kolton   return true;
411f60ad58dSSam Kolton }
412f60ad58dSSam Kolton 
potentialToConvert(const SIInstrInfo * TII)413f60ad58dSSam Kolton MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
414f60ad58dSSam Kolton   // For SDWA dst operand potential instruction is one that defines register
415f60ad58dSSam Kolton   // that this operand uses
416f60ad58dSSam Kolton   MachineRegisterInfo *MRI = getMRI();
417f60ad58dSSam Kolton   MachineInstr *ParentMI = getParentInst();
418f60ad58dSSam Kolton 
4195f7f32c3SSam Kolton   MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);
4205f7f32c3SSam Kolton   if (!PotentialMO)
421f60ad58dSSam Kolton     return nullptr;
422f60ad58dSSam Kolton 
423f60ad58dSSam Kolton   // Check that ParentMI is the only instruction that uses replaced register
4245f7f32c3SSam Kolton   for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {
4255f7f32c3SSam Kolton     if (&UseInst != ParentMI)
426f60ad58dSSam Kolton       return nullptr;
427f60ad58dSSam Kolton   }
428f60ad58dSSam Kolton 
4295f7f32c3SSam Kolton   return PotentialMO->getParent();
430f60ad58dSSam Kolton }
431f60ad58dSSam Kolton 
convertToSDWA(MachineInstr & MI,const SIInstrInfo * TII)432f60ad58dSSam Kolton bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
433f60ad58dSSam Kolton   // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
434f60ad58dSSam Kolton 
43528a1936fSStanislav Mekhanoshin   if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
43628a1936fSStanislav Mekhanoshin        MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
43728a1936fSStanislav Mekhanoshin        MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
438f60ad58dSSam Kolton        MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
439f60ad58dSSam Kolton       getDstSel() != AMDGPU::SDWA::DWORD) {
440f60ad58dSSam Kolton     // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
441f60ad58dSSam Kolton     return false;
442f60ad58dSSam Kolton   }
443f60ad58dSSam Kolton 
444f60ad58dSSam Kolton   MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
445f60ad58dSSam Kolton   assert(Operand &&
446f60ad58dSSam Kolton          Operand->isReg() &&
447f60ad58dSSam Kolton          isSameReg(*Operand, *getReplacedOperand()));
448f60ad58dSSam Kolton   copyRegOperand(*Operand, *getTargetOperand());
449f60ad58dSSam Kolton   MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
450f60ad58dSSam Kolton   assert(DstSel);
451f60ad58dSSam Kolton   DstSel->setImm(getDstSel());
452f60ad58dSSam Kolton   MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
453f60ad58dSSam Kolton   assert(DstUnused);
454f60ad58dSSam Kolton   DstUnused->setImm(getDstUnused());
455f60ad58dSSam Kolton 
456f60ad58dSSam Kolton   // Remove original instruction  because it would conflict with our new
457f60ad58dSSam Kolton   // instruction by register definition
458f60ad58dSSam Kolton   getParentInst()->eraseFromParent();
459f60ad58dSSam Kolton   return true;
460f60ad58dSSam Kolton }
461f60ad58dSSam Kolton 
convertToSDWA(MachineInstr & MI,const SIInstrInfo * TII)4625f7f32c3SSam Kolton bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
4635f7f32c3SSam Kolton                                            const SIInstrInfo *TII) {
4645f7f32c3SSam Kolton   // MI should be moved right before v_or_b32.
4655f7f32c3SSam Kolton   // For this we should clear all kill flags on uses of MI src-operands or else
4665f7f32c3SSam Kolton   // we can encounter problem with use of killed operand.
4675f7f32c3SSam Kolton   for (MachineOperand &MO : MI.uses()) {
4685f7f32c3SSam Kolton     if (!MO.isReg())
4695f7f32c3SSam Kolton       continue;
4705f7f32c3SSam Kolton     getMRI()->clearKillFlags(MO.getReg());
4715f7f32c3SSam Kolton   }
4725f7f32c3SSam Kolton 
4735f7f32c3SSam Kolton   // Move MI before v_or_b32
4745f7f32c3SSam Kolton   auto MBB = MI.getParent();
4755f7f32c3SSam Kolton   MBB->remove(&MI);
4765f7f32c3SSam Kolton   MBB->insert(getParentInst(), &MI);
4775f7f32c3SSam Kolton 
4785f7f32c3SSam Kolton   // Add Implicit use of preserved register
4795f7f32c3SSam Kolton   MachineInstrBuilder MIB(*MBB->getParent(), MI);
4805f7f32c3SSam Kolton   MIB.addReg(getPreservedOperand()->getReg(),
4815f7f32c3SSam Kolton              RegState::ImplicitKill,
4825f7f32c3SSam Kolton              getPreservedOperand()->getSubReg());
4835f7f32c3SSam Kolton 
4845f7f32c3SSam Kolton   // Tie dst to implicit use
4855f7f32c3SSam Kolton   MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),
4865f7f32c3SSam Kolton                  MI.getNumOperands() - 1);
4875f7f32c3SSam Kolton 
4885f7f32c3SSam Kolton   // Convert MI as any other SDWADstOperand and remove v_or_b32
4895f7f32c3SSam Kolton   return SDWADstOperand::convertToSDWA(MI, TII);
4905f7f32c3SSam Kolton }
4915f7f32c3SSam Kolton 
foldToImm(const MachineOperand & Op) const49227e0f8bcSSam Kolton Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
49327e0f8bcSSam Kolton   if (Op.isImm()) {
49427e0f8bcSSam Kolton     return Op.getImm();
49527e0f8bcSSam Kolton   }
49627e0f8bcSSam Kolton 
49727e0f8bcSSam Kolton   // If this is not immediate then it can be copy of immediate value, e.g.:
498a8a83d15SFrancis Visoiu Mistrih   // %1 = S_MOV_B32 255;
49927e0f8bcSSam Kolton   if (Op.isReg()) {
50027e0f8bcSSam Kolton     for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
50127e0f8bcSSam Kolton       if (!isSameReg(Op, Def))
50227e0f8bcSSam Kolton         continue;
50327e0f8bcSSam Kolton 
50427e0f8bcSSam Kolton       const MachineInstr *DefInst = Def.getParent();
505aff8341dSSam Kolton       if (!TII->isFoldableCopy(*DefInst))
50627e0f8bcSSam Kolton         return None;
50727e0f8bcSSam Kolton 
50827e0f8bcSSam Kolton       const MachineOperand &Copied = DefInst->getOperand(1);
50927e0f8bcSSam Kolton       if (!Copied.isImm())
51027e0f8bcSSam Kolton         return None;
51127e0f8bcSSam Kolton 
51227e0f8bcSSam Kolton       return Copied.getImm();
51327e0f8bcSSam Kolton     }
51427e0f8bcSSam Kolton   }
51527e0f8bcSSam Kolton 
51627e0f8bcSSam Kolton   return None;
51727e0f8bcSSam Kolton }
51827e0f8bcSSam Kolton 
5195f7f32c3SSam Kolton std::unique_ptr<SDWAOperand>
matchSDWAOperand(MachineInstr & MI)5205f7f32c3SSam Kolton SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
521f60ad58dSSam Kolton   unsigned Opcode = MI.getOpcode();
522f60ad58dSSam Kolton   switch (Opcode) {
523f60ad58dSSam Kolton   case AMDGPU::V_LSHRREV_B32_e32:
524f60ad58dSSam Kolton   case AMDGPU::V_ASHRREV_I32_e32:
52503306604SStanislav Mekhanoshin   case AMDGPU::V_LSHLREV_B32_e32:
52603306604SStanislav Mekhanoshin   case AMDGPU::V_LSHRREV_B32_e64:
52703306604SStanislav Mekhanoshin   case AMDGPU::V_ASHRREV_I32_e64:
52803306604SStanislav Mekhanoshin   case AMDGPU::V_LSHLREV_B32_e64: {
529f60ad58dSSam Kolton     // from: v_lshrrev_b32_e32 v1, 16/24, v0
530f60ad58dSSam Kolton     // to SDWA src:v0 src_sel:WORD_1/BYTE_3
531f60ad58dSSam Kolton 
532f60ad58dSSam Kolton     // from: v_ashrrev_i32_e32 v1, 16/24, v0
533f60ad58dSSam Kolton     // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
534f60ad58dSSam Kolton 
535f60ad58dSSam Kolton     // from: v_lshlrev_b32_e32 v1, 16/24, v0
536f60ad58dSSam Kolton     // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
537f60ad58dSSam Kolton     MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
53827e0f8bcSSam Kolton     auto Imm = foldToImm(*Src0);
53927e0f8bcSSam Kolton     if (!Imm)
540f60ad58dSSam Kolton       break;
541f60ad58dSSam Kolton 
54227e0f8bcSSam Kolton     if (*Imm != 16 && *Imm != 24)
543f60ad58dSSam Kolton       break;
544f60ad58dSSam Kolton 
545f60ad58dSSam Kolton     MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
546f60ad58dSSam Kolton     MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
54734978602SJay Foad     if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
548f60ad58dSSam Kolton       break;
549f60ad58dSSam Kolton 
55003306604SStanislav Mekhanoshin     if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
55103306604SStanislav Mekhanoshin         Opcode == AMDGPU::V_LSHLREV_B32_e64) {
5520eaee545SJonas Devlieghere       return std::make_unique<SDWADstOperand>(
55327e0f8bcSSam Kolton           Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
554f60ad58dSSam Kolton     } else {
5550eaee545SJonas Devlieghere       return std::make_unique<SDWASrcOperand>(
55627e0f8bcSSam Kolton           Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
55703306604SStanislav Mekhanoshin           Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
55803306604SStanislav Mekhanoshin           Opcode != AMDGPU::V_LSHRREV_B32_e64);
559f60ad58dSSam Kolton     }
560f60ad58dSSam Kolton     break;
561f60ad58dSSam Kolton   }
562f60ad58dSSam Kolton 
563f60ad58dSSam Kolton   case AMDGPU::V_LSHRREV_B16_e32:
564f60ad58dSSam Kolton   case AMDGPU::V_ASHRREV_I16_e32:
56503306604SStanislav Mekhanoshin   case AMDGPU::V_LSHLREV_B16_e32:
56603306604SStanislav Mekhanoshin   case AMDGPU::V_LSHRREV_B16_e64:
56703306604SStanislav Mekhanoshin   case AMDGPU::V_ASHRREV_I16_e64:
56803306604SStanislav Mekhanoshin   case AMDGPU::V_LSHLREV_B16_e64: {
569f60ad58dSSam Kolton     // from: v_lshrrev_b16_e32 v1, 8, v0
570f60ad58dSSam Kolton     // to SDWA src:v0 src_sel:BYTE_1
571f60ad58dSSam Kolton 
572f60ad58dSSam Kolton     // from: v_ashrrev_i16_e32 v1, 8, v0
573f60ad58dSSam Kolton     // to SDWA src:v0 src_sel:BYTE_1 sext:1
574f60ad58dSSam Kolton 
575f60ad58dSSam Kolton     // from: v_lshlrev_b16_e32 v1, 8, v0
576f60ad58dSSam Kolton     // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
577f60ad58dSSam Kolton     MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
57827e0f8bcSSam Kolton     auto Imm = foldToImm(*Src0);
57927e0f8bcSSam Kolton     if (!Imm || *Imm != 8)
580f60ad58dSSam Kolton       break;
581f60ad58dSSam Kolton 
582f60ad58dSSam Kolton     MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
583f60ad58dSSam Kolton     MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
584f60ad58dSSam Kolton 
58534978602SJay Foad     if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
586f60ad58dSSam Kolton       break;
587f60ad58dSSam Kolton 
58803306604SStanislav Mekhanoshin     if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
58903306604SStanislav Mekhanoshin         Opcode == AMDGPU::V_LSHLREV_B16_e64) {
5900eaee545SJonas Devlieghere       return std::make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
591f60ad58dSSam Kolton     } else {
5920eaee545SJonas Devlieghere       return std::make_unique<SDWASrcOperand>(
593f60ad58dSSam Kolton             Src1, Dst, BYTE_1, false, false,
59403306604SStanislav Mekhanoshin             Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
59503306604SStanislav Mekhanoshin             Opcode != AMDGPU::V_LSHRREV_B16_e64);
596f60ad58dSSam Kolton     }
597f60ad58dSSam Kolton     break;
598f60ad58dSSam Kolton   }
599f60ad58dSSam Kolton 
600314e29edSJoe Nash   case AMDGPU::V_BFE_I32_e64:
601314e29edSJoe Nash   case AMDGPU::V_BFE_U32_e64: {
602f60ad58dSSam Kolton     // e.g.:
603f60ad58dSSam Kolton     // from: v_bfe_u32 v1, v0, 8, 8
604f60ad58dSSam Kolton     // to SDWA src:v0 src_sel:BYTE_1
605f60ad58dSSam Kolton 
606f60ad58dSSam Kolton     // offset | width | src_sel
607f60ad58dSSam Kolton     // ------------------------
608f60ad58dSSam Kolton     // 0      | 8     | BYTE_0
609f60ad58dSSam Kolton     // 0      | 16    | WORD_0
610f60ad58dSSam Kolton     // 0      | 32    | DWORD ?
611f60ad58dSSam Kolton     // 8      | 8     | BYTE_1
612f60ad58dSSam Kolton     // 16     | 8     | BYTE_2
613f60ad58dSSam Kolton     // 16     | 16    | WORD_1
614f60ad58dSSam Kolton     // 24     | 8     | BYTE_3
615f60ad58dSSam Kolton 
616f60ad58dSSam Kolton     MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
61727e0f8bcSSam Kolton     auto Offset = foldToImm(*Src1);
61827e0f8bcSSam Kolton     if (!Offset)
619f60ad58dSSam Kolton       break;
620f60ad58dSSam Kolton 
621f60ad58dSSam Kolton     MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
62227e0f8bcSSam Kolton     auto Width = foldToImm(*Src2);
62327e0f8bcSSam Kolton     if (!Width)
624f60ad58dSSam Kolton       break;
625f60ad58dSSam Kolton 
626f60ad58dSSam Kolton     SdwaSel SrcSel = DWORD;
627f60ad58dSSam Kolton 
62827e0f8bcSSam Kolton     if (*Offset == 0 && *Width == 8)
629f60ad58dSSam Kolton       SrcSel = BYTE_0;
63027e0f8bcSSam Kolton     else if (*Offset == 0 && *Width == 16)
631f60ad58dSSam Kolton       SrcSel = WORD_0;
63227e0f8bcSSam Kolton     else if (*Offset == 0 && *Width == 32)
633f60ad58dSSam Kolton       SrcSel = DWORD;
63427e0f8bcSSam Kolton     else if (*Offset == 8 && *Width == 8)
635f60ad58dSSam Kolton       SrcSel = BYTE_1;
63627e0f8bcSSam Kolton     else if (*Offset == 16 && *Width == 8)
637f60ad58dSSam Kolton       SrcSel = BYTE_2;
63827e0f8bcSSam Kolton     else if (*Offset == 16 && *Width == 16)
639f60ad58dSSam Kolton       SrcSel = WORD_1;
64027e0f8bcSSam Kolton     else if (*Offset == 24 && *Width == 8)
641f60ad58dSSam Kolton       SrcSel = BYTE_3;
642f60ad58dSSam Kolton     else
643f60ad58dSSam Kolton       break;
644f60ad58dSSam Kolton 
645f60ad58dSSam Kolton     MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
646f60ad58dSSam Kolton     MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
647f60ad58dSSam Kolton 
64834978602SJay Foad     if (Src0->getReg().isPhysical() || Dst->getReg().isPhysical())
649f60ad58dSSam Kolton       break;
650f60ad58dSSam Kolton 
6510eaee545SJonas Devlieghere     return std::make_unique<SDWASrcOperand>(
652314e29edSJoe Nash           Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);
653f60ad58dSSam Kolton   }
6545f7f32c3SSam Kolton 
65503306604SStanislav Mekhanoshin   case AMDGPU::V_AND_B32_e32:
65603306604SStanislav Mekhanoshin   case AMDGPU::V_AND_B32_e64: {
657f60ad58dSSam Kolton     // e.g.:
658f60ad58dSSam Kolton     // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
659f60ad58dSSam Kolton     // to SDWA src:v0 src_sel:WORD_0/BYTE_0
660f60ad58dSSam Kolton 
661f60ad58dSSam Kolton     MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
662f60ad58dSSam Kolton     MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
66303306604SStanislav Mekhanoshin     auto ValSrc = Src1;
66403306604SStanislav Mekhanoshin     auto Imm = foldToImm(*Src0);
66503306604SStanislav Mekhanoshin 
66603306604SStanislav Mekhanoshin     if (!Imm) {
66703306604SStanislav Mekhanoshin       Imm = foldToImm(*Src1);
66803306604SStanislav Mekhanoshin       ValSrc = Src0;
66903306604SStanislav Mekhanoshin     }
67003306604SStanislav Mekhanoshin 
67103306604SStanislav Mekhanoshin     if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
67203306604SStanislav Mekhanoshin       break;
67303306604SStanislav Mekhanoshin 
674f60ad58dSSam Kolton     MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
675f60ad58dSSam Kolton 
67634978602SJay Foad     if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
677f60ad58dSSam Kolton       break;
678f60ad58dSSam Kolton 
6790eaee545SJonas Devlieghere     return std::make_unique<SDWASrcOperand>(
68003306604SStanislav Mekhanoshin         ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
6815f7f32c3SSam Kolton   }
6825f7f32c3SSam Kolton 
6835f7f32c3SSam Kolton   case AMDGPU::V_OR_B32_e32:
6845f7f32c3SSam Kolton   case AMDGPU::V_OR_B32_e64: {
6855f7f32c3SSam Kolton     // Patterns for dst_unused:UNUSED_PRESERVE.
6865f7f32c3SSam Kolton     // e.g., from:
6875f7f32c3SSam Kolton     // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD
6885f7f32c3SSam Kolton     //                           src1_sel:WORD_1 src2_sel:WORD1
6895f7f32c3SSam Kolton     // v_add_f16_e32 v3, v1, v2
6905f7f32c3SSam Kolton     // v_or_b32_e32 v4, v0, v3
6915f7f32c3SSam Kolton     // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3
6925f7f32c3SSam Kolton 
6935f7f32c3SSam Kolton     // Check if one of operands of v_or_b32 is SDWA instruction
6945f7f32c3SSam Kolton     using CheckRetType = Optional<std::pair<MachineOperand *, MachineOperand *>>;
6955f7f32c3SSam Kolton     auto CheckOROperandsForSDWA =
6965f7f32c3SSam Kolton       [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {
6975f7f32c3SSam Kolton         if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())
6985f7f32c3SSam Kolton           return CheckRetType(None);
6995f7f32c3SSam Kolton 
7005f7f32c3SSam Kolton         MachineOperand *Op1Def = findSingleRegDef(Op1, MRI);
7015f7f32c3SSam Kolton         if (!Op1Def)
7025f7f32c3SSam Kolton           return CheckRetType(None);
7035f7f32c3SSam Kolton 
7045f7f32c3SSam Kolton         MachineInstr *Op1Inst = Op1Def->getParent();
7055f7f32c3SSam Kolton         if (!TII->isSDWA(*Op1Inst))
7065f7f32c3SSam Kolton           return CheckRetType(None);
7075f7f32c3SSam Kolton 
7085f7f32c3SSam Kolton         MachineOperand *Op2Def = findSingleRegDef(Op2, MRI);
7095f7f32c3SSam Kolton         if (!Op2Def)
7105f7f32c3SSam Kolton           return CheckRetType(None);
7115f7f32c3SSam Kolton 
7125f7f32c3SSam Kolton         return CheckRetType(std::make_pair(Op1Def, Op2Def));
7135f7f32c3SSam Kolton       };
7145f7f32c3SSam Kolton 
7155f7f32c3SSam Kolton     MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
7165f7f32c3SSam Kolton     MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
7175f7f32c3SSam Kolton     assert(OrSDWA && OrOther);
7185f7f32c3SSam Kolton     auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
7195f7f32c3SSam Kolton     if (!Res) {
7205f7f32c3SSam Kolton       OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
7215f7f32c3SSam Kolton       OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
7225f7f32c3SSam Kolton       assert(OrSDWA && OrOther);
7235f7f32c3SSam Kolton       Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
7245f7f32c3SSam Kolton       if (!Res)
725f60ad58dSSam Kolton         break;
726f60ad58dSSam Kolton     }
7275f7f32c3SSam Kolton 
7285f7f32c3SSam Kolton     MachineOperand *OrSDWADef = Res->first;
7295f7f32c3SSam Kolton     MachineOperand *OrOtherDef = Res->second;
7305f7f32c3SSam Kolton     assert(OrSDWADef && OrOtherDef);
7315f7f32c3SSam Kolton 
7325f7f32c3SSam Kolton     MachineInstr *SDWAInst = OrSDWADef->getParent();
7335f7f32c3SSam Kolton     MachineInstr *OtherInst = OrOtherDef->getParent();
7345f7f32c3SSam Kolton 
7355f7f32c3SSam Kolton     // Check that OtherInstr is actually bitwise compatible with SDWAInst = their
7365f7f32c3SSam Kolton     // destination patterns don't overlap. Compatible instruction can be either
7375f7f32c3SSam Kolton     // regular instruction with compatible bitness or SDWA instruction with
7385f7f32c3SSam Kolton     // correct dst_sel
7395f7f32c3SSam Kolton     // SDWAInst | OtherInst bitness / OtherInst dst_sel
7405f7f32c3SSam Kolton     // -----------------------------------------------------
7415f7f32c3SSam Kolton     // DWORD    | no                    / no
7425f7f32c3SSam Kolton     // WORD_0   | no                    / BYTE_2/3, WORD_1
7435f7f32c3SSam Kolton     // WORD_1   | 8/16-bit instructions / BYTE_0/1, WORD_0
7445f7f32c3SSam Kolton     // BYTE_0   | no                    / BYTE_1/2/3, WORD_1
7455f7f32c3SSam Kolton     // BYTE_1   | 8-bit                 / BYTE_0/2/3, WORD_1
7465f7f32c3SSam Kolton     // BYTE_2   | 8/16-bit              / BYTE_0/1/3. WORD_0
7475f7f32c3SSam Kolton     // BYTE_3   | 8/16/24-bit           / BYTE_0/1/2, WORD_0
7485f7f32c3SSam Kolton     // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK
7495f7f32c3SSam Kolton     // but v_add_f32 is not.
7505f7f32c3SSam Kolton 
7515f7f32c3SSam Kolton     // TODO: add support for non-SDWA instructions as OtherInst.
7525f7f32c3SSam Kolton     // For now this only works with SDWA instructions. For regular instructions
75380cf9ff5SMichael Bedy     // there is no way to determine if the instruction writes only 8/16/24-bit
75480cf9ff5SMichael Bedy     // out of full register size and all registers are at min 32-bit wide.
7555f7f32c3SSam Kolton     if (!TII->isSDWA(*OtherInst))
7565f7f32c3SSam Kolton       break;
7575f7f32c3SSam Kolton 
7585f7f32c3SSam Kolton     SdwaSel DstSel = static_cast<SdwaSel>(
7595f7f32c3SSam Kolton       TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));;
7605f7f32c3SSam Kolton     SdwaSel OtherDstSel = static_cast<SdwaSel>(
7615f7f32c3SSam Kolton       TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
7625f7f32c3SSam Kolton 
7635f7f32c3SSam Kolton     bool DstSelAgree = false;
7645f7f32c3SSam Kolton     switch (DstSel) {
7655f7f32c3SSam Kolton     case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||
7665f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_3) ||
7675f7f32c3SSam Kolton                                 (OtherDstSel == WORD_1));
7685f7f32c3SSam Kolton       break;
7695f7f32c3SSam Kolton     case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7705f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_1) ||
7715f7f32c3SSam Kolton                                 (OtherDstSel == WORD_0));
7725f7f32c3SSam Kolton       break;
7735f7f32c3SSam Kolton     case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||
7745f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_2) ||
7755f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_3) ||
7765f7f32c3SSam Kolton                                 (OtherDstSel == WORD_1));
7775f7f32c3SSam Kolton       break;
7785f7f32c3SSam Kolton     case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7795f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_2) ||
7805f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_3) ||
7815f7f32c3SSam Kolton                                 (OtherDstSel == WORD_1));
7825f7f32c3SSam Kolton       break;
7835f7f32c3SSam Kolton     case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7845f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_1) ||
7855f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_3) ||
7865f7f32c3SSam Kolton                                 (OtherDstSel == WORD_0));
7875f7f32c3SSam Kolton       break;
7885f7f32c3SSam Kolton     case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7895f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_1) ||
7905f7f32c3SSam Kolton                                 (OtherDstSel == BYTE_2) ||
7915f7f32c3SSam Kolton                                 (OtherDstSel == WORD_0));
7925f7f32c3SSam Kolton       break;
7935f7f32c3SSam Kolton     default: DstSelAgree = false;
7945f7f32c3SSam Kolton     }
7955f7f32c3SSam Kolton 
7965f7f32c3SSam Kolton     if (!DstSelAgree)
7975f7f32c3SSam Kolton       break;
7985f7f32c3SSam Kolton 
7995f7f32c3SSam Kolton     // Also OtherInst dst_unused should be UNUSED_PAD
8005f7f32c3SSam Kolton     DstUnused OtherDstUnused = static_cast<DstUnused>(
8015f7f32c3SSam Kolton       TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
8025f7f32c3SSam Kolton     if (OtherDstUnused != DstUnused::UNUSED_PAD)
8035f7f32c3SSam Kolton       break;
8045f7f32c3SSam Kolton 
8055f7f32c3SSam Kolton     // Create DstPreserveOperand
8065f7f32c3SSam Kolton     MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
8075f7f32c3SSam Kolton     assert(OrDst && OrDst->isReg());
8085f7f32c3SSam Kolton 
8090eaee545SJonas Devlieghere     return std::make_unique<SDWADstPreserveOperand>(
8105f7f32c3SSam Kolton       OrDst, OrSDWADef, OrOtherDef, DstSel);
8115f7f32c3SSam Kolton 
8125f7f32c3SSam Kolton   }
8135f7f32c3SSam Kolton   }
8145f7f32c3SSam Kolton 
8155f7f32c3SSam Kolton   return std::unique_ptr<SDWAOperand>(nullptr);
8165f7f32c3SSam Kolton }
8175f7f32c3SSam Kolton 
818a19de320SHans Wennborg #if !defined(NDEBUG)
operator <<(raw_ostream & OS,const SDWAOperand & Operand)819a19de320SHans Wennborg static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
820a19de320SHans Wennborg   Operand.print(OS);
821a19de320SHans Wennborg   return OS;
822a19de320SHans Wennborg }
823a19de320SHans Wennborg #endif
824a19de320SHans Wennborg 
matchSDWAOperands(MachineBasicBlock & MBB)8259c2f3c48SMatt Arsenault void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
8265f7f32c3SSam Kolton   for (MachineInstr &MI : MBB) {
8275f7f32c3SSam Kolton     if (auto Operand = matchSDWAOperand(MI)) {
828d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
8295f7f32c3SSam Kolton       SDWAOperands[&MI] = std::move(Operand);
8305f7f32c3SSam Kolton       ++NumSDWAPatternsFound;
831f60ad58dSSam Kolton     }
832f60ad58dSSam Kolton   }
833f60ad58dSSam Kolton }
834f60ad58dSSam Kolton 
83516de4fd2SRon Lieberman // Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
83679f67caeSMatt Arsenault // V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows isConvertibleToSDWA
83779f67caeSMatt Arsenault // to perform its transformation on V_ADD_CO_U32_e32 into V_ADD_CO_U32_sdwa.
83816de4fd2SRon Lieberman //
83916de4fd2SRon Lieberman // We are transforming from a VOP3 into a VOP2 form of the instruction.
84016de4fd2SRon Lieberman //   %19:vgpr_32 = V_AND_B32_e32 255,
84116de4fd2SRon Lieberman //       killed %16:vgpr_32, implicit $exec
84279f67caeSMatt Arsenault //   %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64
84316de4fd2SRon Lieberman //       %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
84416de4fd2SRon Lieberman //  %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
84516de4fd2SRon Lieberman //       %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
84616de4fd2SRon Lieberman //
84716de4fd2SRon Lieberman // becomes
84879f67caeSMatt Arsenault //   %47:vgpr_32 = V_ADD_CO_U32_sdwa
84916de4fd2SRon Lieberman //       0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
85016de4fd2SRon Lieberman //       implicit-def $vcc, implicit $exec
85116de4fd2SRon Lieberman //  %48:vgpr_32 = V_ADDC_U32_e32
85216de4fd2SRon Lieberman //       0, %26.sub1:vreg_64, implicit-def $vcc, implicit $vcc, implicit $exec
pseudoOpConvertToVOP2(MachineInstr & MI,const GCNSubtarget & ST) const85316de4fd2SRon Lieberman void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
85416de4fd2SRon Lieberman                                            const GCNSubtarget &ST) const {
85516de4fd2SRon Lieberman   int Opc = MI.getOpcode();
85679f67caeSMatt Arsenault   assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
85779f67caeSMatt Arsenault          "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
85816de4fd2SRon Lieberman 
85916de4fd2SRon Lieberman   // Can the candidate MI be shrunk?
86016de4fd2SRon Lieberman   if (!TII->canShrink(MI, *MRI))
86116de4fd2SRon Lieberman     return;
86216de4fd2SRon Lieberman   Opc = AMDGPU::getVOPe32(Opc);
86316de4fd2SRon Lieberman   // Find the related ADD instruction.
86416de4fd2SRon Lieberman   const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
86516de4fd2SRon Lieberman   if (!Sdst)
86616de4fd2SRon Lieberman     return;
86716de4fd2SRon Lieberman   MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
86816de4fd2SRon Lieberman   if (!NextOp)
86916de4fd2SRon Lieberman     return;
87016de4fd2SRon Lieberman   MachineInstr &MISucc = *NextOp->getParent();
87116de4fd2SRon Lieberman   // Can the successor be shrunk?
87216de4fd2SRon Lieberman   if (!TII->canShrink(MISucc, *MRI))
87316de4fd2SRon Lieberman     return;
87416de4fd2SRon Lieberman   int SuccOpc = AMDGPU::getVOPe32(MISucc.getOpcode());
87516de4fd2SRon Lieberman   // Make sure the carry in/out are subsequently unused.
87616de4fd2SRon Lieberman   MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
87716de4fd2SRon Lieberman   if (!CarryIn)
87816de4fd2SRon Lieberman     return;
87916de4fd2SRon Lieberman   MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
88016de4fd2SRon Lieberman   if (!CarryOut)
88116de4fd2SRon Lieberman     return;
88216de4fd2SRon Lieberman   if (!MRI->hasOneUse(CarryIn->getReg()) || !MRI->use_empty(CarryOut->getReg()))
88316de4fd2SRon Lieberman     return;
88416de4fd2SRon Lieberman   // Make sure VCC or its subregs are dead before MI.
88516de4fd2SRon Lieberman   MachineBasicBlock &MBB = *MI.getParent();
88616de4fd2SRon Lieberman   auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
88716de4fd2SRon Lieberman   if (Liveness != MachineBasicBlock::LQR_Dead)
88816de4fd2SRon Lieberman     return;
88916de4fd2SRon Lieberman   // Check if VCC is referenced in range of (MI,MISucc].
89016de4fd2SRon Lieberman   for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
89116de4fd2SRon Lieberman        I != E; ++I) {
89216de4fd2SRon Lieberman     if (I->modifiesRegister(AMDGPU::VCC, TRI))
89316de4fd2SRon Lieberman       return;
89416de4fd2SRon Lieberman   }
89507cd19efSMatt Arsenault 
89616de4fd2SRon Lieberman   // Make the two new e32 instruction variants.
89716de4fd2SRon Lieberman   // Replace MI with V_{SUB|ADD}_I32_e32
89807cd19efSMatt Arsenault   BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc))
89907cd19efSMatt Arsenault     .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
90007cd19efSMatt Arsenault     .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
90107cd19efSMatt Arsenault     .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
90207cd19efSMatt Arsenault     .setMIFlags(MI.getFlags());
90307cd19efSMatt Arsenault 
90416de4fd2SRon Lieberman   MI.eraseFromParent();
90507cd19efSMatt Arsenault 
90616de4fd2SRon Lieberman   // Replace MISucc with V_{SUBB|ADDC}_U32_e32
90707cd19efSMatt Arsenault   BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc))
90807cd19efSMatt Arsenault     .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst))
90907cd19efSMatt Arsenault     .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0))
91007cd19efSMatt Arsenault     .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1))
91107cd19efSMatt Arsenault     .setMIFlags(MISucc.getFlags());
91207cd19efSMatt Arsenault 
91316de4fd2SRon Lieberman   MISucc.eraseFromParent();
91416de4fd2SRon Lieberman }
91516de4fd2SRon Lieberman 
isConvertibleToSDWA(MachineInstr & MI,const GCNSubtarget & ST) const91616de4fd2SRon Lieberman bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
9175bfbae5cSTom Stellard                                          const GCNSubtarget &ST) const {
9185f7f32c3SSam Kolton   // Check if this is already an SDWA instruction
9195f7f32c3SSam Kolton   unsigned Opc = MI.getOpcode();
9205f7f32c3SSam Kolton   if (TII->isSDWA(Opc))
9215f7f32c3SSam Kolton     return true;
9225f7f32c3SSam Kolton 
92356ea488dSStanislav Mekhanoshin   // Check if this instruction has opcode that supports SDWA
9243c4933fcSSam Kolton   if (AMDGPU::getSDWAOp(Opc) == -1)
9253c4933fcSSam Kolton     Opc = AMDGPU::getVOPe32(Opc);
9263c4933fcSSam Kolton 
9275f7f32c3SSam Kolton   if (AMDGPU::getSDWAOp(Opc) == -1)
9283c4933fcSSam Kolton     return false;
9293c4933fcSSam Kolton 
9303c4933fcSSam Kolton   if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
931549c89d2SSam Kolton     return false;
932549c89d2SSam Kolton 
933549c89d2SSam Kolton   if (TII->isVOPC(Opc)) {
9343c4933fcSSam Kolton     if (!ST.hasSDWASdst()) {
935549c89d2SSam Kolton       const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
93652500216SStanislav Mekhanoshin       if (SDst && (SDst->getReg() != AMDGPU::VCC &&
93752500216SStanislav Mekhanoshin                    SDst->getReg() != AMDGPU::VCC_LO))
93803306604SStanislav Mekhanoshin         return false;
939ebfdaf73SSam Kolton     }
940ebfdaf73SSam Kolton 
941a179d25bSSam Kolton     if (!ST.hasSDWAOutModsVOPC() &&
942a179d25bSSam Kolton         (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
943a179d25bSSam Kolton          TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
9443c4933fcSSam Kolton       return false;
9453c4933fcSSam Kolton 
946a179d25bSSam Kolton   } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
947a179d25bSSam Kolton              !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
9483c4933fcSSam Kolton     return false;
9493c4933fcSSam Kolton   }
9503c4933fcSSam Kolton 
95128a1936fSStanislav Mekhanoshin   if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
95228a1936fSStanislav Mekhanoshin                            Opc == AMDGPU::V_FMAC_F32_e32 ||
95328a1936fSStanislav Mekhanoshin                            Opc == AMDGPU::V_MAC_F16_e32 ||
9543c4933fcSSam Kolton                            Opc == AMDGPU::V_MAC_F32_e32))
9553c4933fcSSam Kolton     return false;
9563c4933fcSSam Kolton 
95728a1936fSStanislav Mekhanoshin   // Check if target supports this SDWA opcode
95828a1936fSStanislav Mekhanoshin   if (TII->pseudoToMCOpcode(Opc) == -1)
95928a1936fSStanislav Mekhanoshin     return false;
96028a1936fSStanislav Mekhanoshin 
9614c45e6ffSDmitry Preobrazhensky   // FIXME: has SDWA but require handling of implicit VCC use
9624c45e6ffSDmitry Preobrazhensky   if (Opc == AMDGPU::V_CNDMASK_B32_e32)
9634c45e6ffSDmitry Preobrazhensky     return false;
9644c45e6ffSDmitry Preobrazhensky 
9650462aef5SStanislav Mekhanoshin   if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
9660462aef5SStanislav Mekhanoshin     if (!Src0->isReg() && !Src0->isImm())
9670462aef5SStanislav Mekhanoshin       return false;
9680462aef5SStanislav Mekhanoshin   }
9690462aef5SStanislav Mekhanoshin 
9700462aef5SStanislav Mekhanoshin   if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
9710462aef5SStanislav Mekhanoshin     if (!Src1->isReg() && !Src1->isImm())
9720462aef5SStanislav Mekhanoshin       return false;
9730462aef5SStanislav Mekhanoshin   }
9740462aef5SStanislav Mekhanoshin 
9753c4933fcSSam Kolton   return true;
9763c4933fcSSam Kolton }
9773c4933fcSSam Kolton 
convertToSDWA(MachineInstr & MI,const SDWAOperandsVector & SDWAOperands)978ebfdaf73SSam Kolton bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
979ebfdaf73SSam Kolton                                    const SDWAOperandsVector &SDWAOperands) {
98059e5ef79SMichael Bedy 
981d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);
98259e5ef79SMichael Bedy 
983f60ad58dSSam Kolton   // Convert to sdwa
9845f7f32c3SSam Kolton   int SDWAOpcode;
9855f7f32c3SSam Kolton   unsigned Opcode = MI.getOpcode();
9865f7f32c3SSam Kolton   if (TII->isSDWA(Opcode)) {
9875f7f32c3SSam Kolton     SDWAOpcode = Opcode;
9885f7f32c3SSam Kolton   } else {
9895f7f32c3SSam Kolton     SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
99003306604SStanislav Mekhanoshin     if (SDWAOpcode == -1)
9915f7f32c3SSam Kolton       SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
9925f7f32c3SSam Kolton   }
993f60ad58dSSam Kolton   assert(SDWAOpcode != -1);
994f60ad58dSSam Kolton 
995f60ad58dSSam Kolton   const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
996f60ad58dSSam Kolton 
997f60ad58dSSam Kolton   // Create SDWA version of instruction MI and initialize its operands
998f60ad58dSSam Kolton   MachineInstrBuilder SDWAInst =
99907cd19efSMatt Arsenault     BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
100007cd19efSMatt Arsenault     .setMIFlags(MI.getFlags());
1001f60ad58dSSam Kolton 
1002a179d25bSSam Kolton   // Copy dst, if it is present in original then should also be present in SDWA
1003a179d25bSSam Kolton   MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
1004f60ad58dSSam Kolton   if (Dst) {
1005f60ad58dSSam Kolton     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
1006f60ad58dSSam Kolton     SDWAInst.add(*Dst);
1007a179d25bSSam Kolton   } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
1008549c89d2SSam Kolton     assert(Dst &&
1009549c89d2SSam Kolton            AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
1010549c89d2SSam Kolton     SDWAInst.add(*Dst);
1011a179d25bSSam Kolton   } else {
1012a179d25bSSam Kolton     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
101352500216SStanislav Mekhanoshin     SDWAInst.addReg(TRI->getVCC(), RegState::Define);
1014f60ad58dSSam Kolton   }
1015f60ad58dSSam Kolton 
1016f60ad58dSSam Kolton   // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
1017f60ad58dSSam Kolton   // src0_modifiers (except for v_nop_sdwa, but it can't get here)
1018f60ad58dSSam Kolton   MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1019f60ad58dSSam Kolton   assert(
1020f60ad58dSSam Kolton     Src0 &&
1021f60ad58dSSam Kolton     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
1022f60ad58dSSam Kolton     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
102303306604SStanislav Mekhanoshin   if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
102403306604SStanislav Mekhanoshin     SDWAInst.addImm(Mod->getImm());
102503306604SStanislav Mekhanoshin   else
1026f60ad58dSSam Kolton     SDWAInst.addImm(0);
1027f60ad58dSSam Kolton   SDWAInst.add(*Src0);
1028f60ad58dSSam Kolton 
1029f60ad58dSSam Kolton   // Copy src1 if present, initialize src1_modifiers.
1030f60ad58dSSam Kolton   MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1031f60ad58dSSam Kolton   if (Src1) {
1032f60ad58dSSam Kolton     assert(
1033f60ad58dSSam Kolton       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
1034f60ad58dSSam Kolton       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
103503306604SStanislav Mekhanoshin     if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
103603306604SStanislav Mekhanoshin       SDWAInst.addImm(Mod->getImm());
103703306604SStanislav Mekhanoshin     else
1038f60ad58dSSam Kolton       SDWAInst.addImm(0);
1039f60ad58dSSam Kolton     SDWAInst.add(*Src1);
1040f60ad58dSSam Kolton   }
1041f60ad58dSSam Kolton 
104228a1936fSStanislav Mekhanoshin   if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
104328a1936fSStanislav Mekhanoshin       SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
104428a1936fSStanislav Mekhanoshin       SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1045f60ad58dSSam Kolton       SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1046f60ad58dSSam Kolton     // v_mac_f16/32 has additional src2 operand tied to vdst
1047f60ad58dSSam Kolton     MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1048f60ad58dSSam Kolton     assert(Src2);
1049f60ad58dSSam Kolton     SDWAInst.add(*Src2);
1050f60ad58dSSam Kolton   }
1051f60ad58dSSam Kolton 
10523c4933fcSSam Kolton   // Copy clamp if present, initialize otherwise
10533c4933fcSSam Kolton   assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
10543c4933fcSSam Kolton   MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
10553c4933fcSSam Kolton   if (Clamp) {
10563c4933fcSSam Kolton     SDWAInst.add(*Clamp);
10573c4933fcSSam Kolton   } else {
1058549c89d2SSam Kolton     SDWAInst.addImm(0);
10593c4933fcSSam Kolton   }
1060549c89d2SSam Kolton 
10613c4933fcSSam Kolton   // Copy omod if present, initialize otherwise if needed
1062a179d25bSSam Kolton   if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
10633c4933fcSSam Kolton     MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
10643c4933fcSSam Kolton     if (OMod) {
10653c4933fcSSam Kolton       SDWAInst.add(*OMod);
1066a179d25bSSam Kolton     } else {
1067f60ad58dSSam Kolton       SDWAInst.addImm(0);
10683c4933fcSSam Kolton     }
1069a179d25bSSam Kolton   }
1070f60ad58dSSam Kolton 
10715f7f32c3SSam Kolton   // Copy dst_sel if present, initialize otherwise if needed
1072a179d25bSSam Kolton   if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
10735f7f32c3SSam Kolton     MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
10745f7f32c3SSam Kolton     if (DstSel) {
10755f7f32c3SSam Kolton       SDWAInst.add(*DstSel);
10765f7f32c3SSam Kolton     } else {
1077f60ad58dSSam Kolton       SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1078a179d25bSSam Kolton     }
10795f7f32c3SSam Kolton   }
1080a179d25bSSam Kolton 
10815f7f32c3SSam Kolton   // Copy dst_unused if present, initialize otherwise if needed
1082a179d25bSSam Kolton   if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
10835f7f32c3SSam Kolton     MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
10845f7f32c3SSam Kolton     if (DstUnused) {
10855f7f32c3SSam Kolton       SDWAInst.add(*DstUnused);
10865f7f32c3SSam Kolton     } else {
1087f60ad58dSSam Kolton       SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
1088f60ad58dSSam Kolton     }
10895f7f32c3SSam Kolton   }
1090f60ad58dSSam Kolton 
10915f7f32c3SSam Kolton   // Copy src0_sel if present, initialize otherwise
1092f60ad58dSSam Kolton   assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
10935f7f32c3SSam Kolton   MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
10945f7f32c3SSam Kolton   if (Src0Sel) {
10955f7f32c3SSam Kolton     SDWAInst.add(*Src0Sel);
10965f7f32c3SSam Kolton   } else {
1097f60ad58dSSam Kolton     SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
10985f7f32c3SSam Kolton   }
1099f60ad58dSSam Kolton 
11005f7f32c3SSam Kolton   // Copy src1_sel if present, initialize otherwise if needed
1101f60ad58dSSam Kolton   if (Src1) {
1102f60ad58dSSam Kolton     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
11035f7f32c3SSam Kolton     MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
11045f7f32c3SSam Kolton     if (Src1Sel) {
11055f7f32c3SSam Kolton       SDWAInst.add(*Src1Sel);
11065f7f32c3SSam Kolton     } else {
1107f60ad58dSSam Kolton       SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1108f60ad58dSSam Kolton     }
11095f7f32c3SSam Kolton   }
1110f60ad58dSSam Kolton 
111159e5ef79SMichael Bedy   // Check for a preserved register that needs to be copied.
111259e5ef79SMichael Bedy   auto DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
111359e5ef79SMichael Bedy   if (DstUnused &&
111459e5ef79SMichael Bedy       DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
111559e5ef79SMichael Bedy     // We expect, if we are here, that the instruction was already in it's SDWA form,
111659e5ef79SMichael Bedy     // with a tied operand.
111759e5ef79SMichael Bedy     assert(Dst && Dst->isTied());
111859e5ef79SMichael Bedy     assert(Opcode == static_cast<unsigned int>(SDWAOpcode));
111959e5ef79SMichael Bedy     // We also expect a vdst, since sdst can't preserve.
112059e5ef79SMichael Bedy     auto PreserveDstIdx = AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst);
112159e5ef79SMichael Bedy     assert(PreserveDstIdx != -1);
112259e5ef79SMichael Bedy 
112359e5ef79SMichael Bedy     auto TiedIdx = MI.findTiedOperandIdx(PreserveDstIdx);
112459e5ef79SMichael Bedy     auto Tied = MI.getOperand(TiedIdx);
112559e5ef79SMichael Bedy 
112659e5ef79SMichael Bedy     SDWAInst.add(Tied);
112759e5ef79SMichael Bedy     SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1);
112859e5ef79SMichael Bedy   }
112959e5ef79SMichael Bedy 
1130c24d5e28SMatt Arsenault   // Apply all sdwa operand patterns.
1131f60ad58dSSam Kolton   bool Converted = false;
1132f60ad58dSSam Kolton   for (auto &Operand : SDWAOperands) {
1133d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
1134*6527b2a4SSebastian Neubauer     // There should be no intersection between SDWA operands and potential MIs
1135ebfdaf73SSam Kolton     // e.g.:
1136ebfdaf73SSam Kolton     // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
1137ebfdaf73SSam Kolton     // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
1138ebfdaf73SSam Kolton     // v_add_u32 v3, v4, v2
1139ebfdaf73SSam Kolton     //
1140*6527b2a4SSebastian Neubauer     // In that example it is possible that we would fold 2nd instruction into
1141*6527b2a4SSebastian Neubauer     // 3rd (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that
1142*6527b2a4SSebastian Neubauer     // was already destroyed). So if SDWAOperand is also a potential MI then do
1143*6527b2a4SSebastian Neubauer     // not apply it.
1144ebfdaf73SSam Kolton     if (PotentialMatches.count(Operand->getParentInst()) == 0)
1145f60ad58dSSam Kolton       Converted |= Operand->convertToSDWA(*SDWAInst, TII);
1146f60ad58dSSam Kolton   }
114756ea488dSStanislav Mekhanoshin   if (Converted) {
114856ea488dSStanislav Mekhanoshin     ConvertedInstructions.push_back(SDWAInst);
114956ea488dSStanislav Mekhanoshin   } else {
1150f60ad58dSSam Kolton     SDWAInst->eraseFromParent();
1151f60ad58dSSam Kolton     return false;
1152f60ad58dSSam Kolton   }
1153f60ad58dSSam Kolton 
1154d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
1155f60ad58dSSam Kolton   ++NumSDWAInstructionsPeepholed;
1156f60ad58dSSam Kolton 
1157f60ad58dSSam Kolton   MI.eraseFromParent();
1158f60ad58dSSam Kolton   return true;
1159f60ad58dSSam Kolton }
1160f60ad58dSSam Kolton 
116156ea488dSStanislav Mekhanoshin // If an instruction was converted to SDWA it should not have immediates or SGPR
11623c4933fcSSam Kolton // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
legalizeScalarOperands(MachineInstr & MI,const GCNSubtarget & ST) const1163c24d5e28SMatt Arsenault void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
11645bfbae5cSTom Stellard                                             const GCNSubtarget &ST) const {
116556ea488dSStanislav Mekhanoshin   const MCInstrDesc &Desc = TII->get(MI.getOpcode());
11663c4933fcSSam Kolton   unsigned ConstantBusCount = 0;
11673c4933fcSSam Kolton   for (MachineOperand &Op : MI.explicit_uses()) {
116856ea488dSStanislav Mekhanoshin     if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
116956ea488dSStanislav Mekhanoshin       continue;
11703c4933fcSSam Kolton 
11713c4933fcSSam Kolton     unsigned I = MI.getOperandNo(&Op);
117256ea488dSStanislav Mekhanoshin     if (Desc.OpInfo[I].RegClass == -1 ||
1173399b7de0SChristudasan Devadasan         !TRI->isVSSuperClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
117456ea488dSStanislav Mekhanoshin       continue;
11753c4933fcSSam Kolton 
11763c4933fcSSam Kolton     if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
11773c4933fcSSam Kolton         TRI->isSGPRReg(*MRI, Op.getReg())) {
11783c4933fcSSam Kolton       ++ConstantBusCount;
11793c4933fcSSam Kolton       continue;
11803c4933fcSSam Kolton     }
11813c4933fcSSam Kolton 
11820c476111SDaniel Sanders     Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
118356ea488dSStanislav Mekhanoshin     auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
118456ea488dSStanislav Mekhanoshin                         TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
118556ea488dSStanislav Mekhanoshin     if (Op.isImm())
118656ea488dSStanislav Mekhanoshin       Copy.addImm(Op.getImm());
118756ea488dSStanislav Mekhanoshin     else if (Op.isReg())
118856ea488dSStanislav Mekhanoshin       Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
118956ea488dSStanislav Mekhanoshin                   Op.getSubReg());
119056ea488dSStanislav Mekhanoshin     Op.ChangeToRegister(VGPR, false);
119156ea488dSStanislav Mekhanoshin   }
119256ea488dSStanislav Mekhanoshin }
119356ea488dSStanislav Mekhanoshin 
runOnMachineFunction(MachineFunction & MF)1194f60ad58dSSam Kolton bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
11955bfbae5cSTom Stellard   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1196f60ad58dSSam Kolton 
1197f1caa283SMatthias Braun   if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
1198f60ad58dSSam Kolton     return false;
1199f60ad58dSSam Kolton 
1200f60ad58dSSam Kolton   MRI = &MF.getRegInfo();
1201f60ad58dSSam Kolton   TRI = ST.getRegisterInfo();
1202f60ad58dSSam Kolton   TII = ST.getInstrInfo();
1203f60ad58dSSam Kolton 
1204ebfdaf73SSam Kolton   // Find all SDWA operands in MF.
12055f7f32c3SSam Kolton   bool Ret = false;
12069c2f3c48SMatt Arsenault   for (MachineBasicBlock &MBB : MF) {
12079c2f3c48SMatt Arsenault     bool Changed = false;
12085f7f32c3SSam Kolton     do {
120916de4fd2SRon Lieberman       // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
121016de4fd2SRon Lieberman       // Look for a possible ADD or SUB that resulted from a previously lowered
121116de4fd2SRon Lieberman       // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
121216de4fd2SRon Lieberman       // lowers the pair of instructions into e32 form.
121316de4fd2SRon Lieberman       matchSDWAOperands(MBB);
121416de4fd2SRon Lieberman       for (const auto &OperandPair : SDWAOperands) {
121516de4fd2SRon Lieberman         const auto &Operand = OperandPair.second;
121616de4fd2SRon Lieberman         MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
121716de4fd2SRon Lieberman         if (PotentialMI &&
121879f67caeSMatt Arsenault            (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
121979f67caeSMatt Arsenault             PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
122016de4fd2SRon Lieberman           pseudoOpConvertToVOP2(*PotentialMI, ST);
122116de4fd2SRon Lieberman       }
122216de4fd2SRon Lieberman       SDWAOperands.clear();
122316de4fd2SRon Lieberman 
122416de4fd2SRon Lieberman       // Generate potential match list.
12259c2f3c48SMatt Arsenault       matchSDWAOperands(MBB);
1226f60ad58dSSam Kolton 
1227ebfdaf73SSam Kolton       for (const auto &OperandPair : SDWAOperands) {
1228ebfdaf73SSam Kolton         const auto &Operand = OperandPair.second;
1229f60ad58dSSam Kolton         MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
12303c4933fcSSam Kolton         if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
1231ebfdaf73SSam Kolton           PotentialMatches[PotentialMI].push_back(Operand.get());
1232f60ad58dSSam Kolton         }
1233f60ad58dSSam Kolton       }
1234f60ad58dSSam Kolton 
1235f60ad58dSSam Kolton       for (auto &PotentialPair : PotentialMatches) {
1236f60ad58dSSam Kolton         MachineInstr &PotentialMI = *PotentialPair.first;
1237f60ad58dSSam Kolton         convertToSDWA(PotentialMI, PotentialPair.second);
1238f60ad58dSSam Kolton       }
1239aff8341dSSam Kolton 
1240ebfdaf73SSam Kolton       PotentialMatches.clear();
1241aff8341dSSam Kolton       SDWAOperands.clear();
124256ea488dSStanislav Mekhanoshin 
12435f7f32c3SSam Kolton       Changed = !ConvertedInstructions.empty();
12445f7f32c3SSam Kolton 
12455f7f32c3SSam Kolton       if (Changed)
12465f7f32c3SSam Kolton         Ret = true;
124756ea488dSStanislav Mekhanoshin       while (!ConvertedInstructions.empty())
12483c4933fcSSam Kolton         legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
12495f7f32c3SSam Kolton     } while (Changed);
12509c2f3c48SMatt Arsenault   }
125156ea488dSStanislav Mekhanoshin 
1252e4cda741SStanislav Mekhanoshin   return Ret;
1253f60ad58dSSam Kolton }
1254