159e12826SEugene Zelenko //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
2f60ad58dSSam Kolton //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f60ad58dSSam Kolton //
7f60ad58dSSam Kolton //===----------------------------------------------------------------------===//
8f60ad58dSSam Kolton //
9f60ad58dSSam Kolton /// \file This pass tries to apply several peephole SDWA patterns.
10f60ad58dSSam Kolton ///
11f60ad58dSSam Kolton /// E.g. original:
1293ef1458SFrancis Visoiu Mistrih /// V_LSHRREV_B32_e32 %0, 16, %1
1379f67caeSMatt Arsenault /// V_ADD_CO_U32_e32 %2, %0, %3
1493ef1458SFrancis Visoiu Mistrih /// V_LSHLREV_B32_e32 %4, 16, %2
15f60ad58dSSam Kolton ///
16f60ad58dSSam Kolton /// Replace:
1779f67caeSMatt Arsenault /// V_ADD_CO_U32_sdwa %4, %1, %3
18f60ad58dSSam Kolton /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
19f60ad58dSSam Kolton ///
20f60ad58dSSam Kolton //===----------------------------------------------------------------------===//
21f60ad58dSSam Kolton
22f60ad58dSSam Kolton #include "AMDGPU.h"
23560d7e04Sdfukalov #include "GCNSubtarget.h"
24560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
253d5ba7c6STim Renouf #include "llvm/ADT/MapVector.h"
266bda14b3SChandler Carruth #include "llvm/ADT/Statistic.h"
27f60ad58dSSam Kolton #include "llvm/CodeGen/MachineFunctionPass.h"
28f60ad58dSSam Kolton
29f60ad58dSSam Kolton using namespace llvm;
30f60ad58dSSam Kolton
31f60ad58dSSam Kolton #define DEBUG_TYPE "si-peephole-sdwa"
32f60ad58dSSam Kolton
33f60ad58dSSam Kolton STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
34f60ad58dSSam Kolton STATISTIC(NumSDWAInstructionsPeepholed,
35f60ad58dSSam Kolton "Number of instruction converted to SDWA.");
36f60ad58dSSam Kolton
37f60ad58dSSam Kolton namespace {
38f60ad58dSSam Kolton
39f60ad58dSSam Kolton class SDWAOperand;
405f7f32c3SSam Kolton class SDWADstOperand;
41f60ad58dSSam Kolton
42f60ad58dSSam Kolton class SIPeepholeSDWA : public MachineFunctionPass {
43ebfdaf73SSam Kolton public:
4459e12826SEugene Zelenko using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
45ebfdaf73SSam Kolton
46f60ad58dSSam Kolton private:
47f60ad58dSSam Kolton MachineRegisterInfo *MRI;
48f60ad58dSSam Kolton const SIRegisterInfo *TRI;
49f60ad58dSSam Kolton const SIInstrInfo *TII;
50f60ad58dSSam Kolton
513d5ba7c6STim Renouf MapVector<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
523d5ba7c6STim Renouf MapVector<MachineInstr *, SDWAOperandsVector> PotentialMatches;
5356ea488dSStanislav Mekhanoshin SmallVector<MachineInstr *, 8> ConvertedInstructions;
54f60ad58dSSam Kolton
5527e0f8bcSSam Kolton Optional<int64_t> foldToImm(const MachineOperand &Op) const;
5627e0f8bcSSam Kolton
57f60ad58dSSam Kolton public:
58f60ad58dSSam Kolton static char ID;
59f60ad58dSSam Kolton
SIPeepholeSDWA()60f60ad58dSSam Kolton SIPeepholeSDWA() : MachineFunctionPass(ID) {
61f60ad58dSSam Kolton initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
62f60ad58dSSam Kolton }
63f60ad58dSSam Kolton
64f60ad58dSSam Kolton bool runOnMachineFunction(MachineFunction &MF) override;
659c2f3c48SMatt Arsenault void matchSDWAOperands(MachineBasicBlock &MBB);
665f7f32c3SSam Kolton std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
6716de4fd2SRon Lieberman bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const;
6816de4fd2SRon Lieberman void pseudoOpConvertToVOP2(MachineInstr &MI,
6916de4fd2SRon Lieberman const GCNSubtarget &ST) const;
70f60ad58dSSam Kolton bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
715bfbae5cSTom Stellard void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
72f60ad58dSSam Kolton
getPassName() const73f60ad58dSSam Kolton StringRef getPassName() const override { return "SI Peephole SDWA"; }
74f60ad58dSSam Kolton
getAnalysisUsage(AnalysisUsage & AU) const75f60ad58dSSam Kolton void getAnalysisUsage(AnalysisUsage &AU) const override {
76f60ad58dSSam Kolton AU.setPreservesCFG();
77f60ad58dSSam Kolton MachineFunctionPass::getAnalysisUsage(AU);
78f60ad58dSSam Kolton }
79f60ad58dSSam Kolton };
80f60ad58dSSam Kolton
81f60ad58dSSam Kolton class SDWAOperand {
82f60ad58dSSam Kolton private:
83f60ad58dSSam Kolton MachineOperand *Target; // Operand that would be used in converted instruction
84f60ad58dSSam Kolton MachineOperand *Replaced; // Operand that would be replace by Target
85f60ad58dSSam Kolton
86f60ad58dSSam Kolton public:
SDWAOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp)87f60ad58dSSam Kolton SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
88f60ad58dSSam Kolton : Target(TargetOp), Replaced(ReplacedOp) {
89f60ad58dSSam Kolton assert(Target->isReg());
90f60ad58dSSam Kolton assert(Replaced->isReg());
91f60ad58dSSam Kolton }
92f60ad58dSSam Kolton
9359e12826SEugene Zelenko virtual ~SDWAOperand() = default;
94f60ad58dSSam Kolton
95f60ad58dSSam Kolton virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
96f60ad58dSSam Kolton virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
97f60ad58dSSam Kolton
getTargetOperand() const98f60ad58dSSam Kolton MachineOperand *getTargetOperand() const { return Target; }
getReplacedOperand() const99f60ad58dSSam Kolton MachineOperand *getReplacedOperand() const { return Replaced; }
getParentInst() const100f60ad58dSSam Kolton MachineInstr *getParentInst() const { return Target->getParent(); }
10159e12826SEugene Zelenko
getMRI() const102f60ad58dSSam Kolton MachineRegisterInfo *getMRI() const {
103f60ad58dSSam Kolton return &getParentInst()->getParent()->getParent()->getRegInfo();
104f60ad58dSSam Kolton }
1055f7f32c3SSam Kolton
1065f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1075f7f32c3SSam Kolton virtual void print(raw_ostream& OS) const = 0;
dump() const1085f7f32c3SSam Kolton void dump() const { print(dbgs()); }
1095f7f32c3SSam Kolton #endif
110f60ad58dSSam Kolton };
111f60ad58dSSam Kolton
112f60ad58dSSam Kolton using namespace AMDGPU::SDWA;
113f60ad58dSSam Kolton
114f60ad58dSSam Kolton class SDWASrcOperand : public SDWAOperand {
115f60ad58dSSam Kolton private:
116f60ad58dSSam Kolton SdwaSel SrcSel;
117f60ad58dSSam Kolton bool Abs;
118f60ad58dSSam Kolton bool Neg;
119f60ad58dSSam Kolton bool Sext;
120f60ad58dSSam Kolton
121f60ad58dSSam Kolton public:
SDWASrcOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp,SdwaSel SrcSel_=DWORD,bool Abs_=false,bool Neg_=false,bool Sext_=false)122f60ad58dSSam Kolton SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
123f60ad58dSSam Kolton SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
124f60ad58dSSam Kolton bool Sext_ = false)
1255f7f32c3SSam Kolton : SDWAOperand(TargetOp, ReplacedOp),
1265f7f32c3SSam Kolton SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
127f60ad58dSSam Kolton
12859e12826SEugene Zelenko MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
12959e12826SEugene Zelenko bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
130f60ad58dSSam Kolton
getSrcSel() const131f60ad58dSSam Kolton SdwaSel getSrcSel() const { return SrcSel; }
getAbs() const132f60ad58dSSam Kolton bool getAbs() const { return Abs; }
getNeg() const133f60ad58dSSam Kolton bool getNeg() const { return Neg; }
getSext() const134f60ad58dSSam Kolton bool getSext() const { return Sext; }
135f60ad58dSSam Kolton
13603306604SStanislav Mekhanoshin uint64_t getSrcMods(const SIInstrInfo *TII,
13703306604SStanislav Mekhanoshin const MachineOperand *SrcOp) const;
1385f7f32c3SSam Kolton
1395f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1405f7f32c3SSam Kolton void print(raw_ostream& OS) const override;
1415f7f32c3SSam Kolton #endif
142f60ad58dSSam Kolton };
143f60ad58dSSam Kolton
144f60ad58dSSam Kolton class SDWADstOperand : public SDWAOperand {
145f60ad58dSSam Kolton private:
146f60ad58dSSam Kolton SdwaSel DstSel;
147f60ad58dSSam Kolton DstUnused DstUn;
148f60ad58dSSam Kolton
149f60ad58dSSam Kolton public:
1505f7f32c3SSam Kolton
SDWADstOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp,SdwaSel DstSel_=DWORD,DstUnused DstUn_=UNUSED_PAD)151f60ad58dSSam Kolton SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
152f60ad58dSSam Kolton SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
153f60ad58dSSam Kolton : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
154f60ad58dSSam Kolton
15559e12826SEugene Zelenko MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
15659e12826SEugene Zelenko bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
157f60ad58dSSam Kolton
getDstSel() const158f60ad58dSSam Kolton SdwaSel getDstSel() const { return DstSel; }
getDstUnused() const159f60ad58dSSam Kolton DstUnused getDstUnused() const { return DstUn; }
1605f7f32c3SSam Kolton
1615f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1625f7f32c3SSam Kolton void print(raw_ostream& OS) const override;
1635f7f32c3SSam Kolton #endif
1645f7f32c3SSam Kolton };
1655f7f32c3SSam Kolton
1665f7f32c3SSam Kolton class SDWADstPreserveOperand : public SDWADstOperand {
1675f7f32c3SSam Kolton private:
1685f7f32c3SSam Kolton MachineOperand *Preserve;
1695f7f32c3SSam Kolton
1705f7f32c3SSam Kolton public:
SDWADstPreserveOperand(MachineOperand * TargetOp,MachineOperand * ReplacedOp,MachineOperand * PreserveOp,SdwaSel DstSel_=DWORD)1715f7f32c3SSam Kolton SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
1725f7f32c3SSam Kolton MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)
1735f7f32c3SSam Kolton : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
1745f7f32c3SSam Kolton Preserve(PreserveOp) {}
1755f7f32c3SSam Kolton
1765f7f32c3SSam Kolton bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
1775f7f32c3SSam Kolton
getPreservedOperand() const1785f7f32c3SSam Kolton MachineOperand *getPreservedOperand() const { return Preserve; }
1795f7f32c3SSam Kolton
1805f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1815f7f32c3SSam Kolton void print(raw_ostream& OS) const override;
1825f7f32c3SSam Kolton #endif
183f60ad58dSSam Kolton };
184f60ad58dSSam Kolton
18559e12826SEugene Zelenko } // end anonymous namespace
186f60ad58dSSam Kolton
187f60ad58dSSam Kolton INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
188f60ad58dSSam Kolton
189f60ad58dSSam Kolton char SIPeepholeSDWA::ID = 0;
190f60ad58dSSam Kolton
191f60ad58dSSam Kolton char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
192f60ad58dSSam Kolton
createSIPeepholeSDWAPass()193f60ad58dSSam Kolton FunctionPass *llvm::createSIPeepholeSDWAPass() {
194f60ad58dSSam Kolton return new SIPeepholeSDWA();
195f60ad58dSSam Kolton }
196f60ad58dSSam Kolton
1975f7f32c3SSam Kolton
1985f7f32c3SSam Kolton #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
operator <<(raw_ostream & OS,SdwaSel Sel)199c24d5e28SMatt Arsenault static raw_ostream& operator<<(raw_ostream &OS, SdwaSel Sel) {
200f60ad58dSSam Kolton switch(Sel) {
201f60ad58dSSam Kolton case BYTE_0: OS << "BYTE_0"; break;
202f60ad58dSSam Kolton case BYTE_1: OS << "BYTE_1"; break;
203f60ad58dSSam Kolton case BYTE_2: OS << "BYTE_2"; break;
204f60ad58dSSam Kolton case BYTE_3: OS << "BYTE_3"; break;
205f60ad58dSSam Kolton case WORD_0: OS << "WORD_0"; break;
206f60ad58dSSam Kolton case WORD_1: OS << "WORD_1"; break;
207f60ad58dSSam Kolton case DWORD: OS << "DWORD"; break;
208f60ad58dSSam Kolton }
209f60ad58dSSam Kolton return OS;
210f60ad58dSSam Kolton }
211f60ad58dSSam Kolton
operator <<(raw_ostream & OS,const DstUnused & Un)212f60ad58dSSam Kolton static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
213f60ad58dSSam Kolton switch(Un) {
214f60ad58dSSam Kolton case UNUSED_PAD: OS << "UNUSED_PAD"; break;
215f60ad58dSSam Kolton case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
216f60ad58dSSam Kolton case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
217f60ad58dSSam Kolton }
218f60ad58dSSam Kolton return OS;
219f60ad58dSSam Kolton }
220f60ad58dSSam Kolton
2215f7f32c3SSam Kolton LLVM_DUMP_METHOD
print(raw_ostream & OS) const2225f7f32c3SSam Kolton void SDWASrcOperand::print(raw_ostream& OS) const {
2235f7f32c3SSam Kolton OS << "SDWA src: " << *getTargetOperand()
2245f7f32c3SSam Kolton << " src_sel:" << getSrcSel()
2255f7f32c3SSam Kolton << " abs:" << getAbs() << " neg:" << getNeg()
2265f7f32c3SSam Kolton << " sext:" << getSext() << '\n';
227f60ad58dSSam Kolton }
2285f7f32c3SSam Kolton
2295f7f32c3SSam Kolton LLVM_DUMP_METHOD
print(raw_ostream & OS) const2305f7f32c3SSam Kolton void SDWADstOperand::print(raw_ostream& OS) const {
2315f7f32c3SSam Kolton OS << "SDWA dst: " << *getTargetOperand()
2325f7f32c3SSam Kolton << " dst_sel:" << getDstSel()
2335f7f32c3SSam Kolton << " dst_unused:" << getDstUnused() << '\n';
2345f7f32c3SSam Kolton }
2355f7f32c3SSam Kolton
2365f7f32c3SSam Kolton LLVM_DUMP_METHOD
print(raw_ostream & OS) const2375f7f32c3SSam Kolton void SDWADstPreserveOperand::print(raw_ostream& OS) const {
2385f7f32c3SSam Kolton OS << "SDWA preserve dst: " << *getTargetOperand()
2395f7f32c3SSam Kolton << " dst_sel:" << getDstSel()
2405f7f32c3SSam Kolton << " preserve:" << *getPreservedOperand() << '\n';
2415f7f32c3SSam Kolton }
2425f7f32c3SSam Kolton
243f60ad58dSSam Kolton #endif
244f60ad58dSSam Kolton
copyRegOperand(MachineOperand & To,const MachineOperand & From)245f60ad58dSSam Kolton static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
246f60ad58dSSam Kolton assert(To.isReg() && From.isReg());
247f60ad58dSSam Kolton To.setReg(From.getReg());
248f60ad58dSSam Kolton To.setSubReg(From.getSubReg());
249f60ad58dSSam Kolton To.setIsUndef(From.isUndef());
250f60ad58dSSam Kolton if (To.isUse()) {
251f60ad58dSSam Kolton To.setIsKill(From.isKill());
252f60ad58dSSam Kolton } else {
253f60ad58dSSam Kolton To.setIsDead(From.isDead());
254f60ad58dSSam Kolton }
255f60ad58dSSam Kolton }
256f60ad58dSSam Kolton
isSameReg(const MachineOperand & LHS,const MachineOperand & RHS)257f60ad58dSSam Kolton static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
258f60ad58dSSam Kolton return LHS.isReg() &&
259f60ad58dSSam Kolton RHS.isReg() &&
260f60ad58dSSam Kolton LHS.getReg() == RHS.getReg() &&
261f60ad58dSSam Kolton LHS.getSubReg() == RHS.getSubReg();
262f60ad58dSSam Kolton }
263f60ad58dSSam Kolton
findSingleRegUse(const MachineOperand * Reg,const MachineRegisterInfo * MRI)2645f7f32c3SSam Kolton static MachineOperand *findSingleRegUse(const MachineOperand *Reg,
2655f7f32c3SSam Kolton const MachineRegisterInfo *MRI) {
2665f7f32c3SSam Kolton if (!Reg->isReg() || !Reg->isDef())
2675f7f32c3SSam Kolton return nullptr;
268f60ad58dSSam Kolton
2695f7f32c3SSam Kolton MachineOperand *ResMO = nullptr;
2705f7f32c3SSam Kolton for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) {
2715f7f32c3SSam Kolton // If there exist use of subreg of Reg then return nullptr
2725f7f32c3SSam Kolton if (!isSameReg(UseMO, *Reg))
2735f7f32c3SSam Kolton return nullptr;
274f60ad58dSSam Kolton
2755f7f32c3SSam Kolton // Check that there is only one instruction that uses Reg
2765f7f32c3SSam Kolton if (!ResMO) {
2775f7f32c3SSam Kolton ResMO = &UseMO;
2785f7f32c3SSam Kolton } else if (ResMO->getParent() != UseMO.getParent()) {
2795f7f32c3SSam Kolton return nullptr;
2805f7f32c3SSam Kolton }
2815f7f32c3SSam Kolton }
282f60ad58dSSam Kolton
2835f7f32c3SSam Kolton return ResMO;
2845f7f32c3SSam Kolton }
285f60ad58dSSam Kolton
findSingleRegDef(const MachineOperand * Reg,const MachineRegisterInfo * MRI)2865f7f32c3SSam Kolton static MachineOperand *findSingleRegDef(const MachineOperand *Reg,
2875f7f32c3SSam Kolton const MachineRegisterInfo *MRI) {
2885f7f32c3SSam Kolton if (!Reg->isReg())
2895f7f32c3SSam Kolton return nullptr;
2905f7f32c3SSam Kolton
2915f7f32c3SSam Kolton MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg());
2925f7f32c3SSam Kolton if (!DefInstr)
2935f7f32c3SSam Kolton return nullptr;
2945f7f32c3SSam Kolton
2955f7f32c3SSam Kolton for (auto &DefMO : DefInstr->defs()) {
2965f7f32c3SSam Kolton if (DefMO.isReg() && DefMO.getReg() == Reg->getReg())
2975f7f32c3SSam Kolton return &DefMO;
2985f7f32c3SSam Kolton }
2995f7f32c3SSam Kolton
3008ae38bc0SMatt Arsenault // Ignore implicit defs.
3018ae38bc0SMatt Arsenault return nullptr;
302f60ad58dSSam Kolton }
303f60ad58dSSam Kolton
getSrcMods(const SIInstrInfo * TII,const MachineOperand * SrcOp) const30403306604SStanislav Mekhanoshin uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
30503306604SStanislav Mekhanoshin const MachineOperand *SrcOp) const {
306f60ad58dSSam Kolton uint64_t Mods = 0;
30703306604SStanislav Mekhanoshin const auto *MI = SrcOp->getParent();
30803306604SStanislav Mekhanoshin if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
30903306604SStanislav Mekhanoshin if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
31003306604SStanislav Mekhanoshin Mods = Mod->getImm();
31103306604SStanislav Mekhanoshin }
31203306604SStanislav Mekhanoshin } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
31303306604SStanislav Mekhanoshin if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
31403306604SStanislav Mekhanoshin Mods = Mod->getImm();
31503306604SStanislav Mekhanoshin }
31603306604SStanislav Mekhanoshin }
317f60ad58dSSam Kolton if (Abs || Neg) {
318f60ad58dSSam Kolton assert(!Sext &&
319*6527b2a4SSebastian Neubauer "Float and integer src modifiers can't be set simultaneously");
320da644c02SStanislav Mekhanoshin Mods |= Abs ? SISrcMods::ABS : 0u;
321da644c02SStanislav Mekhanoshin Mods ^= Neg ? SISrcMods::NEG : 0u;
322f60ad58dSSam Kolton } else if (Sext) {
323f60ad58dSSam Kolton Mods |= SISrcMods::SEXT;
324f60ad58dSSam Kolton }
325f60ad58dSSam Kolton
326f60ad58dSSam Kolton return Mods;
327f60ad58dSSam Kolton }
328f60ad58dSSam Kolton
potentialToConvert(const SIInstrInfo * TII)329f60ad58dSSam Kolton MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
330f60ad58dSSam Kolton // For SDWA src operand potential instruction is one that use register
331f60ad58dSSam Kolton // defined by parent instruction
3325f7f32c3SSam Kolton MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());
3335f7f32c3SSam Kolton if (!PotentialMO)
334f60ad58dSSam Kolton return nullptr;
335f60ad58dSSam Kolton
3365f7f32c3SSam Kolton return PotentialMO->getParent();
337f60ad58dSSam Kolton }
338f60ad58dSSam Kolton
convertToSDWA(MachineInstr & MI,const SIInstrInfo * TII)339f60ad58dSSam Kolton bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
340f60ad58dSSam Kolton // Find operand in instruction that matches source operand and replace it with
341f60ad58dSSam Kolton // target operand. Set corresponding src_sel
34259e5ef79SMichael Bedy bool IsPreserveSrc = false;
343f60ad58dSSam Kolton MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
344f60ad58dSSam Kolton MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
345f60ad58dSSam Kolton MachineOperand *SrcMods =
346f60ad58dSSam Kolton TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
34756ea488dSStanislav Mekhanoshin assert(Src && (Src->isReg() || Src->isImm()));
348f60ad58dSSam Kolton if (!isSameReg(*Src, *getReplacedOperand())) {
34959e5ef79SMichael Bedy // If this is not src0 then it could be src1
350f60ad58dSSam Kolton Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
351f60ad58dSSam Kolton SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
352f60ad58dSSam Kolton SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
353f60ad58dSSam Kolton
35459e5ef79SMichael Bedy if (!Src ||
35559e5ef79SMichael Bedy !isSameReg(*Src, *getReplacedOperand())) {
35659e5ef79SMichael Bedy // It's possible this Src is a tied operand for
35759e5ef79SMichael Bedy // UNUSED_PRESERVE, in which case we can either
35859e5ef79SMichael Bedy // abandon the peephole attempt, or if legal we can
35959e5ef79SMichael Bedy // copy the target operand into the tied slot
36059e5ef79SMichael Bedy // if the preserve operation will effectively cause the same
36159e5ef79SMichael Bedy // result by overwriting the rest of the dst.
36259e5ef79SMichael Bedy MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
36359e5ef79SMichael Bedy MachineOperand *DstUnused =
36459e5ef79SMichael Bedy TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
36559e5ef79SMichael Bedy
36659e5ef79SMichael Bedy if (Dst &&
36759e5ef79SMichael Bedy DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
368d1f45ed5SNeubauer, Sebastian // This will work if the tied src is accessing WORD_0, and the dst is
36959e5ef79SMichael Bedy // writing WORD_1. Modifiers don't matter because all the bits that
37059e5ef79SMichael Bedy // would be impacted are being overwritten by the dst.
37159e5ef79SMichael Bedy // Any other case will not work.
37259e5ef79SMichael Bedy SdwaSel DstSel = static_cast<SdwaSel>(
37359e5ef79SMichael Bedy TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel));
37459e5ef79SMichael Bedy if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
37559e5ef79SMichael Bedy getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
37659e5ef79SMichael Bedy IsPreserveSrc = true;
37759e5ef79SMichael Bedy auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
37859e5ef79SMichael Bedy AMDGPU::OpName::vdst);
37959e5ef79SMichael Bedy auto TiedIdx = MI.findTiedOperandIdx(DstIdx);
38059e5ef79SMichael Bedy Src = &MI.getOperand(TiedIdx);
38159e5ef79SMichael Bedy SrcSel = nullptr;
38259e5ef79SMichael Bedy SrcMods = nullptr;
38359e5ef79SMichael Bedy } else {
38459e5ef79SMichael Bedy // Not legal to convert this src
38559e5ef79SMichael Bedy return false;
38659e5ef79SMichael Bedy }
38759e5ef79SMichael Bedy }
38859e5ef79SMichael Bedy }
389f60ad58dSSam Kolton assert(Src && Src->isReg());
390f60ad58dSSam Kolton
39128a1936fSStanislav Mekhanoshin if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
39228a1936fSStanislav Mekhanoshin MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
39328a1936fSStanislav Mekhanoshin MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
394f60ad58dSSam Kolton MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
395f60ad58dSSam Kolton !isSameReg(*Src, *getReplacedOperand())) {
396f60ad58dSSam Kolton // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
397f60ad58dSSam Kolton // src2. This is not allowed.
398f60ad58dSSam Kolton return false;
399f60ad58dSSam Kolton }
400f60ad58dSSam Kolton
40159e5ef79SMichael Bedy assert(isSameReg(*Src, *getReplacedOperand()) &&
40259e5ef79SMichael Bedy (IsPreserveSrc || (SrcSel && SrcMods)));
403f60ad58dSSam Kolton }
404f60ad58dSSam Kolton copyRegOperand(*Src, *getTargetOperand());
40559e5ef79SMichael Bedy if (!IsPreserveSrc) {
406f60ad58dSSam Kolton SrcSel->setImm(getSrcSel());
40703306604SStanislav Mekhanoshin SrcMods->setImm(getSrcMods(TII, Src));
40859e5ef79SMichael Bedy }
409f60ad58dSSam Kolton getTargetOperand()->setIsKill(false);
410f60ad58dSSam Kolton return true;
411f60ad58dSSam Kolton }
412f60ad58dSSam Kolton
potentialToConvert(const SIInstrInfo * TII)413f60ad58dSSam Kolton MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
414f60ad58dSSam Kolton // For SDWA dst operand potential instruction is one that defines register
415f60ad58dSSam Kolton // that this operand uses
416f60ad58dSSam Kolton MachineRegisterInfo *MRI = getMRI();
417f60ad58dSSam Kolton MachineInstr *ParentMI = getParentInst();
418f60ad58dSSam Kolton
4195f7f32c3SSam Kolton MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);
4205f7f32c3SSam Kolton if (!PotentialMO)
421f60ad58dSSam Kolton return nullptr;
422f60ad58dSSam Kolton
423f60ad58dSSam Kolton // Check that ParentMI is the only instruction that uses replaced register
4245f7f32c3SSam Kolton for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {
4255f7f32c3SSam Kolton if (&UseInst != ParentMI)
426f60ad58dSSam Kolton return nullptr;
427f60ad58dSSam Kolton }
428f60ad58dSSam Kolton
4295f7f32c3SSam Kolton return PotentialMO->getParent();
430f60ad58dSSam Kolton }
431f60ad58dSSam Kolton
convertToSDWA(MachineInstr & MI,const SIInstrInfo * TII)432f60ad58dSSam Kolton bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
433f60ad58dSSam Kolton // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
434f60ad58dSSam Kolton
43528a1936fSStanislav Mekhanoshin if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
43628a1936fSStanislav Mekhanoshin MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
43728a1936fSStanislav Mekhanoshin MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
438f60ad58dSSam Kolton MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
439f60ad58dSSam Kolton getDstSel() != AMDGPU::SDWA::DWORD) {
440f60ad58dSSam Kolton // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
441f60ad58dSSam Kolton return false;
442f60ad58dSSam Kolton }
443f60ad58dSSam Kolton
444f60ad58dSSam Kolton MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
445f60ad58dSSam Kolton assert(Operand &&
446f60ad58dSSam Kolton Operand->isReg() &&
447f60ad58dSSam Kolton isSameReg(*Operand, *getReplacedOperand()));
448f60ad58dSSam Kolton copyRegOperand(*Operand, *getTargetOperand());
449f60ad58dSSam Kolton MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
450f60ad58dSSam Kolton assert(DstSel);
451f60ad58dSSam Kolton DstSel->setImm(getDstSel());
452f60ad58dSSam Kolton MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
453f60ad58dSSam Kolton assert(DstUnused);
454f60ad58dSSam Kolton DstUnused->setImm(getDstUnused());
455f60ad58dSSam Kolton
456f60ad58dSSam Kolton // Remove original instruction because it would conflict with our new
457f60ad58dSSam Kolton // instruction by register definition
458f60ad58dSSam Kolton getParentInst()->eraseFromParent();
459f60ad58dSSam Kolton return true;
460f60ad58dSSam Kolton }
461f60ad58dSSam Kolton
convertToSDWA(MachineInstr & MI,const SIInstrInfo * TII)4625f7f32c3SSam Kolton bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
4635f7f32c3SSam Kolton const SIInstrInfo *TII) {
4645f7f32c3SSam Kolton // MI should be moved right before v_or_b32.
4655f7f32c3SSam Kolton // For this we should clear all kill flags on uses of MI src-operands or else
4665f7f32c3SSam Kolton // we can encounter problem with use of killed operand.
4675f7f32c3SSam Kolton for (MachineOperand &MO : MI.uses()) {
4685f7f32c3SSam Kolton if (!MO.isReg())
4695f7f32c3SSam Kolton continue;
4705f7f32c3SSam Kolton getMRI()->clearKillFlags(MO.getReg());
4715f7f32c3SSam Kolton }
4725f7f32c3SSam Kolton
4735f7f32c3SSam Kolton // Move MI before v_or_b32
4745f7f32c3SSam Kolton auto MBB = MI.getParent();
4755f7f32c3SSam Kolton MBB->remove(&MI);
4765f7f32c3SSam Kolton MBB->insert(getParentInst(), &MI);
4775f7f32c3SSam Kolton
4785f7f32c3SSam Kolton // Add Implicit use of preserved register
4795f7f32c3SSam Kolton MachineInstrBuilder MIB(*MBB->getParent(), MI);
4805f7f32c3SSam Kolton MIB.addReg(getPreservedOperand()->getReg(),
4815f7f32c3SSam Kolton RegState::ImplicitKill,
4825f7f32c3SSam Kolton getPreservedOperand()->getSubReg());
4835f7f32c3SSam Kolton
4845f7f32c3SSam Kolton // Tie dst to implicit use
4855f7f32c3SSam Kolton MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),
4865f7f32c3SSam Kolton MI.getNumOperands() - 1);
4875f7f32c3SSam Kolton
4885f7f32c3SSam Kolton // Convert MI as any other SDWADstOperand and remove v_or_b32
4895f7f32c3SSam Kolton return SDWADstOperand::convertToSDWA(MI, TII);
4905f7f32c3SSam Kolton }
4915f7f32c3SSam Kolton
foldToImm(const MachineOperand & Op) const49227e0f8bcSSam Kolton Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
49327e0f8bcSSam Kolton if (Op.isImm()) {
49427e0f8bcSSam Kolton return Op.getImm();
49527e0f8bcSSam Kolton }
49627e0f8bcSSam Kolton
49727e0f8bcSSam Kolton // If this is not immediate then it can be copy of immediate value, e.g.:
498a8a83d15SFrancis Visoiu Mistrih // %1 = S_MOV_B32 255;
49927e0f8bcSSam Kolton if (Op.isReg()) {
50027e0f8bcSSam Kolton for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
50127e0f8bcSSam Kolton if (!isSameReg(Op, Def))
50227e0f8bcSSam Kolton continue;
50327e0f8bcSSam Kolton
50427e0f8bcSSam Kolton const MachineInstr *DefInst = Def.getParent();
505aff8341dSSam Kolton if (!TII->isFoldableCopy(*DefInst))
50627e0f8bcSSam Kolton return None;
50727e0f8bcSSam Kolton
50827e0f8bcSSam Kolton const MachineOperand &Copied = DefInst->getOperand(1);
50927e0f8bcSSam Kolton if (!Copied.isImm())
51027e0f8bcSSam Kolton return None;
51127e0f8bcSSam Kolton
51227e0f8bcSSam Kolton return Copied.getImm();
51327e0f8bcSSam Kolton }
51427e0f8bcSSam Kolton }
51527e0f8bcSSam Kolton
51627e0f8bcSSam Kolton return None;
51727e0f8bcSSam Kolton }
51827e0f8bcSSam Kolton
5195f7f32c3SSam Kolton std::unique_ptr<SDWAOperand>
matchSDWAOperand(MachineInstr & MI)5205f7f32c3SSam Kolton SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
521f60ad58dSSam Kolton unsigned Opcode = MI.getOpcode();
522f60ad58dSSam Kolton switch (Opcode) {
523f60ad58dSSam Kolton case AMDGPU::V_LSHRREV_B32_e32:
524f60ad58dSSam Kolton case AMDGPU::V_ASHRREV_I32_e32:
52503306604SStanislav Mekhanoshin case AMDGPU::V_LSHLREV_B32_e32:
52603306604SStanislav Mekhanoshin case AMDGPU::V_LSHRREV_B32_e64:
52703306604SStanislav Mekhanoshin case AMDGPU::V_ASHRREV_I32_e64:
52803306604SStanislav Mekhanoshin case AMDGPU::V_LSHLREV_B32_e64: {
529f60ad58dSSam Kolton // from: v_lshrrev_b32_e32 v1, 16/24, v0
530f60ad58dSSam Kolton // to SDWA src:v0 src_sel:WORD_1/BYTE_3
531f60ad58dSSam Kolton
532f60ad58dSSam Kolton // from: v_ashrrev_i32_e32 v1, 16/24, v0
533f60ad58dSSam Kolton // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
534f60ad58dSSam Kolton
535f60ad58dSSam Kolton // from: v_lshlrev_b32_e32 v1, 16/24, v0
536f60ad58dSSam Kolton // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
537f60ad58dSSam Kolton MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
53827e0f8bcSSam Kolton auto Imm = foldToImm(*Src0);
53927e0f8bcSSam Kolton if (!Imm)
540f60ad58dSSam Kolton break;
541f60ad58dSSam Kolton
54227e0f8bcSSam Kolton if (*Imm != 16 && *Imm != 24)
543f60ad58dSSam Kolton break;
544f60ad58dSSam Kolton
545f60ad58dSSam Kolton MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
546f60ad58dSSam Kolton MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
54734978602SJay Foad if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
548f60ad58dSSam Kolton break;
549f60ad58dSSam Kolton
55003306604SStanislav Mekhanoshin if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
55103306604SStanislav Mekhanoshin Opcode == AMDGPU::V_LSHLREV_B32_e64) {
5520eaee545SJonas Devlieghere return std::make_unique<SDWADstOperand>(
55327e0f8bcSSam Kolton Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
554f60ad58dSSam Kolton } else {
5550eaee545SJonas Devlieghere return std::make_unique<SDWASrcOperand>(
55627e0f8bcSSam Kolton Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
55703306604SStanislav Mekhanoshin Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
55803306604SStanislav Mekhanoshin Opcode != AMDGPU::V_LSHRREV_B32_e64);
559f60ad58dSSam Kolton }
560f60ad58dSSam Kolton break;
561f60ad58dSSam Kolton }
562f60ad58dSSam Kolton
563f60ad58dSSam Kolton case AMDGPU::V_LSHRREV_B16_e32:
564f60ad58dSSam Kolton case AMDGPU::V_ASHRREV_I16_e32:
56503306604SStanislav Mekhanoshin case AMDGPU::V_LSHLREV_B16_e32:
56603306604SStanislav Mekhanoshin case AMDGPU::V_LSHRREV_B16_e64:
56703306604SStanislav Mekhanoshin case AMDGPU::V_ASHRREV_I16_e64:
56803306604SStanislav Mekhanoshin case AMDGPU::V_LSHLREV_B16_e64: {
569f60ad58dSSam Kolton // from: v_lshrrev_b16_e32 v1, 8, v0
570f60ad58dSSam Kolton // to SDWA src:v0 src_sel:BYTE_1
571f60ad58dSSam Kolton
572f60ad58dSSam Kolton // from: v_ashrrev_i16_e32 v1, 8, v0
573f60ad58dSSam Kolton // to SDWA src:v0 src_sel:BYTE_1 sext:1
574f60ad58dSSam Kolton
575f60ad58dSSam Kolton // from: v_lshlrev_b16_e32 v1, 8, v0
576f60ad58dSSam Kolton // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
577f60ad58dSSam Kolton MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
57827e0f8bcSSam Kolton auto Imm = foldToImm(*Src0);
57927e0f8bcSSam Kolton if (!Imm || *Imm != 8)
580f60ad58dSSam Kolton break;
581f60ad58dSSam Kolton
582f60ad58dSSam Kolton MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
583f60ad58dSSam Kolton MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
584f60ad58dSSam Kolton
58534978602SJay Foad if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
586f60ad58dSSam Kolton break;
587f60ad58dSSam Kolton
58803306604SStanislav Mekhanoshin if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
58903306604SStanislav Mekhanoshin Opcode == AMDGPU::V_LSHLREV_B16_e64) {
5900eaee545SJonas Devlieghere return std::make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
591f60ad58dSSam Kolton } else {
5920eaee545SJonas Devlieghere return std::make_unique<SDWASrcOperand>(
593f60ad58dSSam Kolton Src1, Dst, BYTE_1, false, false,
59403306604SStanislav Mekhanoshin Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
59503306604SStanislav Mekhanoshin Opcode != AMDGPU::V_LSHRREV_B16_e64);
596f60ad58dSSam Kolton }
597f60ad58dSSam Kolton break;
598f60ad58dSSam Kolton }
599f60ad58dSSam Kolton
600314e29edSJoe Nash case AMDGPU::V_BFE_I32_e64:
601314e29edSJoe Nash case AMDGPU::V_BFE_U32_e64: {
602f60ad58dSSam Kolton // e.g.:
603f60ad58dSSam Kolton // from: v_bfe_u32 v1, v0, 8, 8
604f60ad58dSSam Kolton // to SDWA src:v0 src_sel:BYTE_1
605f60ad58dSSam Kolton
606f60ad58dSSam Kolton // offset | width | src_sel
607f60ad58dSSam Kolton // ------------------------
608f60ad58dSSam Kolton // 0 | 8 | BYTE_0
609f60ad58dSSam Kolton // 0 | 16 | WORD_0
610f60ad58dSSam Kolton // 0 | 32 | DWORD ?
611f60ad58dSSam Kolton // 8 | 8 | BYTE_1
612f60ad58dSSam Kolton // 16 | 8 | BYTE_2
613f60ad58dSSam Kolton // 16 | 16 | WORD_1
614f60ad58dSSam Kolton // 24 | 8 | BYTE_3
615f60ad58dSSam Kolton
616f60ad58dSSam Kolton MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
61727e0f8bcSSam Kolton auto Offset = foldToImm(*Src1);
61827e0f8bcSSam Kolton if (!Offset)
619f60ad58dSSam Kolton break;
620f60ad58dSSam Kolton
621f60ad58dSSam Kolton MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
62227e0f8bcSSam Kolton auto Width = foldToImm(*Src2);
62327e0f8bcSSam Kolton if (!Width)
624f60ad58dSSam Kolton break;
625f60ad58dSSam Kolton
626f60ad58dSSam Kolton SdwaSel SrcSel = DWORD;
627f60ad58dSSam Kolton
62827e0f8bcSSam Kolton if (*Offset == 0 && *Width == 8)
629f60ad58dSSam Kolton SrcSel = BYTE_0;
63027e0f8bcSSam Kolton else if (*Offset == 0 && *Width == 16)
631f60ad58dSSam Kolton SrcSel = WORD_0;
63227e0f8bcSSam Kolton else if (*Offset == 0 && *Width == 32)
633f60ad58dSSam Kolton SrcSel = DWORD;
63427e0f8bcSSam Kolton else if (*Offset == 8 && *Width == 8)
635f60ad58dSSam Kolton SrcSel = BYTE_1;
63627e0f8bcSSam Kolton else if (*Offset == 16 && *Width == 8)
637f60ad58dSSam Kolton SrcSel = BYTE_2;
63827e0f8bcSSam Kolton else if (*Offset == 16 && *Width == 16)
639f60ad58dSSam Kolton SrcSel = WORD_1;
64027e0f8bcSSam Kolton else if (*Offset == 24 && *Width == 8)
641f60ad58dSSam Kolton SrcSel = BYTE_3;
642f60ad58dSSam Kolton else
643f60ad58dSSam Kolton break;
644f60ad58dSSam Kolton
645f60ad58dSSam Kolton MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
646f60ad58dSSam Kolton MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
647f60ad58dSSam Kolton
64834978602SJay Foad if (Src0->getReg().isPhysical() || Dst->getReg().isPhysical())
649f60ad58dSSam Kolton break;
650f60ad58dSSam Kolton
6510eaee545SJonas Devlieghere return std::make_unique<SDWASrcOperand>(
652314e29edSJoe Nash Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);
653f60ad58dSSam Kolton }
6545f7f32c3SSam Kolton
65503306604SStanislav Mekhanoshin case AMDGPU::V_AND_B32_e32:
65603306604SStanislav Mekhanoshin case AMDGPU::V_AND_B32_e64: {
657f60ad58dSSam Kolton // e.g.:
658f60ad58dSSam Kolton // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
659f60ad58dSSam Kolton // to SDWA src:v0 src_sel:WORD_0/BYTE_0
660f60ad58dSSam Kolton
661f60ad58dSSam Kolton MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
662f60ad58dSSam Kolton MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
66303306604SStanislav Mekhanoshin auto ValSrc = Src1;
66403306604SStanislav Mekhanoshin auto Imm = foldToImm(*Src0);
66503306604SStanislav Mekhanoshin
66603306604SStanislav Mekhanoshin if (!Imm) {
66703306604SStanislav Mekhanoshin Imm = foldToImm(*Src1);
66803306604SStanislav Mekhanoshin ValSrc = Src0;
66903306604SStanislav Mekhanoshin }
67003306604SStanislav Mekhanoshin
67103306604SStanislav Mekhanoshin if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
67203306604SStanislav Mekhanoshin break;
67303306604SStanislav Mekhanoshin
674f60ad58dSSam Kolton MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
675f60ad58dSSam Kolton
67634978602SJay Foad if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
677f60ad58dSSam Kolton break;
678f60ad58dSSam Kolton
6790eaee545SJonas Devlieghere return std::make_unique<SDWASrcOperand>(
68003306604SStanislav Mekhanoshin ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
6815f7f32c3SSam Kolton }
6825f7f32c3SSam Kolton
6835f7f32c3SSam Kolton case AMDGPU::V_OR_B32_e32:
6845f7f32c3SSam Kolton case AMDGPU::V_OR_B32_e64: {
6855f7f32c3SSam Kolton // Patterns for dst_unused:UNUSED_PRESERVE.
6865f7f32c3SSam Kolton // e.g., from:
6875f7f32c3SSam Kolton // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD
6885f7f32c3SSam Kolton // src1_sel:WORD_1 src2_sel:WORD1
6895f7f32c3SSam Kolton // v_add_f16_e32 v3, v1, v2
6905f7f32c3SSam Kolton // v_or_b32_e32 v4, v0, v3
6915f7f32c3SSam Kolton // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3
6925f7f32c3SSam Kolton
6935f7f32c3SSam Kolton // Check if one of operands of v_or_b32 is SDWA instruction
6945f7f32c3SSam Kolton using CheckRetType = Optional<std::pair<MachineOperand *, MachineOperand *>>;
6955f7f32c3SSam Kolton auto CheckOROperandsForSDWA =
6965f7f32c3SSam Kolton [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {
6975f7f32c3SSam Kolton if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())
6985f7f32c3SSam Kolton return CheckRetType(None);
6995f7f32c3SSam Kolton
7005f7f32c3SSam Kolton MachineOperand *Op1Def = findSingleRegDef(Op1, MRI);
7015f7f32c3SSam Kolton if (!Op1Def)
7025f7f32c3SSam Kolton return CheckRetType(None);
7035f7f32c3SSam Kolton
7045f7f32c3SSam Kolton MachineInstr *Op1Inst = Op1Def->getParent();
7055f7f32c3SSam Kolton if (!TII->isSDWA(*Op1Inst))
7065f7f32c3SSam Kolton return CheckRetType(None);
7075f7f32c3SSam Kolton
7085f7f32c3SSam Kolton MachineOperand *Op2Def = findSingleRegDef(Op2, MRI);
7095f7f32c3SSam Kolton if (!Op2Def)
7105f7f32c3SSam Kolton return CheckRetType(None);
7115f7f32c3SSam Kolton
7125f7f32c3SSam Kolton return CheckRetType(std::make_pair(Op1Def, Op2Def));
7135f7f32c3SSam Kolton };
7145f7f32c3SSam Kolton
7155f7f32c3SSam Kolton MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
7165f7f32c3SSam Kolton MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
7175f7f32c3SSam Kolton assert(OrSDWA && OrOther);
7185f7f32c3SSam Kolton auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
7195f7f32c3SSam Kolton if (!Res) {
7205f7f32c3SSam Kolton OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
7215f7f32c3SSam Kolton OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
7225f7f32c3SSam Kolton assert(OrSDWA && OrOther);
7235f7f32c3SSam Kolton Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
7245f7f32c3SSam Kolton if (!Res)
725f60ad58dSSam Kolton break;
726f60ad58dSSam Kolton }
7275f7f32c3SSam Kolton
7285f7f32c3SSam Kolton MachineOperand *OrSDWADef = Res->first;
7295f7f32c3SSam Kolton MachineOperand *OrOtherDef = Res->second;
7305f7f32c3SSam Kolton assert(OrSDWADef && OrOtherDef);
7315f7f32c3SSam Kolton
7325f7f32c3SSam Kolton MachineInstr *SDWAInst = OrSDWADef->getParent();
7335f7f32c3SSam Kolton MachineInstr *OtherInst = OrOtherDef->getParent();
7345f7f32c3SSam Kolton
7355f7f32c3SSam Kolton // Check that OtherInstr is actually bitwise compatible with SDWAInst = their
7365f7f32c3SSam Kolton // destination patterns don't overlap. Compatible instruction can be either
7375f7f32c3SSam Kolton // regular instruction with compatible bitness or SDWA instruction with
7385f7f32c3SSam Kolton // correct dst_sel
7395f7f32c3SSam Kolton // SDWAInst | OtherInst bitness / OtherInst dst_sel
7405f7f32c3SSam Kolton // -----------------------------------------------------
7415f7f32c3SSam Kolton // DWORD | no / no
7425f7f32c3SSam Kolton // WORD_0 | no / BYTE_2/3, WORD_1
7435f7f32c3SSam Kolton // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0
7445f7f32c3SSam Kolton // BYTE_0 | no / BYTE_1/2/3, WORD_1
7455f7f32c3SSam Kolton // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1
7465f7f32c3SSam Kolton // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0
7475f7f32c3SSam Kolton // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0
7485f7f32c3SSam Kolton // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK
7495f7f32c3SSam Kolton // but v_add_f32 is not.
7505f7f32c3SSam Kolton
7515f7f32c3SSam Kolton // TODO: add support for non-SDWA instructions as OtherInst.
7525f7f32c3SSam Kolton // For now this only works with SDWA instructions. For regular instructions
75380cf9ff5SMichael Bedy // there is no way to determine if the instruction writes only 8/16/24-bit
75480cf9ff5SMichael Bedy // out of full register size and all registers are at min 32-bit wide.
7555f7f32c3SSam Kolton if (!TII->isSDWA(*OtherInst))
7565f7f32c3SSam Kolton break;
7575f7f32c3SSam Kolton
7585f7f32c3SSam Kolton SdwaSel DstSel = static_cast<SdwaSel>(
7595f7f32c3SSam Kolton TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));;
7605f7f32c3SSam Kolton SdwaSel OtherDstSel = static_cast<SdwaSel>(
7615f7f32c3SSam Kolton TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
7625f7f32c3SSam Kolton
7635f7f32c3SSam Kolton bool DstSelAgree = false;
7645f7f32c3SSam Kolton switch (DstSel) {
7655f7f32c3SSam Kolton case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||
7665f7f32c3SSam Kolton (OtherDstSel == BYTE_3) ||
7675f7f32c3SSam Kolton (OtherDstSel == WORD_1));
7685f7f32c3SSam Kolton break;
7695f7f32c3SSam Kolton case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7705f7f32c3SSam Kolton (OtherDstSel == BYTE_1) ||
7715f7f32c3SSam Kolton (OtherDstSel == WORD_0));
7725f7f32c3SSam Kolton break;
7735f7f32c3SSam Kolton case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||
7745f7f32c3SSam Kolton (OtherDstSel == BYTE_2) ||
7755f7f32c3SSam Kolton (OtherDstSel == BYTE_3) ||
7765f7f32c3SSam Kolton (OtherDstSel == WORD_1));
7775f7f32c3SSam Kolton break;
7785f7f32c3SSam Kolton case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7795f7f32c3SSam Kolton (OtherDstSel == BYTE_2) ||
7805f7f32c3SSam Kolton (OtherDstSel == BYTE_3) ||
7815f7f32c3SSam Kolton (OtherDstSel == WORD_1));
7825f7f32c3SSam Kolton break;
7835f7f32c3SSam Kolton case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7845f7f32c3SSam Kolton (OtherDstSel == BYTE_1) ||
7855f7f32c3SSam Kolton (OtherDstSel == BYTE_3) ||
7865f7f32c3SSam Kolton (OtherDstSel == WORD_0));
7875f7f32c3SSam Kolton break;
7885f7f32c3SSam Kolton case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||
7895f7f32c3SSam Kolton (OtherDstSel == BYTE_1) ||
7905f7f32c3SSam Kolton (OtherDstSel == BYTE_2) ||
7915f7f32c3SSam Kolton (OtherDstSel == WORD_0));
7925f7f32c3SSam Kolton break;
7935f7f32c3SSam Kolton default: DstSelAgree = false;
7945f7f32c3SSam Kolton }
7955f7f32c3SSam Kolton
7965f7f32c3SSam Kolton if (!DstSelAgree)
7975f7f32c3SSam Kolton break;
7985f7f32c3SSam Kolton
7995f7f32c3SSam Kolton // Also OtherInst dst_unused should be UNUSED_PAD
8005f7f32c3SSam Kolton DstUnused OtherDstUnused = static_cast<DstUnused>(
8015f7f32c3SSam Kolton TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
8025f7f32c3SSam Kolton if (OtherDstUnused != DstUnused::UNUSED_PAD)
8035f7f32c3SSam Kolton break;
8045f7f32c3SSam Kolton
8055f7f32c3SSam Kolton // Create DstPreserveOperand
8065f7f32c3SSam Kolton MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
8075f7f32c3SSam Kolton assert(OrDst && OrDst->isReg());
8085f7f32c3SSam Kolton
8090eaee545SJonas Devlieghere return std::make_unique<SDWADstPreserveOperand>(
8105f7f32c3SSam Kolton OrDst, OrSDWADef, OrOtherDef, DstSel);
8115f7f32c3SSam Kolton
8125f7f32c3SSam Kolton }
8135f7f32c3SSam Kolton }
8145f7f32c3SSam Kolton
8155f7f32c3SSam Kolton return std::unique_ptr<SDWAOperand>(nullptr);
8165f7f32c3SSam Kolton }
8175f7f32c3SSam Kolton
818a19de320SHans Wennborg #if !defined(NDEBUG)
operator <<(raw_ostream & OS,const SDWAOperand & Operand)819a19de320SHans Wennborg static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
820a19de320SHans Wennborg Operand.print(OS);
821a19de320SHans Wennborg return OS;
822a19de320SHans Wennborg }
823a19de320SHans Wennborg #endif
824a19de320SHans Wennborg
matchSDWAOperands(MachineBasicBlock & MBB)8259c2f3c48SMatt Arsenault void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
8265f7f32c3SSam Kolton for (MachineInstr &MI : MBB) {
8275f7f32c3SSam Kolton if (auto Operand = matchSDWAOperand(MI)) {
828d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
8295f7f32c3SSam Kolton SDWAOperands[&MI] = std::move(Operand);
8305f7f32c3SSam Kolton ++NumSDWAPatternsFound;
831f60ad58dSSam Kolton }
832f60ad58dSSam Kolton }
833f60ad58dSSam Kolton }
834f60ad58dSSam Kolton
83516de4fd2SRon Lieberman // Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
83679f67caeSMatt Arsenault // V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows isConvertibleToSDWA
83779f67caeSMatt Arsenault // to perform its transformation on V_ADD_CO_U32_e32 into V_ADD_CO_U32_sdwa.
83816de4fd2SRon Lieberman //
83916de4fd2SRon Lieberman // We are transforming from a VOP3 into a VOP2 form of the instruction.
84016de4fd2SRon Lieberman // %19:vgpr_32 = V_AND_B32_e32 255,
84116de4fd2SRon Lieberman // killed %16:vgpr_32, implicit $exec
84279f67caeSMatt Arsenault // %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64
84316de4fd2SRon Lieberman // %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
84416de4fd2SRon Lieberman // %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
84516de4fd2SRon Lieberman // %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
84616de4fd2SRon Lieberman //
84716de4fd2SRon Lieberman // becomes
84879f67caeSMatt Arsenault // %47:vgpr_32 = V_ADD_CO_U32_sdwa
84916de4fd2SRon Lieberman // 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
85016de4fd2SRon Lieberman // implicit-def $vcc, implicit $exec
85116de4fd2SRon Lieberman // %48:vgpr_32 = V_ADDC_U32_e32
85216de4fd2SRon Lieberman // 0, %26.sub1:vreg_64, implicit-def $vcc, implicit $vcc, implicit $exec
pseudoOpConvertToVOP2(MachineInstr & MI,const GCNSubtarget & ST) const85316de4fd2SRon Lieberman void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
85416de4fd2SRon Lieberman const GCNSubtarget &ST) const {
85516de4fd2SRon Lieberman int Opc = MI.getOpcode();
85679f67caeSMatt Arsenault assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
85779f67caeSMatt Arsenault "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
85816de4fd2SRon Lieberman
85916de4fd2SRon Lieberman // Can the candidate MI be shrunk?
86016de4fd2SRon Lieberman if (!TII->canShrink(MI, *MRI))
86116de4fd2SRon Lieberman return;
86216de4fd2SRon Lieberman Opc = AMDGPU::getVOPe32(Opc);
86316de4fd2SRon Lieberman // Find the related ADD instruction.
86416de4fd2SRon Lieberman const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
86516de4fd2SRon Lieberman if (!Sdst)
86616de4fd2SRon Lieberman return;
86716de4fd2SRon Lieberman MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
86816de4fd2SRon Lieberman if (!NextOp)
86916de4fd2SRon Lieberman return;
87016de4fd2SRon Lieberman MachineInstr &MISucc = *NextOp->getParent();
87116de4fd2SRon Lieberman // Can the successor be shrunk?
87216de4fd2SRon Lieberman if (!TII->canShrink(MISucc, *MRI))
87316de4fd2SRon Lieberman return;
87416de4fd2SRon Lieberman int SuccOpc = AMDGPU::getVOPe32(MISucc.getOpcode());
87516de4fd2SRon Lieberman // Make sure the carry in/out are subsequently unused.
87616de4fd2SRon Lieberman MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
87716de4fd2SRon Lieberman if (!CarryIn)
87816de4fd2SRon Lieberman return;
87916de4fd2SRon Lieberman MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
88016de4fd2SRon Lieberman if (!CarryOut)
88116de4fd2SRon Lieberman return;
88216de4fd2SRon Lieberman if (!MRI->hasOneUse(CarryIn->getReg()) || !MRI->use_empty(CarryOut->getReg()))
88316de4fd2SRon Lieberman return;
88416de4fd2SRon Lieberman // Make sure VCC or its subregs are dead before MI.
88516de4fd2SRon Lieberman MachineBasicBlock &MBB = *MI.getParent();
88616de4fd2SRon Lieberman auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
88716de4fd2SRon Lieberman if (Liveness != MachineBasicBlock::LQR_Dead)
88816de4fd2SRon Lieberman return;
88916de4fd2SRon Lieberman // Check if VCC is referenced in range of (MI,MISucc].
89016de4fd2SRon Lieberman for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
89116de4fd2SRon Lieberman I != E; ++I) {
89216de4fd2SRon Lieberman if (I->modifiesRegister(AMDGPU::VCC, TRI))
89316de4fd2SRon Lieberman return;
89416de4fd2SRon Lieberman }
89507cd19efSMatt Arsenault
89616de4fd2SRon Lieberman // Make the two new e32 instruction variants.
89716de4fd2SRon Lieberman // Replace MI with V_{SUB|ADD}_I32_e32
89807cd19efSMatt Arsenault BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc))
89907cd19efSMatt Arsenault .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
90007cd19efSMatt Arsenault .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
90107cd19efSMatt Arsenault .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
90207cd19efSMatt Arsenault .setMIFlags(MI.getFlags());
90307cd19efSMatt Arsenault
90416de4fd2SRon Lieberman MI.eraseFromParent();
90507cd19efSMatt Arsenault
90616de4fd2SRon Lieberman // Replace MISucc with V_{SUBB|ADDC}_U32_e32
90707cd19efSMatt Arsenault BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc))
90807cd19efSMatt Arsenault .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst))
90907cd19efSMatt Arsenault .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0))
91007cd19efSMatt Arsenault .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1))
91107cd19efSMatt Arsenault .setMIFlags(MISucc.getFlags());
91207cd19efSMatt Arsenault
91316de4fd2SRon Lieberman MISucc.eraseFromParent();
91416de4fd2SRon Lieberman }
91516de4fd2SRon Lieberman
isConvertibleToSDWA(MachineInstr & MI,const GCNSubtarget & ST) const91616de4fd2SRon Lieberman bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
9175bfbae5cSTom Stellard const GCNSubtarget &ST) const {
9185f7f32c3SSam Kolton // Check if this is already an SDWA instruction
9195f7f32c3SSam Kolton unsigned Opc = MI.getOpcode();
9205f7f32c3SSam Kolton if (TII->isSDWA(Opc))
9215f7f32c3SSam Kolton return true;
9225f7f32c3SSam Kolton
92356ea488dSStanislav Mekhanoshin // Check if this instruction has opcode that supports SDWA
9243c4933fcSSam Kolton if (AMDGPU::getSDWAOp(Opc) == -1)
9253c4933fcSSam Kolton Opc = AMDGPU::getVOPe32(Opc);
9263c4933fcSSam Kolton
9275f7f32c3SSam Kolton if (AMDGPU::getSDWAOp(Opc) == -1)
9283c4933fcSSam Kolton return false;
9293c4933fcSSam Kolton
9303c4933fcSSam Kolton if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
931549c89d2SSam Kolton return false;
932549c89d2SSam Kolton
933549c89d2SSam Kolton if (TII->isVOPC(Opc)) {
9343c4933fcSSam Kolton if (!ST.hasSDWASdst()) {
935549c89d2SSam Kolton const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
93652500216SStanislav Mekhanoshin if (SDst && (SDst->getReg() != AMDGPU::VCC &&
93752500216SStanislav Mekhanoshin SDst->getReg() != AMDGPU::VCC_LO))
93803306604SStanislav Mekhanoshin return false;
939ebfdaf73SSam Kolton }
940ebfdaf73SSam Kolton
941a179d25bSSam Kolton if (!ST.hasSDWAOutModsVOPC() &&
942a179d25bSSam Kolton (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
943a179d25bSSam Kolton TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
9443c4933fcSSam Kolton return false;
9453c4933fcSSam Kolton
946a179d25bSSam Kolton } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
947a179d25bSSam Kolton !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
9483c4933fcSSam Kolton return false;
9493c4933fcSSam Kolton }
9503c4933fcSSam Kolton
95128a1936fSStanislav Mekhanoshin if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
95228a1936fSStanislav Mekhanoshin Opc == AMDGPU::V_FMAC_F32_e32 ||
95328a1936fSStanislav Mekhanoshin Opc == AMDGPU::V_MAC_F16_e32 ||
9543c4933fcSSam Kolton Opc == AMDGPU::V_MAC_F32_e32))
9553c4933fcSSam Kolton return false;
9563c4933fcSSam Kolton
95728a1936fSStanislav Mekhanoshin // Check if target supports this SDWA opcode
95828a1936fSStanislav Mekhanoshin if (TII->pseudoToMCOpcode(Opc) == -1)
95928a1936fSStanislav Mekhanoshin return false;
96028a1936fSStanislav Mekhanoshin
9614c45e6ffSDmitry Preobrazhensky // FIXME: has SDWA but require handling of implicit VCC use
9624c45e6ffSDmitry Preobrazhensky if (Opc == AMDGPU::V_CNDMASK_B32_e32)
9634c45e6ffSDmitry Preobrazhensky return false;
9644c45e6ffSDmitry Preobrazhensky
9650462aef5SStanislav Mekhanoshin if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
9660462aef5SStanislav Mekhanoshin if (!Src0->isReg() && !Src0->isImm())
9670462aef5SStanislav Mekhanoshin return false;
9680462aef5SStanislav Mekhanoshin }
9690462aef5SStanislav Mekhanoshin
9700462aef5SStanislav Mekhanoshin if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
9710462aef5SStanislav Mekhanoshin if (!Src1->isReg() && !Src1->isImm())
9720462aef5SStanislav Mekhanoshin return false;
9730462aef5SStanislav Mekhanoshin }
9740462aef5SStanislav Mekhanoshin
9753c4933fcSSam Kolton return true;
9763c4933fcSSam Kolton }
9773c4933fcSSam Kolton
convertToSDWA(MachineInstr & MI,const SDWAOperandsVector & SDWAOperands)978ebfdaf73SSam Kolton bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
979ebfdaf73SSam Kolton const SDWAOperandsVector &SDWAOperands) {
98059e5ef79SMichael Bedy
981d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);
98259e5ef79SMichael Bedy
983f60ad58dSSam Kolton // Convert to sdwa
9845f7f32c3SSam Kolton int SDWAOpcode;
9855f7f32c3SSam Kolton unsigned Opcode = MI.getOpcode();
9865f7f32c3SSam Kolton if (TII->isSDWA(Opcode)) {
9875f7f32c3SSam Kolton SDWAOpcode = Opcode;
9885f7f32c3SSam Kolton } else {
9895f7f32c3SSam Kolton SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
99003306604SStanislav Mekhanoshin if (SDWAOpcode == -1)
9915f7f32c3SSam Kolton SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
9925f7f32c3SSam Kolton }
993f60ad58dSSam Kolton assert(SDWAOpcode != -1);
994f60ad58dSSam Kolton
995f60ad58dSSam Kolton const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
996f60ad58dSSam Kolton
997f60ad58dSSam Kolton // Create SDWA version of instruction MI and initialize its operands
998f60ad58dSSam Kolton MachineInstrBuilder SDWAInst =
99907cd19efSMatt Arsenault BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
100007cd19efSMatt Arsenault .setMIFlags(MI.getFlags());
1001f60ad58dSSam Kolton
1002a179d25bSSam Kolton // Copy dst, if it is present in original then should also be present in SDWA
1003a179d25bSSam Kolton MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
1004f60ad58dSSam Kolton if (Dst) {
1005f60ad58dSSam Kolton assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
1006f60ad58dSSam Kolton SDWAInst.add(*Dst);
1007a179d25bSSam Kolton } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
1008549c89d2SSam Kolton assert(Dst &&
1009549c89d2SSam Kolton AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
1010549c89d2SSam Kolton SDWAInst.add(*Dst);
1011a179d25bSSam Kolton } else {
1012a179d25bSSam Kolton assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
101352500216SStanislav Mekhanoshin SDWAInst.addReg(TRI->getVCC(), RegState::Define);
1014f60ad58dSSam Kolton }
1015f60ad58dSSam Kolton
1016f60ad58dSSam Kolton // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
1017f60ad58dSSam Kolton // src0_modifiers (except for v_nop_sdwa, but it can't get here)
1018f60ad58dSSam Kolton MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1019f60ad58dSSam Kolton assert(
1020f60ad58dSSam Kolton Src0 &&
1021f60ad58dSSam Kolton AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
1022f60ad58dSSam Kolton AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
102303306604SStanislav Mekhanoshin if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
102403306604SStanislav Mekhanoshin SDWAInst.addImm(Mod->getImm());
102503306604SStanislav Mekhanoshin else
1026f60ad58dSSam Kolton SDWAInst.addImm(0);
1027f60ad58dSSam Kolton SDWAInst.add(*Src0);
1028f60ad58dSSam Kolton
1029f60ad58dSSam Kolton // Copy src1 if present, initialize src1_modifiers.
1030f60ad58dSSam Kolton MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1031f60ad58dSSam Kolton if (Src1) {
1032f60ad58dSSam Kolton assert(
1033f60ad58dSSam Kolton AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
1034f60ad58dSSam Kolton AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
103503306604SStanislav Mekhanoshin if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
103603306604SStanislav Mekhanoshin SDWAInst.addImm(Mod->getImm());
103703306604SStanislav Mekhanoshin else
1038f60ad58dSSam Kolton SDWAInst.addImm(0);
1039f60ad58dSSam Kolton SDWAInst.add(*Src1);
1040f60ad58dSSam Kolton }
1041f60ad58dSSam Kolton
104228a1936fSStanislav Mekhanoshin if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
104328a1936fSStanislav Mekhanoshin SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
104428a1936fSStanislav Mekhanoshin SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1045f60ad58dSSam Kolton SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1046f60ad58dSSam Kolton // v_mac_f16/32 has additional src2 operand tied to vdst
1047f60ad58dSSam Kolton MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1048f60ad58dSSam Kolton assert(Src2);
1049f60ad58dSSam Kolton SDWAInst.add(*Src2);
1050f60ad58dSSam Kolton }
1051f60ad58dSSam Kolton
10523c4933fcSSam Kolton // Copy clamp if present, initialize otherwise
10533c4933fcSSam Kolton assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
10543c4933fcSSam Kolton MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
10553c4933fcSSam Kolton if (Clamp) {
10563c4933fcSSam Kolton SDWAInst.add(*Clamp);
10573c4933fcSSam Kolton } else {
1058549c89d2SSam Kolton SDWAInst.addImm(0);
10593c4933fcSSam Kolton }
1060549c89d2SSam Kolton
10613c4933fcSSam Kolton // Copy omod if present, initialize otherwise if needed
1062a179d25bSSam Kolton if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
10633c4933fcSSam Kolton MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
10643c4933fcSSam Kolton if (OMod) {
10653c4933fcSSam Kolton SDWAInst.add(*OMod);
1066a179d25bSSam Kolton } else {
1067f60ad58dSSam Kolton SDWAInst.addImm(0);
10683c4933fcSSam Kolton }
1069a179d25bSSam Kolton }
1070f60ad58dSSam Kolton
10715f7f32c3SSam Kolton // Copy dst_sel if present, initialize otherwise if needed
1072a179d25bSSam Kolton if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
10735f7f32c3SSam Kolton MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
10745f7f32c3SSam Kolton if (DstSel) {
10755f7f32c3SSam Kolton SDWAInst.add(*DstSel);
10765f7f32c3SSam Kolton } else {
1077f60ad58dSSam Kolton SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1078a179d25bSSam Kolton }
10795f7f32c3SSam Kolton }
1080a179d25bSSam Kolton
10815f7f32c3SSam Kolton // Copy dst_unused if present, initialize otherwise if needed
1082a179d25bSSam Kolton if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
10835f7f32c3SSam Kolton MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
10845f7f32c3SSam Kolton if (DstUnused) {
10855f7f32c3SSam Kolton SDWAInst.add(*DstUnused);
10865f7f32c3SSam Kolton } else {
1087f60ad58dSSam Kolton SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
1088f60ad58dSSam Kolton }
10895f7f32c3SSam Kolton }
1090f60ad58dSSam Kolton
10915f7f32c3SSam Kolton // Copy src0_sel if present, initialize otherwise
1092f60ad58dSSam Kolton assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
10935f7f32c3SSam Kolton MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
10945f7f32c3SSam Kolton if (Src0Sel) {
10955f7f32c3SSam Kolton SDWAInst.add(*Src0Sel);
10965f7f32c3SSam Kolton } else {
1097f60ad58dSSam Kolton SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
10985f7f32c3SSam Kolton }
1099f60ad58dSSam Kolton
11005f7f32c3SSam Kolton // Copy src1_sel if present, initialize otherwise if needed
1101f60ad58dSSam Kolton if (Src1) {
1102f60ad58dSSam Kolton assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
11035f7f32c3SSam Kolton MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
11045f7f32c3SSam Kolton if (Src1Sel) {
11055f7f32c3SSam Kolton SDWAInst.add(*Src1Sel);
11065f7f32c3SSam Kolton } else {
1107f60ad58dSSam Kolton SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1108f60ad58dSSam Kolton }
11095f7f32c3SSam Kolton }
1110f60ad58dSSam Kolton
111159e5ef79SMichael Bedy // Check for a preserved register that needs to be copied.
111259e5ef79SMichael Bedy auto DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
111359e5ef79SMichael Bedy if (DstUnused &&
111459e5ef79SMichael Bedy DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
111559e5ef79SMichael Bedy // We expect, if we are here, that the instruction was already in it's SDWA form,
111659e5ef79SMichael Bedy // with a tied operand.
111759e5ef79SMichael Bedy assert(Dst && Dst->isTied());
111859e5ef79SMichael Bedy assert(Opcode == static_cast<unsigned int>(SDWAOpcode));
111959e5ef79SMichael Bedy // We also expect a vdst, since sdst can't preserve.
112059e5ef79SMichael Bedy auto PreserveDstIdx = AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst);
112159e5ef79SMichael Bedy assert(PreserveDstIdx != -1);
112259e5ef79SMichael Bedy
112359e5ef79SMichael Bedy auto TiedIdx = MI.findTiedOperandIdx(PreserveDstIdx);
112459e5ef79SMichael Bedy auto Tied = MI.getOperand(TiedIdx);
112559e5ef79SMichael Bedy
112659e5ef79SMichael Bedy SDWAInst.add(Tied);
112759e5ef79SMichael Bedy SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1);
112859e5ef79SMichael Bedy }
112959e5ef79SMichael Bedy
1130c24d5e28SMatt Arsenault // Apply all sdwa operand patterns.
1131f60ad58dSSam Kolton bool Converted = false;
1132f60ad58dSSam Kolton for (auto &Operand : SDWAOperands) {
1133d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
1134*6527b2a4SSebastian Neubauer // There should be no intersection between SDWA operands and potential MIs
1135ebfdaf73SSam Kolton // e.g.:
1136ebfdaf73SSam Kolton // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
1137ebfdaf73SSam Kolton // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
1138ebfdaf73SSam Kolton // v_add_u32 v3, v4, v2
1139ebfdaf73SSam Kolton //
1140*6527b2a4SSebastian Neubauer // In that example it is possible that we would fold 2nd instruction into
1141*6527b2a4SSebastian Neubauer // 3rd (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that
1142*6527b2a4SSebastian Neubauer // was already destroyed). So if SDWAOperand is also a potential MI then do
1143*6527b2a4SSebastian Neubauer // not apply it.
1144ebfdaf73SSam Kolton if (PotentialMatches.count(Operand->getParentInst()) == 0)
1145f60ad58dSSam Kolton Converted |= Operand->convertToSDWA(*SDWAInst, TII);
1146f60ad58dSSam Kolton }
114756ea488dSStanislav Mekhanoshin if (Converted) {
114856ea488dSStanislav Mekhanoshin ConvertedInstructions.push_back(SDWAInst);
114956ea488dSStanislav Mekhanoshin } else {
1150f60ad58dSSam Kolton SDWAInst->eraseFromParent();
1151f60ad58dSSam Kolton return false;
1152f60ad58dSSam Kolton }
1153f60ad58dSSam Kolton
1154d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
1155f60ad58dSSam Kolton ++NumSDWAInstructionsPeepholed;
1156f60ad58dSSam Kolton
1157f60ad58dSSam Kolton MI.eraseFromParent();
1158f60ad58dSSam Kolton return true;
1159f60ad58dSSam Kolton }
1160f60ad58dSSam Kolton
116156ea488dSStanislav Mekhanoshin // If an instruction was converted to SDWA it should not have immediates or SGPR
11623c4933fcSSam Kolton // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
legalizeScalarOperands(MachineInstr & MI,const GCNSubtarget & ST) const1163c24d5e28SMatt Arsenault void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
11645bfbae5cSTom Stellard const GCNSubtarget &ST) const {
116556ea488dSStanislav Mekhanoshin const MCInstrDesc &Desc = TII->get(MI.getOpcode());
11663c4933fcSSam Kolton unsigned ConstantBusCount = 0;
11673c4933fcSSam Kolton for (MachineOperand &Op : MI.explicit_uses()) {
116856ea488dSStanislav Mekhanoshin if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
116956ea488dSStanislav Mekhanoshin continue;
11703c4933fcSSam Kolton
11713c4933fcSSam Kolton unsigned I = MI.getOperandNo(&Op);
117256ea488dSStanislav Mekhanoshin if (Desc.OpInfo[I].RegClass == -1 ||
1173399b7de0SChristudasan Devadasan !TRI->isVSSuperClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
117456ea488dSStanislav Mekhanoshin continue;
11753c4933fcSSam Kolton
11763c4933fcSSam Kolton if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
11773c4933fcSSam Kolton TRI->isSGPRReg(*MRI, Op.getReg())) {
11783c4933fcSSam Kolton ++ConstantBusCount;
11793c4933fcSSam Kolton continue;
11803c4933fcSSam Kolton }
11813c4933fcSSam Kolton
11820c476111SDaniel Sanders Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
118356ea488dSStanislav Mekhanoshin auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
118456ea488dSStanislav Mekhanoshin TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
118556ea488dSStanislav Mekhanoshin if (Op.isImm())
118656ea488dSStanislav Mekhanoshin Copy.addImm(Op.getImm());
118756ea488dSStanislav Mekhanoshin else if (Op.isReg())
118856ea488dSStanislav Mekhanoshin Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
118956ea488dSStanislav Mekhanoshin Op.getSubReg());
119056ea488dSStanislav Mekhanoshin Op.ChangeToRegister(VGPR, false);
119156ea488dSStanislav Mekhanoshin }
119256ea488dSStanislav Mekhanoshin }
119356ea488dSStanislav Mekhanoshin
runOnMachineFunction(MachineFunction & MF)1194f60ad58dSSam Kolton bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
11955bfbae5cSTom Stellard const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1196f60ad58dSSam Kolton
1197f1caa283SMatthias Braun if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
1198f60ad58dSSam Kolton return false;
1199f60ad58dSSam Kolton
1200f60ad58dSSam Kolton MRI = &MF.getRegInfo();
1201f60ad58dSSam Kolton TRI = ST.getRegisterInfo();
1202f60ad58dSSam Kolton TII = ST.getInstrInfo();
1203f60ad58dSSam Kolton
1204ebfdaf73SSam Kolton // Find all SDWA operands in MF.
12055f7f32c3SSam Kolton bool Ret = false;
12069c2f3c48SMatt Arsenault for (MachineBasicBlock &MBB : MF) {
12079c2f3c48SMatt Arsenault bool Changed = false;
12085f7f32c3SSam Kolton do {
120916de4fd2SRon Lieberman // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
121016de4fd2SRon Lieberman // Look for a possible ADD or SUB that resulted from a previously lowered
121116de4fd2SRon Lieberman // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
121216de4fd2SRon Lieberman // lowers the pair of instructions into e32 form.
121316de4fd2SRon Lieberman matchSDWAOperands(MBB);
121416de4fd2SRon Lieberman for (const auto &OperandPair : SDWAOperands) {
121516de4fd2SRon Lieberman const auto &Operand = OperandPair.second;
121616de4fd2SRon Lieberman MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
121716de4fd2SRon Lieberman if (PotentialMI &&
121879f67caeSMatt Arsenault (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
121979f67caeSMatt Arsenault PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
122016de4fd2SRon Lieberman pseudoOpConvertToVOP2(*PotentialMI, ST);
122116de4fd2SRon Lieberman }
122216de4fd2SRon Lieberman SDWAOperands.clear();
122316de4fd2SRon Lieberman
122416de4fd2SRon Lieberman // Generate potential match list.
12259c2f3c48SMatt Arsenault matchSDWAOperands(MBB);
1226f60ad58dSSam Kolton
1227ebfdaf73SSam Kolton for (const auto &OperandPair : SDWAOperands) {
1228ebfdaf73SSam Kolton const auto &Operand = OperandPair.second;
1229f60ad58dSSam Kolton MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
12303c4933fcSSam Kolton if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
1231ebfdaf73SSam Kolton PotentialMatches[PotentialMI].push_back(Operand.get());
1232f60ad58dSSam Kolton }
1233f60ad58dSSam Kolton }
1234f60ad58dSSam Kolton
1235f60ad58dSSam Kolton for (auto &PotentialPair : PotentialMatches) {
1236f60ad58dSSam Kolton MachineInstr &PotentialMI = *PotentialPair.first;
1237f60ad58dSSam Kolton convertToSDWA(PotentialMI, PotentialPair.second);
1238f60ad58dSSam Kolton }
1239aff8341dSSam Kolton
1240ebfdaf73SSam Kolton PotentialMatches.clear();
1241aff8341dSSam Kolton SDWAOperands.clear();
124256ea488dSStanislav Mekhanoshin
12435f7f32c3SSam Kolton Changed = !ConvertedInstructions.empty();
12445f7f32c3SSam Kolton
12455f7f32c3SSam Kolton if (Changed)
12465f7f32c3SSam Kolton Ret = true;
124756ea488dSStanislav Mekhanoshin while (!ConvertedInstructions.empty())
12483c4933fcSSam Kolton legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
12495f7f32c3SSam Kolton } while (Changed);
12509c2f3c48SMatt Arsenault }
125156ea488dSStanislav Mekhanoshin
1252e4cda741SStanislav Mekhanoshin return Ret;
1253f60ad58dSSam Kolton }
1254