19a8da909SThomas Symalla //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2fee41517SMatt Arsenault //
3fee41517SMatt Arsenault // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fee41517SMatt Arsenault // See https://llvm.org/LICENSE.txt for license information.
5fee41517SMatt Arsenault // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fee41517SMatt Arsenault //
7fee41517SMatt Arsenault //===----------------------------------------------------------------------===//
8fee41517SMatt Arsenault //
9fee41517SMatt Arsenault // This pass does combining of machine instructions at the generic MI level,
10fee41517SMatt Arsenault // after the legalizer.
11fee41517SMatt Arsenault //
12fee41517SMatt Arsenault //===----------------------------------------------------------------------===//
13fee41517SMatt Arsenault 
146a87e9b0Sdfukalov #include "AMDGPU.h"
15db6bc2abSMirko Brkusanin #include "AMDGPUCombinerHelper.h"
16fee41517SMatt Arsenault #include "AMDGPULegalizerInfo.h"
17560d7e04Sdfukalov #include "GCNSubtarget.h"
18560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/Combiner.h"
20fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24fee41517SMatt Arsenault #include "llvm/CodeGen/MachineDominators.h"
25fee41517SMatt Arsenault #include "llvm/CodeGen/TargetPassConfig.h"
26*ca57b80cSMateja Marjanovic #include "llvm/IR/IntrinsicsAMDGPU.h"
276a87e9b0Sdfukalov #include "llvm/Target/TargetMachine.h"
28fee41517SMatt Arsenault 
29fee41517SMatt Arsenault #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
30fee41517SMatt Arsenault 
31fee41517SMatt Arsenault using namespace llvm;
32fee41517SMatt Arsenault using namespace MIPatternMatch;
33fee41517SMatt Arsenault 
340031418dSPetar Avramovic class AMDGPUPostLegalizerCombinerHelper {
350031418dSPetar Avramovic protected:
360031418dSPetar Avramovic   MachineIRBuilder &B;
370031418dSPetar Avramovic   MachineFunction &MF;
380031418dSPetar Avramovic   MachineRegisterInfo &MRI;
39db6bc2abSMirko Brkusanin   AMDGPUCombinerHelper &Helper;
400031418dSPetar Avramovic 
410031418dSPetar Avramovic public:
AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder & B,AMDGPUCombinerHelper & Helper)42db6bc2abSMirko Brkusanin   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
43db6bc2abSMirko Brkusanin                                     AMDGPUCombinerHelper &Helper)
440031418dSPetar Avramovic       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
450031418dSPetar Avramovic 
46fee41517SMatt Arsenault   struct FMinFMaxLegacyInfo {
47fee41517SMatt Arsenault     Register LHS;
48fee41517SMatt Arsenault     Register RHS;
49fee41517SMatt Arsenault     Register True;
50fee41517SMatt Arsenault     Register False;
51fee41517SMatt Arsenault     CmpInst::Predicate Pred;
52fee41517SMatt Arsenault   };
53fee41517SMatt Arsenault 
54fee41517SMatt Arsenault   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
550031418dSPetar Avramovic   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
560031418dSPetar Avramovic   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
570031418dSPetar Avramovic                                          const FMinFMaxLegacyInfo &Info);
580031418dSPetar Avramovic 
590031418dSPetar Avramovic   bool matchUCharToFloat(MachineInstr &MI);
600031418dSPetar Avramovic   void applyUCharToFloat(MachineInstr &MI);
610031418dSPetar Avramovic 
62*ca57b80cSMateja Marjanovic   bool matchRcpSqrtToRsq(MachineInstr &MI,
63*ca57b80cSMateja Marjanovic                          std::function<void(MachineIRBuilder &)> &MatchInfo);
64*ca57b80cSMateja Marjanovic 
650031418dSPetar Avramovic   // FIXME: Should be able to have 2 separate matchdatas rather than custom
660031418dSPetar Avramovic   // struct boilerplate.
670031418dSPetar Avramovic   struct CvtF32UByteMatchInfo {
680031418dSPetar Avramovic     Register CvtVal;
690031418dSPetar Avramovic     unsigned ShiftOffset;
700031418dSPetar Avramovic   };
710031418dSPetar Avramovic 
720031418dSPetar Avramovic   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
730031418dSPetar Avramovic   void applyCvtF32UByteN(MachineInstr &MI,
740031418dSPetar Avramovic                          const CvtF32UByteMatchInfo &MatchInfo);
75fb7be0d9SPetar Avramovic 
76fb7be0d9SPetar Avramovic   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
770031418dSPetar Avramovic };
780031418dSPetar Avramovic 
matchFMinFMaxLegacy(MachineInstr & MI,FMinFMaxLegacyInfo & Info)790031418dSPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
800031418dSPetar Avramovic     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
81fee41517SMatt Arsenault   // FIXME: Combines should have subtarget predicates, and we shouldn't need
82fee41517SMatt Arsenault   // this here.
83fee41517SMatt Arsenault   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
84fee41517SMatt Arsenault     return false;
85fee41517SMatt Arsenault 
86fee41517SMatt Arsenault   // FIXME: Type predicate on pattern
87fee41517SMatt Arsenault   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
88fee41517SMatt Arsenault     return false;
89fee41517SMatt Arsenault 
90fee41517SMatt Arsenault   Register Cond = MI.getOperand(1).getReg();
91fee41517SMatt Arsenault   if (!MRI.hasOneNonDBGUse(Cond) ||
92fee41517SMatt Arsenault       !mi_match(Cond, MRI,
93fee41517SMatt Arsenault                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
94fee41517SMatt Arsenault     return false;
95fee41517SMatt Arsenault 
96fee41517SMatt Arsenault   Info.True = MI.getOperand(2).getReg();
97fee41517SMatt Arsenault   Info.False = MI.getOperand(3).getReg();
98fee41517SMatt Arsenault 
99fee41517SMatt Arsenault   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
100fee41517SMatt Arsenault       !(Info.LHS == Info.False && Info.RHS == Info.True))
101fee41517SMatt Arsenault     return false;
102fee41517SMatt Arsenault 
103fee41517SMatt Arsenault   switch (Info.Pred) {
104fee41517SMatt Arsenault   case CmpInst::FCMP_FALSE:
105fee41517SMatt Arsenault   case CmpInst::FCMP_OEQ:
106fee41517SMatt Arsenault   case CmpInst::FCMP_ONE:
107fee41517SMatt Arsenault   case CmpInst::FCMP_ORD:
108fee41517SMatt Arsenault   case CmpInst::FCMP_UNO:
109fee41517SMatt Arsenault   case CmpInst::FCMP_UEQ:
110fee41517SMatt Arsenault   case CmpInst::FCMP_UNE:
111fee41517SMatt Arsenault   case CmpInst::FCMP_TRUE:
112fee41517SMatt Arsenault     return false;
113fee41517SMatt Arsenault   default:
114fee41517SMatt Arsenault     return true;
115fee41517SMatt Arsenault   }
116fee41517SMatt Arsenault }
117fee41517SMatt Arsenault 
applySelectFCmpToFMinToFMaxLegacy(MachineInstr & MI,const FMinFMaxLegacyInfo & Info)1180031418dSPetar Avramovic void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
1190031418dSPetar Avramovic     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
1200031418dSPetar Avramovic   B.setInstrAndDebugLoc(MI);
1210031418dSPetar Avramovic   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
1220031418dSPetar Avramovic     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
123fee41517SMatt Arsenault   };
124fee41517SMatt Arsenault 
125fee41517SMatt Arsenault   switch (Info.Pred) {
126fee41517SMatt Arsenault   case CmpInst::FCMP_ULT:
127fee41517SMatt Arsenault   case CmpInst::FCMP_ULE:
128fee41517SMatt Arsenault     if (Info.LHS == Info.True)
129fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
130fee41517SMatt Arsenault     else
131fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
132fee41517SMatt Arsenault     break;
133fee41517SMatt Arsenault   case CmpInst::FCMP_OLE:
134fee41517SMatt Arsenault   case CmpInst::FCMP_OLT: {
135fee41517SMatt Arsenault     // We need to permute the operands to get the correct NaN behavior. The
136fee41517SMatt Arsenault     // selected operand is the second one based on the failing compare with NaN,
137fee41517SMatt Arsenault     // so permute it based on the compare type the hardware uses.
138fee41517SMatt Arsenault     if (Info.LHS == Info.True)
139fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
140fee41517SMatt Arsenault     else
141fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
142fee41517SMatt Arsenault     break;
143fee41517SMatt Arsenault   }
144fee41517SMatt Arsenault   case CmpInst::FCMP_UGE:
145fee41517SMatt Arsenault   case CmpInst::FCMP_UGT: {
146fee41517SMatt Arsenault     if (Info.LHS == Info.True)
147fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
148fee41517SMatt Arsenault     else
149fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
150fee41517SMatt Arsenault     break;
151fee41517SMatt Arsenault   }
152fee41517SMatt Arsenault   case CmpInst::FCMP_OGT:
153fee41517SMatt Arsenault   case CmpInst::FCMP_OGE: {
154fee41517SMatt Arsenault     if (Info.LHS == Info.True)
155fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
156fee41517SMatt Arsenault     else
157fee41517SMatt Arsenault       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
158fee41517SMatt Arsenault     break;
159fee41517SMatt Arsenault   }
160fee41517SMatt Arsenault   default:
161fee41517SMatt Arsenault     llvm_unreachable("predicate should not have matched");
162fee41517SMatt Arsenault   }
163fee41517SMatt Arsenault 
164fee41517SMatt Arsenault   MI.eraseFromParent();
165fee41517SMatt Arsenault }
166fee41517SMatt Arsenault 
matchUCharToFloat(MachineInstr & MI)1670031418dSPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
168b27d255eSMatt Arsenault   Register DstReg = MI.getOperand(0).getReg();
169b27d255eSMatt Arsenault 
170b27d255eSMatt Arsenault   // TODO: We could try to match extracting the higher bytes, which would be
171b27d255eSMatt Arsenault   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
172b27d255eSMatt Arsenault   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
173b27d255eSMatt Arsenault   // about in practice.
174b27d255eSMatt Arsenault   LLT Ty = MRI.getType(DstReg);
175b27d255eSMatt Arsenault   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
176db777eaeSMatt Arsenault     Register SrcReg = MI.getOperand(1).getReg();
177db777eaeSMatt Arsenault     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
178db777eaeSMatt Arsenault     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
179db777eaeSMatt Arsenault     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
180db777eaeSMatt Arsenault     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
181b27d255eSMatt Arsenault   }
182b27d255eSMatt Arsenault 
183b27d255eSMatt Arsenault   return false;
184b27d255eSMatt Arsenault }
185b27d255eSMatt Arsenault 
applyUCharToFloat(MachineInstr & MI)1860031418dSPetar Avramovic void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
1870031418dSPetar Avramovic   B.setInstrAndDebugLoc(MI);
188b27d255eSMatt Arsenault 
189b27d255eSMatt Arsenault   const LLT S32 = LLT::scalar(32);
190b27d255eSMatt Arsenault 
191b27d255eSMatt Arsenault   Register DstReg = MI.getOperand(0).getReg();
192db777eaeSMatt Arsenault   Register SrcReg = MI.getOperand(1).getReg();
1930031418dSPetar Avramovic   LLT Ty = MRI.getType(DstReg);
1940031418dSPetar Avramovic   LLT SrcTy = MRI.getType(SrcReg);
195db777eaeSMatt Arsenault   if (SrcTy != S32)
196db777eaeSMatt Arsenault     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
197b27d255eSMatt Arsenault 
198b27d255eSMatt Arsenault   if (Ty == S32) {
199ecbed4e0SThomas Symalla     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
200ecbed4e0SThomas Symalla                    {SrcReg}, MI.getFlags());
201b27d255eSMatt Arsenault   } else {
202ecbed4e0SThomas Symalla     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
203ecbed4e0SThomas Symalla                              {SrcReg}, MI.getFlags());
204b27d255eSMatt Arsenault     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
205b27d255eSMatt Arsenault   }
206b27d255eSMatt Arsenault 
207b27d255eSMatt Arsenault   MI.eraseFromParent();
208b27d255eSMatt Arsenault }
209fee41517SMatt Arsenault 
matchRcpSqrtToRsq(MachineInstr & MI,std::function<void (MachineIRBuilder &)> & MatchInfo)210*ca57b80cSMateja Marjanovic bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
211*ca57b80cSMateja Marjanovic     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
212*ca57b80cSMateja Marjanovic 
213*ca57b80cSMateja Marjanovic   auto getRcpSrc = [=](const MachineInstr &MI) {
214*ca57b80cSMateja Marjanovic     MachineInstr *ResMI = nullptr;
215*ca57b80cSMateja Marjanovic     if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
216*ca57b80cSMateja Marjanovic         MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
217*ca57b80cSMateja Marjanovic       ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
218*ca57b80cSMateja Marjanovic 
219*ca57b80cSMateja Marjanovic     return ResMI;
220*ca57b80cSMateja Marjanovic   };
221*ca57b80cSMateja Marjanovic 
222*ca57b80cSMateja Marjanovic   auto getSqrtSrc = [=](const MachineInstr &MI) {
223*ca57b80cSMateja Marjanovic     MachineInstr *SqrtSrcMI = nullptr;
224*ca57b80cSMateja Marjanovic     mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
225*ca57b80cSMateja Marjanovic     return SqrtSrcMI;
226*ca57b80cSMateja Marjanovic   };
227*ca57b80cSMateja Marjanovic 
228*ca57b80cSMateja Marjanovic   MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
229*ca57b80cSMateja Marjanovic   // rcp(sqrt(x))
230*ca57b80cSMateja Marjanovic   if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
231*ca57b80cSMateja Marjanovic     MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
232*ca57b80cSMateja Marjanovic       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
233*ca57b80cSMateja Marjanovic           .addUse(SqrtSrcMI->getOperand(0).getReg())
234*ca57b80cSMateja Marjanovic           .setMIFlags(MI.getFlags());
235*ca57b80cSMateja Marjanovic     };
236*ca57b80cSMateja Marjanovic     return true;
237*ca57b80cSMateja Marjanovic   }
238*ca57b80cSMateja Marjanovic 
239*ca57b80cSMateja Marjanovic   // sqrt(rcp(x))
240*ca57b80cSMateja Marjanovic   if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
241*ca57b80cSMateja Marjanovic     MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
242*ca57b80cSMateja Marjanovic       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
243*ca57b80cSMateja Marjanovic           .addUse(RcpSrcMI->getOperand(0).getReg())
244*ca57b80cSMateja Marjanovic           .setMIFlags(MI.getFlags());
245*ca57b80cSMateja Marjanovic     };
246*ca57b80cSMateja Marjanovic     return true;
247*ca57b80cSMateja Marjanovic   }
248*ca57b80cSMateja Marjanovic 
249*ca57b80cSMateja Marjanovic   return false;
250*ca57b80cSMateja Marjanovic }
251*ca57b80cSMateja Marjanovic 
matchCvtF32UByteN(MachineInstr & MI,CvtF32UByteMatchInfo & MatchInfo)2520031418dSPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
2530031418dSPetar Avramovic     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
2540ba40d4cSMatt Arsenault   Register SrcReg = MI.getOperand(1).getReg();
2550ba40d4cSMatt Arsenault 
2560ba40d4cSMatt Arsenault   // Look through G_ZEXT.
2570ba40d4cSMatt Arsenault   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
2580ba40d4cSMatt Arsenault 
2590ba40d4cSMatt Arsenault   Register Src0;
2600ba40d4cSMatt Arsenault   int64_t ShiftAmt;
2610ba40d4cSMatt Arsenault   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
2620ba40d4cSMatt Arsenault   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
2630ba40d4cSMatt Arsenault     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
2640ba40d4cSMatt Arsenault 
2650ba40d4cSMatt Arsenault     unsigned ShiftOffset = 8 * Offset;
2660ba40d4cSMatt Arsenault     if (IsShr)
2670ba40d4cSMatt Arsenault       ShiftOffset += ShiftAmt;
2680ba40d4cSMatt Arsenault     else
2690ba40d4cSMatt Arsenault       ShiftOffset -= ShiftAmt;
2700ba40d4cSMatt Arsenault 
2710ba40d4cSMatt Arsenault     MatchInfo.CvtVal = Src0;
2720ba40d4cSMatt Arsenault     MatchInfo.ShiftOffset = ShiftOffset;
2730ba40d4cSMatt Arsenault     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
2740ba40d4cSMatt Arsenault   }
2750ba40d4cSMatt Arsenault 
2760ba40d4cSMatt Arsenault   // TODO: Simplify demanded bits.
2770ba40d4cSMatt Arsenault   return false;
2780ba40d4cSMatt Arsenault }
2790ba40d4cSMatt Arsenault 
applyCvtF32UByteN(MachineInstr & MI,const CvtF32UByteMatchInfo & MatchInfo)2800031418dSPetar Avramovic void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
2810031418dSPetar Avramovic     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
2820031418dSPetar Avramovic   B.setInstrAndDebugLoc(MI);
2830ba40d4cSMatt Arsenault   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
2840ba40d4cSMatt Arsenault 
2850ba40d4cSMatt Arsenault   const LLT S32 = LLT::scalar(32);
2860ba40d4cSMatt Arsenault   Register CvtSrc = MatchInfo.CvtVal;
2870031418dSPetar Avramovic   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
2880ba40d4cSMatt Arsenault   if (SrcTy != S32) {
2890ba40d4cSMatt Arsenault     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
2900ba40d4cSMatt Arsenault     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
2910ba40d4cSMatt Arsenault   }
2920ba40d4cSMatt Arsenault 
2930ba40d4cSMatt Arsenault   assert(MI.getOpcode() != NewOpc);
2940ba40d4cSMatt Arsenault   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
2950ba40d4cSMatt Arsenault   MI.eraseFromParent();
2960ba40d4cSMatt Arsenault }
2970ba40d4cSMatt Arsenault 
matchRemoveFcanonicalize(MachineInstr & MI,Register & Reg)298fb7be0d9SPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
299fb7be0d9SPetar Avramovic     MachineInstr &MI, Register &Reg) {
300fb7be0d9SPetar Avramovic   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
301fb7be0d9SPetar Avramovic       MF.getSubtarget().getTargetLowering());
302fb7be0d9SPetar Avramovic   Reg = MI.getOperand(1).getReg();
303fb7be0d9SPetar Avramovic   return TLI->isCanonicalized(Reg, MF);
304fb7be0d9SPetar Avramovic }
305fb7be0d9SPetar Avramovic 
3060031418dSPetar Avramovic class AMDGPUPostLegalizerCombinerHelperState {
3070031418dSPetar Avramovic protected:
308db6bc2abSMirko Brkusanin   AMDGPUCombinerHelper &Helper;
3090031418dSPetar Avramovic   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
3100031418dSPetar Avramovic 
3110031418dSPetar Avramovic public:
AMDGPUPostLegalizerCombinerHelperState(AMDGPUCombinerHelper & Helper,AMDGPUPostLegalizerCombinerHelper & PostLegalizerHelper)3120031418dSPetar Avramovic   AMDGPUPostLegalizerCombinerHelperState(
313db6bc2abSMirko Brkusanin       AMDGPUCombinerHelper &Helper,
3140031418dSPetar Avramovic       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
3150031418dSPetar Avramovic       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
3160031418dSPetar Avramovic };
3170031418dSPetar Avramovic 
318fee41517SMatt Arsenault #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
319fee41517SMatt Arsenault #include "AMDGPUGenPostLegalizeGICombiner.inc"
320fee41517SMatt Arsenault #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
321fee41517SMatt Arsenault 
322fee41517SMatt Arsenault namespace {
323fee41517SMatt Arsenault #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
324fee41517SMatt Arsenault #include "AMDGPUGenPostLegalizeGICombiner.inc"
325fee41517SMatt Arsenault #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
326fee41517SMatt Arsenault 
32716bcd545SMatt Arsenault class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
328fee41517SMatt Arsenault   GISelKnownBits *KB;
329fee41517SMatt Arsenault   MachineDominatorTree *MDT;
330fee41517SMatt Arsenault 
331fee41517SMatt Arsenault public:
332e35ba099SDaniel Sanders   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
333fee41517SMatt Arsenault 
AMDGPUPostLegalizerCombinerInfo(bool EnableOpt,bool OptSize,bool MinSize,const AMDGPULegalizerInfo * LI,GISelKnownBits * KB,MachineDominatorTree * MDT)334fee41517SMatt Arsenault   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
335fee41517SMatt Arsenault                                   const AMDGPULegalizerInfo *LI,
336fee41517SMatt Arsenault                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
337fee41517SMatt Arsenault       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
338fee41517SMatt Arsenault                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
339fee41517SMatt Arsenault         KB(KB), MDT(MDT) {
340e35ba099SDaniel Sanders     if (!GeneratedRuleCfg.parseCommandLineOption())
341fee41517SMatt Arsenault       report_fatal_error("Invalid rule identifier");
342fee41517SMatt Arsenault   }
343fee41517SMatt Arsenault 
344d15723efSMatt Arsenault   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
345fee41517SMatt Arsenault                MachineIRBuilder &B) const override;
346fee41517SMatt Arsenault };
347fee41517SMatt Arsenault 
combine(GISelChangeObserver & Observer,MachineInstr & MI,MachineIRBuilder & B) const348fee41517SMatt Arsenault bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
349fee41517SMatt Arsenault                                               MachineInstr &MI,
350fee41517SMatt Arsenault                                               MachineIRBuilder &B) const {
351db6bc2abSMirko Brkusanin   AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo);
3520031418dSPetar Avramovic   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
3530031418dSPetar Avramovic   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
3540031418dSPetar Avramovic                                                  PostLegalizerHelper);
355fee41517SMatt Arsenault 
3560031418dSPetar Avramovic   if (Generated.tryCombineAll(Observer, MI, B))
357fee41517SMatt Arsenault     return true;
358fee41517SMatt Arsenault 
359fee41517SMatt Arsenault   switch (MI.getOpcode()) {
360fee41517SMatt Arsenault   case TargetOpcode::G_SHL:
361fee41517SMatt Arsenault   case TargetOpcode::G_LSHR:
362fee41517SMatt Arsenault   case TargetOpcode::G_ASHR:
363fee41517SMatt Arsenault     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
364fee41517SMatt Arsenault     // common case, splitting this into a move and a 32-bit shift is faster and
365fee41517SMatt Arsenault     // the same code size.
366fee41517SMatt Arsenault     return Helper.tryCombineShiftToUnmerge(MI, 32);
367fee41517SMatt Arsenault   }
368fee41517SMatt Arsenault 
369fee41517SMatt Arsenault   return false;
370fee41517SMatt Arsenault }
371fee41517SMatt Arsenault 
372fee41517SMatt Arsenault #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
373fee41517SMatt Arsenault #include "AMDGPUGenPostLegalizeGICombiner.inc"
374fee41517SMatt Arsenault #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
375fee41517SMatt Arsenault 
376fee41517SMatt Arsenault // Pass boilerplate
377fee41517SMatt Arsenault // ================
378fee41517SMatt Arsenault 
379fee41517SMatt Arsenault class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
380fee41517SMatt Arsenault public:
381fee41517SMatt Arsenault   static char ID;
382fee41517SMatt Arsenault 
383fee41517SMatt Arsenault   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
384fee41517SMatt Arsenault 
getPassName() const385fee41517SMatt Arsenault   StringRef getPassName() const override {
386fee41517SMatt Arsenault     return "AMDGPUPostLegalizerCombiner";
387fee41517SMatt Arsenault   }
388fee41517SMatt Arsenault 
389fee41517SMatt Arsenault   bool runOnMachineFunction(MachineFunction &MF) override;
390fee41517SMatt Arsenault 
391fee41517SMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override;
392fee41517SMatt Arsenault private:
393fee41517SMatt Arsenault   bool IsOptNone;
394fee41517SMatt Arsenault };
395fee41517SMatt Arsenault } // end anonymous namespace
396fee41517SMatt Arsenault 
getAnalysisUsage(AnalysisUsage & AU) const397fee41517SMatt Arsenault void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
398fee41517SMatt Arsenault   AU.addRequired<TargetPassConfig>();
399fee41517SMatt Arsenault   AU.setPreservesCFG();
400fee41517SMatt Arsenault   getSelectionDAGFallbackAnalysisUsage(AU);
401fee41517SMatt Arsenault   AU.addRequired<GISelKnownBitsAnalysis>();
402fee41517SMatt Arsenault   AU.addPreserved<GISelKnownBitsAnalysis>();
403fee41517SMatt Arsenault   if (!IsOptNone) {
404fee41517SMatt Arsenault     AU.addRequired<MachineDominatorTree>();
405fee41517SMatt Arsenault     AU.addPreserved<MachineDominatorTree>();
406fee41517SMatt Arsenault   }
407fee41517SMatt Arsenault   MachineFunctionPass::getAnalysisUsage(AU);
408fee41517SMatt Arsenault }
409fee41517SMatt Arsenault 
AMDGPUPostLegalizerCombiner(bool IsOptNone)410fee41517SMatt Arsenault AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
411fee41517SMatt Arsenault   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
412fee41517SMatt Arsenault   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
413fee41517SMatt Arsenault }
414fee41517SMatt Arsenault 
runOnMachineFunction(MachineFunction & MF)415fee41517SMatt Arsenault bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
416fee41517SMatt Arsenault   if (MF.getProperties().hasProperty(
417fee41517SMatt Arsenault           MachineFunctionProperties::Property::FailedISel))
418fee41517SMatt Arsenault     return false;
419fee41517SMatt Arsenault   auto *TPC = &getAnalysis<TargetPassConfig>();
420fee41517SMatt Arsenault   const Function &F = MF.getFunction();
421fee41517SMatt Arsenault   bool EnableOpt =
422fee41517SMatt Arsenault       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
423fee41517SMatt Arsenault 
424fee41517SMatt Arsenault   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
425bcd6c2d2SThomas Symalla   const AMDGPULegalizerInfo *LI
426bcd6c2d2SThomas Symalla     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
427fee41517SMatt Arsenault 
428fee41517SMatt Arsenault   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
429fee41517SMatt Arsenault   MachineDominatorTree *MDT =
430fee41517SMatt Arsenault       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
431fee41517SMatt Arsenault   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
432fee41517SMatt Arsenault                                          F.hasMinSize(), LI, KB, MDT);
433fee41517SMatt Arsenault   Combiner C(PCInfo, TPC);
434fee41517SMatt Arsenault   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
435fee41517SMatt Arsenault }
436fee41517SMatt Arsenault 
437fee41517SMatt Arsenault char AMDGPUPostLegalizerCombiner::ID = 0;
438fee41517SMatt Arsenault INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
439bcd6c2d2SThomas Symalla                       "Combine AMDGPU machine instrs after legalization",
440bcd6c2d2SThomas Symalla                       false, false)
441fee41517SMatt Arsenault INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
442fee41517SMatt Arsenault INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
443fee41517SMatt Arsenault INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
444fee41517SMatt Arsenault                     "Combine AMDGPU machine instrs after legalization", false,
445fee41517SMatt Arsenault                     false)
446fee41517SMatt Arsenault 
447fee41517SMatt Arsenault namespace llvm {
createAMDGPUPostLegalizeCombiner(bool IsOptNone)448fee41517SMatt Arsenault FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
449fee41517SMatt Arsenault   return new AMDGPUPostLegalizerCombiner(IsOptNone);
450fee41517SMatt Arsenault }
451fee41517SMatt Arsenault } // end namespace llvm
452