19a8da909SThomas Symalla //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2fee41517SMatt Arsenault //
3fee41517SMatt Arsenault // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fee41517SMatt Arsenault // See https://llvm.org/LICENSE.txt for license information.
5fee41517SMatt Arsenault // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fee41517SMatt Arsenault //
7fee41517SMatt Arsenault //===----------------------------------------------------------------------===//
8fee41517SMatt Arsenault //
9fee41517SMatt Arsenault // This pass does combining of machine instructions at the generic MI level,
10fee41517SMatt Arsenault // after the legalizer.
11fee41517SMatt Arsenault //
12fee41517SMatt Arsenault //===----------------------------------------------------------------------===//
13fee41517SMatt Arsenault
146a87e9b0Sdfukalov #include "AMDGPU.h"
15db6bc2abSMirko Brkusanin #include "AMDGPUCombinerHelper.h"
16fee41517SMatt Arsenault #include "AMDGPULegalizerInfo.h"
17560d7e04Sdfukalov #include "GCNSubtarget.h"
18560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/Combiner.h"
20fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23fee41517SMatt Arsenault #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24fee41517SMatt Arsenault #include "llvm/CodeGen/MachineDominators.h"
25fee41517SMatt Arsenault #include "llvm/CodeGen/TargetPassConfig.h"
26*ca57b80cSMateja Marjanovic #include "llvm/IR/IntrinsicsAMDGPU.h"
276a87e9b0Sdfukalov #include "llvm/Target/TargetMachine.h"
28fee41517SMatt Arsenault
29fee41517SMatt Arsenault #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
30fee41517SMatt Arsenault
31fee41517SMatt Arsenault using namespace llvm;
32fee41517SMatt Arsenault using namespace MIPatternMatch;
33fee41517SMatt Arsenault
340031418dSPetar Avramovic class AMDGPUPostLegalizerCombinerHelper {
350031418dSPetar Avramovic protected:
360031418dSPetar Avramovic MachineIRBuilder &B;
370031418dSPetar Avramovic MachineFunction &MF;
380031418dSPetar Avramovic MachineRegisterInfo &MRI;
39db6bc2abSMirko Brkusanin AMDGPUCombinerHelper &Helper;
400031418dSPetar Avramovic
410031418dSPetar Avramovic public:
AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder & B,AMDGPUCombinerHelper & Helper)42db6bc2abSMirko Brkusanin AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
43db6bc2abSMirko Brkusanin AMDGPUCombinerHelper &Helper)
440031418dSPetar Avramovic : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
450031418dSPetar Avramovic
46fee41517SMatt Arsenault struct FMinFMaxLegacyInfo {
47fee41517SMatt Arsenault Register LHS;
48fee41517SMatt Arsenault Register RHS;
49fee41517SMatt Arsenault Register True;
50fee41517SMatt Arsenault Register False;
51fee41517SMatt Arsenault CmpInst::Predicate Pred;
52fee41517SMatt Arsenault };
53fee41517SMatt Arsenault
54fee41517SMatt Arsenault // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
550031418dSPetar Avramovic bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
560031418dSPetar Avramovic void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
570031418dSPetar Avramovic const FMinFMaxLegacyInfo &Info);
580031418dSPetar Avramovic
590031418dSPetar Avramovic bool matchUCharToFloat(MachineInstr &MI);
600031418dSPetar Avramovic void applyUCharToFloat(MachineInstr &MI);
610031418dSPetar Avramovic
62*ca57b80cSMateja Marjanovic bool matchRcpSqrtToRsq(MachineInstr &MI,
63*ca57b80cSMateja Marjanovic std::function<void(MachineIRBuilder &)> &MatchInfo);
64*ca57b80cSMateja Marjanovic
650031418dSPetar Avramovic // FIXME: Should be able to have 2 separate matchdatas rather than custom
660031418dSPetar Avramovic // struct boilerplate.
670031418dSPetar Avramovic struct CvtF32UByteMatchInfo {
680031418dSPetar Avramovic Register CvtVal;
690031418dSPetar Avramovic unsigned ShiftOffset;
700031418dSPetar Avramovic };
710031418dSPetar Avramovic
720031418dSPetar Avramovic bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
730031418dSPetar Avramovic void applyCvtF32UByteN(MachineInstr &MI,
740031418dSPetar Avramovic const CvtF32UByteMatchInfo &MatchInfo);
75fb7be0d9SPetar Avramovic
76fb7be0d9SPetar Avramovic bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
770031418dSPetar Avramovic };
780031418dSPetar Avramovic
matchFMinFMaxLegacy(MachineInstr & MI,FMinFMaxLegacyInfo & Info)790031418dSPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
800031418dSPetar Avramovic MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
81fee41517SMatt Arsenault // FIXME: Combines should have subtarget predicates, and we shouldn't need
82fee41517SMatt Arsenault // this here.
83fee41517SMatt Arsenault if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
84fee41517SMatt Arsenault return false;
85fee41517SMatt Arsenault
86fee41517SMatt Arsenault // FIXME: Type predicate on pattern
87fee41517SMatt Arsenault if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
88fee41517SMatt Arsenault return false;
89fee41517SMatt Arsenault
90fee41517SMatt Arsenault Register Cond = MI.getOperand(1).getReg();
91fee41517SMatt Arsenault if (!MRI.hasOneNonDBGUse(Cond) ||
92fee41517SMatt Arsenault !mi_match(Cond, MRI,
93fee41517SMatt Arsenault m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
94fee41517SMatt Arsenault return false;
95fee41517SMatt Arsenault
96fee41517SMatt Arsenault Info.True = MI.getOperand(2).getReg();
97fee41517SMatt Arsenault Info.False = MI.getOperand(3).getReg();
98fee41517SMatt Arsenault
99fee41517SMatt Arsenault if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
100fee41517SMatt Arsenault !(Info.LHS == Info.False && Info.RHS == Info.True))
101fee41517SMatt Arsenault return false;
102fee41517SMatt Arsenault
103fee41517SMatt Arsenault switch (Info.Pred) {
104fee41517SMatt Arsenault case CmpInst::FCMP_FALSE:
105fee41517SMatt Arsenault case CmpInst::FCMP_OEQ:
106fee41517SMatt Arsenault case CmpInst::FCMP_ONE:
107fee41517SMatt Arsenault case CmpInst::FCMP_ORD:
108fee41517SMatt Arsenault case CmpInst::FCMP_UNO:
109fee41517SMatt Arsenault case CmpInst::FCMP_UEQ:
110fee41517SMatt Arsenault case CmpInst::FCMP_UNE:
111fee41517SMatt Arsenault case CmpInst::FCMP_TRUE:
112fee41517SMatt Arsenault return false;
113fee41517SMatt Arsenault default:
114fee41517SMatt Arsenault return true;
115fee41517SMatt Arsenault }
116fee41517SMatt Arsenault }
117fee41517SMatt Arsenault
applySelectFCmpToFMinToFMaxLegacy(MachineInstr & MI,const FMinFMaxLegacyInfo & Info)1180031418dSPetar Avramovic void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
1190031418dSPetar Avramovic MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
1200031418dSPetar Avramovic B.setInstrAndDebugLoc(MI);
1210031418dSPetar Avramovic auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
1220031418dSPetar Avramovic B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
123fee41517SMatt Arsenault };
124fee41517SMatt Arsenault
125fee41517SMatt Arsenault switch (Info.Pred) {
126fee41517SMatt Arsenault case CmpInst::FCMP_ULT:
127fee41517SMatt Arsenault case CmpInst::FCMP_ULE:
128fee41517SMatt Arsenault if (Info.LHS == Info.True)
129fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
130fee41517SMatt Arsenault else
131fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
132fee41517SMatt Arsenault break;
133fee41517SMatt Arsenault case CmpInst::FCMP_OLE:
134fee41517SMatt Arsenault case CmpInst::FCMP_OLT: {
135fee41517SMatt Arsenault // We need to permute the operands to get the correct NaN behavior. The
136fee41517SMatt Arsenault // selected operand is the second one based on the failing compare with NaN,
137fee41517SMatt Arsenault // so permute it based on the compare type the hardware uses.
138fee41517SMatt Arsenault if (Info.LHS == Info.True)
139fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
140fee41517SMatt Arsenault else
141fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
142fee41517SMatt Arsenault break;
143fee41517SMatt Arsenault }
144fee41517SMatt Arsenault case CmpInst::FCMP_UGE:
145fee41517SMatt Arsenault case CmpInst::FCMP_UGT: {
146fee41517SMatt Arsenault if (Info.LHS == Info.True)
147fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
148fee41517SMatt Arsenault else
149fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
150fee41517SMatt Arsenault break;
151fee41517SMatt Arsenault }
152fee41517SMatt Arsenault case CmpInst::FCMP_OGT:
153fee41517SMatt Arsenault case CmpInst::FCMP_OGE: {
154fee41517SMatt Arsenault if (Info.LHS == Info.True)
155fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
156fee41517SMatt Arsenault else
157fee41517SMatt Arsenault buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
158fee41517SMatt Arsenault break;
159fee41517SMatt Arsenault }
160fee41517SMatt Arsenault default:
161fee41517SMatt Arsenault llvm_unreachable("predicate should not have matched");
162fee41517SMatt Arsenault }
163fee41517SMatt Arsenault
164fee41517SMatt Arsenault MI.eraseFromParent();
165fee41517SMatt Arsenault }
166fee41517SMatt Arsenault
matchUCharToFloat(MachineInstr & MI)1670031418dSPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
168b27d255eSMatt Arsenault Register DstReg = MI.getOperand(0).getReg();
169b27d255eSMatt Arsenault
170b27d255eSMatt Arsenault // TODO: We could try to match extracting the higher bytes, which would be
171b27d255eSMatt Arsenault // easier if i8 vectors weren't promoted to i32 vectors, particularly after
172b27d255eSMatt Arsenault // types are legalized. v4i8 -> v4f32 is probably the only case to worry
173b27d255eSMatt Arsenault // about in practice.
174b27d255eSMatt Arsenault LLT Ty = MRI.getType(DstReg);
175b27d255eSMatt Arsenault if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
176db777eaeSMatt Arsenault Register SrcReg = MI.getOperand(1).getReg();
177db777eaeSMatt Arsenault unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
178db777eaeSMatt Arsenault assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
179db777eaeSMatt Arsenault const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
180db777eaeSMatt Arsenault return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
181b27d255eSMatt Arsenault }
182b27d255eSMatt Arsenault
183b27d255eSMatt Arsenault return false;
184b27d255eSMatt Arsenault }
185b27d255eSMatt Arsenault
applyUCharToFloat(MachineInstr & MI)1860031418dSPetar Avramovic void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
1870031418dSPetar Avramovic B.setInstrAndDebugLoc(MI);
188b27d255eSMatt Arsenault
189b27d255eSMatt Arsenault const LLT S32 = LLT::scalar(32);
190b27d255eSMatt Arsenault
191b27d255eSMatt Arsenault Register DstReg = MI.getOperand(0).getReg();
192db777eaeSMatt Arsenault Register SrcReg = MI.getOperand(1).getReg();
1930031418dSPetar Avramovic LLT Ty = MRI.getType(DstReg);
1940031418dSPetar Avramovic LLT SrcTy = MRI.getType(SrcReg);
195db777eaeSMatt Arsenault if (SrcTy != S32)
196db777eaeSMatt Arsenault SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
197b27d255eSMatt Arsenault
198b27d255eSMatt Arsenault if (Ty == S32) {
199ecbed4e0SThomas Symalla B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
200ecbed4e0SThomas Symalla {SrcReg}, MI.getFlags());
201b27d255eSMatt Arsenault } else {
202ecbed4e0SThomas Symalla auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
203ecbed4e0SThomas Symalla {SrcReg}, MI.getFlags());
204b27d255eSMatt Arsenault B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
205b27d255eSMatt Arsenault }
206b27d255eSMatt Arsenault
207b27d255eSMatt Arsenault MI.eraseFromParent();
208b27d255eSMatt Arsenault }
209fee41517SMatt Arsenault
matchRcpSqrtToRsq(MachineInstr & MI,std::function<void (MachineIRBuilder &)> & MatchInfo)210*ca57b80cSMateja Marjanovic bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
211*ca57b80cSMateja Marjanovic MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
212*ca57b80cSMateja Marjanovic
213*ca57b80cSMateja Marjanovic auto getRcpSrc = [=](const MachineInstr &MI) {
214*ca57b80cSMateja Marjanovic MachineInstr *ResMI = nullptr;
215*ca57b80cSMateja Marjanovic if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
216*ca57b80cSMateja Marjanovic MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
217*ca57b80cSMateja Marjanovic ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
218*ca57b80cSMateja Marjanovic
219*ca57b80cSMateja Marjanovic return ResMI;
220*ca57b80cSMateja Marjanovic };
221*ca57b80cSMateja Marjanovic
222*ca57b80cSMateja Marjanovic auto getSqrtSrc = [=](const MachineInstr &MI) {
223*ca57b80cSMateja Marjanovic MachineInstr *SqrtSrcMI = nullptr;
224*ca57b80cSMateja Marjanovic mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
225*ca57b80cSMateja Marjanovic return SqrtSrcMI;
226*ca57b80cSMateja Marjanovic };
227*ca57b80cSMateja Marjanovic
228*ca57b80cSMateja Marjanovic MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
229*ca57b80cSMateja Marjanovic // rcp(sqrt(x))
230*ca57b80cSMateja Marjanovic if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
231*ca57b80cSMateja Marjanovic MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
232*ca57b80cSMateja Marjanovic B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
233*ca57b80cSMateja Marjanovic .addUse(SqrtSrcMI->getOperand(0).getReg())
234*ca57b80cSMateja Marjanovic .setMIFlags(MI.getFlags());
235*ca57b80cSMateja Marjanovic };
236*ca57b80cSMateja Marjanovic return true;
237*ca57b80cSMateja Marjanovic }
238*ca57b80cSMateja Marjanovic
239*ca57b80cSMateja Marjanovic // sqrt(rcp(x))
240*ca57b80cSMateja Marjanovic if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
241*ca57b80cSMateja Marjanovic MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
242*ca57b80cSMateja Marjanovic B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
243*ca57b80cSMateja Marjanovic .addUse(RcpSrcMI->getOperand(0).getReg())
244*ca57b80cSMateja Marjanovic .setMIFlags(MI.getFlags());
245*ca57b80cSMateja Marjanovic };
246*ca57b80cSMateja Marjanovic return true;
247*ca57b80cSMateja Marjanovic }
248*ca57b80cSMateja Marjanovic
249*ca57b80cSMateja Marjanovic return false;
250*ca57b80cSMateja Marjanovic }
251*ca57b80cSMateja Marjanovic
matchCvtF32UByteN(MachineInstr & MI,CvtF32UByteMatchInfo & MatchInfo)2520031418dSPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
2530031418dSPetar Avramovic MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
2540ba40d4cSMatt Arsenault Register SrcReg = MI.getOperand(1).getReg();
2550ba40d4cSMatt Arsenault
2560ba40d4cSMatt Arsenault // Look through G_ZEXT.
2570ba40d4cSMatt Arsenault mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
2580ba40d4cSMatt Arsenault
2590ba40d4cSMatt Arsenault Register Src0;
2600ba40d4cSMatt Arsenault int64_t ShiftAmt;
2610ba40d4cSMatt Arsenault bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
2620ba40d4cSMatt Arsenault if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
2630ba40d4cSMatt Arsenault const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
2640ba40d4cSMatt Arsenault
2650ba40d4cSMatt Arsenault unsigned ShiftOffset = 8 * Offset;
2660ba40d4cSMatt Arsenault if (IsShr)
2670ba40d4cSMatt Arsenault ShiftOffset += ShiftAmt;
2680ba40d4cSMatt Arsenault else
2690ba40d4cSMatt Arsenault ShiftOffset -= ShiftAmt;
2700ba40d4cSMatt Arsenault
2710ba40d4cSMatt Arsenault MatchInfo.CvtVal = Src0;
2720ba40d4cSMatt Arsenault MatchInfo.ShiftOffset = ShiftOffset;
2730ba40d4cSMatt Arsenault return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
2740ba40d4cSMatt Arsenault }
2750ba40d4cSMatt Arsenault
2760ba40d4cSMatt Arsenault // TODO: Simplify demanded bits.
2770ba40d4cSMatt Arsenault return false;
2780ba40d4cSMatt Arsenault }
2790ba40d4cSMatt Arsenault
applyCvtF32UByteN(MachineInstr & MI,const CvtF32UByteMatchInfo & MatchInfo)2800031418dSPetar Avramovic void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
2810031418dSPetar Avramovic MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
2820031418dSPetar Avramovic B.setInstrAndDebugLoc(MI);
2830ba40d4cSMatt Arsenault unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
2840ba40d4cSMatt Arsenault
2850ba40d4cSMatt Arsenault const LLT S32 = LLT::scalar(32);
2860ba40d4cSMatt Arsenault Register CvtSrc = MatchInfo.CvtVal;
2870031418dSPetar Avramovic LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
2880ba40d4cSMatt Arsenault if (SrcTy != S32) {
2890ba40d4cSMatt Arsenault assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
2900ba40d4cSMatt Arsenault CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
2910ba40d4cSMatt Arsenault }
2920ba40d4cSMatt Arsenault
2930ba40d4cSMatt Arsenault assert(MI.getOpcode() != NewOpc);
2940ba40d4cSMatt Arsenault B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
2950ba40d4cSMatt Arsenault MI.eraseFromParent();
2960ba40d4cSMatt Arsenault }
2970ba40d4cSMatt Arsenault
matchRemoveFcanonicalize(MachineInstr & MI,Register & Reg)298fb7be0d9SPetar Avramovic bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
299fb7be0d9SPetar Avramovic MachineInstr &MI, Register &Reg) {
300fb7be0d9SPetar Avramovic const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
301fb7be0d9SPetar Avramovic MF.getSubtarget().getTargetLowering());
302fb7be0d9SPetar Avramovic Reg = MI.getOperand(1).getReg();
303fb7be0d9SPetar Avramovic return TLI->isCanonicalized(Reg, MF);
304fb7be0d9SPetar Avramovic }
305fb7be0d9SPetar Avramovic
3060031418dSPetar Avramovic class AMDGPUPostLegalizerCombinerHelperState {
3070031418dSPetar Avramovic protected:
308db6bc2abSMirko Brkusanin AMDGPUCombinerHelper &Helper;
3090031418dSPetar Avramovic AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
3100031418dSPetar Avramovic
3110031418dSPetar Avramovic public:
AMDGPUPostLegalizerCombinerHelperState(AMDGPUCombinerHelper & Helper,AMDGPUPostLegalizerCombinerHelper & PostLegalizerHelper)3120031418dSPetar Avramovic AMDGPUPostLegalizerCombinerHelperState(
313db6bc2abSMirko Brkusanin AMDGPUCombinerHelper &Helper,
3140031418dSPetar Avramovic AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
3150031418dSPetar Avramovic : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
3160031418dSPetar Avramovic };
3170031418dSPetar Avramovic
318fee41517SMatt Arsenault #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
319fee41517SMatt Arsenault #include "AMDGPUGenPostLegalizeGICombiner.inc"
320fee41517SMatt Arsenault #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
321fee41517SMatt Arsenault
322fee41517SMatt Arsenault namespace {
323fee41517SMatt Arsenault #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
324fee41517SMatt Arsenault #include "AMDGPUGenPostLegalizeGICombiner.inc"
325fee41517SMatt Arsenault #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
326fee41517SMatt Arsenault
32716bcd545SMatt Arsenault class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
328fee41517SMatt Arsenault GISelKnownBits *KB;
329fee41517SMatt Arsenault MachineDominatorTree *MDT;
330fee41517SMatt Arsenault
331fee41517SMatt Arsenault public:
332e35ba099SDaniel Sanders AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
333fee41517SMatt Arsenault
AMDGPUPostLegalizerCombinerInfo(bool EnableOpt,bool OptSize,bool MinSize,const AMDGPULegalizerInfo * LI,GISelKnownBits * KB,MachineDominatorTree * MDT)334fee41517SMatt Arsenault AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
335fee41517SMatt Arsenault const AMDGPULegalizerInfo *LI,
336fee41517SMatt Arsenault GISelKnownBits *KB, MachineDominatorTree *MDT)
337fee41517SMatt Arsenault : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
338fee41517SMatt Arsenault /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
339fee41517SMatt Arsenault KB(KB), MDT(MDT) {
340e35ba099SDaniel Sanders if (!GeneratedRuleCfg.parseCommandLineOption())
341fee41517SMatt Arsenault report_fatal_error("Invalid rule identifier");
342fee41517SMatt Arsenault }
343fee41517SMatt Arsenault
344d15723efSMatt Arsenault bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
345fee41517SMatt Arsenault MachineIRBuilder &B) const override;
346fee41517SMatt Arsenault };
347fee41517SMatt Arsenault
combine(GISelChangeObserver & Observer,MachineInstr & MI,MachineIRBuilder & B) const348fee41517SMatt Arsenault bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
349fee41517SMatt Arsenault MachineInstr &MI,
350fee41517SMatt Arsenault MachineIRBuilder &B) const {
351db6bc2abSMirko Brkusanin AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo);
3520031418dSPetar Avramovic AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
3530031418dSPetar Avramovic AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
3540031418dSPetar Avramovic PostLegalizerHelper);
355fee41517SMatt Arsenault
3560031418dSPetar Avramovic if (Generated.tryCombineAll(Observer, MI, B))
357fee41517SMatt Arsenault return true;
358fee41517SMatt Arsenault
359fee41517SMatt Arsenault switch (MI.getOpcode()) {
360fee41517SMatt Arsenault case TargetOpcode::G_SHL:
361fee41517SMatt Arsenault case TargetOpcode::G_LSHR:
362fee41517SMatt Arsenault case TargetOpcode::G_ASHR:
363fee41517SMatt Arsenault // On some subtargets, 64-bit shift is a quarter rate instruction. In the
364fee41517SMatt Arsenault // common case, splitting this into a move and a 32-bit shift is faster and
365fee41517SMatt Arsenault // the same code size.
366fee41517SMatt Arsenault return Helper.tryCombineShiftToUnmerge(MI, 32);
367fee41517SMatt Arsenault }
368fee41517SMatt Arsenault
369fee41517SMatt Arsenault return false;
370fee41517SMatt Arsenault }
371fee41517SMatt Arsenault
372fee41517SMatt Arsenault #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
373fee41517SMatt Arsenault #include "AMDGPUGenPostLegalizeGICombiner.inc"
374fee41517SMatt Arsenault #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
375fee41517SMatt Arsenault
376fee41517SMatt Arsenault // Pass boilerplate
377fee41517SMatt Arsenault // ================
378fee41517SMatt Arsenault
379fee41517SMatt Arsenault class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
380fee41517SMatt Arsenault public:
381fee41517SMatt Arsenault static char ID;
382fee41517SMatt Arsenault
383fee41517SMatt Arsenault AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
384fee41517SMatt Arsenault
getPassName() const385fee41517SMatt Arsenault StringRef getPassName() const override {
386fee41517SMatt Arsenault return "AMDGPUPostLegalizerCombiner";
387fee41517SMatt Arsenault }
388fee41517SMatt Arsenault
389fee41517SMatt Arsenault bool runOnMachineFunction(MachineFunction &MF) override;
390fee41517SMatt Arsenault
391fee41517SMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override;
392fee41517SMatt Arsenault private:
393fee41517SMatt Arsenault bool IsOptNone;
394fee41517SMatt Arsenault };
395fee41517SMatt Arsenault } // end anonymous namespace
396fee41517SMatt Arsenault
getAnalysisUsage(AnalysisUsage & AU) const397fee41517SMatt Arsenault void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
398fee41517SMatt Arsenault AU.addRequired<TargetPassConfig>();
399fee41517SMatt Arsenault AU.setPreservesCFG();
400fee41517SMatt Arsenault getSelectionDAGFallbackAnalysisUsage(AU);
401fee41517SMatt Arsenault AU.addRequired<GISelKnownBitsAnalysis>();
402fee41517SMatt Arsenault AU.addPreserved<GISelKnownBitsAnalysis>();
403fee41517SMatt Arsenault if (!IsOptNone) {
404fee41517SMatt Arsenault AU.addRequired<MachineDominatorTree>();
405fee41517SMatt Arsenault AU.addPreserved<MachineDominatorTree>();
406fee41517SMatt Arsenault }
407fee41517SMatt Arsenault MachineFunctionPass::getAnalysisUsage(AU);
408fee41517SMatt Arsenault }
409fee41517SMatt Arsenault
AMDGPUPostLegalizerCombiner(bool IsOptNone)410fee41517SMatt Arsenault AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
411fee41517SMatt Arsenault : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
412fee41517SMatt Arsenault initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
413fee41517SMatt Arsenault }
414fee41517SMatt Arsenault
runOnMachineFunction(MachineFunction & MF)415fee41517SMatt Arsenault bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
416fee41517SMatt Arsenault if (MF.getProperties().hasProperty(
417fee41517SMatt Arsenault MachineFunctionProperties::Property::FailedISel))
418fee41517SMatt Arsenault return false;
419fee41517SMatt Arsenault auto *TPC = &getAnalysis<TargetPassConfig>();
420fee41517SMatt Arsenault const Function &F = MF.getFunction();
421fee41517SMatt Arsenault bool EnableOpt =
422fee41517SMatt Arsenault MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
423fee41517SMatt Arsenault
424fee41517SMatt Arsenault const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
425bcd6c2d2SThomas Symalla const AMDGPULegalizerInfo *LI
426bcd6c2d2SThomas Symalla = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
427fee41517SMatt Arsenault
428fee41517SMatt Arsenault GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
429fee41517SMatt Arsenault MachineDominatorTree *MDT =
430fee41517SMatt Arsenault IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
431fee41517SMatt Arsenault AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
432fee41517SMatt Arsenault F.hasMinSize(), LI, KB, MDT);
433fee41517SMatt Arsenault Combiner C(PCInfo, TPC);
434fee41517SMatt Arsenault return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
435fee41517SMatt Arsenault }
436fee41517SMatt Arsenault
437fee41517SMatt Arsenault char AMDGPUPostLegalizerCombiner::ID = 0;
438fee41517SMatt Arsenault INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
439bcd6c2d2SThomas Symalla "Combine AMDGPU machine instrs after legalization",
440bcd6c2d2SThomas Symalla false, false)
441fee41517SMatt Arsenault INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
442fee41517SMatt Arsenault INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
443fee41517SMatt Arsenault INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
444fee41517SMatt Arsenault "Combine AMDGPU machine instrs after legalization", false,
445fee41517SMatt Arsenault false)
446fee41517SMatt Arsenault
447fee41517SMatt Arsenault namespace llvm {
createAMDGPUPostLegalizeCombiner(bool IsOptNone)448fee41517SMatt Arsenault FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
449fee41517SMatt Arsenault return new AMDGPUPostLegalizerCombiner(IsOptNone);
450fee41517SMatt Arsenault }
451fee41517SMatt Arsenault } // end namespace llvm
452