1*db6bc2abSMirko Brkusanin //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2*db6bc2abSMirko Brkusanin //
3*db6bc2abSMirko Brkusanin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*db6bc2abSMirko Brkusanin // See https://llvm.org/LICENSE.txt for license information.
5*db6bc2abSMirko Brkusanin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*db6bc2abSMirko Brkusanin //
7*db6bc2abSMirko Brkusanin //===----------------------------------------------------------------------===//
8*db6bc2abSMirko Brkusanin 
9*db6bc2abSMirko Brkusanin #include "AMDGPUCombinerHelper.h"
10*db6bc2abSMirko Brkusanin #include "GCNSubtarget.h"
11*db6bc2abSMirko Brkusanin #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12*db6bc2abSMirko Brkusanin #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
13*db6bc2abSMirko Brkusanin #include "llvm/IR/IntrinsicsAMDGPU.h"
14*db6bc2abSMirko Brkusanin #include "llvm/Target/TargetMachine.h"
15*db6bc2abSMirko Brkusanin 
16*db6bc2abSMirko Brkusanin using namespace llvm;
17*db6bc2abSMirko Brkusanin using namespace MIPatternMatch;
18*db6bc2abSMirko Brkusanin 
19*db6bc2abSMirko Brkusanin LLVM_READNONE
20*db6bc2abSMirko Brkusanin static bool fnegFoldsIntoMI(const MachineInstr &MI) {
21*db6bc2abSMirko Brkusanin   switch (MI.getOpcode()) {
22*db6bc2abSMirko Brkusanin   case AMDGPU::G_FADD:
23*db6bc2abSMirko Brkusanin   case AMDGPU::G_FSUB:
24*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMUL:
25*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMA:
26*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAD:
27*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM:
28*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM:
29*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM_IEEE:
30*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM_IEEE:
31*db6bc2abSMirko Brkusanin   case AMDGPU::G_FSIN:
32*db6bc2abSMirko Brkusanin   case AMDGPU::G_FPEXT:
33*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_TRUNC:
34*db6bc2abSMirko Brkusanin   case AMDGPU::G_FPTRUNC:
35*db6bc2abSMirko Brkusanin   case AMDGPU::G_FRINT:
36*db6bc2abSMirko Brkusanin   case AMDGPU::G_FNEARBYINT:
37*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_ROUND:
38*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
39*db6bc2abSMirko Brkusanin   case AMDGPU::G_FCANONICALIZE:
40*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_RCP_IFLAG:
41*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
42*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
43*db6bc2abSMirko Brkusanin     return true;
44*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC: {
45*db6bc2abSMirko Brkusanin     unsigned IntrinsicID = MI.getIntrinsicID();
46*db6bc2abSMirko Brkusanin     switch (IntrinsicID) {
47*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_rcp:
48*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_rcp_legacy:
49*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_sin:
50*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fmul_legacy:
51*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fmed3:
52*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fma_legacy:
53*db6bc2abSMirko Brkusanin       return true;
54*db6bc2abSMirko Brkusanin     default:
55*db6bc2abSMirko Brkusanin       return false;
56*db6bc2abSMirko Brkusanin     }
57*db6bc2abSMirko Brkusanin   }
58*db6bc2abSMirko Brkusanin   default:
59*db6bc2abSMirko Brkusanin     return false;
60*db6bc2abSMirko Brkusanin   }
61*db6bc2abSMirko Brkusanin }
62*db6bc2abSMirko Brkusanin 
63*db6bc2abSMirko Brkusanin /// \p returns true if the operation will definitely need to use a 64-bit
64*db6bc2abSMirko Brkusanin /// encoding, and thus will use a VOP3 encoding regardless of the source
65*db6bc2abSMirko Brkusanin /// modifiers.
66*db6bc2abSMirko Brkusanin LLVM_READONLY
67*db6bc2abSMirko Brkusanin static bool opMustUseVOP3Encoding(const MachineInstr &MI,
68*db6bc2abSMirko Brkusanin                                   const MachineRegisterInfo &MRI) {
69*db6bc2abSMirko Brkusanin   return MI.getNumOperands() >
70*db6bc2abSMirko Brkusanin              (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4 : 3) ||
71*db6bc2abSMirko Brkusanin          MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
72*db6bc2abSMirko Brkusanin }
73*db6bc2abSMirko Brkusanin 
74*db6bc2abSMirko Brkusanin // Most FP instructions support source modifiers.
75*db6bc2abSMirko Brkusanin LLVM_READONLY
76*db6bc2abSMirko Brkusanin static bool hasSourceMods(const MachineInstr &MI) {
77*db6bc2abSMirko Brkusanin   if (!MI.memoperands().empty())
78*db6bc2abSMirko Brkusanin     return false;
79*db6bc2abSMirko Brkusanin 
80*db6bc2abSMirko Brkusanin   switch (MI.getOpcode()) {
81*db6bc2abSMirko Brkusanin   case AMDGPU::COPY:
82*db6bc2abSMirko Brkusanin   case AMDGPU::G_SELECT:
83*db6bc2abSMirko Brkusanin   case AMDGPU::G_FDIV:
84*db6bc2abSMirko Brkusanin   case AMDGPU::G_FREM:
85*db6bc2abSMirko Brkusanin   case TargetOpcode::INLINEASM:
86*db6bc2abSMirko Brkusanin   case TargetOpcode::INLINEASM_BR:
87*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
88*db6bc2abSMirko Brkusanin   case AMDGPU::G_BITCAST:
89*db6bc2abSMirko Brkusanin   case AMDGPU::G_ANYEXT:
90*db6bc2abSMirko Brkusanin   case AMDGPU::G_BUILD_VECTOR:
91*db6bc2abSMirko Brkusanin   case AMDGPU::G_BUILD_VECTOR_TRUNC:
92*db6bc2abSMirko Brkusanin   case AMDGPU::G_PHI:
93*db6bc2abSMirko Brkusanin     return false;
94*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC: {
95*db6bc2abSMirko Brkusanin     unsigned IntrinsicID = MI.getIntrinsicID();
96*db6bc2abSMirko Brkusanin     switch (IntrinsicID) {
97*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_interp_p1:
98*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_interp_p2:
99*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_interp_mov:
100*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_interp_p1_f16:
101*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_interp_p2_f16:
102*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_div_scale:
103*db6bc2abSMirko Brkusanin       return false;
104*db6bc2abSMirko Brkusanin     default:
105*db6bc2abSMirko Brkusanin       return true;
106*db6bc2abSMirko Brkusanin     }
107*db6bc2abSMirko Brkusanin   }
108*db6bc2abSMirko Brkusanin   default:
109*db6bc2abSMirko Brkusanin     return true;
110*db6bc2abSMirko Brkusanin   }
111*db6bc2abSMirko Brkusanin }
112*db6bc2abSMirko Brkusanin 
113*db6bc2abSMirko Brkusanin static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
114*db6bc2abSMirko Brkusanin                                   unsigned CostThreshold = 4) {
115*db6bc2abSMirko Brkusanin   // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
116*db6bc2abSMirko Brkusanin   // it is truly free to use a source modifier in all cases. If there are
117*db6bc2abSMirko Brkusanin   // multiple users but for each one will necessitate using VOP3, there will be
118*db6bc2abSMirko Brkusanin   // a code size increase. Try to avoid increasing code size unless we know it
119*db6bc2abSMirko Brkusanin   // will save on the instruction count.
120*db6bc2abSMirko Brkusanin   unsigned NumMayIncreaseSize = 0;
121*db6bc2abSMirko Brkusanin   Register Dst = MI.getOperand(0).getReg();
122*db6bc2abSMirko Brkusanin   for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
123*db6bc2abSMirko Brkusanin     if (!hasSourceMods(Use))
124*db6bc2abSMirko Brkusanin       return false;
125*db6bc2abSMirko Brkusanin 
126*db6bc2abSMirko Brkusanin     if (!opMustUseVOP3Encoding(Use, MRI)) {
127*db6bc2abSMirko Brkusanin       if (++NumMayIncreaseSize > CostThreshold)
128*db6bc2abSMirko Brkusanin         return false;
129*db6bc2abSMirko Brkusanin     }
130*db6bc2abSMirko Brkusanin   }
131*db6bc2abSMirko Brkusanin   return true;
132*db6bc2abSMirko Brkusanin }
133*db6bc2abSMirko Brkusanin 
134*db6bc2abSMirko Brkusanin static bool mayIgnoreSignedZero(MachineInstr &MI) {
135*db6bc2abSMirko Brkusanin   const TargetOptions &Options = MI.getMF()->getTarget().Options;
136*db6bc2abSMirko Brkusanin   return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
137*db6bc2abSMirko Brkusanin }
138*db6bc2abSMirko Brkusanin 
139*db6bc2abSMirko Brkusanin static bool isInv2Pi(const APFloat &APF) {
140*db6bc2abSMirko Brkusanin   static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
141*db6bc2abSMirko Brkusanin   static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
142*db6bc2abSMirko Brkusanin   static const APFloat KF64(APFloat::IEEEdouble(),
143*db6bc2abSMirko Brkusanin                             APInt(64, 0x3fc45f306dc9c882));
144*db6bc2abSMirko Brkusanin 
145*db6bc2abSMirko Brkusanin   return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
146*db6bc2abSMirko Brkusanin          APF.bitwiseIsEqual(KF64);
147*db6bc2abSMirko Brkusanin }
148*db6bc2abSMirko Brkusanin 
149*db6bc2abSMirko Brkusanin // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
150*db6bc2abSMirko Brkusanin // additional cost to negate them.
151*db6bc2abSMirko Brkusanin static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
152*db6bc2abSMirko Brkusanin                                        MachineRegisterInfo &MRI) {
153*db6bc2abSMirko Brkusanin   Optional<FPValueAndVReg> FPValReg;
154*db6bc2abSMirko Brkusanin   if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
155*db6bc2abSMirko Brkusanin     if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
156*db6bc2abSMirko Brkusanin       return true;
157*db6bc2abSMirko Brkusanin 
158*db6bc2abSMirko Brkusanin     const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
159*db6bc2abSMirko Brkusanin     if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
160*db6bc2abSMirko Brkusanin       return true;
161*db6bc2abSMirko Brkusanin   }
162*db6bc2abSMirko Brkusanin   return false;
163*db6bc2abSMirko Brkusanin }
164*db6bc2abSMirko Brkusanin 
165*db6bc2abSMirko Brkusanin static unsigned inverseMinMax(unsigned Opc) {
166*db6bc2abSMirko Brkusanin   switch (Opc) {
167*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM:
168*db6bc2abSMirko Brkusanin     return AMDGPU::G_FMINNUM;
169*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM:
170*db6bc2abSMirko Brkusanin     return AMDGPU::G_FMAXNUM;
171*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM_IEEE:
172*db6bc2abSMirko Brkusanin     return AMDGPU::G_FMINNUM_IEEE;
173*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM_IEEE:
174*db6bc2abSMirko Brkusanin     return AMDGPU::G_FMAXNUM_IEEE;
175*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
176*db6bc2abSMirko Brkusanin     return AMDGPU::G_AMDGPU_FMIN_LEGACY;
177*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
178*db6bc2abSMirko Brkusanin     return AMDGPU::G_AMDGPU_FMAX_LEGACY;
179*db6bc2abSMirko Brkusanin   default:
180*db6bc2abSMirko Brkusanin     llvm_unreachable("invalid min/max opcode");
181*db6bc2abSMirko Brkusanin   }
182*db6bc2abSMirko Brkusanin }
183*db6bc2abSMirko Brkusanin 
184*db6bc2abSMirko Brkusanin bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
185*db6bc2abSMirko Brkusanin                                              MachineInstr *&MatchInfo) {
186*db6bc2abSMirko Brkusanin   Register Src = MI.getOperand(1).getReg();
187*db6bc2abSMirko Brkusanin   MatchInfo = MRI.getVRegDef(Src);
188*db6bc2abSMirko Brkusanin 
189*db6bc2abSMirko Brkusanin   // If the input has multiple uses and we can either fold the negate down, or
190*db6bc2abSMirko Brkusanin   // the other uses cannot, give up. This both prevents unprofitable
191*db6bc2abSMirko Brkusanin   // transformations and infinite loops: we won't repeatedly try to fold around
192*db6bc2abSMirko Brkusanin   // a negate that has no 'good' form.
193*db6bc2abSMirko Brkusanin   if (MRI.hasOneNonDBGUse(Src)) {
194*db6bc2abSMirko Brkusanin     if (allUsesHaveSourceMods(MI, MRI, 0))
195*db6bc2abSMirko Brkusanin       return false;
196*db6bc2abSMirko Brkusanin   } else {
197*db6bc2abSMirko Brkusanin     if (fnegFoldsIntoMI(*MatchInfo) &&
198*db6bc2abSMirko Brkusanin         (allUsesHaveSourceMods(MI, MRI) ||
199*db6bc2abSMirko Brkusanin          !allUsesHaveSourceMods(*MatchInfo, MRI)))
200*db6bc2abSMirko Brkusanin       return false;
201*db6bc2abSMirko Brkusanin   }
202*db6bc2abSMirko Brkusanin 
203*db6bc2abSMirko Brkusanin   switch (MatchInfo->getOpcode()) {
204*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM:
205*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM:
206*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM_IEEE:
207*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM_IEEE:
208*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
209*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
210*db6bc2abSMirko Brkusanin     // 0 doesn't have a negated inline immediate.
211*db6bc2abSMirko Brkusanin     return !isConstantCostlierToNegate(*MatchInfo,
212*db6bc2abSMirko Brkusanin                                        MatchInfo->getOperand(2).getReg(), MRI);
213*db6bc2abSMirko Brkusanin   case AMDGPU::G_FADD:
214*db6bc2abSMirko Brkusanin   case AMDGPU::G_FSUB:
215*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMA:
216*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAD:
217*db6bc2abSMirko Brkusanin     return mayIgnoreSignedZero(*MatchInfo);
218*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMUL:
219*db6bc2abSMirko Brkusanin   case AMDGPU::G_FPEXT:
220*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_TRUNC:
221*db6bc2abSMirko Brkusanin   case AMDGPU::G_FPTRUNC:
222*db6bc2abSMirko Brkusanin   case AMDGPU::G_FRINT:
223*db6bc2abSMirko Brkusanin   case AMDGPU::G_FNEARBYINT:
224*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_ROUND:
225*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
226*db6bc2abSMirko Brkusanin   case AMDGPU::G_FSIN:
227*db6bc2abSMirko Brkusanin   case AMDGPU::G_FCANONICALIZE:
228*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_RCP_IFLAG:
229*db6bc2abSMirko Brkusanin     return true;
230*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC: {
231*db6bc2abSMirko Brkusanin     unsigned IntrinsicID = MatchInfo->getIntrinsicID();
232*db6bc2abSMirko Brkusanin     switch (IntrinsicID) {
233*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_rcp:
234*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_rcp_legacy:
235*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_sin:
236*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fmul_legacy:
237*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fmed3:
238*db6bc2abSMirko Brkusanin       return true;
239*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fma_legacy:
240*db6bc2abSMirko Brkusanin       return mayIgnoreSignedZero(*MatchInfo);
241*db6bc2abSMirko Brkusanin     default:
242*db6bc2abSMirko Brkusanin       return false;
243*db6bc2abSMirko Brkusanin     }
244*db6bc2abSMirko Brkusanin   }
245*db6bc2abSMirko Brkusanin   default:
246*db6bc2abSMirko Brkusanin     return false;
247*db6bc2abSMirko Brkusanin   }
248*db6bc2abSMirko Brkusanin }
249*db6bc2abSMirko Brkusanin 
250*db6bc2abSMirko Brkusanin void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
251*db6bc2abSMirko Brkusanin                                              MachineInstr *&MatchInfo) {
252*db6bc2abSMirko Brkusanin   // Transform:
253*db6bc2abSMirko Brkusanin   // %A = inst %Op1, ...
254*db6bc2abSMirko Brkusanin   // %B = fneg %A
255*db6bc2abSMirko Brkusanin   //
256*db6bc2abSMirko Brkusanin   // into:
257*db6bc2abSMirko Brkusanin   //
258*db6bc2abSMirko Brkusanin   // (if %A has one use, specifically fneg above)
259*db6bc2abSMirko Brkusanin   // %B = inst (maybe fneg %Op1), ...
260*db6bc2abSMirko Brkusanin   //
261*db6bc2abSMirko Brkusanin   // (if %A has multiple uses)
262*db6bc2abSMirko Brkusanin   // %B = inst (maybe fneg %Op1), ...
263*db6bc2abSMirko Brkusanin   // %A = fneg %B
264*db6bc2abSMirko Brkusanin 
265*db6bc2abSMirko Brkusanin   // Replace register in operand with a register holding negated value.
266*db6bc2abSMirko Brkusanin   auto NegateOperand = [&](MachineOperand &Op) {
267*db6bc2abSMirko Brkusanin     Register Reg = Op.getReg();
268*db6bc2abSMirko Brkusanin     if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
269*db6bc2abSMirko Brkusanin       Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
270*db6bc2abSMirko Brkusanin     replaceRegOpWith(MRI, Op, Reg);
271*db6bc2abSMirko Brkusanin   };
272*db6bc2abSMirko Brkusanin 
273*db6bc2abSMirko Brkusanin   // Replace either register in operands with a register holding negated value.
274*db6bc2abSMirko Brkusanin   auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
275*db6bc2abSMirko Brkusanin     Register XReg = X.getReg();
276*db6bc2abSMirko Brkusanin     Register YReg = Y.getReg();
277*db6bc2abSMirko Brkusanin     if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
278*db6bc2abSMirko Brkusanin       replaceRegOpWith(MRI, X, XReg);
279*db6bc2abSMirko Brkusanin     else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
280*db6bc2abSMirko Brkusanin       replaceRegOpWith(MRI, Y, YReg);
281*db6bc2abSMirko Brkusanin     else {
282*db6bc2abSMirko Brkusanin       YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
283*db6bc2abSMirko Brkusanin       replaceRegOpWith(MRI, Y, YReg);
284*db6bc2abSMirko Brkusanin     }
285*db6bc2abSMirko Brkusanin   };
286*db6bc2abSMirko Brkusanin 
287*db6bc2abSMirko Brkusanin   Builder.setInstrAndDebugLoc(*MatchInfo);
288*db6bc2abSMirko Brkusanin 
289*db6bc2abSMirko Brkusanin   // Negate appropriate operands so that resulting value of MatchInfo is
290*db6bc2abSMirko Brkusanin   // negated.
291*db6bc2abSMirko Brkusanin   switch (MatchInfo->getOpcode()) {
292*db6bc2abSMirko Brkusanin   case AMDGPU::G_FADD:
293*db6bc2abSMirko Brkusanin   case AMDGPU::G_FSUB:
294*db6bc2abSMirko Brkusanin     NegateOperand(MatchInfo->getOperand(1));
295*db6bc2abSMirko Brkusanin     NegateOperand(MatchInfo->getOperand(2));
296*db6bc2abSMirko Brkusanin     break;
297*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMUL:
298*db6bc2abSMirko Brkusanin     NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
299*db6bc2abSMirko Brkusanin     break;
300*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM:
301*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM:
302*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMINNUM_IEEE:
303*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAXNUM_IEEE:
304*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
305*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
306*db6bc2abSMirko Brkusanin     NegateOperand(MatchInfo->getOperand(1));
307*db6bc2abSMirko Brkusanin     NegateOperand(MatchInfo->getOperand(2));
308*db6bc2abSMirko Brkusanin     unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
309*db6bc2abSMirko Brkusanin     replaceOpcodeWith(*MatchInfo, Opposite);
310*db6bc2abSMirko Brkusanin     break;
311*db6bc2abSMirko Brkusanin   }
312*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMA:
313*db6bc2abSMirko Brkusanin   case AMDGPU::G_FMAD:
314*db6bc2abSMirko Brkusanin     NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
315*db6bc2abSMirko Brkusanin     NegateOperand(MatchInfo->getOperand(3));
316*db6bc2abSMirko Brkusanin     break;
317*db6bc2abSMirko Brkusanin   case AMDGPU::G_FPEXT:
318*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_TRUNC:
319*db6bc2abSMirko Brkusanin   case AMDGPU::G_FRINT:
320*db6bc2abSMirko Brkusanin   case AMDGPU::G_FNEARBYINT:
321*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_ROUND:
322*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
323*db6bc2abSMirko Brkusanin   case AMDGPU::G_FSIN:
324*db6bc2abSMirko Brkusanin   case AMDGPU::G_FCANONICALIZE:
325*db6bc2abSMirko Brkusanin   case AMDGPU::G_AMDGPU_RCP_IFLAG:
326*db6bc2abSMirko Brkusanin   case AMDGPU::G_FPTRUNC:
327*db6bc2abSMirko Brkusanin     NegateOperand(MatchInfo->getOperand(1));
328*db6bc2abSMirko Brkusanin     break;
329*db6bc2abSMirko Brkusanin   case AMDGPU::G_INTRINSIC: {
330*db6bc2abSMirko Brkusanin     unsigned IntrinsicID = MatchInfo->getIntrinsicID();
331*db6bc2abSMirko Brkusanin     switch (IntrinsicID) {
332*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_rcp:
333*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_rcp_legacy:
334*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_sin:
335*db6bc2abSMirko Brkusanin       NegateOperand(MatchInfo->getOperand(2));
336*db6bc2abSMirko Brkusanin       break;
337*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fmul_legacy:
338*db6bc2abSMirko Brkusanin       NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
339*db6bc2abSMirko Brkusanin       break;
340*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fmed3:
341*db6bc2abSMirko Brkusanin       NegateOperand(MatchInfo->getOperand(2));
342*db6bc2abSMirko Brkusanin       NegateOperand(MatchInfo->getOperand(3));
343*db6bc2abSMirko Brkusanin       NegateOperand(MatchInfo->getOperand(4));
344*db6bc2abSMirko Brkusanin       break;
345*db6bc2abSMirko Brkusanin     case Intrinsic::amdgcn_fma_legacy:
346*db6bc2abSMirko Brkusanin       NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
347*db6bc2abSMirko Brkusanin       NegateOperand(MatchInfo->getOperand(4));
348*db6bc2abSMirko Brkusanin       break;
349*db6bc2abSMirko Brkusanin     default:
350*db6bc2abSMirko Brkusanin       llvm_unreachable("folding fneg not supported for this intrinsic");
351*db6bc2abSMirko Brkusanin     }
352*db6bc2abSMirko Brkusanin     break;
353*db6bc2abSMirko Brkusanin   }
354*db6bc2abSMirko Brkusanin   default:
355*db6bc2abSMirko Brkusanin     llvm_unreachable("folding fneg not supported for this instruction");
356*db6bc2abSMirko Brkusanin   }
357*db6bc2abSMirko Brkusanin 
358*db6bc2abSMirko Brkusanin   Register Dst = MI.getOperand(0).getReg();
359*db6bc2abSMirko Brkusanin   Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
360*db6bc2abSMirko Brkusanin 
361*db6bc2abSMirko Brkusanin   if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
362*db6bc2abSMirko Brkusanin     // MatchInfo now has negated value so use that instead of old Dst.
363*db6bc2abSMirko Brkusanin     replaceRegWith(MRI, Dst, MatchInfoDst);
364*db6bc2abSMirko Brkusanin   } else {
365*db6bc2abSMirko Brkusanin     // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
366*db6bc2abSMirko Brkusanin     // but replaceRegWith will replace defs as well. It is easier to replace one
367*db6bc2abSMirko Brkusanin     // def with a new register.
368*db6bc2abSMirko Brkusanin     LLT Type = MRI.getType(Dst);
369*db6bc2abSMirko Brkusanin     Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
370*db6bc2abSMirko Brkusanin     replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
371*db6bc2abSMirko Brkusanin 
372*db6bc2abSMirko Brkusanin     // MatchInfo now has negated value so use that instead of old Dst.
373*db6bc2abSMirko Brkusanin     replaceRegWith(MRI, Dst, NegatedMatchInfo);
374*db6bc2abSMirko Brkusanin 
375*db6bc2abSMirko Brkusanin     // Recreate non negated value for other uses of old MatchInfoDst
376*db6bc2abSMirko Brkusanin     Builder.setInstrAndDebugLoc(MI);
377*db6bc2abSMirko Brkusanin     Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
378*db6bc2abSMirko Brkusanin   }
379*db6bc2abSMirko Brkusanin 
380*db6bc2abSMirko Brkusanin   MI.eraseFromParent();
381*db6bc2abSMirko Brkusanin   return;
382*db6bc2abSMirko Brkusanin }
383