1db6bc2abSMirko Brkusanin //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2db6bc2abSMirko Brkusanin //
3db6bc2abSMirko Brkusanin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4db6bc2abSMirko Brkusanin // See https://llvm.org/LICENSE.txt for license information.
5db6bc2abSMirko Brkusanin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6db6bc2abSMirko Brkusanin //
7db6bc2abSMirko Brkusanin //===----------------------------------------------------------------------===//
8db6bc2abSMirko Brkusanin
9db6bc2abSMirko Brkusanin #include "AMDGPUCombinerHelper.h"
10db6bc2abSMirko Brkusanin #include "GCNSubtarget.h"
11db6bc2abSMirko Brkusanin #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12db6bc2abSMirko Brkusanin #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
13db6bc2abSMirko Brkusanin #include "llvm/IR/IntrinsicsAMDGPU.h"
14db6bc2abSMirko Brkusanin #include "llvm/Target/TargetMachine.h"
15db6bc2abSMirko Brkusanin
16db6bc2abSMirko Brkusanin using namespace llvm;
17db6bc2abSMirko Brkusanin using namespace MIPatternMatch;
18db6bc2abSMirko Brkusanin
19db6bc2abSMirko Brkusanin LLVM_READNONE
fnegFoldsIntoMI(const MachineInstr & MI)20db6bc2abSMirko Brkusanin static bool fnegFoldsIntoMI(const MachineInstr &MI) {
21db6bc2abSMirko Brkusanin switch (MI.getOpcode()) {
22db6bc2abSMirko Brkusanin case AMDGPU::G_FADD:
23db6bc2abSMirko Brkusanin case AMDGPU::G_FSUB:
24db6bc2abSMirko Brkusanin case AMDGPU::G_FMUL:
25db6bc2abSMirko Brkusanin case AMDGPU::G_FMA:
26db6bc2abSMirko Brkusanin case AMDGPU::G_FMAD:
27db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM:
28db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM:
29db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE:
30db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE:
31db6bc2abSMirko Brkusanin case AMDGPU::G_FSIN:
32db6bc2abSMirko Brkusanin case AMDGPU::G_FPEXT:
33db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_TRUNC:
34db6bc2abSMirko Brkusanin case AMDGPU::G_FPTRUNC:
35db6bc2abSMirko Brkusanin case AMDGPU::G_FRINT:
36db6bc2abSMirko Brkusanin case AMDGPU::G_FNEARBYINT:
37db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUND:
38db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUNDEVEN:
39db6bc2abSMirko Brkusanin case AMDGPU::G_FCANONICALIZE:
40db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_RCP_IFLAG:
41db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY:
42db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY:
43db6bc2abSMirko Brkusanin return true;
44db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: {
45db6bc2abSMirko Brkusanin unsigned IntrinsicID = MI.getIntrinsicID();
46db6bc2abSMirko Brkusanin switch (IntrinsicID) {
47db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp:
48db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp_legacy:
49db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_sin:
50db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmul_legacy:
51db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmed3:
52db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fma_legacy:
53db6bc2abSMirko Brkusanin return true;
54db6bc2abSMirko Brkusanin default:
55db6bc2abSMirko Brkusanin return false;
56db6bc2abSMirko Brkusanin }
57db6bc2abSMirko Brkusanin }
58db6bc2abSMirko Brkusanin default:
59db6bc2abSMirko Brkusanin return false;
60db6bc2abSMirko Brkusanin }
61db6bc2abSMirko Brkusanin }
62db6bc2abSMirko Brkusanin
63db6bc2abSMirko Brkusanin /// \p returns true if the operation will definitely need to use a 64-bit
64db6bc2abSMirko Brkusanin /// encoding, and thus will use a VOP3 encoding regardless of the source
65db6bc2abSMirko Brkusanin /// modifiers.
66db6bc2abSMirko Brkusanin LLVM_READONLY
opMustUseVOP3Encoding(const MachineInstr & MI,const MachineRegisterInfo & MRI)67db6bc2abSMirko Brkusanin static bool opMustUseVOP3Encoding(const MachineInstr &MI,
68db6bc2abSMirko Brkusanin const MachineRegisterInfo &MRI) {
69db6bc2abSMirko Brkusanin return MI.getNumOperands() >
703020608bSSimon Pilgrim (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
71db6bc2abSMirko Brkusanin MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
72db6bc2abSMirko Brkusanin }
73db6bc2abSMirko Brkusanin
74db6bc2abSMirko Brkusanin // Most FP instructions support source modifiers.
75db6bc2abSMirko Brkusanin LLVM_READONLY
hasSourceMods(const MachineInstr & MI)76db6bc2abSMirko Brkusanin static bool hasSourceMods(const MachineInstr &MI) {
77db6bc2abSMirko Brkusanin if (!MI.memoperands().empty())
78db6bc2abSMirko Brkusanin return false;
79db6bc2abSMirko Brkusanin
80db6bc2abSMirko Brkusanin switch (MI.getOpcode()) {
81db6bc2abSMirko Brkusanin case AMDGPU::COPY:
82db6bc2abSMirko Brkusanin case AMDGPU::G_SELECT:
83db6bc2abSMirko Brkusanin case AMDGPU::G_FDIV:
84db6bc2abSMirko Brkusanin case AMDGPU::G_FREM:
85db6bc2abSMirko Brkusanin case TargetOpcode::INLINEASM:
86db6bc2abSMirko Brkusanin case TargetOpcode::INLINEASM_BR:
87db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
88db6bc2abSMirko Brkusanin case AMDGPU::G_BITCAST:
89db6bc2abSMirko Brkusanin case AMDGPU::G_ANYEXT:
90db6bc2abSMirko Brkusanin case AMDGPU::G_BUILD_VECTOR:
91db6bc2abSMirko Brkusanin case AMDGPU::G_BUILD_VECTOR_TRUNC:
92db6bc2abSMirko Brkusanin case AMDGPU::G_PHI:
93db6bc2abSMirko Brkusanin return false;
94db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: {
95db6bc2abSMirko Brkusanin unsigned IntrinsicID = MI.getIntrinsicID();
96db6bc2abSMirko Brkusanin switch (IntrinsicID) {
97db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p1:
98db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p2:
99db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_mov:
100db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p1_f16:
101db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p2_f16:
102db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_div_scale:
103db6bc2abSMirko Brkusanin return false;
104db6bc2abSMirko Brkusanin default:
105db6bc2abSMirko Brkusanin return true;
106db6bc2abSMirko Brkusanin }
107db6bc2abSMirko Brkusanin }
108db6bc2abSMirko Brkusanin default:
109db6bc2abSMirko Brkusanin return true;
110db6bc2abSMirko Brkusanin }
111db6bc2abSMirko Brkusanin }
112db6bc2abSMirko Brkusanin
allUsesHaveSourceMods(MachineInstr & MI,MachineRegisterInfo & MRI,unsigned CostThreshold=4)113db6bc2abSMirko Brkusanin static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
114db6bc2abSMirko Brkusanin unsigned CostThreshold = 4) {
115db6bc2abSMirko Brkusanin // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
116db6bc2abSMirko Brkusanin // it is truly free to use a source modifier in all cases. If there are
117db6bc2abSMirko Brkusanin // multiple users but for each one will necessitate using VOP3, there will be
118db6bc2abSMirko Brkusanin // a code size increase. Try to avoid increasing code size unless we know it
119db6bc2abSMirko Brkusanin // will save on the instruction count.
120db6bc2abSMirko Brkusanin unsigned NumMayIncreaseSize = 0;
121db6bc2abSMirko Brkusanin Register Dst = MI.getOperand(0).getReg();
122db6bc2abSMirko Brkusanin for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
123db6bc2abSMirko Brkusanin if (!hasSourceMods(Use))
124db6bc2abSMirko Brkusanin return false;
125db6bc2abSMirko Brkusanin
126db6bc2abSMirko Brkusanin if (!opMustUseVOP3Encoding(Use, MRI)) {
127db6bc2abSMirko Brkusanin if (++NumMayIncreaseSize > CostThreshold)
128db6bc2abSMirko Brkusanin return false;
129db6bc2abSMirko Brkusanin }
130db6bc2abSMirko Brkusanin }
131db6bc2abSMirko Brkusanin return true;
132db6bc2abSMirko Brkusanin }
133db6bc2abSMirko Brkusanin
mayIgnoreSignedZero(MachineInstr & MI)134db6bc2abSMirko Brkusanin static bool mayIgnoreSignedZero(MachineInstr &MI) {
135db6bc2abSMirko Brkusanin const TargetOptions &Options = MI.getMF()->getTarget().Options;
136db6bc2abSMirko Brkusanin return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
137db6bc2abSMirko Brkusanin }
138db6bc2abSMirko Brkusanin
isInv2Pi(const APFloat & APF)139db6bc2abSMirko Brkusanin static bool isInv2Pi(const APFloat &APF) {
140db6bc2abSMirko Brkusanin static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
141db6bc2abSMirko Brkusanin static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
142db6bc2abSMirko Brkusanin static const APFloat KF64(APFloat::IEEEdouble(),
143db6bc2abSMirko Brkusanin APInt(64, 0x3fc45f306dc9c882));
144db6bc2abSMirko Brkusanin
145db6bc2abSMirko Brkusanin return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
146db6bc2abSMirko Brkusanin APF.bitwiseIsEqual(KF64);
147db6bc2abSMirko Brkusanin }
148db6bc2abSMirko Brkusanin
149db6bc2abSMirko Brkusanin // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
150db6bc2abSMirko Brkusanin // additional cost to negate them.
isConstantCostlierToNegate(MachineInstr & MI,Register Reg,MachineRegisterInfo & MRI)151db6bc2abSMirko Brkusanin static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
152db6bc2abSMirko Brkusanin MachineRegisterInfo &MRI) {
153db6bc2abSMirko Brkusanin Optional<FPValueAndVReg> FPValReg;
154db6bc2abSMirko Brkusanin if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
155db6bc2abSMirko Brkusanin if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
156db6bc2abSMirko Brkusanin return true;
157db6bc2abSMirko Brkusanin
158db6bc2abSMirko Brkusanin const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
159db6bc2abSMirko Brkusanin if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
160db6bc2abSMirko Brkusanin return true;
161db6bc2abSMirko Brkusanin }
162db6bc2abSMirko Brkusanin return false;
163db6bc2abSMirko Brkusanin }
164db6bc2abSMirko Brkusanin
inverseMinMax(unsigned Opc)165db6bc2abSMirko Brkusanin static unsigned inverseMinMax(unsigned Opc) {
166db6bc2abSMirko Brkusanin switch (Opc) {
167db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM:
168db6bc2abSMirko Brkusanin return AMDGPU::G_FMINNUM;
169db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM:
170db6bc2abSMirko Brkusanin return AMDGPU::G_FMAXNUM;
171db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE:
172db6bc2abSMirko Brkusanin return AMDGPU::G_FMINNUM_IEEE;
173db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE:
174db6bc2abSMirko Brkusanin return AMDGPU::G_FMAXNUM_IEEE;
175db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY:
176db6bc2abSMirko Brkusanin return AMDGPU::G_AMDGPU_FMIN_LEGACY;
177db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY:
178db6bc2abSMirko Brkusanin return AMDGPU::G_AMDGPU_FMAX_LEGACY;
179db6bc2abSMirko Brkusanin default:
180db6bc2abSMirko Brkusanin llvm_unreachable("invalid min/max opcode");
181db6bc2abSMirko Brkusanin }
182db6bc2abSMirko Brkusanin }
183db6bc2abSMirko Brkusanin
matchFoldableFneg(MachineInstr & MI,MachineInstr * & MatchInfo)184db6bc2abSMirko Brkusanin bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
185db6bc2abSMirko Brkusanin MachineInstr *&MatchInfo) {
186db6bc2abSMirko Brkusanin Register Src = MI.getOperand(1).getReg();
187db6bc2abSMirko Brkusanin MatchInfo = MRI.getVRegDef(Src);
188db6bc2abSMirko Brkusanin
189db6bc2abSMirko Brkusanin // If the input has multiple uses and we can either fold the negate down, or
190db6bc2abSMirko Brkusanin // the other uses cannot, give up. This both prevents unprofitable
191db6bc2abSMirko Brkusanin // transformations and infinite loops: we won't repeatedly try to fold around
192db6bc2abSMirko Brkusanin // a negate that has no 'good' form.
193db6bc2abSMirko Brkusanin if (MRI.hasOneNonDBGUse(Src)) {
194db6bc2abSMirko Brkusanin if (allUsesHaveSourceMods(MI, MRI, 0))
195db6bc2abSMirko Brkusanin return false;
196db6bc2abSMirko Brkusanin } else {
197db6bc2abSMirko Brkusanin if (fnegFoldsIntoMI(*MatchInfo) &&
198db6bc2abSMirko Brkusanin (allUsesHaveSourceMods(MI, MRI) ||
199db6bc2abSMirko Brkusanin !allUsesHaveSourceMods(*MatchInfo, MRI)))
200db6bc2abSMirko Brkusanin return false;
201db6bc2abSMirko Brkusanin }
202db6bc2abSMirko Brkusanin
203db6bc2abSMirko Brkusanin switch (MatchInfo->getOpcode()) {
204db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM:
205db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM:
206db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE:
207db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE:
208db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY:
209db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY:
210db6bc2abSMirko Brkusanin // 0 doesn't have a negated inline immediate.
211db6bc2abSMirko Brkusanin return !isConstantCostlierToNegate(*MatchInfo,
212db6bc2abSMirko Brkusanin MatchInfo->getOperand(2).getReg(), MRI);
213db6bc2abSMirko Brkusanin case AMDGPU::G_FADD:
214db6bc2abSMirko Brkusanin case AMDGPU::G_FSUB:
215db6bc2abSMirko Brkusanin case AMDGPU::G_FMA:
216db6bc2abSMirko Brkusanin case AMDGPU::G_FMAD:
217db6bc2abSMirko Brkusanin return mayIgnoreSignedZero(*MatchInfo);
218db6bc2abSMirko Brkusanin case AMDGPU::G_FMUL:
219db6bc2abSMirko Brkusanin case AMDGPU::G_FPEXT:
220db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_TRUNC:
221db6bc2abSMirko Brkusanin case AMDGPU::G_FPTRUNC:
222db6bc2abSMirko Brkusanin case AMDGPU::G_FRINT:
223db6bc2abSMirko Brkusanin case AMDGPU::G_FNEARBYINT:
224db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUND:
225db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUNDEVEN:
226db6bc2abSMirko Brkusanin case AMDGPU::G_FSIN:
227db6bc2abSMirko Brkusanin case AMDGPU::G_FCANONICALIZE:
228db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_RCP_IFLAG:
229db6bc2abSMirko Brkusanin return true;
230db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: {
231db6bc2abSMirko Brkusanin unsigned IntrinsicID = MatchInfo->getIntrinsicID();
232db6bc2abSMirko Brkusanin switch (IntrinsicID) {
233db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp:
234db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp_legacy:
235db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_sin:
236db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmul_legacy:
237db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmed3:
238db6bc2abSMirko Brkusanin return true;
239db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fma_legacy:
240db6bc2abSMirko Brkusanin return mayIgnoreSignedZero(*MatchInfo);
241db6bc2abSMirko Brkusanin default:
242db6bc2abSMirko Brkusanin return false;
243db6bc2abSMirko Brkusanin }
244db6bc2abSMirko Brkusanin }
245db6bc2abSMirko Brkusanin default:
246db6bc2abSMirko Brkusanin return false;
247db6bc2abSMirko Brkusanin }
248db6bc2abSMirko Brkusanin }
249db6bc2abSMirko Brkusanin
applyFoldableFneg(MachineInstr & MI,MachineInstr * & MatchInfo)250db6bc2abSMirko Brkusanin void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
251db6bc2abSMirko Brkusanin MachineInstr *&MatchInfo) {
252db6bc2abSMirko Brkusanin // Transform:
253db6bc2abSMirko Brkusanin // %A = inst %Op1, ...
254db6bc2abSMirko Brkusanin // %B = fneg %A
255db6bc2abSMirko Brkusanin //
256db6bc2abSMirko Brkusanin // into:
257db6bc2abSMirko Brkusanin //
258db6bc2abSMirko Brkusanin // (if %A has one use, specifically fneg above)
259db6bc2abSMirko Brkusanin // %B = inst (maybe fneg %Op1), ...
260db6bc2abSMirko Brkusanin //
261db6bc2abSMirko Brkusanin // (if %A has multiple uses)
262db6bc2abSMirko Brkusanin // %B = inst (maybe fneg %Op1), ...
263db6bc2abSMirko Brkusanin // %A = fneg %B
264db6bc2abSMirko Brkusanin
265db6bc2abSMirko Brkusanin // Replace register in operand with a register holding negated value.
266db6bc2abSMirko Brkusanin auto NegateOperand = [&](MachineOperand &Op) {
267db6bc2abSMirko Brkusanin Register Reg = Op.getReg();
268db6bc2abSMirko Brkusanin if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
269db6bc2abSMirko Brkusanin Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
270db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, Op, Reg);
271db6bc2abSMirko Brkusanin };
272db6bc2abSMirko Brkusanin
273db6bc2abSMirko Brkusanin // Replace either register in operands with a register holding negated value.
274db6bc2abSMirko Brkusanin auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
275db6bc2abSMirko Brkusanin Register XReg = X.getReg();
276db6bc2abSMirko Brkusanin Register YReg = Y.getReg();
277db6bc2abSMirko Brkusanin if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
278db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, X, XReg);
279db6bc2abSMirko Brkusanin else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
280db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, Y, YReg);
281db6bc2abSMirko Brkusanin else {
282db6bc2abSMirko Brkusanin YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
283db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, Y, YReg);
284db6bc2abSMirko Brkusanin }
285db6bc2abSMirko Brkusanin };
286db6bc2abSMirko Brkusanin
287db6bc2abSMirko Brkusanin Builder.setInstrAndDebugLoc(*MatchInfo);
288db6bc2abSMirko Brkusanin
289db6bc2abSMirko Brkusanin // Negate appropriate operands so that resulting value of MatchInfo is
290db6bc2abSMirko Brkusanin // negated.
291db6bc2abSMirko Brkusanin switch (MatchInfo->getOpcode()) {
292db6bc2abSMirko Brkusanin case AMDGPU::G_FADD:
293db6bc2abSMirko Brkusanin case AMDGPU::G_FSUB:
294db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(1));
295db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2));
296db6bc2abSMirko Brkusanin break;
297db6bc2abSMirko Brkusanin case AMDGPU::G_FMUL:
298db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
299db6bc2abSMirko Brkusanin break;
300db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM:
301db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM:
302db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE:
303db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE:
304db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY:
305db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
306db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(1));
307db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2));
308db6bc2abSMirko Brkusanin unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
309db6bc2abSMirko Brkusanin replaceOpcodeWith(*MatchInfo, Opposite);
310db6bc2abSMirko Brkusanin break;
311db6bc2abSMirko Brkusanin }
312db6bc2abSMirko Brkusanin case AMDGPU::G_FMA:
313db6bc2abSMirko Brkusanin case AMDGPU::G_FMAD:
314db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
315db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(3));
316db6bc2abSMirko Brkusanin break;
317db6bc2abSMirko Brkusanin case AMDGPU::G_FPEXT:
318db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_TRUNC:
319db6bc2abSMirko Brkusanin case AMDGPU::G_FRINT:
320db6bc2abSMirko Brkusanin case AMDGPU::G_FNEARBYINT:
321db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUND:
322db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUNDEVEN:
323db6bc2abSMirko Brkusanin case AMDGPU::G_FSIN:
324db6bc2abSMirko Brkusanin case AMDGPU::G_FCANONICALIZE:
325db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_RCP_IFLAG:
326db6bc2abSMirko Brkusanin case AMDGPU::G_FPTRUNC:
327db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(1));
328db6bc2abSMirko Brkusanin break;
329db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: {
330db6bc2abSMirko Brkusanin unsigned IntrinsicID = MatchInfo->getIntrinsicID();
331db6bc2abSMirko Brkusanin switch (IntrinsicID) {
332db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp:
333db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp_legacy:
334db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_sin:
335db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2));
336db6bc2abSMirko Brkusanin break;
337db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmul_legacy:
338db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
339db6bc2abSMirko Brkusanin break;
340db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmed3:
341db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2));
342db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(3));
343db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(4));
344db6bc2abSMirko Brkusanin break;
345db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fma_legacy:
346db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
347db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(4));
348db6bc2abSMirko Brkusanin break;
349db6bc2abSMirko Brkusanin default:
350db6bc2abSMirko Brkusanin llvm_unreachable("folding fneg not supported for this intrinsic");
351db6bc2abSMirko Brkusanin }
352db6bc2abSMirko Brkusanin break;
353db6bc2abSMirko Brkusanin }
354db6bc2abSMirko Brkusanin default:
355db6bc2abSMirko Brkusanin llvm_unreachable("folding fneg not supported for this instruction");
356db6bc2abSMirko Brkusanin }
357db6bc2abSMirko Brkusanin
358db6bc2abSMirko Brkusanin Register Dst = MI.getOperand(0).getReg();
359db6bc2abSMirko Brkusanin Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
360db6bc2abSMirko Brkusanin
361db6bc2abSMirko Brkusanin if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
362db6bc2abSMirko Brkusanin // MatchInfo now has negated value so use that instead of old Dst.
363db6bc2abSMirko Brkusanin replaceRegWith(MRI, Dst, MatchInfoDst);
364db6bc2abSMirko Brkusanin } else {
365db6bc2abSMirko Brkusanin // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
366db6bc2abSMirko Brkusanin // but replaceRegWith will replace defs as well. It is easier to replace one
367db6bc2abSMirko Brkusanin // def with a new register.
368db6bc2abSMirko Brkusanin LLT Type = MRI.getType(Dst);
369db6bc2abSMirko Brkusanin Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
370db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
371db6bc2abSMirko Brkusanin
372db6bc2abSMirko Brkusanin // MatchInfo now has negated value so use that instead of old Dst.
373db6bc2abSMirko Brkusanin replaceRegWith(MRI, Dst, NegatedMatchInfo);
374db6bc2abSMirko Brkusanin
375db6bc2abSMirko Brkusanin // Recreate non negated value for other uses of old MatchInfoDst
376*5ff35ba8SMirko Brkusanin auto NextInst = ++MatchInfo->getIterator();
377*5ff35ba8SMirko Brkusanin Builder.setInstrAndDebugLoc(*NextInst);
378db6bc2abSMirko Brkusanin Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
379db6bc2abSMirko Brkusanin }
380db6bc2abSMirko Brkusanin
381db6bc2abSMirko Brkusanin MI.eraseFromParent();
382db6bc2abSMirko Brkusanin }
383