1*db6bc2abSMirko Brkusanin //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===// 2*db6bc2abSMirko Brkusanin // 3*db6bc2abSMirko Brkusanin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*db6bc2abSMirko Brkusanin // See https://llvm.org/LICENSE.txt for license information. 5*db6bc2abSMirko Brkusanin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*db6bc2abSMirko Brkusanin // 7*db6bc2abSMirko Brkusanin //===----------------------------------------------------------------------===// 8*db6bc2abSMirko Brkusanin 9*db6bc2abSMirko Brkusanin #include "AMDGPUCombinerHelper.h" 10*db6bc2abSMirko Brkusanin #include "GCNSubtarget.h" 11*db6bc2abSMirko Brkusanin #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12*db6bc2abSMirko Brkusanin #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 13*db6bc2abSMirko Brkusanin #include "llvm/IR/IntrinsicsAMDGPU.h" 14*db6bc2abSMirko Brkusanin #include "llvm/Target/TargetMachine.h" 15*db6bc2abSMirko Brkusanin 16*db6bc2abSMirko Brkusanin using namespace llvm; 17*db6bc2abSMirko Brkusanin using namespace MIPatternMatch; 18*db6bc2abSMirko Brkusanin 19*db6bc2abSMirko Brkusanin LLVM_READNONE 20*db6bc2abSMirko Brkusanin static bool fnegFoldsIntoMI(const MachineInstr &MI) { 21*db6bc2abSMirko Brkusanin switch (MI.getOpcode()) { 22*db6bc2abSMirko Brkusanin case AMDGPU::G_FADD: 23*db6bc2abSMirko Brkusanin case AMDGPU::G_FSUB: 24*db6bc2abSMirko Brkusanin case AMDGPU::G_FMUL: 25*db6bc2abSMirko Brkusanin case AMDGPU::G_FMA: 26*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAD: 27*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM: 28*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM: 29*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE: 30*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE: 31*db6bc2abSMirko Brkusanin case AMDGPU::G_FSIN: 32*db6bc2abSMirko Brkusanin case AMDGPU::G_FPEXT: 33*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_TRUNC: 34*db6bc2abSMirko Brkusanin case AMDGPU::G_FPTRUNC: 35*db6bc2abSMirko Brkusanin case AMDGPU::G_FRINT: 36*db6bc2abSMirko Brkusanin case AMDGPU::G_FNEARBYINT: 37*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUND: 38*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUNDEVEN: 39*db6bc2abSMirko Brkusanin case AMDGPU::G_FCANONICALIZE: 40*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_RCP_IFLAG: 41*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY: 42*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY: 43*db6bc2abSMirko Brkusanin return true; 44*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: { 45*db6bc2abSMirko Brkusanin unsigned IntrinsicID = MI.getIntrinsicID(); 46*db6bc2abSMirko Brkusanin switch (IntrinsicID) { 47*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp: 48*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp_legacy: 49*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_sin: 50*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmul_legacy: 51*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmed3: 52*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fma_legacy: 53*db6bc2abSMirko Brkusanin return true; 54*db6bc2abSMirko Brkusanin default: 55*db6bc2abSMirko Brkusanin return false; 56*db6bc2abSMirko Brkusanin } 57*db6bc2abSMirko Brkusanin } 58*db6bc2abSMirko Brkusanin default: 59*db6bc2abSMirko Brkusanin return false; 60*db6bc2abSMirko Brkusanin } 61*db6bc2abSMirko Brkusanin } 62*db6bc2abSMirko Brkusanin 63*db6bc2abSMirko Brkusanin /// \p returns true if the operation will definitely need to use a 64-bit 64*db6bc2abSMirko Brkusanin /// encoding, and thus will use a VOP3 encoding regardless of the source 65*db6bc2abSMirko Brkusanin /// modifiers. 66*db6bc2abSMirko Brkusanin LLVM_READONLY 67*db6bc2abSMirko Brkusanin static bool opMustUseVOP3Encoding(const MachineInstr &MI, 68*db6bc2abSMirko Brkusanin const MachineRegisterInfo &MRI) { 69*db6bc2abSMirko Brkusanin return MI.getNumOperands() > 70*db6bc2abSMirko Brkusanin (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4 : 3) || 71*db6bc2abSMirko Brkusanin MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64; 72*db6bc2abSMirko Brkusanin } 73*db6bc2abSMirko Brkusanin 74*db6bc2abSMirko Brkusanin // Most FP instructions support source modifiers. 75*db6bc2abSMirko Brkusanin LLVM_READONLY 76*db6bc2abSMirko Brkusanin static bool hasSourceMods(const MachineInstr &MI) { 77*db6bc2abSMirko Brkusanin if (!MI.memoperands().empty()) 78*db6bc2abSMirko Brkusanin return false; 79*db6bc2abSMirko Brkusanin 80*db6bc2abSMirko Brkusanin switch (MI.getOpcode()) { 81*db6bc2abSMirko Brkusanin case AMDGPU::COPY: 82*db6bc2abSMirko Brkusanin case AMDGPU::G_SELECT: 83*db6bc2abSMirko Brkusanin case AMDGPU::G_FDIV: 84*db6bc2abSMirko Brkusanin case AMDGPU::G_FREM: 85*db6bc2abSMirko Brkusanin case TargetOpcode::INLINEASM: 86*db6bc2abSMirko Brkusanin case TargetOpcode::INLINEASM_BR: 87*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: 88*db6bc2abSMirko Brkusanin case AMDGPU::G_BITCAST: 89*db6bc2abSMirko Brkusanin case AMDGPU::G_ANYEXT: 90*db6bc2abSMirko Brkusanin case AMDGPU::G_BUILD_VECTOR: 91*db6bc2abSMirko Brkusanin case AMDGPU::G_BUILD_VECTOR_TRUNC: 92*db6bc2abSMirko Brkusanin case AMDGPU::G_PHI: 93*db6bc2abSMirko Brkusanin return false; 94*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: { 95*db6bc2abSMirko Brkusanin unsigned IntrinsicID = MI.getIntrinsicID(); 96*db6bc2abSMirko Brkusanin switch (IntrinsicID) { 97*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p1: 98*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p2: 99*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_mov: 100*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p1_f16: 101*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_interp_p2_f16: 102*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_div_scale: 103*db6bc2abSMirko Brkusanin return false; 104*db6bc2abSMirko Brkusanin default: 105*db6bc2abSMirko Brkusanin return true; 106*db6bc2abSMirko Brkusanin } 107*db6bc2abSMirko Brkusanin } 108*db6bc2abSMirko Brkusanin default: 109*db6bc2abSMirko Brkusanin return true; 110*db6bc2abSMirko Brkusanin } 111*db6bc2abSMirko Brkusanin } 112*db6bc2abSMirko Brkusanin 113*db6bc2abSMirko Brkusanin static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, 114*db6bc2abSMirko Brkusanin unsigned CostThreshold = 4) { 115*db6bc2abSMirko Brkusanin // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus 116*db6bc2abSMirko Brkusanin // it is truly free to use a source modifier in all cases. If there are 117*db6bc2abSMirko Brkusanin // multiple users but for each one will necessitate using VOP3, there will be 118*db6bc2abSMirko Brkusanin // a code size increase. Try to avoid increasing code size unless we know it 119*db6bc2abSMirko Brkusanin // will save on the instruction count. 120*db6bc2abSMirko Brkusanin unsigned NumMayIncreaseSize = 0; 121*db6bc2abSMirko Brkusanin Register Dst = MI.getOperand(0).getReg(); 122*db6bc2abSMirko Brkusanin for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) { 123*db6bc2abSMirko Brkusanin if (!hasSourceMods(Use)) 124*db6bc2abSMirko Brkusanin return false; 125*db6bc2abSMirko Brkusanin 126*db6bc2abSMirko Brkusanin if (!opMustUseVOP3Encoding(Use, MRI)) { 127*db6bc2abSMirko Brkusanin if (++NumMayIncreaseSize > CostThreshold) 128*db6bc2abSMirko Brkusanin return false; 129*db6bc2abSMirko Brkusanin } 130*db6bc2abSMirko Brkusanin } 131*db6bc2abSMirko Brkusanin return true; 132*db6bc2abSMirko Brkusanin } 133*db6bc2abSMirko Brkusanin 134*db6bc2abSMirko Brkusanin static bool mayIgnoreSignedZero(MachineInstr &MI) { 135*db6bc2abSMirko Brkusanin const TargetOptions &Options = MI.getMF()->getTarget().Options; 136*db6bc2abSMirko Brkusanin return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz); 137*db6bc2abSMirko Brkusanin } 138*db6bc2abSMirko Brkusanin 139*db6bc2abSMirko Brkusanin static bool isInv2Pi(const APFloat &APF) { 140*db6bc2abSMirko Brkusanin static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118)); 141*db6bc2abSMirko Brkusanin static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983)); 142*db6bc2abSMirko Brkusanin static const APFloat KF64(APFloat::IEEEdouble(), 143*db6bc2abSMirko Brkusanin APInt(64, 0x3fc45f306dc9c882)); 144*db6bc2abSMirko Brkusanin 145*db6bc2abSMirko Brkusanin return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) || 146*db6bc2abSMirko Brkusanin APF.bitwiseIsEqual(KF64); 147*db6bc2abSMirko Brkusanin } 148*db6bc2abSMirko Brkusanin 149*db6bc2abSMirko Brkusanin // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an 150*db6bc2abSMirko Brkusanin // additional cost to negate them. 151*db6bc2abSMirko Brkusanin static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, 152*db6bc2abSMirko Brkusanin MachineRegisterInfo &MRI) { 153*db6bc2abSMirko Brkusanin Optional<FPValueAndVReg> FPValReg; 154*db6bc2abSMirko Brkusanin if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) { 155*db6bc2abSMirko Brkusanin if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative()) 156*db6bc2abSMirko Brkusanin return true; 157*db6bc2abSMirko Brkusanin 158*db6bc2abSMirko Brkusanin const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>(); 159*db6bc2abSMirko Brkusanin if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value)) 160*db6bc2abSMirko Brkusanin return true; 161*db6bc2abSMirko Brkusanin } 162*db6bc2abSMirko Brkusanin return false; 163*db6bc2abSMirko Brkusanin } 164*db6bc2abSMirko Brkusanin 165*db6bc2abSMirko Brkusanin static unsigned inverseMinMax(unsigned Opc) { 166*db6bc2abSMirko Brkusanin switch (Opc) { 167*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM: 168*db6bc2abSMirko Brkusanin return AMDGPU::G_FMINNUM; 169*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM: 170*db6bc2abSMirko Brkusanin return AMDGPU::G_FMAXNUM; 171*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE: 172*db6bc2abSMirko Brkusanin return AMDGPU::G_FMINNUM_IEEE; 173*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE: 174*db6bc2abSMirko Brkusanin return AMDGPU::G_FMAXNUM_IEEE; 175*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY: 176*db6bc2abSMirko Brkusanin return AMDGPU::G_AMDGPU_FMIN_LEGACY; 177*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY: 178*db6bc2abSMirko Brkusanin return AMDGPU::G_AMDGPU_FMAX_LEGACY; 179*db6bc2abSMirko Brkusanin default: 180*db6bc2abSMirko Brkusanin llvm_unreachable("invalid min/max opcode"); 181*db6bc2abSMirko Brkusanin } 182*db6bc2abSMirko Brkusanin } 183*db6bc2abSMirko Brkusanin 184*db6bc2abSMirko Brkusanin bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI, 185*db6bc2abSMirko Brkusanin MachineInstr *&MatchInfo) { 186*db6bc2abSMirko Brkusanin Register Src = MI.getOperand(1).getReg(); 187*db6bc2abSMirko Brkusanin MatchInfo = MRI.getVRegDef(Src); 188*db6bc2abSMirko Brkusanin 189*db6bc2abSMirko Brkusanin // If the input has multiple uses and we can either fold the negate down, or 190*db6bc2abSMirko Brkusanin // the other uses cannot, give up. This both prevents unprofitable 191*db6bc2abSMirko Brkusanin // transformations and infinite loops: we won't repeatedly try to fold around 192*db6bc2abSMirko Brkusanin // a negate that has no 'good' form. 193*db6bc2abSMirko Brkusanin if (MRI.hasOneNonDBGUse(Src)) { 194*db6bc2abSMirko Brkusanin if (allUsesHaveSourceMods(MI, MRI, 0)) 195*db6bc2abSMirko Brkusanin return false; 196*db6bc2abSMirko Brkusanin } else { 197*db6bc2abSMirko Brkusanin if (fnegFoldsIntoMI(*MatchInfo) && 198*db6bc2abSMirko Brkusanin (allUsesHaveSourceMods(MI, MRI) || 199*db6bc2abSMirko Brkusanin !allUsesHaveSourceMods(*MatchInfo, MRI))) 200*db6bc2abSMirko Brkusanin return false; 201*db6bc2abSMirko Brkusanin } 202*db6bc2abSMirko Brkusanin 203*db6bc2abSMirko Brkusanin switch (MatchInfo->getOpcode()) { 204*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM: 205*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM: 206*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE: 207*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE: 208*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY: 209*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY: 210*db6bc2abSMirko Brkusanin // 0 doesn't have a negated inline immediate. 211*db6bc2abSMirko Brkusanin return !isConstantCostlierToNegate(*MatchInfo, 212*db6bc2abSMirko Brkusanin MatchInfo->getOperand(2).getReg(), MRI); 213*db6bc2abSMirko Brkusanin case AMDGPU::G_FADD: 214*db6bc2abSMirko Brkusanin case AMDGPU::G_FSUB: 215*db6bc2abSMirko Brkusanin case AMDGPU::G_FMA: 216*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAD: 217*db6bc2abSMirko Brkusanin return mayIgnoreSignedZero(*MatchInfo); 218*db6bc2abSMirko Brkusanin case AMDGPU::G_FMUL: 219*db6bc2abSMirko Brkusanin case AMDGPU::G_FPEXT: 220*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_TRUNC: 221*db6bc2abSMirko Brkusanin case AMDGPU::G_FPTRUNC: 222*db6bc2abSMirko Brkusanin case AMDGPU::G_FRINT: 223*db6bc2abSMirko Brkusanin case AMDGPU::G_FNEARBYINT: 224*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUND: 225*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUNDEVEN: 226*db6bc2abSMirko Brkusanin case AMDGPU::G_FSIN: 227*db6bc2abSMirko Brkusanin case AMDGPU::G_FCANONICALIZE: 228*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_RCP_IFLAG: 229*db6bc2abSMirko Brkusanin return true; 230*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: { 231*db6bc2abSMirko Brkusanin unsigned IntrinsicID = MatchInfo->getIntrinsicID(); 232*db6bc2abSMirko Brkusanin switch (IntrinsicID) { 233*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp: 234*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp_legacy: 235*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_sin: 236*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmul_legacy: 237*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmed3: 238*db6bc2abSMirko Brkusanin return true; 239*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fma_legacy: 240*db6bc2abSMirko Brkusanin return mayIgnoreSignedZero(*MatchInfo); 241*db6bc2abSMirko Brkusanin default: 242*db6bc2abSMirko Brkusanin return false; 243*db6bc2abSMirko Brkusanin } 244*db6bc2abSMirko Brkusanin } 245*db6bc2abSMirko Brkusanin default: 246*db6bc2abSMirko Brkusanin return false; 247*db6bc2abSMirko Brkusanin } 248*db6bc2abSMirko Brkusanin } 249*db6bc2abSMirko Brkusanin 250*db6bc2abSMirko Brkusanin void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, 251*db6bc2abSMirko Brkusanin MachineInstr *&MatchInfo) { 252*db6bc2abSMirko Brkusanin // Transform: 253*db6bc2abSMirko Brkusanin // %A = inst %Op1, ... 254*db6bc2abSMirko Brkusanin // %B = fneg %A 255*db6bc2abSMirko Brkusanin // 256*db6bc2abSMirko Brkusanin // into: 257*db6bc2abSMirko Brkusanin // 258*db6bc2abSMirko Brkusanin // (if %A has one use, specifically fneg above) 259*db6bc2abSMirko Brkusanin // %B = inst (maybe fneg %Op1), ... 260*db6bc2abSMirko Brkusanin // 261*db6bc2abSMirko Brkusanin // (if %A has multiple uses) 262*db6bc2abSMirko Brkusanin // %B = inst (maybe fneg %Op1), ... 263*db6bc2abSMirko Brkusanin // %A = fneg %B 264*db6bc2abSMirko Brkusanin 265*db6bc2abSMirko Brkusanin // Replace register in operand with a register holding negated value. 266*db6bc2abSMirko Brkusanin auto NegateOperand = [&](MachineOperand &Op) { 267*db6bc2abSMirko Brkusanin Register Reg = Op.getReg(); 268*db6bc2abSMirko Brkusanin if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg)))) 269*db6bc2abSMirko Brkusanin Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0); 270*db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, Op, Reg); 271*db6bc2abSMirko Brkusanin }; 272*db6bc2abSMirko Brkusanin 273*db6bc2abSMirko Brkusanin // Replace either register in operands with a register holding negated value. 274*db6bc2abSMirko Brkusanin auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) { 275*db6bc2abSMirko Brkusanin Register XReg = X.getReg(); 276*db6bc2abSMirko Brkusanin Register YReg = Y.getReg(); 277*db6bc2abSMirko Brkusanin if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg)))) 278*db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, X, XReg); 279*db6bc2abSMirko Brkusanin else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg)))) 280*db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, Y, YReg); 281*db6bc2abSMirko Brkusanin else { 282*db6bc2abSMirko Brkusanin YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0); 283*db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, Y, YReg); 284*db6bc2abSMirko Brkusanin } 285*db6bc2abSMirko Brkusanin }; 286*db6bc2abSMirko Brkusanin 287*db6bc2abSMirko Brkusanin Builder.setInstrAndDebugLoc(*MatchInfo); 288*db6bc2abSMirko Brkusanin 289*db6bc2abSMirko Brkusanin // Negate appropriate operands so that resulting value of MatchInfo is 290*db6bc2abSMirko Brkusanin // negated. 291*db6bc2abSMirko Brkusanin switch (MatchInfo->getOpcode()) { 292*db6bc2abSMirko Brkusanin case AMDGPU::G_FADD: 293*db6bc2abSMirko Brkusanin case AMDGPU::G_FSUB: 294*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(1)); 295*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2)); 296*db6bc2abSMirko Brkusanin break; 297*db6bc2abSMirko Brkusanin case AMDGPU::G_FMUL: 298*db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2)); 299*db6bc2abSMirko Brkusanin break; 300*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM: 301*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM: 302*db6bc2abSMirko Brkusanin case AMDGPU::G_FMINNUM_IEEE: 303*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAXNUM_IEEE: 304*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMIN_LEGACY: 305*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_FMAX_LEGACY: { 306*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(1)); 307*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2)); 308*db6bc2abSMirko Brkusanin unsigned Opposite = inverseMinMax(MatchInfo->getOpcode()); 309*db6bc2abSMirko Brkusanin replaceOpcodeWith(*MatchInfo, Opposite); 310*db6bc2abSMirko Brkusanin break; 311*db6bc2abSMirko Brkusanin } 312*db6bc2abSMirko Brkusanin case AMDGPU::G_FMA: 313*db6bc2abSMirko Brkusanin case AMDGPU::G_FMAD: 314*db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2)); 315*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(3)); 316*db6bc2abSMirko Brkusanin break; 317*db6bc2abSMirko Brkusanin case AMDGPU::G_FPEXT: 318*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_TRUNC: 319*db6bc2abSMirko Brkusanin case AMDGPU::G_FRINT: 320*db6bc2abSMirko Brkusanin case AMDGPU::G_FNEARBYINT: 321*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUND: 322*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC_ROUNDEVEN: 323*db6bc2abSMirko Brkusanin case AMDGPU::G_FSIN: 324*db6bc2abSMirko Brkusanin case AMDGPU::G_FCANONICALIZE: 325*db6bc2abSMirko Brkusanin case AMDGPU::G_AMDGPU_RCP_IFLAG: 326*db6bc2abSMirko Brkusanin case AMDGPU::G_FPTRUNC: 327*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(1)); 328*db6bc2abSMirko Brkusanin break; 329*db6bc2abSMirko Brkusanin case AMDGPU::G_INTRINSIC: { 330*db6bc2abSMirko Brkusanin unsigned IntrinsicID = MatchInfo->getIntrinsicID(); 331*db6bc2abSMirko Brkusanin switch (IntrinsicID) { 332*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp: 333*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_rcp_legacy: 334*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_sin: 335*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2)); 336*db6bc2abSMirko Brkusanin break; 337*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmul_legacy: 338*db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3)); 339*db6bc2abSMirko Brkusanin break; 340*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fmed3: 341*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(2)); 342*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(3)); 343*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(4)); 344*db6bc2abSMirko Brkusanin break; 345*db6bc2abSMirko Brkusanin case Intrinsic::amdgcn_fma_legacy: 346*db6bc2abSMirko Brkusanin NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3)); 347*db6bc2abSMirko Brkusanin NegateOperand(MatchInfo->getOperand(4)); 348*db6bc2abSMirko Brkusanin break; 349*db6bc2abSMirko Brkusanin default: 350*db6bc2abSMirko Brkusanin llvm_unreachable("folding fneg not supported for this intrinsic"); 351*db6bc2abSMirko Brkusanin } 352*db6bc2abSMirko Brkusanin break; 353*db6bc2abSMirko Brkusanin } 354*db6bc2abSMirko Brkusanin default: 355*db6bc2abSMirko Brkusanin llvm_unreachable("folding fneg not supported for this instruction"); 356*db6bc2abSMirko Brkusanin } 357*db6bc2abSMirko Brkusanin 358*db6bc2abSMirko Brkusanin Register Dst = MI.getOperand(0).getReg(); 359*db6bc2abSMirko Brkusanin Register MatchInfoDst = MatchInfo->getOperand(0).getReg(); 360*db6bc2abSMirko Brkusanin 361*db6bc2abSMirko Brkusanin if (MRI.hasOneNonDBGUse(MatchInfoDst)) { 362*db6bc2abSMirko Brkusanin // MatchInfo now has negated value so use that instead of old Dst. 363*db6bc2abSMirko Brkusanin replaceRegWith(MRI, Dst, MatchInfoDst); 364*db6bc2abSMirko Brkusanin } else { 365*db6bc2abSMirko Brkusanin // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa 366*db6bc2abSMirko Brkusanin // but replaceRegWith will replace defs as well. It is easier to replace one 367*db6bc2abSMirko Brkusanin // def with a new register. 368*db6bc2abSMirko Brkusanin LLT Type = MRI.getType(Dst); 369*db6bc2abSMirko Brkusanin Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type); 370*db6bc2abSMirko Brkusanin replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo); 371*db6bc2abSMirko Brkusanin 372*db6bc2abSMirko Brkusanin // MatchInfo now has negated value so use that instead of old Dst. 373*db6bc2abSMirko Brkusanin replaceRegWith(MRI, Dst, NegatedMatchInfo); 374*db6bc2abSMirko Brkusanin 375*db6bc2abSMirko Brkusanin // Recreate non negated value for other uses of old MatchInfoDst 376*db6bc2abSMirko Brkusanin Builder.setInstrAndDebugLoc(MI); 377*db6bc2abSMirko Brkusanin Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags()); 378*db6bc2abSMirko Brkusanin } 379*db6bc2abSMirko Brkusanin 380*db6bc2abSMirko Brkusanin MI.eraseFromParent(); 381*db6bc2abSMirko Brkusanin return; 382*db6bc2abSMirko Brkusanin } 383