1//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10//===----------------------------------------------------------------------===// 11// VOP2 Classes 12//===----------------------------------------------------------------------===// 13 14class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 15 bits<8> vdst; 16 bits<9> src0; 17 bits<8> src1; 18 19 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 20 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 21 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 22 let Inst{30-25} = op; 23 let Inst{31} = 0x0; //encoding 24} 25 26class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 27 bits<8> vdst; 28 bits<9> src0; 29 bits<8> src1; 30 bits<32> imm; 31 32 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 33 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 34 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 35 let Inst{30-25} = op; 36 let Inst{31} = 0x0; // encoding 37 let Inst{63-32} = imm; 38} 39 40class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 41 bits<8> vdst; 42 bits<8> src1; 43 44 let Inst{8-0} = 0xf9; // sdwa 45 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 46 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 47 let Inst{30-25} = op; 48 let Inst{31} = 0x0; // encoding 49} 50 51class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 52 InstSI <P.Outs32, P.Ins32, "", pattern>, 53 VOP <opName>, 54 SIMCInstr <opName#suffix, SIEncodingFamily.NONE>, 55 MnemonicAlias<opName#suffix, opName> { 56 57 let isPseudo = 1; 58 let isCodeGenOnly = 1; 59 let UseNamedOperandTable = 1; 60 61 string Mnemonic = opName; 62 string AsmOperands = P.Asm32; 63 64 let Size = 4; 65 let mayLoad = 0; 66 let mayStore = 0; 67 let hasSideEffects = 0; 68 let SubtargetPredicate = isGCN; 69 70 let VOP2 = 1; 71 let VALU = 1; 72 let Uses = [EXEC]; 73 74 let AsmVariantName = AMDGPUAsmVariants.Default; 75 76 VOPProfile Pfl = P; 77} 78 79class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> : 80 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 81 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 82 83 let isPseudo = 0; 84 let isCodeGenOnly = 0; 85 86 let Constraints = ps.Constraints; 87 let DisableEncoding = ps.DisableEncoding; 88 89 // copy relevant pseudo op flags 90 let SubtargetPredicate = ps.SubtargetPredicate; 91 let AsmMatchConverter = ps.AsmMatchConverter; 92 let AsmVariantName = ps.AsmVariantName; 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 let TSFlags = ps.TSFlags; 96} 97 98class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 99 VOP_SDWA_Pseudo <OpName, P, pattern> { 100 let AsmMatchConverter = "cvtSdwaVOP2"; 101} 102 103class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 104 list<dag> ret = !if(P.HasModifiers, 105 [(set P.DstVT:$vdst, 106 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 107 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 108 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 109} 110 111multiclass VOP2Inst <string opName, 112 VOPProfile P, 113 SDPatternOperator node = null_frag, 114 string revOp = opName> { 115 116 def _e32 : VOP2_Pseudo <opName, P>, 117 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 118 119 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 120 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 121 122 def _sdwa : VOP2_SDWA_Pseudo <opName, P>, 123 Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>; 124} 125 126// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst 127multiclass VOP2bInst <string opName, 128 VOPProfile P, 129 SDPatternOperator node = null_frag, 130 string revOp = opName, 131 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 132 133 let SchedRW = [Write32Bit, WriteSALU] in { 134 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 135 def _e32 : VOP2_Pseudo <opName, P>, 136 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 137 138 def _sdwa : VOP2_SDWA_Pseudo <opName, P>, 139 Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>; 140 } 141 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 142 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 143 } 144} 145 146multiclass VOP2eInst <string opName, 147 VOPProfile P, 148 SDPatternOperator node = null_frag, 149 string revOp = opName, 150 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 151 152 let SchedRW = [Write32Bit] in { 153 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 154 def _e32 : VOP2_Pseudo <opName, P>, 155 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 156 } 157 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 158 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 159 } 160} 161 162class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 163 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 164 field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm); 165 field string Asm32 = "$vdst, $src0, $src1, $imm"; 166 field bit HasExt = 0; 167} 168 169def VOP_MADAK_F16 : VOP_MADAK <f16>; 170def VOP_MADAK_F32 : VOP_MADAK <f32>; 171 172class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 173 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 174 field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); 175 field string Asm32 = "$vdst, $src0, $imm, $src1"; 176 field bit HasExt = 0; 177} 178 179def VOP_MADMK_F16 : VOP_MADMK <f16>; 180def VOP_MADMK_F32 : VOP_MADMK <f32>; 181 182class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 183 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); 184 let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, 185 HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret; 186 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 187 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 188 VGPR_32:$src2, // stub argument 189 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 190 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 191 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 192 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 193 VGPR_32:$src2, // stub argument 194 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 195 src0_sel:$src0_sel, src1_sel:$src1_sel); 196 let Asm32 = getAsm32<1, 2, vt>.ret; 197 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; 198 let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret; 199 let HasSrc2 = 0; 200 let HasSrc2Mods = 0; 201 let HasExt = 1; 202} 203 204def VOP_MAC_F16 : VOP_MAC <f16> { 205 // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives 206 // 'not a string initializer' error. 207 let Asm64 = getAsm64<1, 2, HasModifiers, f16>.ret; 208} 209 210def VOP_MAC_F32 : VOP_MAC <f32> { 211 // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives 212 // 'not a string initializer' error. 213 let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret; 214} 215 216// Write out to vcc or arbitrary SGPR. 217def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { 218 let Asm32 = "$vdst, vcc, $src0, $src1"; 219 let Asm64 = "$vdst, $sdst, $src0, $src1"; 220 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 221 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 222 let Outs32 = (outs DstRC:$vdst); 223 let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); 224} 225 226// Write out to vcc or arbitrary SGPR and read in from vcc or 227// arbitrary SGPR. 228def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { 229 // We use VCSrc_b32 to exclude literal constants, even though the 230 // encoding normally allows them since the implicit VCC use means 231 // using one would always violate the constant bus 232 // restriction. SGPRs are still allowed because it should 233 // technically be possible to use VCC again as src0. 234 let Src0RC32 = VCSrc_b32; 235 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 236 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; 237 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 238 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 239 let Outs32 = (outs DstRC:$vdst); 240 let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); 241 242 // Suppress src2 implied by type since the 32-bit encoding uses an 243 // implicit VCC use. 244 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 245 246 let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0, 247 Src1Mod:$src1_modifiers, Src1SDWA:$src1, 248 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 249 src0_sel:$src0_sel, src1_sel:$src1_sel); 250 251 let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, 252 Src1Mod:$src1_modifiers, Src1DPP:$src1, 253 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 254 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 255 let HasExt = 1; 256} 257 258// Read in from vcc or arbitrary SGPR 259def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { 260 let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. 261 let Asm32 = "$vdst, $src0, $src1, vcc"; 262 let Asm64 = "$vdst, $src0, $src1, $src2"; 263 let Outs32 = (outs DstRC:$vdst); 264 let Outs64 = (outs DstRC:$vdst); 265 266 // Suppress src2 implied by type since the 32-bit encoding uses an 267 // implicit VCC use. 268 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 269} 270 271def VOP_READLANE : VOPProfile<[i32, i32, i32]> { 272 let Outs32 = (outs SReg_32:$vdst); 273 let Outs64 = Outs32; 274 let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1); 275 let Ins64 = Ins32; 276 let Asm32 = " $vdst, $src0, $src1"; 277 let Asm64 = Asm32; 278} 279 280def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { 281 let Outs32 = (outs VGPR_32:$vdst); 282 let Outs64 = Outs32; 283 let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1); 284 let Ins64 = Ins32; 285 let Asm32 = " $vdst, $src0, $src1"; 286 let Asm64 = Asm32; 287} 288 289//===----------------------------------------------------------------------===// 290// VOP2 Instructions 291//===----------------------------------------------------------------------===// 292 293let SubtargetPredicate = isGCN in { 294 295defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; 296def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32>; 297 298let isCommutable = 1 in { 299defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>; 300defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>; 301defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; 302defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; 303defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>; 304defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>; 305defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 306defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>; 307defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 308defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>; 309defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>; 310defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>; 311defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>; 312defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>; 313defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>; 314defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">; 315defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">; 316defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">; 317defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>; 318defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>; 319defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>; 320 321let Constraints = "$vdst = $src2", DisableEncoding="$src2", 322 isConvertibleToThreeAddress = 1 in { 323defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 324} 325 326def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32>; 327 328// No patterns so that the scalar instructions are always selected. 329// The scalar versions will be replaced with vector when needed later. 330 331// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, 332// but the VI instructions behave the same as the SI versions. 333defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>; 334defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>; 335defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">; 336defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>; 337defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>; 338defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; 339} // End isCommutable = 1 340 341// These are special and do not read the exec mask. 342let isConvergent = 1, Uses = []<Register> in { 343def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 344 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">; 345 346def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">; 347} // End isConvergent = 1 348 349defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 350defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>; 351defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 352defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 353defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; 354defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst" 355defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>; 356defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>; 357defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>; 358defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>; 359defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>; 360 361} // End SubtargetPredicate = isGCN 362 363 364// These instructions only exist on SI and CI 365let SubtargetPredicate = isSICI in { 366 367defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 368defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 369 370let isCommutable = 1 in { 371defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; 372defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>; 373defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>; 374defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; 375} // End isCommutable = 1 376 377} // End let SubtargetPredicate = SICI 378 379let SubtargetPredicate = isVI in { 380 381def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16>; 382defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; 383defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>; 384defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>; 385defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 386 387let isCommutable = 1 in { 388defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>; 389defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>; 390defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 391defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; 392def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16>; 393defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>; 394defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>; 395defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; 396defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>; 397defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum>; 398defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum>; 399defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>; 400defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>; 401defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>; 402defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>; 403 404let Constraints = "$vdst = $src2", DisableEncoding="$src2", 405 isConvertibleToThreeAddress = 1 in { 406defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 407} 408} // End isCommutable = 1 409 410} // End SubtargetPredicate = isVI 411 412// Note: 16-bit instructions produce a 0 result in the high 16-bits. 413multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> { 414 415def : Pat< 416 (op i16:$src0, i16:$src1), 417 (inst $src0, $src1) 418>; 419 420def : Pat< 421 (i32 (zext (op i16:$src0, i16:$src1))), 422 (inst $src0, $src1) 423>; 424 425def : Pat< 426 (i64 (zext (op i16:$src0, i16:$src1))), 427 (REG_SEQUENCE VReg_64, 428 (inst $src0, $src1), sub0, 429 (V_MOV_B32_e32 (i32 0)), sub1) 430>; 431 432} 433 434multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst> { 435 436def : Pat< 437 (op i16:$src0, i16:$src1), 438 (inst $src1, $src0) 439>; 440 441def : Pat< 442 (i32 (zext (op i16:$src0, i16:$src1))), 443 (inst $src1, $src0) 444>; 445 446 447def : Pat< 448 (i64 (zext (op i16:$src0, i16:$src1))), 449 (REG_SEQUENCE VReg_64, 450 (inst $src1, $src0), sub0, 451 (V_MOV_B32_e32 (i32 0)), sub1) 452>; 453} 454 455class ZExt_i16_i1_Pat <SDNode ext> : Pat < 456 (i16 (ext i1:$src)), 457 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) 458>; 459 460let Predicates = [isVI] in { 461 462defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>; 463defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>; 464defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64>; 465defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64>; 466defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64>; 467defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64>; 468defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64>; 469 470def : Pat < 471 (and i16:$src0, i16:$src1), 472 (V_AND_B32_e64 $src0, $src1) 473>; 474 475def : Pat < 476 (or i16:$src0, i16:$src1), 477 (V_OR_B32_e64 $src0, $src1) 478>; 479 480def : Pat < 481 (xor i16:$src0, i16:$src1), 482 (V_XOR_B32_e64 $src0, $src1) 483>; 484 485defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64>; 486defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64>; 487defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64>; 488 489def : ZExt_i16_i1_Pat<zext>; 490def : ZExt_i16_i1_Pat<anyext>; 491 492def : Pat < 493 (i16 (sext i1:$src)), 494 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src) 495>; 496 497} // End Predicates = [isVI] 498 499//===----------------------------------------------------------------------===// 500// SI 501//===----------------------------------------------------------------------===// 502 503let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { 504 505multiclass VOP2_Real_si <bits<6> op> { 506 def _si : 507 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 508 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 509} 510 511multiclass VOP2_Real_MADK_si <bits<6> op> { 512 def _si : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 513 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 514} 515 516multiclass VOP2_Real_e32_si <bits<6> op> { 517 def _e32_si : 518 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 519 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 520} 521 522multiclass VOP2_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> { 523 def _e64_si : 524 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 525 VOP3e_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 526} 527 528multiclass VOP2be_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> { 529 def _e64_si : 530 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 531 VOP3be_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 532} 533 534} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" 535 536defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>; 537defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>; 538defm V_SUB_F32 : VOP2_Real_e32e64_si <0x4>; 539defm V_SUBREV_F32 : VOP2_Real_e32e64_si <0x5>; 540defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_si <0x7>; 541defm V_MUL_F32 : VOP2_Real_e32e64_si <0x8>; 542defm V_MUL_I32_I24 : VOP2_Real_e32e64_si <0x9>; 543defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_si <0xa>; 544defm V_MUL_U32_U24 : VOP2_Real_e32e64_si <0xb>; 545defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_si <0xc>; 546defm V_MIN_F32 : VOP2_Real_e32e64_si <0xf>; 547defm V_MAX_F32 : VOP2_Real_e32e64_si <0x10>; 548defm V_MIN_I32 : VOP2_Real_e32e64_si <0x11>; 549defm V_MAX_I32 : VOP2_Real_e32e64_si <0x12>; 550defm V_MIN_U32 : VOP2_Real_e32e64_si <0x13>; 551defm V_MAX_U32 : VOP2_Real_e32e64_si <0x14>; 552defm V_LSHRREV_B32 : VOP2_Real_e32e64_si <0x16>; 553defm V_ASHRREV_I32 : VOP2_Real_e32e64_si <0x18>; 554defm V_LSHLREV_B32 : VOP2_Real_e32e64_si <0x1a>; 555defm V_AND_B32 : VOP2_Real_e32e64_si <0x1b>; 556defm V_OR_B32 : VOP2_Real_e32e64_si <0x1c>; 557defm V_XOR_B32 : VOP2_Real_e32e64_si <0x1d>; 558defm V_MAC_F32 : VOP2_Real_e32e64_si <0x1f>; 559defm V_MADMK_F32 : VOP2_Real_MADK_si <0x20>; 560defm V_MADAK_F32 : VOP2_Real_MADK_si <0x21>; 561defm V_ADD_I32 : VOP2be_Real_e32e64_si <0x25>; 562defm V_SUB_I32 : VOP2be_Real_e32e64_si <0x26>; 563defm V_SUBREV_I32 : VOP2be_Real_e32e64_si <0x27>; 564defm V_ADDC_U32 : VOP2be_Real_e32e64_si <0x28>; 565defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>; 566defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>; 567 568defm V_READLANE_B32 : VOP2_Real_si <0x01>; 569defm V_WRITELANE_B32 : VOP2_Real_si <0x02>; 570 571defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>; 572defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>; 573defm V_MAX_LEGACY_F32 : VOP2_Real_e32e64_si <0xe>; 574defm V_LSHR_B32 : VOP2_Real_e32e64_si <0x15>; 575defm V_ASHR_I32 : VOP2_Real_e32e64_si <0x17>; 576defm V_LSHL_B32 : VOP2_Real_e32e64_si <0x19>; 577 578defm V_BFM_B32 : VOP2_Real_e32e64_si <0x1e>; 579defm V_BCNT_U32_B32 : VOP2_Real_e32e64_si <0x22>; 580defm V_MBCNT_LO_U32_B32 : VOP2_Real_e32e64_si <0x23>; 581defm V_MBCNT_HI_U32_B32 : VOP2_Real_e32e64_si <0x24>; 582defm V_LDEXP_F32 : VOP2_Real_e32e64_si <0x2b>; 583defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>; 584defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>; 585defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>; 586defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e32e64_si <0x2f>; 587defm V_CVT_PK_U16_U32 : VOP2_Real_e32e64_si <0x30>; 588defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>; 589 590 591//===----------------------------------------------------------------------===// 592// VI 593//===----------------------------------------------------------------------===// 594 595class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : 596 VOP_DPP <ps.OpName, P> { 597 let Defs = ps.Defs; 598 let Uses = ps.Uses; 599 let SchedRW = ps.SchedRW; 600 let hasSideEffects = ps.hasSideEffects; 601 let Constraints = ps.Constraints; 602 let DisableEncoding = ps.DisableEncoding; 603 604 bits<8> vdst; 605 bits<8> src1; 606 let Inst{8-0} = 0xfa; //dpp 607 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 608 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 609 let Inst{30-25} = op; 610 let Inst{31} = 0x0; //encoding 611} 612 613let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { 614 615multiclass VOP32_Real_vi <bits<10> op> { 616 def _vi : 617 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 618 VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>; 619} 620 621multiclass VOP2_Real_MADK_vi <bits<6> op> { 622 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 623 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 624} 625 626multiclass VOP2_Real_e32_vi <bits<6> op> { 627 def _e32_vi : 628 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 629 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 630} 631 632multiclass VOP2_Real_e64_vi <bits<10> op> { 633 def _e64_vi : 634 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 635 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 636} 637 638multiclass Base_VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> { 639 def _e64_vi : 640 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 641 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 642} 643 644multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 645 VOP2_Real_e32_vi<op>, 646 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 647 648} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" 649 650multiclass VOP2_SDWA_Real <bits<6> op> { 651 def _sdwa_vi : 652 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 653 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 654} 655 656multiclass VOP2be_Real_e32e64_vi <bits<6> op> : 657 Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> { 658 // For now left dpp only for asm/dasm 659 // TODO: add corresponding pseudo 660 def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; 661} 662 663multiclass VOP2_Real_e32e64_vi <bits<6> op> : 664 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> { 665 // For now left dpp only for asm/dasm 666 // TODO: add corresponding pseudo 667 def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; 668} 669 670defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>; 671defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 672defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 673defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 674defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 675defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 676defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 677defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 678defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 679defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 680defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 681defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 682defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 683defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 684defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 685defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 686defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 687defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 688defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 689defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 690defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 691defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 692defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 693defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 694defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 695defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>; 696defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>; 697defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>; 698defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>; 699defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>; 700defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>; 701 702defm V_READLANE_B32 : VOP32_Real_vi <0x289>; 703defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; 704 705defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>; 706defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>; 707defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>; 708defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>; 709defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>; 710defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>; 711defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>; 712defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>; 713defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>; 714defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>; 715defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>; 716 717defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 718defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 719defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 720defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 721defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 722defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 723defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 724defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 725defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 726defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 727defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 728defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 729defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 730defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 731defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 732defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 733defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 734defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 735defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 736defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 737defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 738 739let SubtargetPredicate = isVI in { 740 741// Aliases to simplify matching of floating-point instructions that 742// are VOP2 on SI and VOP3 on VI. 743class SI2_VI3Alias <string name, Instruction inst> : InstAlias < 744 name#" $dst, $src0, $src1", 745 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0) 746>, PredicateControl { 747 let UseInstAsmMatchConverter = 0; 748 let AsmVariantName = AMDGPUAsmVariants.VOP3; 749} 750 751def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 752def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 753def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 754def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 755def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 756 757} // End SubtargetPredicate = isVI 758