1//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains DAG node defintions for the AMDGPU target. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// AMDGPU DAG Profiles 16//===----------------------------------------------------------------------===// 17 18def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ 19 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> 20]>; 21 22def AMDGPUTrigPreOp : SDTypeProfile<1, 2, 23 [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] 24>; 25 26def AMDGPULdExpOp : SDTypeProfile<1, 2, 27 [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] 28>; 29 30def AMDGPUFPClassOp : SDTypeProfile<1, 2, 31 [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] 32>; 33 34def AMDGPUDivScaleOp : SDTypeProfile<2, 3, 35 [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] 36>; 37 38// float, float, float, vcc 39def AMDGPUFmasOp : SDTypeProfile<1, 4, 40 [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] 41>; 42 43def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 44 45//===----------------------------------------------------------------------===// 46// AMDGPU DAG Nodes 47// 48 49def AMDGPUconstdata_ptr : SDNode< 50 "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, 51 SDTCisVT<0, iPTR>]> 52>; 53 54// This argument to this node is a dword address. 55def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; 56 57def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; 58def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; 59 60// out = a - floor(a) 61def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; 62 63// out = 1.0 / a 64def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; 65 66// out = 1.0 / sqrt(a) 67def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; 68 69// out = 1.0 / sqrt(a) 70def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; 71def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; 72 73// out = 1.0 / sqrt(a) result clamped to +/- max_float. 74def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; 75 76def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; 77 78def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; 79 80// out = max(a, b) a and b are floats, where a nan comparison fails. 81// This is not commutative because this gives the second operand: 82// x < nan ? x : nan -> nan 83// nan < x ? nan : x -> x 84def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp, 85 [] 86>; 87 88def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp, 89 [SDNPCommutative, SDNPAssociative] 90>; 91 92def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>; 93 94// out = max(a, b) a and b are signed ints 95def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp, 96 [SDNPCommutative, SDNPAssociative] 97>; 98 99// out = max(a, b) a and b are unsigned ints 100def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, 101 [SDNPCommutative, SDNPAssociative] 102>; 103 104// out = min(a, b) a and b are floats, where a nan comparison fails. 105def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, 106 [] 107>; 108 109// FIXME: TableGen doesn't like commutative instructions with more 110// than 2 operands. 111// out = max(a, b, c) a, b and c are floats 112def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp, 113 [/*SDNPCommutative, SDNPAssociative*/] 114>; 115 116// out = max(a, b, c) a, b, and c are signed ints 117def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp, 118 [/*SDNPCommutative, SDNPAssociative*/] 119>; 120 121// out = max(a, b, c) a, b and c are unsigned ints 122def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp, 123 [/*SDNPCommutative, SDNPAssociative*/] 124>; 125 126// out = min(a, b, c) a, b and c are floats 127def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp, 128 [/*SDNPCommutative, SDNPAssociative*/] 129>; 130 131// out = min(a, b, c) a, b and c are signed ints 132def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp, 133 [/*SDNPCommutative, SDNPAssociative*/] 134>; 135 136// out = min(a, b) a and b are unsigned ints 137def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp, 138 [/*SDNPCommutative, SDNPAssociative*/] 139>; 140 141// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0 142def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; 143 144// out = (src1 > src0) ? 1 : 0 145def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; 146 147def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc 148 SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> 149]>; 150 151def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; 152 153def AMDGPUSetRegOp : SDTypeProfile<0, 2, [ 154 SDTCisInt<0>, SDTCisInt<1> 155]>; 156 157def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [ 158 SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; 159 160def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [ 161 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 162 163def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [ 164 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 165 166def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", 167 SDTIntToFPOp, []>; 168def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", 169 SDTIntToFPOp, []>; 170def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2", 171 SDTIntToFPOp, []>; 172def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", 173 SDTIntToFPOp, []>; 174 175 176// urecip - This operation is a helper for integer division, it returns the 177// result of 1 / a as a fractional unsigned integer. 178// out = (2^32 / a) + e 179// e is rounding error 180def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; 181 182// Special case divide preop and flags. 183def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; 184 185// Special case divide FMA with scale and flags (src0 = Quotient, 186// src1 = Denominator, src2 = Numerator). 187def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>; 188 189// Single or double precision division fixup. 190// Special case divide fixup and flags(src0 = Quotient, src1 = 191// Denominator, src2 = Numerator). 192def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; 193 194// Look Up 2.0 / pi src0 with segment select src1[4:0] 195def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>; 196 197def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", 198 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 199 [SDNPHasChain, SDNPMayLoad]>; 200 201def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", 202 SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 203 [SDNPHasChain, SDNPMayStore]>; 204 205// MSKOR instructions are atomic memory instructions used mainly for storing 206// 8-bit and 16-bit values. The definition is: 207// 208// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) 209// 210// src0: vec4(src, 0, 0, mask) 211// src1: dst - rat offset (aka pointer) in dwords 212def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", 213 SDTypeProfile<0, 2, []>, 214 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 215 216def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP", 217 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>, 218 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, 219 SDNPMemOperand]>; 220 221def AMDGPUround : SDNode<"ISD::FROUND", 222 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>; 223 224def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>; 225def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; 226def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; 227def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; 228 229def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; 230def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; 231 232// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore 233// when performing the mulitply. The result is a 32-bit value. 234def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, 235 [SDNPCommutative, SDNPAssociative] 236>; 237def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, 238 [SDNPCommutative, SDNPAssociative] 239>; 240 241def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, 242 [SDNPCommutative, SDNPAssociative] 243>; 244def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, 245 [SDNPCommutative, SDNPAssociative] 246>; 247 248def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, 249 [] 250>; 251def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, 252 [] 253>; 254 255def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp, 256 [] 257>; 258 259def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, 260 [] 261>; 262 263def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; 264 265def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", 266 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 267 [SDNPHasChain, SDNPInGlue]>; 268 269def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", 270 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 271 [SDNPHasChain, SDNPInGlue]>; 272 273def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", 274 SDTypeProfile<1, 3, [SDTCisFP<0>]>, 275 [SDNPInGlue]>; 276 277def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1", 278 SDTypeProfile<1, 3, [SDTCisFP<0>]>, 279 [SDNPInGlue, SDNPOutGlue]>; 280 281def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", 282 SDTypeProfile<1, 4, [SDTCisFP<0>]>, 283 [SDNPInGlue]>; 284 285 286def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT, 287 [SDNPHasChain, SDNPSideEffect]>; 288 289// SI+ export 290def AMDGPUExportOp : SDTypeProfile<0, 8, [ 291 SDTCisInt<0>, // i8 en 292 SDTCisInt<1>, // i1 vm 293 // skip done 294 SDTCisInt<2>, // i8 tgt 295 SDTCisSameAs<3, 1>, // i1 compr 296 SDTCisFP<4>, // f32 src0 297 SDTCisSameAs<5, 4>, // f32 src1 298 SDTCisSameAs<6, 4>, // f32 src2 299 SDTCisSameAs<7, 4> // f32 src3 300]>; 301 302def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, 303 [SDNPHasChain, SDNPMayStore]>; 304 305def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, 306 [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; 307 308 309def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 310 311def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, 312 [SDNPHasChain, SDNPSideEffect]>; 313 314//===----------------------------------------------------------------------===// 315// Flow Control Profile Types 316//===----------------------------------------------------------------------===// 317// Branch instruction where second and third are basic blocks 318def SDTIL_BRCond : SDTypeProfile<0, 2, [ 319 SDTCisVT<0, OtherVT> 320 ]>; 321 322//===----------------------------------------------------------------------===// 323// Flow Control DAG Nodes 324//===----------------------------------------------------------------------===// 325def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; 326 327//===----------------------------------------------------------------------===// 328// Call/Return DAG Nodes 329//===----------------------------------------------------------------------===// 330def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, 331 [SDNPHasChain, SDNPOptInGlue]>; 332 333def AMDGPUreturn : SDNode<"AMDGPUISD::RETURN", SDTNone, 334 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 335