1// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// WebAssembly SIMD operand code-gen constructs. 11/// 12//===----------------------------------------------------------------------===// 13 14// Instructions requiring HasSIMD128 and the simd128 prefix byte 15multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 16 list<dag> pattern_r, string asmstr_r = "", 17 string asmstr_s = "", bits<32> simdop = -1> { 18 defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, 19 !or(0xfd00, !and(0xff, simdop))>, 20 Requires<[HasSIMD128]>; 21} 22 23defm "" : ARGUMENT<V128, v16i8>; 24defm "" : ARGUMENT<V128, v8i16>; 25defm "" : ARGUMENT<V128, v4i32>; 26defm "" : ARGUMENT<V128, v2i64>; 27defm "" : ARGUMENT<V128, v4f32>; 28defm "" : ARGUMENT<V128, v2f64>; 29 30// Constrained immediate argument types 31foreach SIZE = [8, 16] in 32def ImmI#SIZE : ImmLeaf<i32, 33 "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));" 34>; 35foreach SIZE = [2, 4, 8, 16, 32] in 36def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; 37 38//===----------------------------------------------------------------------===// 39// Load and store 40//===----------------------------------------------------------------------===// 41 42// Load: v128.load 43let mayLoad = 1, UseNamedOperandTable = 1 in { 44defm LOAD_V128_A32 : 45 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 46 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 47 "v128.load\t$dst, ${off}(${addr})$p2align", 48 "v128.load\t$off$p2align", 0>; 49defm LOAD_V128_A64 : 50 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 51 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 52 "v128.load\t$dst, ${off}(${addr})$p2align", 53 "v128.load\t$off$p2align", 0>; 54} 55 56// Def load and store patterns from WebAssemblyInstrMemory.td for vector types 57foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 58defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">; 59defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">; 60defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">; 61defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">; 62defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">; 63} 64 65// vNxM.load_splat 66multiclass SIMDLoadSplat<string vec, bits<32> simdop> { 67 let mayLoad = 1, UseNamedOperandTable = 1 in { 68 defm LOAD_SPLAT_#vec#_A32 : 69 SIMD_I<(outs V128:$dst), 70 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 71 (outs), 72 (ins P2Align:$p2align, offset32_op:$off), [], 73 vec#".load_splat\t$dst, ${off}(${addr})$p2align", 74 vec#".load_splat\t$off$p2align", simdop>; 75 defm LOAD_SPLAT_#vec#_A64 : 76 SIMD_I<(outs V128:$dst), 77 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 78 (outs), 79 (ins P2Align:$p2align, offset64_op:$off), [], 80 vec#".load_splat\t$dst, ${off}(${addr})$p2align", 81 vec#".load_splat\t$off$p2align", simdop>; 82 } 83} 84 85defm "" : SIMDLoadSplat<"v8x16", 7>; 86defm "" : SIMDLoadSplat<"v16x8", 8>; 87defm "" : SIMDLoadSplat<"v32x4", 9>; 88defm "" : SIMDLoadSplat<"v64x2", 10>; 89 90def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; 91def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, 92 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 93def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; 94 95foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], 96 ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { 97defm : LoadPatNoOffset<!cast<ValueType>(args[0]), 98 load_splat, 99 "LOAD_SPLAT_"#args[1]>; 100defm : LoadPatImmOff<!cast<ValueType>(args[0]), 101 load_splat, 102 regPlusImm, 103 "LOAD_SPLAT_"#args[1]>; 104defm : LoadPatImmOff<!cast<ValueType>(args[0]), 105 load_splat, 106 or_is_add, 107 "LOAD_SPLAT_"#args[1]>; 108defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]), 109 load_splat, 110 "LOAD_SPLAT_"#args[1]>; 111defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]), 112 load_splat, 113 "LOAD_SPLAT_"#args[1]>; 114} 115 116// Load and extend 117multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> { 118 let mayLoad = 1, UseNamedOperandTable = 1 in { 119 defm LOAD_EXTEND_S_#vec_t#_A32 : 120 SIMD_I<(outs V128:$dst), 121 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 122 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 123 name#"_s\t$dst, ${off}(${addr})$p2align", 124 name#"_s\t$off$p2align", simdop>; 125 defm LOAD_EXTEND_U_#vec_t#_A32 : 126 SIMD_I<(outs V128:$dst), 127 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 128 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 129 name#"_u\t$dst, ${off}(${addr})$p2align", 130 name#"_u\t$off$p2align", !add(simdop, 1)>; 131 defm LOAD_EXTEND_S_#vec_t#_A64 : 132 SIMD_I<(outs V128:$dst), 133 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 134 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 135 name#"_s\t$dst, ${off}(${addr})$p2align", 136 name#"_s\t$off$p2align", simdop>; 137 defm LOAD_EXTEND_U_#vec_t#_A64 : 138 SIMD_I<(outs V128:$dst), 139 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 140 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 141 name#"_u\t$dst, ${off}(${addr})$p2align", 142 name#"_u\t$off$p2align", !add(simdop, 1)>; 143 } 144} 145 146defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>; 147defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>; 148defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>; 149 150foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in 151foreach exts = [["sextloadv", "_S"], 152 ["zextloadv", "_U"], 153 ["extloadv", "_U"]] in { 154defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]), 155 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 156defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm, 157 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 158defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add, 159 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 160defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), 161 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 162defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), 163 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 164} 165 166// Load lane into zero vector 167multiclass SIMDLoadZero<ValueType vec_t, string name, bits<32> simdop> { 168 let mayLoad = 1, UseNamedOperandTable = 1 in { 169 defm LOAD_ZERO_#vec_t#_A32 : 170 SIMD_I<(outs V128:$dst), 171 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 172 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 173 name#"\t$dst, ${off}(${addr})$p2align", 174 name#"\t$off$p2align", simdop>; 175 defm LOAD_ZERO_#vec_t#_A64 : 176 SIMD_I<(outs V128:$dst), 177 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 178 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 179 name#"\t$dst, ${off}(${addr})$p2align", 180 name#"\t$off$p2align", simdop>; 181 } // mayLoad = 1, UseNamedOperandTable = 1 182} 183 184// TODO: Also support v4f32 and v2f64 once the instructions are merged 185// to the proposal 186defm "" : SIMDLoadZero<v4i32, "v128.load32_zero", 252>; 187defm "" : SIMDLoadZero<v2i64, "v128.load64_zero", 253>; 188 189defm : LoadPatNoOffset<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">; 190defm : LoadPatNoOffset<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">; 191 192defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, regPlusImm, "LOAD_ZERO_v4i32">; 193defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, regPlusImm, "LOAD_ZERO_v2i64">; 194 195defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, or_is_add, "LOAD_ZERO_v4i32">; 196defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, or_is_add, "LOAD_ZERO_v2i64">; 197 198defm : LoadPatOffsetOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">; 199defm : LoadPatOffsetOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">; 200 201defm : LoadPatGlobalAddrOffOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">; 202defm : LoadPatGlobalAddrOffOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">; 203 204// Store: v128.store 205let mayStore = 1, UseNamedOperandTable = 1 in { 206defm STORE_V128_A32 : 207 SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), 208 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 209 "v128.store\t${off}(${addr})$p2align, $vec", 210 "v128.store\t$off$p2align", 11>; 211defm STORE_V128_A64 : 212 SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec), 213 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 214 "v128.store\t${off}(${addr})$p2align, $vec", 215 "v128.store\t$off$p2align", 11>; 216} 217foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 218// Def load and store patterns from WebAssemblyInstrMemory.td for vector types 219defm : StorePatNoOffset<vec_t, store, "STORE_V128">; 220defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">; 221defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">; 222defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">; 223defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">; 224} 225 226//===----------------------------------------------------------------------===// 227// Constructing SIMD values 228//===----------------------------------------------------------------------===// 229 230// Constant: v128.const 231multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> { 232 let isMoveImm = 1, isReMaterializable = 1, 233 Predicates = [HasUnimplementedSIMD128] in 234 defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, 235 [(set V128:$dst, (vec_t pat))], 236 "v128.const\t$dst, "#args, 237 "v128.const\t"#args, 12>; 238} 239 240defm "" : ConstVec<v16i8, 241 (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1, 242 vec_i8imm_op:$i2, vec_i8imm_op:$i3, 243 vec_i8imm_op:$i4, vec_i8imm_op:$i5, 244 vec_i8imm_op:$i6, vec_i8imm_op:$i7, 245 vec_i8imm_op:$i8, vec_i8imm_op:$i9, 246 vec_i8imm_op:$iA, vec_i8imm_op:$iB, 247 vec_i8imm_op:$iC, vec_i8imm_op:$iD, 248 vec_i8imm_op:$iE, vec_i8imm_op:$iF), 249 (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3, 250 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7, 251 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB, 252 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF), 253 !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ", 254 "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>; 255defm "" : ConstVec<v8i16, 256 (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1, 257 vec_i16imm_op:$i2, vec_i16imm_op:$i3, 258 vec_i16imm_op:$i4, vec_i16imm_op:$i5, 259 vec_i16imm_op:$i6, vec_i16imm_op:$i7), 260 (build_vector 261 ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3, 262 ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7), 263 "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">; 264let IsCanonical = 1 in 265defm "" : ConstVec<v4i32, 266 (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1, 267 vec_i32imm_op:$i2, vec_i32imm_op:$i3), 268 (build_vector (i32 imm:$i0), (i32 imm:$i1), 269 (i32 imm:$i2), (i32 imm:$i3)), 270 "$i0, $i1, $i2, $i3">; 271defm "" : ConstVec<v2i64, 272 (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1), 273 (build_vector (i64 imm:$i0), (i64 imm:$i1)), 274 "$i0, $i1">; 275defm "" : ConstVec<v4f32, 276 (ins f32imm_op:$i0, f32imm_op:$i1, 277 f32imm_op:$i2, f32imm_op:$i3), 278 (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1), 279 (f32 fpimm:$i2), (f32 fpimm:$i3)), 280 "$i0, $i1, $i2, $i3">; 281defm "" : ConstVec<v2f64, 282 (ins f64imm_op:$i0, f64imm_op:$i1), 283 (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)), 284 "$i0, $i1">; 285 286// Shuffle lanes: shuffle 287defm SHUFFLE : 288 SIMD_I<(outs V128:$dst), 289 (ins V128:$x, V128:$y, 290 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 291 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 292 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 293 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 294 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 295 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 296 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 297 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 298 (outs), 299 (ins 300 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 301 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 302 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 303 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 304 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 305 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 306 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 307 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 308 [], 309 "v8x16.shuffle\t$dst, $x, $y, "# 310 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 311 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 312 "v8x16.shuffle\t"# 313 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 314 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 315 13>; 316 317// Shuffles after custom lowering 318def wasm_shuffle_t : SDTypeProfile<1, 18, []>; 319def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; 320foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 321def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), 322 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 323 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 324 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 325 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 326 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 327 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 328 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 329 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))), 330 (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y), 331 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 332 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 333 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 334 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 335 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 336 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 337 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 338 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; 339} 340 341// Swizzle lanes: v8x16.swizzle 342def wasm_swizzle_t : SDTypeProfile<1, 2, []>; 343def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; 344defm SWIZZLE : 345 SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), 346 [(set (v16i8 V128:$dst), 347 (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], 348 "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; 349 350def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), 351 (SWIZZLE V128:$src, V128:$mask)>; 352 353// Create vector with identical lanes: splat 354def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; 355def splat4 : PatFrag<(ops node:$x), (build_vector 356 node:$x, node:$x, node:$x, node:$x)>; 357def splat8 : PatFrag<(ops node:$x), (build_vector 358 node:$x, node:$x, node:$x, node:$x, 359 node:$x, node:$x, node:$x, node:$x)>; 360def splat16 : PatFrag<(ops node:$x), (build_vector 361 node:$x, node:$x, node:$x, node:$x, 362 node:$x, node:$x, node:$x, node:$x, 363 node:$x, node:$x, node:$x, node:$x, 364 node:$x, node:$x, node:$x, node:$x)>; 365 366multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, 367 PatFrag splat_pat, bits<32> simdop> { 368 defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins), 369 [(set (vec_t V128:$dst), (splat_pat reg_t:$x))], 370 vec#".splat\t$dst, $x", vec#".splat", simdop>; 371} 372 373defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>; 374defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>; 375defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>; 376defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>; 377defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>; 378defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>; 379 380// scalar_to_vector leaves high lanes undefined, so can be a splat 381class ScalarSplatPat<ValueType vec_t, ValueType lane_t, 382 WebAssemblyRegClass reg_t> : 383 Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))), 384 (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>; 385 386def : ScalarSplatPat<v16i8, i32, I32>; 387def : ScalarSplatPat<v8i16, i32, I32>; 388def : ScalarSplatPat<v4i32, i32, I32>; 389def : ScalarSplatPat<v2i64, i64, I64>; 390def : ScalarSplatPat<v4f32, f32, F32>; 391def : ScalarSplatPat<v2f64, f64, F64>; 392 393//===----------------------------------------------------------------------===// 394// Accessing lanes 395//===----------------------------------------------------------------------===// 396 397// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u 398multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, 399 bits<32> simdop, string suffix = ""> { 400 defm EXTRACT_LANE_#vec_t#suffix : 401 SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx), 402 (outs), (ins vec_i8imm_op:$idx), [], 403 vec#".extract_lane"#suffix#"\t$dst, $vec, $idx", 404 vec#".extract_lane"#suffix#"\t$idx", simdop>; 405} 406 407defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">; 408defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">; 409defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">; 410defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">; 411defm "" : ExtractLane<v4i32, "i32x4", I32, 27>; 412defm "" : ExtractLane<v2i64, "i64x2", I64, 29>; 413defm "" : ExtractLane<v4f32, "f32x4", F32, 31>; 414defm "" : ExtractLane<v2f64, "f64x2", F64, 33>; 415 416def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), 417 (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; 418def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), 419 (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; 420def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), 421 (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>; 422def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), 423 (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>; 424def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), 425 (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>; 426def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), 427 (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>; 428 429def : Pat< 430 (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), 431 (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>; 432def : Pat< 433 (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), 434 (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; 435def : Pat< 436 (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), 437 (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>; 438def : Pat< 439 (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), 440 (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; 441 442// Replace lane value: replace_lane 443multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t, 444 WebAssemblyRegClass reg_t, ValueType lane_t, 445 bits<32> simdop> { 446 defm REPLACE_LANE_#vec_t : 447 SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x), 448 (outs), (ins vec_i8imm_op:$idx), 449 [(set V128:$dst, (vector_insert 450 (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))], 451 vec#".replace_lane\t$dst, $vec, $idx, $x", 452 vec#".replace_lane\t$idx", simdop>; 453} 454 455defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>; 456defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>; 457defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>; 458defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>; 459defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>; 460defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>; 461 462// Lower undef lane indices to zero 463def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), 464 (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>; 465def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef), 466 (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>; 467def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef), 468 (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>; 469def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef), 470 (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>; 471def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef), 472 (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>; 473def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef), 474 (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>; 475 476//===----------------------------------------------------------------------===// 477// Comparisons 478//===----------------------------------------------------------------------===// 479 480multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec, 481 string name, CondCode cond, bits<32> simdop> { 482 defm _#vec_t : 483 SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 484 [(set (out_t V128:$dst), 485 (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond) 486 )], 487 vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>; 488} 489 490multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> { 491 defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>; 492 defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond, 493 !add(baseInst, 10)>; 494 defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond, 495 !add(baseInst, 20)>; 496} 497 498multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { 499 defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>; 500 defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond, 501 !add(baseInst, 6)>; 502} 503 504// Equality: eq 505let isCommutable = 1 in { 506defm EQ : SIMDConditionInt<"eq", SETEQ, 35>; 507defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>; 508} // isCommutable = 1 509 510// Non-equality: ne 511let isCommutable = 1 in { 512defm NE : SIMDConditionInt<"ne", SETNE, 36>; 513defm NE : SIMDConditionFP<"ne", SETUNE, 66>; 514} // isCommutable = 1 515 516// Less than: lt_s / lt_u / lt 517defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>; 518defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>; 519defm LT : SIMDConditionFP<"lt", SETOLT, 67>; 520 521// Greater than: gt_s / gt_u / gt 522defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>; 523defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>; 524defm GT : SIMDConditionFP<"gt", SETOGT, 68>; 525 526// Less than or equal: le_s / le_u / le 527defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>; 528defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>; 529defm LE : SIMDConditionFP<"le", SETOLE, 69>; 530 531// Greater than or equal: ge_s / ge_u / ge 532defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>; 533defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>; 534defm GE : SIMDConditionFP<"ge", SETOGE, 70>; 535 536// Lower float comparisons that don't care about NaN to standard WebAssembly 537// float comparisons. These instructions are generated with nnan and in the 538// target-independent expansion of unordered comparisons and ordered ne. 539foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32], 540 [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in 541def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 542 (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>; 543 544foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64], 545 [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in 546def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 547 (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>; 548 549 550//===----------------------------------------------------------------------===// 551// Bitwise operations 552//===----------------------------------------------------------------------===// 553 554multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name, 555 bits<32> simdop> { 556 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 557 (outs), (ins), 558 [(set (vec_t V128:$dst), 559 (node (vec_t V128:$lhs), (vec_t V128:$rhs)) 560 )], 561 vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, 562 simdop>; 563} 564 565multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> { 566 defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>; 567 defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>; 568 defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>; 569 defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>; 570} 571 572multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name, 573 bits<32> simdop> { 574 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 575 [(set (vec_t V128:$dst), 576 (vec_t (node (vec_t V128:$vec))) 577 )], 578 vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; 579} 580 581// Bitwise logic: v128.not 582foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in 583defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>; 584 585// Bitwise logic: v128.and / v128.or / v128.xor 586let isCommutable = 1 in { 587defm AND : SIMDBitwise<and, "and", 78>; 588defm OR : SIMDBitwise<or, "or", 80>; 589defm XOR : SIMDBitwise<xor, "xor", 81>; 590} // isCommutable = 1 591 592// Bitwise logic: v128.andnot 593def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; 594defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>; 595 596// Bitwise select: v128.bitselect 597foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in 598 defm BITSELECT_#vec_t : 599 SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), 600 [(set (vec_t V128:$dst), 601 (vec_t (int_wasm_bitselect 602 (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) 603 )) 604 )], 605 "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; 606 607// Bitselect is equivalent to (c & v1) | (~c & v2) 608foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in 609 def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)), 610 (and (vnot V128:$c), (vec_t V128:$v2)))), 611 (!cast<Instruction>("BITSELECT_"#vec_t) 612 V128:$v1, V128:$v2, V128:$c)>; 613 614// Also implement vselect in terms of bitselect 615foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64], 616 [v4f32, v4i32], [v2f64, v2i64]] in 617 def : Pat<(types[0] (vselect 618 (types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2) 619 )), 620 (!cast<Instruction>("BITSELECT_"#types[0]) 621 V128:$v1, V128:$v2, V128:$c 622 )>; 623 624// MVP select on v128 values 625foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 626defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), 627 (outs), (ins), 628 [(set V128:$dst, 629 (select I32:$cond, 630 (vec_t V128:$lhs), (vec_t V128:$rhs) 631 ) 632 )], 633 "v128.select\t$dst, $lhs, $rhs, $cond", 634 "v128.select", 0x1b>; 635 636// ISD::SELECT requires its operand to conform to getBooleanContents, but 637// WebAssembly's select interprets any non-zero value as true, so we can fold 638// a setne with 0 into a select. 639def : Pat<(select 640 (i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) 641 ), 642 (!cast<Instruction>("SELECT_"#vec_t) 643 V128:$lhs, V128:$rhs, I32:$cond 644 )>; 645 646// And again, this time with seteq instead of setne and the arms reversed. 647def : Pat<(select 648 (i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) 649 ), 650 (!cast<Instruction>("SELECT_"#vec_t) 651 V128:$rhs, V128:$lhs, I32:$cond 652 )>; 653} // foreach vec_t 654 655//===----------------------------------------------------------------------===// 656// Integer unary arithmetic 657//===----------------------------------------------------------------------===// 658 659multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> { 660 defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>; 661 defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 662 defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 663 defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 664} 665 666multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name, 667 bits<32> simdop> { 668 defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 669 [(set I32:$dst, (i32 (op (vec_t V128:$vec))))], 670 vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; 671} 672 673multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> { 674 defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>; 675 defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>; 676 defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>; 677 defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>; 678} 679 680// Integer vector negation 681def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 682 683// Integer absolute value: abs 684defm ABS : SIMDUnaryInt<abs, "abs", 96>; 685 686// Integer negation: neg 687defm NEG : SIMDUnaryInt<ivneg, "neg", 97>; 688 689// Any lane true: any_true 690defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>; 691 692// All lanes true: all_true 693defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>; 694 695// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 696// can be folded out 697foreach reduction = 698 [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in 699foreach ty = [v16i8, v8i16, v4i32, v2i64] in { 700def : Pat<(i32 (and 701 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 702 (i32 1) 703 )), 704 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 705def : Pat<(i32 (setne 706 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 707 (i32 0) 708 )), 709 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 710def : Pat<(i32 (seteq 711 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 712 (i32 1) 713 )), 714 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 715} 716 717multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> { 718 defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 719 [(set I32:$dst, 720 (i32 (int_wasm_bitmask (vec_t V128:$vec))) 721 )], 722 vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>; 723} 724 725defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>; 726defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>; 727defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>; 728 729//===----------------------------------------------------------------------===// 730// Bit shifts 731//===----------------------------------------------------------------------===// 732 733multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name, 734 bits<32> simdop> { 735 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), 736 (outs), (ins), 737 [(set (vec_t V128:$dst), (node V128:$vec, I32:$x))], 738 vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>; 739} 740 741multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { 742 defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>; 743 defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 744 defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 745 defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 746} 747 748// WebAssembly SIMD shifts are nonstandard in that the shift amount is 749// an i32 rather than a vector, so they need custom nodes. 750def wasm_shift_t : SDTypeProfile<1, 2, 751 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>] 752>; 753def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; 754def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; 755def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; 756 757// Left shift by scalar: shl 758defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>; 759 760// Right shift by scalar: shr_s / shr_u 761defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>; 762defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>; 763 764//===----------------------------------------------------------------------===// 765// Integer binary arithmetic 766//===----------------------------------------------------------------------===// 767 768multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> { 769 defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 770 defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 771 defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 772} 773 774multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> { 775 defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>; 776 defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 777} 778 779multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> { 780 defm "" : SIMDBinaryIntSmall<node, name, baseInst>; 781 defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 782} 783 784multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> { 785 defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; 786 defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 787} 788 789// Integer addition: add / add_saturate_s / add_saturate_u 790let isCommutable = 1 in { 791defm ADD : SIMDBinaryInt<add, "add", 110>; 792defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 111>; 793defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 112>; 794} // isCommutable = 1 795 796// Integer subtraction: sub / sub_saturate_s / sub_saturate_u 797defm SUB : SIMDBinaryInt<sub, "sub", 113>; 798defm SUB_SAT_S : 799 SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 114>; 800defm SUB_SAT_U : 801 SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 115>; 802 803// Integer multiplication: mul 804let isCommutable = 1 in 805defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>; 806 807// Integer min_s / min_u / max_s / max_u 808let isCommutable = 1 in { 809defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>; 810defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>; 811defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>; 812defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>; 813} // isCommutable = 1 814 815// Integer unsigned rounding average: avgr_u 816let isCommutable = 1 in { 817defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>; 818defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>; 819} 820 821def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), 822 (add node:$lhs, node:$rhs), 823 "return N->getFlags().hasNoUnsignedWrap();">; 824 825foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in 826def : Pat<(wasm_shr_u 827 (add_nuw 828 (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)), 829 (nodes[1] (i32 1)) 830 ), 831 (i32 1) 832 ), 833 (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>; 834 835// Widening dot product: i32x4.dot_i16x8_s 836let isCommutable = 1 in 837defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 838 [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], 839 "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", 840 186>; 841 842//===----------------------------------------------------------------------===// 843// Floating-point unary arithmetic 844//===----------------------------------------------------------------------===// 845 846multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { 847 defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>; 848 defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; 849} 850 851// Absolute value: abs 852defm ABS : SIMDUnaryFP<fabs, "abs", 224>; 853 854// Negation: neg 855defm NEG : SIMDUnaryFP<fneg, "neg", 225>; 856 857// Square root: sqrt 858defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>; 859 860// Rounding: ceil, floor, trunc, nearest 861defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>; 862defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>; 863defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>; 864defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>; 865defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>; 866defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>; 867defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>; 868defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>; 869 870//===----------------------------------------------------------------------===// 871// Floating-point binary arithmetic 872//===----------------------------------------------------------------------===// 873 874multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> { 875 defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>; 876 defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; 877} 878 879// Addition: add 880let isCommutable = 1 in 881defm ADD : SIMDBinaryFP<fadd, "add", 228>; 882 883// Subtraction: sub 884defm SUB : SIMDBinaryFP<fsub, "sub", 229>; 885 886// Multiplication: mul 887let isCommutable = 1 in 888defm MUL : SIMDBinaryFP<fmul, "mul", 230>; 889 890// Division: div 891defm DIV : SIMDBinaryFP<fdiv, "div", 231>; 892 893// NaN-propagating minimum: min 894defm MIN : SIMDBinaryFP<fminimum, "min", 232>; 895 896// NaN-propagating maximum: max 897defm MAX : SIMDBinaryFP<fmaximum, "max", 233>; 898 899// Pseudo-minimum: pmin 900defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>; 901 902// Pseudo-maximum: pmax 903defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>; 904 905//===----------------------------------------------------------------------===// 906// Conversions 907//===----------------------------------------------------------------------===// 908 909multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op, 910 string name, bits<32> simdop> { 911 defm op#_#vec_t#_#arg_t : 912 SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 913 [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))], 914 name#"\t$dst, $vec", name, simdop>; 915} 916 917// Floating point to integer with saturation: trunc_sat 918defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>; 919defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>; 920 921// Integer to floating point: convert 922defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>; 923defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>; 924 925// Lower llvm.wasm.trunc.saturate.* to saturating instructions 926def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), 927 (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; 928def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), 929 (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; 930 931// Widening operations 932def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 933def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>; 934def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>; 935def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>; 936def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>; 937 938multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg, 939 bits<32> baseInst> { 940 defm "" : SIMDConvert<vec_t, arg_t, widen_low_s, 941 vec#".widen_low_"#arg#"_s", baseInst>; 942 defm "" : SIMDConvert<vec_t, arg_t, widen_high_s, 943 vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>; 944 defm "" : SIMDConvert<vec_t, arg_t, widen_low_u, 945 vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>; 946 defm "" : SIMDConvert<vec_t, arg_t, widen_high_u, 947 vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>; 948} 949 950defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>; 951defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>; 952 953// Narrowing operations 954multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg, 955 bits<32> baseInst> { 956 defm NARROW_S_#vec_t : 957 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 958 [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed 959 (arg_t V128:$low), (arg_t V128:$high))))], 960 vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s", 961 baseInst>; 962 defm NARROW_U_#vec_t : 963 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 964 [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned 965 (arg_t V128:$low), (arg_t V128:$high))))], 966 vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u", 967 !add(baseInst, 1)>; 968} 969 970defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>; 971defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>; 972 973// Use narrowing operations for truncating stores. Since the narrowing 974// operations are saturating instead of truncating, we need to mask 975// the stored values first. 976// TODO: Use consts instead of splats 977def store_v8i8_trunc_v8i16 : 978 OutPatFrag<(ops node:$val), 979 (EXTRACT_LANE_v2i64 980 (NARROW_U_v16i8 981 (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x00ff00ff)), node:$val), 982 node:$val // Unused input 983 ), 984 0 985 )>; 986 987def store_v4i16_trunc_v4i32 : 988 OutPatFrag<(ops node:$val), 989 (EXTRACT_LANE_v2i64 990 (NARROW_U_v8i16 991 (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x0000ffff)), node:$val), 992 node:$val // Unused input 993 ), 994 0 995 )>; 996 997// Store patterns adapted from WebAssemblyInstrMemory.td 998multiclass NarrowingStorePatNoOffset<ValueType ty, PatFrag node, 999 OutPatFrag out> { 1000 def : Pat<(node ty:$val, I32:$addr), 1001 (STORE_I64_A32 0, 0, I32:$addr, (i64 (out ty:$val)))>, 1002 Requires<[HasAddr32]>; 1003 def : Pat<(node ty:$val, I64:$addr), 1004 (STORE_I64_A64 0, 0, I64:$addr, (i64 (out ty:$val)))>, 1005 Requires<[HasAddr64]>; 1006} 1007 1008defm : NarrowingStorePatNoOffset<v8i16, truncstorevi8, store_v8i8_trunc_v8i16>; 1009defm : NarrowingStorePatNoOffset<v4i32, truncstorevi16, 1010 store_v4i16_trunc_v4i32>; 1011 1012multiclass NarrowingStorePatImmOff<ValueType ty, PatFrag kind, 1013 PatFrag operand, OutPatFrag out> { 1014 def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), 1015 (STORE_I64_A32 0, imm:$off, I32:$addr, (i64 (out ty:$val)))>, 1016 Requires<[HasAddr32]>; 1017 def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)), 1018 (STORE_I64_A64 0, imm:$off, I64:$addr, (i64 (out ty:$val)))>, 1019 Requires<[HasAddr64]>; 1020} 1021 1022defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, regPlusImm, 1023 store_v8i8_trunc_v8i16>; 1024defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, regPlusImm, 1025 store_v4i16_trunc_v4i32>; 1026defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, or_is_add, 1027 store_v8i8_trunc_v8i16>; 1028defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, or_is_add, 1029 store_v4i16_trunc_v4i32>; 1030 1031multiclass NarrowingStorePatOffsetOnly<ValueType ty, PatFrag kind, 1032 OutPatFrag out> { 1033 def : Pat<(kind ty:$val, imm:$off), 1034 (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (i64 (out ty:$val)))>, 1035 Requires<[HasAddr32]>; 1036 def : Pat<(kind ty:$val, imm:$off), 1037 (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (i64 (out ty:$val)))>, 1038 Requires<[HasAddr64]>; 1039} 1040 1041defm : NarrowingStorePatOffsetOnly<v8i16, truncstorevi8, 1042 store_v8i8_trunc_v8i16>; 1043defm : NarrowingStorePatOffsetOnly<v4i32, truncstorevi16, 1044 store_v4i16_trunc_v4i32>; 1045 1046multiclass NarrowingStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, 1047 OutPatFrag out> { 1048 def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), 1049 (STORE_I64_A32 1050 0, tglobaladdr:$off, (CONST_I32 0), (i64 (out ty:$val)))>, 1051 Requires<[IsNotPIC, HasAddr32]>; 1052 def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), 1053 (STORE_I64_A64 1054 0, tglobaladdr:$off, (CONST_I64 0), (i64 (out ty:$val)))>, 1055 Requires<[IsNotPIC, HasAddr64]>; 1056} 1057 1058defm : NarrowingStorePatGlobalAddrOffOnly<v8i16, truncstorevi8, 1059 store_v8i8_trunc_v8i16>; 1060defm : NarrowingStorePatGlobalAddrOffOnly<v4i32, truncstorevi16, 1061 store_v4i16_trunc_v4i32>; 1062 1063// Bitcasts are nops 1064// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types 1065foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in 1066foreach t2 = !foldl( 1067 []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 1068 acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)), 1069 acc, !listconcat(acc, [cur]) 1070 ) 1071) in 1072def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; 1073 1074//===----------------------------------------------------------------------===// 1075// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) 1076//===----------------------------------------------------------------------===// 1077 1078multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> simdopA, 1079 bits<32> simdopS> { 1080 defm QFMA_#vec_t : 1081 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), 1082 (outs), (ins), 1083 [(set (vec_t V128:$dst), 1084 (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], 1085 vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", simdopA>; 1086 defm QFMS_#vec_t : 1087 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), 1088 (outs), (ins), 1089 [(set (vec_t V128:$dst), 1090 (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], 1091 vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", simdopS>; 1092} 1093 1094defm "" : SIMDQFM<v4f32, "f32x4", 180, 212>; 1095defm "" : SIMDQFM<v2f64, "f64x2", 254, 255>; 1096