1// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// WebAssembly SIMD operand code-gen constructs. 11/// 12//===----------------------------------------------------------------------===// 13 14// Instructions requiring HasSIMD128 and the simd128 prefix byte 15multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 16 list<dag> pattern_r, string asmstr_r = "", 17 string asmstr_s = "", bits<32> simdop = -1> { 18 defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, 19 !or(0xfd00, !and(0xff, simdop))>, 20 Requires<[HasSIMD128]>; 21} 22 23defm "" : ARGUMENT<V128, v16i8>; 24defm "" : ARGUMENT<V128, v8i16>; 25defm "" : ARGUMENT<V128, v4i32>; 26defm "" : ARGUMENT<V128, v2i64>; 27defm "" : ARGUMENT<V128, v4f32>; 28defm "" : ARGUMENT<V128, v2f64>; 29 30// Constrained immediate argument types 31foreach SIZE = [8, 16] in 32def ImmI#SIZE : ImmLeaf<i32, 33 "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));" 34>; 35foreach SIZE = [2, 4, 8, 16, 32] in 36def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; 37 38//===----------------------------------------------------------------------===// 39// Load and store 40//===----------------------------------------------------------------------===// 41 42// Load: v128.load 43let mayLoad = 1, UseNamedOperandTable = 1 in { 44defm LOAD_V128_A32 : 45 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 46 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 47 "v128.load\t$dst, ${off}(${addr})$p2align", 48 "v128.load\t$off$p2align", 0>; 49defm LOAD_V128_A64 : 50 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 51 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 52 "v128.load\t$dst, ${off}(${addr})$p2align", 53 "v128.load\t$off$p2align", 0>; 54} 55 56// Def load and store patterns from WebAssemblyInstrMemory.td for vector types 57foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 58defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">; 59defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">; 60defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">; 61defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">; 62defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">; 63} 64 65// vNxM.load_splat 66multiclass SIMDLoadSplat<string vec, bits<32> simdop> { 67 let mayLoad = 1, UseNamedOperandTable = 1 in { 68 defm LOAD_SPLAT_#vec#_A32 : 69 SIMD_I<(outs V128:$dst), 70 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 71 (outs), 72 (ins P2Align:$p2align, offset32_op:$off), [], 73 vec#".load_splat\t$dst, ${off}(${addr})$p2align", 74 vec#".load_splat\t$off$p2align", simdop>; 75 defm LOAD_SPLAT_#vec#_A64 : 76 SIMD_I<(outs V128:$dst), 77 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 78 (outs), 79 (ins P2Align:$p2align, offset64_op:$off), [], 80 vec#".load_splat\t$dst, ${off}(${addr})$p2align", 81 vec#".load_splat\t$off$p2align", simdop>; 82 } 83} 84 85defm "" : SIMDLoadSplat<"v8x16", 7>; 86defm "" : SIMDLoadSplat<"v16x8", 8>; 87defm "" : SIMDLoadSplat<"v32x4", 9>; 88defm "" : SIMDLoadSplat<"v64x2", 10>; 89 90def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; 91def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, 92 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 93def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; 94 95foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], 96 ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { 97defm : LoadPatNoOffset<!cast<ValueType>(args[0]), 98 load_splat, 99 "LOAD_SPLAT_"#args[1]>; 100defm : LoadPatImmOff<!cast<ValueType>(args[0]), 101 load_splat, 102 regPlusImm, 103 "LOAD_SPLAT_"#args[1]>; 104defm : LoadPatImmOff<!cast<ValueType>(args[0]), 105 load_splat, 106 or_is_add, 107 "LOAD_SPLAT_"#args[1]>; 108defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]), 109 load_splat, 110 "LOAD_SPLAT_"#args[1]>; 111defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]), 112 load_splat, 113 "LOAD_SPLAT_"#args[1]>; 114} 115 116// Load and extend 117multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> { 118 let mayLoad = 1, UseNamedOperandTable = 1 in { 119 defm LOAD_EXTEND_S_#vec_t#_A32 : 120 SIMD_I<(outs V128:$dst), 121 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 122 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 123 name#"_s\t$dst, ${off}(${addr})$p2align", 124 name#"_s\t$off$p2align", simdop>; 125 defm LOAD_EXTEND_U_#vec_t#_A32 : 126 SIMD_I<(outs V128:$dst), 127 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 128 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 129 name#"_u\t$dst, ${off}(${addr})$p2align", 130 name#"_u\t$off$p2align", !add(simdop, 1)>; 131 defm LOAD_EXTEND_S_#vec_t#_A64 : 132 SIMD_I<(outs V128:$dst), 133 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 134 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 135 name#"_s\t$dst, ${off}(${addr})$p2align", 136 name#"_s\t$off$p2align", simdop>; 137 defm LOAD_EXTEND_U_#vec_t#_A64 : 138 SIMD_I<(outs V128:$dst), 139 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 140 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 141 name#"_u\t$dst, ${off}(${addr})$p2align", 142 name#"_u\t$off$p2align", !add(simdop, 1)>; 143 } 144} 145 146defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>; 147defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>; 148defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>; 149 150foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in 151foreach exts = [["sextloadv", "_S"], 152 ["zextloadv", "_U"], 153 ["extloadv", "_U"]] in { 154defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]), 155 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 156defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm, 157 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 158defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add, 159 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 160defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), 161 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 162defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), 163 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 164} 165 166 167// Store: v128.store 168let mayStore = 1, UseNamedOperandTable = 1 in { 169defm STORE_V128_A32 : 170 SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), 171 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 172 "v128.store\t${off}(${addr})$p2align, $vec", 173 "v128.store\t$off$p2align", 11>; 174defm STORE_V128_A64 : 175 SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec), 176 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 177 "v128.store\t${off}(${addr})$p2align, $vec", 178 "v128.store\t$off$p2align", 11>; 179} 180foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 181// Def load and store patterns from WebAssemblyInstrMemory.td for vector types 182defm : StorePatNoOffset<vec_t, store, "STORE_V128">; 183defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">; 184defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">; 185defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">; 186defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">; 187} 188 189//===----------------------------------------------------------------------===// 190// Constructing SIMD values 191//===----------------------------------------------------------------------===// 192 193// Constant: v128.const 194multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> { 195 let isMoveImm = 1, isReMaterializable = 1, 196 Predicates = [HasUnimplementedSIMD128] in 197 defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, 198 [(set V128:$dst, (vec_t pat))], 199 "v128.const\t$dst, "#args, 200 "v128.const\t"#args, 12>; 201} 202 203defm "" : ConstVec<v16i8, 204 (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1, 205 vec_i8imm_op:$i2, vec_i8imm_op:$i3, 206 vec_i8imm_op:$i4, vec_i8imm_op:$i5, 207 vec_i8imm_op:$i6, vec_i8imm_op:$i7, 208 vec_i8imm_op:$i8, vec_i8imm_op:$i9, 209 vec_i8imm_op:$iA, vec_i8imm_op:$iB, 210 vec_i8imm_op:$iC, vec_i8imm_op:$iD, 211 vec_i8imm_op:$iE, vec_i8imm_op:$iF), 212 (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3, 213 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7, 214 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB, 215 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF), 216 !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ", 217 "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>; 218defm "" : ConstVec<v8i16, 219 (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1, 220 vec_i16imm_op:$i2, vec_i16imm_op:$i3, 221 vec_i16imm_op:$i4, vec_i16imm_op:$i5, 222 vec_i16imm_op:$i6, vec_i16imm_op:$i7), 223 (build_vector 224 ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3, 225 ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7), 226 "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">; 227let IsCanonical = 1 in 228defm "" : ConstVec<v4i32, 229 (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1, 230 vec_i32imm_op:$i2, vec_i32imm_op:$i3), 231 (build_vector (i32 imm:$i0), (i32 imm:$i1), 232 (i32 imm:$i2), (i32 imm:$i3)), 233 "$i0, $i1, $i2, $i3">; 234defm "" : ConstVec<v2i64, 235 (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1), 236 (build_vector (i64 imm:$i0), (i64 imm:$i1)), 237 "$i0, $i1">; 238defm "" : ConstVec<v4f32, 239 (ins f32imm_op:$i0, f32imm_op:$i1, 240 f32imm_op:$i2, f32imm_op:$i3), 241 (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1), 242 (f32 fpimm:$i2), (f32 fpimm:$i3)), 243 "$i0, $i1, $i2, $i3">; 244defm "" : ConstVec<v2f64, 245 (ins f64imm_op:$i0, f64imm_op:$i1), 246 (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)), 247 "$i0, $i1">; 248 249// Shuffle lanes: shuffle 250defm SHUFFLE : 251 SIMD_I<(outs V128:$dst), 252 (ins V128:$x, V128:$y, 253 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 254 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 255 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 256 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 257 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 258 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 259 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 260 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 261 (outs), 262 (ins 263 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 264 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 265 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 266 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 267 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 268 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 269 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 270 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 271 [], 272 "v8x16.shuffle\t$dst, $x, $y, "# 273 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 274 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 275 "v8x16.shuffle\t"# 276 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 277 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 278 13>; 279 280// Shuffles after custom lowering 281def wasm_shuffle_t : SDTypeProfile<1, 18, []>; 282def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; 283foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 284def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), 285 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 286 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 287 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 288 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 289 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 290 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 291 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 292 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))), 293 (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y), 294 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 295 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 296 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 297 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 298 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 299 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 300 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 301 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; 302} 303 304// Swizzle lanes: v8x16.swizzle 305def wasm_swizzle_t : SDTypeProfile<1, 2, []>; 306def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; 307defm SWIZZLE : 308 SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), 309 [(set (v16i8 V128:$dst), 310 (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], 311 "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; 312 313def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), 314 (SWIZZLE V128:$src, V128:$mask)>; 315 316// Create vector with identical lanes: splat 317def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; 318def splat4 : PatFrag<(ops node:$x), (build_vector 319 node:$x, node:$x, node:$x, node:$x)>; 320def splat8 : PatFrag<(ops node:$x), (build_vector 321 node:$x, node:$x, node:$x, node:$x, 322 node:$x, node:$x, node:$x, node:$x)>; 323def splat16 : PatFrag<(ops node:$x), (build_vector 324 node:$x, node:$x, node:$x, node:$x, 325 node:$x, node:$x, node:$x, node:$x, 326 node:$x, node:$x, node:$x, node:$x, 327 node:$x, node:$x, node:$x, node:$x)>; 328 329multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, 330 PatFrag splat_pat, bits<32> simdop> { 331 defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins), 332 [(set (vec_t V128:$dst), (splat_pat reg_t:$x))], 333 vec#".splat\t$dst, $x", vec#".splat", simdop>; 334} 335 336defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>; 337defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>; 338defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>; 339defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>; 340defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>; 341defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>; 342 343// scalar_to_vector leaves high lanes undefined, so can be a splat 344class ScalarSplatPat<ValueType vec_t, ValueType lane_t, 345 WebAssemblyRegClass reg_t> : 346 Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))), 347 (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>; 348 349def : ScalarSplatPat<v16i8, i32, I32>; 350def : ScalarSplatPat<v8i16, i32, I32>; 351def : ScalarSplatPat<v4i32, i32, I32>; 352def : ScalarSplatPat<v2i64, i64, I64>; 353def : ScalarSplatPat<v4f32, f32, F32>; 354def : ScalarSplatPat<v2f64, f64, F64>; 355 356//===----------------------------------------------------------------------===// 357// Accessing lanes 358//===----------------------------------------------------------------------===// 359 360// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u 361multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, 362 bits<32> simdop, string suffix = ""> { 363 defm EXTRACT_LANE_#vec_t#suffix : 364 SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx), 365 (outs), (ins vec_i8imm_op:$idx), [], 366 vec#".extract_lane"#suffix#"\t$dst, $vec, $idx", 367 vec#".extract_lane"#suffix#"\t$idx", simdop>; 368} 369 370defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">; 371defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">; 372defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">; 373defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">; 374defm "" : ExtractLane<v4i32, "i32x4", I32, 27>; 375defm "" : ExtractLane<v2i64, "i64x2", I64, 29>; 376defm "" : ExtractLane<v4f32, "f32x4", F32, 31>; 377defm "" : ExtractLane<v2f64, "f64x2", F64, 33>; 378 379def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), 380 (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; 381def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), 382 (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; 383def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), 384 (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>; 385def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), 386 (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>; 387def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), 388 (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>; 389def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), 390 (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>; 391 392def : Pat< 393 (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), 394 (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>; 395def : Pat< 396 (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), 397 (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; 398def : Pat< 399 (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), 400 (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>; 401def : Pat< 402 (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), 403 (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; 404 405// Replace lane value: replace_lane 406multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t, 407 WebAssemblyRegClass reg_t, ValueType lane_t, 408 bits<32> simdop> { 409 defm REPLACE_LANE_#vec_t : 410 SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x), 411 (outs), (ins vec_i8imm_op:$idx), 412 [(set V128:$dst, (vector_insert 413 (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))], 414 vec#".replace_lane\t$dst, $vec, $idx, $x", 415 vec#".replace_lane\t$idx", simdop>; 416} 417 418defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>; 419defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>; 420defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>; 421defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>; 422defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>; 423defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>; 424 425// Lower undef lane indices to zero 426def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), 427 (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>; 428def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef), 429 (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>; 430def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef), 431 (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>; 432def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef), 433 (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>; 434def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef), 435 (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>; 436def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef), 437 (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>; 438 439//===----------------------------------------------------------------------===// 440// Comparisons 441//===----------------------------------------------------------------------===// 442 443multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec, 444 string name, CondCode cond, bits<32> simdop> { 445 defm _#vec_t : 446 SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 447 [(set (out_t V128:$dst), 448 (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond) 449 )], 450 vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>; 451} 452 453multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> { 454 defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>; 455 defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond, 456 !add(baseInst, 10)>; 457 defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond, 458 !add(baseInst, 20)>; 459} 460 461multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { 462 defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>; 463 defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond, 464 !add(baseInst, 6)>; 465} 466 467// Equality: eq 468let isCommutable = 1 in { 469defm EQ : SIMDConditionInt<"eq", SETEQ, 35>; 470defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>; 471} // isCommutable = 1 472 473// Non-equality: ne 474let isCommutable = 1 in { 475defm NE : SIMDConditionInt<"ne", SETNE, 36>; 476defm NE : SIMDConditionFP<"ne", SETUNE, 66>; 477} // isCommutable = 1 478 479// Less than: lt_s / lt_u / lt 480defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>; 481defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>; 482defm LT : SIMDConditionFP<"lt", SETOLT, 67>; 483 484// Greater than: gt_s / gt_u / gt 485defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>; 486defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>; 487defm GT : SIMDConditionFP<"gt", SETOGT, 68>; 488 489// Less than or equal: le_s / le_u / le 490defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>; 491defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>; 492defm LE : SIMDConditionFP<"le", SETOLE, 69>; 493 494// Greater than or equal: ge_s / ge_u / ge 495defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>; 496defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>; 497defm GE : SIMDConditionFP<"ge", SETOGE, 70>; 498 499// Lower float comparisons that don't care about NaN to standard WebAssembly 500// float comparisons. These instructions are generated with nnan and in the 501// target-independent expansion of unordered comparisons and ordered ne. 502foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32], 503 [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in 504def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 505 (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>; 506 507foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64], 508 [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in 509def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 510 (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>; 511 512 513//===----------------------------------------------------------------------===// 514// Bitwise operations 515//===----------------------------------------------------------------------===// 516 517multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name, 518 bits<32> simdop> { 519 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 520 (outs), (ins), 521 [(set (vec_t V128:$dst), 522 (node (vec_t V128:$lhs), (vec_t V128:$rhs)) 523 )], 524 vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, 525 simdop>; 526} 527 528multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> { 529 defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>; 530 defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>; 531 defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>; 532 defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>; 533} 534 535multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name, 536 bits<32> simdop> { 537 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 538 [(set (vec_t V128:$dst), 539 (vec_t (node (vec_t V128:$vec))) 540 )], 541 vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; 542} 543 544// Bitwise logic: v128.not 545foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in 546defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>; 547 548// Bitwise logic: v128.and / v128.or / v128.xor 549let isCommutable = 1 in { 550defm AND : SIMDBitwise<and, "and", 78>; 551defm OR : SIMDBitwise<or, "or", 80>; 552defm XOR : SIMDBitwise<xor, "xor", 81>; 553} // isCommutable = 1 554 555// Bitwise logic: v128.andnot 556def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; 557defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>; 558 559// Bitwise select: v128.bitselect 560foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in 561 defm BITSELECT_#vec_t : 562 SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), 563 [(set (vec_t V128:$dst), 564 (vec_t (int_wasm_bitselect 565 (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) 566 )) 567 )], 568 "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; 569 570// Bitselect is equivalent to (c & v1) | (~c & v2) 571foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in 572 def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)), 573 (and (vnot V128:$c), (vec_t V128:$v2)))), 574 (!cast<Instruction>("BITSELECT_"#vec_t) 575 V128:$v1, V128:$v2, V128:$c)>; 576 577// Also implement vselect in terms of bitselect 578foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64], 579 [v4f32, v4i32], [v2f64, v2i64]] in 580 def : Pat<(types[0] (vselect 581 (types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2) 582 )), 583 (!cast<Instruction>("BITSELECT_"#types[0]) 584 V128:$v1, V128:$v2, V128:$c 585 )>; 586 587// MVP select on v128 values 588foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 589defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), 590 (outs), (ins), 591 [(set V128:$dst, 592 (select I32:$cond, 593 (vec_t V128:$lhs), (vec_t V128:$rhs) 594 ) 595 )], 596 "v128.select\t$dst, $lhs, $rhs, $cond", 597 "v128.select", 0x1b>; 598 599// ISD::SELECT requires its operand to conform to getBooleanContents, but 600// WebAssembly's select interprets any non-zero value as true, so we can fold 601// a setne with 0 into a select. 602def : Pat<(select 603 (i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) 604 ), 605 (!cast<Instruction>("SELECT_"#vec_t) 606 V128:$lhs, V128:$rhs, I32:$cond 607 )>; 608 609// And again, this time with seteq instead of setne and the arms reversed. 610def : Pat<(select 611 (i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) 612 ), 613 (!cast<Instruction>("SELECT_"#vec_t) 614 V128:$rhs, V128:$lhs, I32:$cond 615 )>; 616} // foreach vec_t 617 618//===----------------------------------------------------------------------===// 619// Integer unary arithmetic 620//===----------------------------------------------------------------------===// 621 622multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> { 623 defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>; 624 defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 625 defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 626 defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 627} 628 629multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name, 630 bits<32> simdop> { 631 defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 632 [(set I32:$dst, (i32 (op (vec_t V128:$vec))))], 633 vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; 634} 635 636multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> { 637 defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>; 638 defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>; 639 defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>; 640 defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>; 641} 642 643// Integer vector negation 644def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 645 646// Integer absolute value: abs 647defm ABS : SIMDUnaryInt<abs, "abs", 96>; 648 649// Integer negation: neg 650defm NEG : SIMDUnaryInt<ivneg, "neg", 97>; 651 652// Any lane true: any_true 653defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>; 654 655// All lanes true: all_true 656defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>; 657 658// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 659// can be folded out 660foreach reduction = 661 [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in 662foreach ty = [v16i8, v8i16, v4i32, v2i64] in { 663def : Pat<(i32 (and 664 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 665 (i32 1) 666 )), 667 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 668def : Pat<(i32 (setne 669 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 670 (i32 0) 671 )), 672 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 673def : Pat<(i32 (seteq 674 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 675 (i32 1) 676 )), 677 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 678} 679 680multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> { 681 defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 682 [(set I32:$dst, 683 (i32 (int_wasm_bitmask (vec_t V128:$vec))) 684 )], 685 vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>; 686} 687 688defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>; 689defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>; 690defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>; 691 692//===----------------------------------------------------------------------===// 693// Bit shifts 694//===----------------------------------------------------------------------===// 695 696multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name, 697 bits<32> simdop> { 698 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), 699 (outs), (ins), 700 [(set (vec_t V128:$dst), (node V128:$vec, I32:$x))], 701 vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>; 702} 703 704multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { 705 defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>; 706 defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 707 defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 708 defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 709} 710 711// WebAssembly SIMD shifts are nonstandard in that the shift amount is 712// an i32 rather than a vector, so they need custom nodes. 713def wasm_shift_t : SDTypeProfile<1, 2, 714 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>] 715>; 716def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; 717def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; 718def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; 719 720// Left shift by scalar: shl 721defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>; 722 723// Right shift by scalar: shr_s / shr_u 724defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>; 725defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>; 726 727//===----------------------------------------------------------------------===// 728// Integer binary arithmetic 729//===----------------------------------------------------------------------===// 730 731multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> { 732 defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 733 defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 734 defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 735} 736 737multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> { 738 defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>; 739 defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 740} 741 742multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> { 743 defm "" : SIMDBinaryIntSmall<node, name, baseInst>; 744 defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 745} 746 747multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> { 748 defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; 749 defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 750} 751 752// Integer addition: add / add_saturate_s / add_saturate_u 753let isCommutable = 1 in { 754defm ADD : SIMDBinaryInt<add, "add", 110>; 755defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 111>; 756defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 112>; 757} // isCommutable = 1 758 759// Integer subtraction: sub / sub_saturate_s / sub_saturate_u 760defm SUB : SIMDBinaryInt<sub, "sub", 113>; 761defm SUB_SAT_S : 762 SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 114>; 763defm SUB_SAT_U : 764 SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 115>; 765 766// Integer multiplication: mul 767let isCommutable = 1 in 768defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>; 769 770// Integer min_s / min_u / max_s / max_u 771let isCommutable = 1 in { 772defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>; 773defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>; 774defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>; 775defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>; 776} // isCommutable = 1 777 778// Integer unsigned rounding average: avgr_u 779let isCommutable = 1 in { 780defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>; 781defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>; 782} 783 784def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), 785 (add node:$lhs, node:$rhs), 786 "return N->getFlags().hasNoUnsignedWrap();">; 787 788foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in 789def : Pat<(wasm_shr_u 790 (add_nuw 791 (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)), 792 (nodes[1] (i32 1)) 793 ), 794 (i32 1) 795 ), 796 (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>; 797 798// Widening dot product: i32x4.dot_i16x8_s 799let isCommutable = 1 in 800defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 801 [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], 802 "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", 803 180>; 804 805//===----------------------------------------------------------------------===// 806// Floating-point unary arithmetic 807//===----------------------------------------------------------------------===// 808 809multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { 810 defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>; 811 defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; 812} 813 814// Absolute value: abs 815defm ABS : SIMDUnaryFP<fabs, "abs", 224>; 816 817// Negation: neg 818defm NEG : SIMDUnaryFP<fneg, "neg", 225>; 819 820// Square root: sqrt 821defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>; 822 823// Rounding: ceil, floor, trunc, nearest 824defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>; 825defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>; 826defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>; 827defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>; 828defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>; 829defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>; 830defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>; 831defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>; 832 833//===----------------------------------------------------------------------===// 834// Floating-point binary arithmetic 835//===----------------------------------------------------------------------===// 836 837multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> { 838 defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>; 839 defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; 840} 841 842// Addition: add 843let isCommutable = 1 in 844defm ADD : SIMDBinaryFP<fadd, "add", 228>; 845 846// Subtraction: sub 847defm SUB : SIMDBinaryFP<fsub, "sub", 229>; 848 849// Multiplication: mul 850let isCommutable = 1 in 851defm MUL : SIMDBinaryFP<fmul, "mul", 230>; 852 853// Division: div 854defm DIV : SIMDBinaryFP<fdiv, "div", 231>; 855 856// NaN-propagating minimum: min 857defm MIN : SIMDBinaryFP<fminimum, "min", 232>; 858 859// NaN-propagating maximum: max 860defm MAX : SIMDBinaryFP<fmaximum, "max", 233>; 861 862// Pseudo-minimum: pmin 863defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>; 864 865// Pseudo-maximum: pmax 866defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>; 867 868//===----------------------------------------------------------------------===// 869// Conversions 870//===----------------------------------------------------------------------===// 871 872multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op, 873 string name, bits<32> simdop> { 874 defm op#_#vec_t#_#arg_t : 875 SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 876 [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))], 877 name#"\t$dst, $vec", name, simdop>; 878} 879 880// Floating point to integer with saturation: trunc_sat 881defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>; 882defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>; 883 884// Integer to floating point: convert 885defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>; 886defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>; 887 888// Lower llvm.wasm.trunc.saturate.* to saturating instructions 889def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), 890 (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; 891def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), 892 (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; 893 894// Widening operations 895def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 896def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>; 897def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>; 898def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>; 899def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>; 900 901multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg, 902 bits<32> baseInst> { 903 defm "" : SIMDConvert<vec_t, arg_t, widen_low_s, 904 vec#".widen_low_"#arg#"_s", baseInst>; 905 defm "" : SIMDConvert<vec_t, arg_t, widen_high_s, 906 vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>; 907 defm "" : SIMDConvert<vec_t, arg_t, widen_low_u, 908 vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>; 909 defm "" : SIMDConvert<vec_t, arg_t, widen_high_u, 910 vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>; 911} 912 913defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>; 914defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>; 915 916// Narrowing operations 917multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg, 918 bits<32> baseInst> { 919 defm NARROW_S_#vec_t : 920 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 921 [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed 922 (arg_t V128:$low), (arg_t V128:$high))))], 923 vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s", 924 baseInst>; 925 defm NARROW_U_#vec_t : 926 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 927 [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned 928 (arg_t V128:$low), (arg_t V128:$high))))], 929 vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u", 930 !add(baseInst, 1)>; 931} 932 933defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>; 934defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>; 935 936// Use narrowing operations for truncating stores. Since the narrowing 937// operations are saturating instead of truncating, we need to mask 938// the stored values first. 939// TODO: Use consts instead of splats 940def store_v8i8_trunc_v8i16 : 941 OutPatFrag<(ops node:$val), 942 (EXTRACT_LANE_v2i64 943 (NARROW_U_v16i8 944 (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x00ff00ff)), node:$val), 945 node:$val // Unused input 946 ), 947 0 948 )>; 949 950def store_v4i16_trunc_v4i32 : 951 OutPatFrag<(ops node:$val), 952 (EXTRACT_LANE_v2i64 953 (NARROW_U_v8i16 954 (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x0000ffff)), node:$val), 955 node:$val // Unused input 956 ), 957 0 958 )>; 959 960// Store patterns adapted from WebAssemblyInstrMemory.td 961multiclass NarrowingStorePatNoOffset<ValueType ty, PatFrag node, 962 OutPatFrag out> { 963 def : Pat<(node ty:$val, I32:$addr), 964 (STORE_I64_A32 0, 0, I32:$addr, (i64 (out ty:$val)))>, 965 Requires<[HasAddr32]>; 966 def : Pat<(node ty:$val, I64:$addr), 967 (STORE_I64_A64 0, 0, I64:$addr, (i64 (out ty:$val)))>, 968 Requires<[HasAddr64]>; 969} 970 971defm : NarrowingStorePatNoOffset<v8i16, truncstorevi8, store_v8i8_trunc_v8i16>; 972defm : NarrowingStorePatNoOffset<v4i32, truncstorevi16, 973 store_v4i16_trunc_v4i32>; 974 975multiclass NarrowingStorePatImmOff<ValueType ty, PatFrag kind, 976 PatFrag operand, OutPatFrag out> { 977 def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), 978 (STORE_I64_A32 0, imm:$off, I32:$addr, (i64 (out ty:$val)))>, 979 Requires<[HasAddr32]>; 980 def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)), 981 (STORE_I64_A64 0, imm:$off, I64:$addr, (i64 (out ty:$val)))>, 982 Requires<[HasAddr64]>; 983} 984 985defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, regPlusImm, 986 store_v8i8_trunc_v8i16>; 987defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, regPlusImm, 988 store_v4i16_trunc_v4i32>; 989defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, or_is_add, 990 store_v8i8_trunc_v8i16>; 991defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, or_is_add, 992 store_v4i16_trunc_v4i32>; 993 994multiclass NarrowingStorePatOffsetOnly<ValueType ty, PatFrag kind, 995 OutPatFrag out> { 996 def : Pat<(kind ty:$val, imm:$off), 997 (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (i64 (out ty:$val)))>, 998 Requires<[HasAddr32]>; 999 def : Pat<(kind ty:$val, imm:$off), 1000 (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (i64 (out ty:$val)))>, 1001 Requires<[HasAddr64]>; 1002} 1003 1004defm : NarrowingStorePatOffsetOnly<v8i16, truncstorevi8, 1005 store_v8i8_trunc_v8i16>; 1006defm : NarrowingStorePatOffsetOnly<v4i32, truncstorevi16, 1007 store_v4i16_trunc_v4i32>; 1008 1009multiclass NarrowingStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, 1010 OutPatFrag out> { 1011 def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), 1012 (STORE_I64_A32 1013 0, tglobaladdr:$off, (CONST_I32 0), (i64 (out ty:$val)))>, 1014 Requires<[IsNotPIC, HasAddr32]>; 1015 def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), 1016 (STORE_I64_A64 1017 0, tglobaladdr:$off, (CONST_I64 0), (i64 (out ty:$val)))>, 1018 Requires<[IsNotPIC, HasAddr64]>; 1019} 1020 1021defm : NarrowingStorePatGlobalAddrOffOnly<v8i16, truncstorevi8, 1022 store_v8i8_trunc_v8i16>; 1023defm : NarrowingStorePatGlobalAddrOffOnly<v4i32, truncstorevi16, 1024 store_v4i16_trunc_v4i32>; 1025 1026// Bitcasts are nops 1027// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types 1028foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in 1029foreach t2 = !foldl( 1030 []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 1031 acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)), 1032 acc, !listconcat(acc, [cur]) 1033 ) 1034) in 1035def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; 1036 1037//===----------------------------------------------------------------------===// 1038// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) 1039//===----------------------------------------------------------------------===// 1040 1041multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> { 1042 defm QFMA_#vec_t : 1043 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), 1044 (outs), (ins), 1045 [(set (vec_t V128:$dst), 1046 (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], 1047 vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; 1048 defm QFMS_#vec_t : 1049 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), 1050 (outs), (ins), 1051 [(set (vec_t V128:$dst), 1052 (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], 1053 vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; 1054} 1055 1056defm "" : SIMDQFM<v4f32, "f32x4", 252>; 1057defm "" : SIMDQFM<v2f64, "f64x2", 254>; 1058