1// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// WebAssembly SIMD operand code-gen constructs. 11/// 12//===----------------------------------------------------------------------===// 13 14// Instructions requiring HasSIMD128 and the simd128 prefix byte 15multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 16 list<dag> pattern_r, string asmstr_r = "", 17 string asmstr_s = "", bits<32> simdop = -1> { 18 defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, 19 !or(0xfd00, !and(0xff, simdop))>, 20 Requires<[HasSIMD128]>; 21} 22 23defm "" : ARGUMENT<V128, v16i8>; 24defm "" : ARGUMENT<V128, v8i16>; 25defm "" : ARGUMENT<V128, v4i32>; 26defm "" : ARGUMENT<V128, v2i64>; 27defm "" : ARGUMENT<V128, v4f32>; 28defm "" : ARGUMENT<V128, v2f64>; 29 30// Constrained immediate argument types 31foreach SIZE = [8, 16] in 32def ImmI#SIZE : ImmLeaf<i32, 33 "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));" 34>; 35foreach SIZE = [2, 4, 8, 16, 32] in 36def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; 37 38//===----------------------------------------------------------------------===// 39// Load and store 40//===----------------------------------------------------------------------===// 41 42// Load: v128.load 43let mayLoad = 1, UseNamedOperandTable = 1 in { 44defm LOAD_V128_A32 : 45 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 46 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 47 "v128.load\t$dst, ${off}(${addr})$p2align", 48 "v128.load\t$off$p2align", 0>; 49defm LOAD_V128_A64 : 50 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 51 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 52 "v128.load\t$dst, ${off}(${addr})$p2align", 53 "v128.load\t$off$p2align", 0>; 54} 55 56// Def load patterns from WebAssemblyInstrMemory.td for vector types 57foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 58defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">; 59defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">; 60defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">; 61defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">; 62defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">; 63} 64 65// vNxM.load_splat 66multiclass SIMDLoadSplat<string vec, bits<32> simdop> { 67 let mayLoad = 1, UseNamedOperandTable = 1 in { 68 defm LOAD_SPLAT_#vec#_A32 : 69 SIMD_I<(outs V128:$dst), 70 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 71 (outs), 72 (ins P2Align:$p2align, offset32_op:$off), [], 73 vec#".load_splat\t$dst, ${off}(${addr})$p2align", 74 vec#".load_splat\t$off$p2align", simdop>; 75 defm LOAD_SPLAT_#vec#_A64 : 76 SIMD_I<(outs V128:$dst), 77 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 78 (outs), 79 (ins P2Align:$p2align, offset64_op:$off), [], 80 vec#".load_splat\t$dst, ${off}(${addr})$p2align", 81 vec#".load_splat\t$off$p2align", simdop>; 82 } 83} 84 85defm "" : SIMDLoadSplat<"v8x16", 7>; 86defm "" : SIMDLoadSplat<"v16x8", 8>; 87defm "" : SIMDLoadSplat<"v32x4", 9>; 88defm "" : SIMDLoadSplat<"v64x2", 10>; 89 90def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; 91def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, 92 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 93def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; 94 95foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], 96 ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { 97defm : LoadPatNoOffset<!cast<ValueType>(args[0]), 98 load_splat, 99 "LOAD_SPLAT_"#args[1]>; 100defm : LoadPatImmOff<!cast<ValueType>(args[0]), 101 load_splat, 102 regPlusImm, 103 "LOAD_SPLAT_"#args[1]>; 104defm : LoadPatImmOff<!cast<ValueType>(args[0]), 105 load_splat, 106 or_is_add, 107 "LOAD_SPLAT_"#args[1]>; 108defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]), 109 load_splat, 110 "LOAD_SPLAT_"#args[1]>; 111defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]), 112 load_splat, 113 "LOAD_SPLAT_"#args[1]>; 114} 115 116// Load and extend 117multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> { 118 let mayLoad = 1, UseNamedOperandTable = 1 in { 119 defm LOAD_EXTEND_S_#vec_t#_A32 : 120 SIMD_I<(outs V128:$dst), 121 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 122 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 123 name#"_s\t$dst, ${off}(${addr})$p2align", 124 name#"_s\t$off$p2align", simdop>; 125 defm LOAD_EXTEND_U_#vec_t#_A32 : 126 SIMD_I<(outs V128:$dst), 127 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 128 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 129 name#"_u\t$dst, ${off}(${addr})$p2align", 130 name#"_u\t$off$p2align", !add(simdop, 1)>; 131 defm LOAD_EXTEND_S_#vec_t#_A64 : 132 SIMD_I<(outs V128:$dst), 133 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 134 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 135 name#"_s\t$dst, ${off}(${addr})$p2align", 136 name#"_s\t$off$p2align", simdop>; 137 defm LOAD_EXTEND_U_#vec_t#_A64 : 138 SIMD_I<(outs V128:$dst), 139 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 140 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 141 name#"_u\t$dst, ${off}(${addr})$p2align", 142 name#"_u\t$off$p2align", !add(simdop, 1)>; 143 } 144} 145 146defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>; 147defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>; 148defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>; 149 150foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in 151foreach exts = [["sextloadv", "_S"], 152 ["zextloadv", "_U"], 153 ["extloadv", "_U"]] in { 154defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]), 155 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 156defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm, 157 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 158defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add, 159 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 160defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), 161 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 162defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), 163 "LOAD_EXTEND"#exts[1]#"_"#types[0]>; 164} 165 166// Load lane into zero vector 167multiclass SIMDLoadZero<ValueType vec_t, string name, bits<32> simdop> { 168 let mayLoad = 1, UseNamedOperandTable = 1 in { 169 defm LOAD_ZERO_#vec_t#_A32 : 170 SIMD_I<(outs V128:$dst), 171 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 172 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 173 name#"\t$dst, ${off}(${addr})$p2align", 174 name#"\t$off$p2align", simdop>; 175 defm LOAD_ZERO_#vec_t#_A64 : 176 SIMD_I<(outs V128:$dst), 177 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 178 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 179 name#"\t$dst, ${off}(${addr})$p2align", 180 name#"\t$off$p2align", simdop>; 181 } // mayLoad = 1, UseNamedOperandTable = 1 182} 183 184// TODO: Also support v4f32 and v2f64 once the instructions are merged 185// to the proposal 186defm "" : SIMDLoadZero<v4i32, "v128.load32_zero", 252>; 187defm "" : SIMDLoadZero<v2i64, "v128.load64_zero", 253>; 188 189defm : LoadPatNoOffset<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">; 190defm : LoadPatNoOffset<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">; 191 192defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, regPlusImm, "LOAD_ZERO_v4i32">; 193defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, regPlusImm, "LOAD_ZERO_v2i64">; 194 195defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, or_is_add, "LOAD_ZERO_v4i32">; 196defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, or_is_add, "LOAD_ZERO_v2i64">; 197 198defm : LoadPatOffsetOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">; 199defm : LoadPatOffsetOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">; 200 201defm : LoadPatGlobalAddrOffOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">; 202defm : LoadPatGlobalAddrOffOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">; 203 204// Load lane 205multiclass SIMDLoadLane<ValueType vec_t, string name, bits<32> simdop> { 206 let mayLoad = 1, UseNamedOperandTable = 1 in { 207 defm LOAD_LANE_#vec_t#_A32 : 208 SIMD_I<(outs V128:$dst), 209 (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, 210 I32:$addr, V128:$vec), 211 (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), 212 [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", 213 name#"\t$off$p2align, $idx", simdop>; 214 defm LOAD_LANE_#vec_t#_A64 : 215 SIMD_I<(outs V128:$dst), 216 (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, 217 I64:$addr, V128:$vec), 218 (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), 219 [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", 220 name#"\t$off$p2align, $idx", simdop>; 221 } // mayLoad = 1, UseNamedOperandTable = 1 222} 223 224// TODO: Also support v4f32 and v2f64 once the instructions are merged 225// to the proposal 226defm "" : SIMDLoadLane<v16i8, "v128.load8_lane", 88>; 227defm "" : SIMDLoadLane<v8i16, "v128.load16_lane", 89>; 228defm "" : SIMDLoadLane<v4i32, "v128.load32_lane", 90>; 229defm "" : SIMDLoadLane<v2i64, "v128.load64_lane", 91>; 230 231// Select loads with no constant offset. 232multiclass LoadLanePatNoOffset<ValueType ty, PatFrag kind, ImmLeaf lane_imm_t> { 233 def : Pat<(ty (kind (i32 I32:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx))), 234 (!cast<NI>("LOAD_LANE_"#ty#"_A32") 0, 0, imm:$idx, I32:$addr, V128:$vec)>, 235 Requires<[HasAddr32]>; 236 def : Pat<(ty (kind (i64 I64:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx))), 237 (!cast<NI>("LOAD_LANE_"#ty#"_A64") 0, 0, imm:$idx, I64:$addr, V128:$vec)>, 238 Requires<[HasAddr64]>; 239} 240 241defm : LoadLanePatNoOffset<v16i8, int_wasm_load8_lane, LaneIdx16>; 242defm : LoadLanePatNoOffset<v8i16, int_wasm_load16_lane, LaneIdx8>; 243defm : LoadLanePatNoOffset<v4i32, int_wasm_load32_lane, LaneIdx4>; 244defm : LoadLanePatNoOffset<v2i64, int_wasm_load64_lane, LaneIdx2>; 245 246// TODO: Also support the other load patterns for load_lane once the instructions 247// are merged to the proposal. 248 249// Store: v128.store 250let mayStore = 1, UseNamedOperandTable = 1 in { 251defm STORE_V128_A32 : 252 SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), 253 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 254 "v128.store\t${off}(${addr})$p2align, $vec", 255 "v128.store\t$off$p2align", 11>; 256defm STORE_V128_A64 : 257 SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec), 258 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 259 "v128.store\t${off}(${addr})$p2align, $vec", 260 "v128.store\t$off$p2align", 11>; 261} 262 263// Def store patterns from WebAssemblyInstrMemory.td for vector types 264foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 265defm : StorePatNoOffset<vec_t, store, "STORE_V128">; 266defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">; 267defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">; 268defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">; 269defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">; 270} 271 272// Store lane 273multiclass SIMDStoreLane<ValueType vec_t, string name, bits<32> simdop> { 274 let mayStore = 1, UseNamedOperandTable = 1 in { 275 defm STORE_LANE_#vec_t#_A32 : 276 SIMD_I<(outs), 277 (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, 278 I32:$addr, V128:$vec), 279 (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), 280 [], name#"\t${off}(${addr})$p2align, $vec, $idx", 281 name#"\t$off$p2align, $idx", simdop>; 282 defm STORE_LANE_#vec_t#_A64 : 283 SIMD_I<(outs V128:$dst), 284 (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, 285 I64:$addr, V128:$vec), 286 (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), 287 [], name#"\t${off}(${addr})$p2align, $vec, $idx", 288 name#"\t$off$p2align, $idx", simdop>; 289 } // mayStore = 1, UseNamedOperandTable = 1 290} 291 292// TODO: Also support v4f32 and v2f64 once the instructions are merged 293// to the proposal 294defm "" : SIMDStoreLane<v16i8, "v128.store8_lane", 92>; 295defm "" : SIMDStoreLane<v8i16, "v128.store16_lane", 93>; 296defm "" : SIMDStoreLane<v4i32, "v128.store32_lane", 94>; 297defm "" : SIMDStoreLane<v2i64, "v128.store64_lane", 95>; 298 299// Select stores with no constant offset. 300multiclass StoreLanePatNoOffset<ValueType ty, PatFrag kind, ImmLeaf lane_imm_t> { 301 def : Pat<(kind (i32 I32:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx)), 302 (!cast<NI>("STORE_LANE_"#ty#"_A32") 303 0, 0, imm:$idx, I32:$addr, ty:$vec)>, 304 Requires<[HasAddr32]>; 305 def : Pat<(kind (i64 I64:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx)), 306 (!cast<NI>("STORE_LANE_"#ty#"_A64") 307 0, 0, imm:$idx, I64:$addr, ty:$vec)>, 308 Requires<[HasAddr64]>; 309} 310 311defm : StoreLanePatNoOffset<v16i8, int_wasm_store8_lane, LaneIdx16>; 312defm : StoreLanePatNoOffset<v8i16, int_wasm_store16_lane, LaneIdx8>; 313defm : StoreLanePatNoOffset<v4i32, int_wasm_store32_lane, LaneIdx4>; 314defm : StoreLanePatNoOffset<v2i64, int_wasm_store64_lane, LaneIdx2>; 315 316// TODO: Also support the other store patterns for store_lane once the 317// instructions are merged to the proposal. 318 319//===----------------------------------------------------------------------===// 320// Constructing SIMD values 321//===----------------------------------------------------------------------===// 322 323// Constant: v128.const 324multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> { 325 let isMoveImm = 1, isReMaterializable = 1, 326 Predicates = [HasUnimplementedSIMD128] in 327 defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, 328 [(set V128:$dst, (vec_t pat))], 329 "v128.const\t$dst, "#args, 330 "v128.const\t"#args, 12>; 331} 332 333defm "" : ConstVec<v16i8, 334 (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1, 335 vec_i8imm_op:$i2, vec_i8imm_op:$i3, 336 vec_i8imm_op:$i4, vec_i8imm_op:$i5, 337 vec_i8imm_op:$i6, vec_i8imm_op:$i7, 338 vec_i8imm_op:$i8, vec_i8imm_op:$i9, 339 vec_i8imm_op:$iA, vec_i8imm_op:$iB, 340 vec_i8imm_op:$iC, vec_i8imm_op:$iD, 341 vec_i8imm_op:$iE, vec_i8imm_op:$iF), 342 (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3, 343 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7, 344 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB, 345 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF), 346 !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ", 347 "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>; 348defm "" : ConstVec<v8i16, 349 (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1, 350 vec_i16imm_op:$i2, vec_i16imm_op:$i3, 351 vec_i16imm_op:$i4, vec_i16imm_op:$i5, 352 vec_i16imm_op:$i6, vec_i16imm_op:$i7), 353 (build_vector 354 ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3, 355 ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7), 356 "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">; 357let IsCanonical = 1 in 358defm "" : ConstVec<v4i32, 359 (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1, 360 vec_i32imm_op:$i2, vec_i32imm_op:$i3), 361 (build_vector (i32 imm:$i0), (i32 imm:$i1), 362 (i32 imm:$i2), (i32 imm:$i3)), 363 "$i0, $i1, $i2, $i3">; 364defm "" : ConstVec<v2i64, 365 (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1), 366 (build_vector (i64 imm:$i0), (i64 imm:$i1)), 367 "$i0, $i1">; 368defm "" : ConstVec<v4f32, 369 (ins f32imm_op:$i0, f32imm_op:$i1, 370 f32imm_op:$i2, f32imm_op:$i3), 371 (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1), 372 (f32 fpimm:$i2), (f32 fpimm:$i3)), 373 "$i0, $i1, $i2, $i3">; 374defm "" : ConstVec<v2f64, 375 (ins f64imm_op:$i0, f64imm_op:$i1), 376 (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)), 377 "$i0, $i1">; 378 379// Shuffle lanes: shuffle 380defm SHUFFLE : 381 SIMD_I<(outs V128:$dst), 382 (ins V128:$x, V128:$y, 383 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 384 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 385 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 386 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 387 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 388 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 389 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 390 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 391 (outs), 392 (ins 393 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 394 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 395 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 396 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 397 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 398 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 399 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 400 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 401 [], 402 "v8x16.shuffle\t$dst, $x, $y, "# 403 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 404 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 405 "v8x16.shuffle\t"# 406 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 407 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 408 13>; 409 410// Shuffles after custom lowering 411def wasm_shuffle_t : SDTypeProfile<1, 18, []>; 412def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; 413foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 414def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), 415 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 416 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 417 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 418 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 419 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 420 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 421 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 422 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))), 423 (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y), 424 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 425 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 426 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 427 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 428 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 429 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 430 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 431 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; 432} 433 434// Swizzle lanes: v8x16.swizzle 435def wasm_swizzle_t : SDTypeProfile<1, 2, []>; 436def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; 437defm SWIZZLE : 438 SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), 439 [(set (v16i8 V128:$dst), 440 (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], 441 "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; 442 443def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), 444 (SWIZZLE V128:$src, V128:$mask)>; 445 446// Create vector with identical lanes: splat 447def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; 448def splat4 : PatFrag<(ops node:$x), (build_vector 449 node:$x, node:$x, node:$x, node:$x)>; 450def splat8 : PatFrag<(ops node:$x), (build_vector 451 node:$x, node:$x, node:$x, node:$x, 452 node:$x, node:$x, node:$x, node:$x)>; 453def splat16 : PatFrag<(ops node:$x), (build_vector 454 node:$x, node:$x, node:$x, node:$x, 455 node:$x, node:$x, node:$x, node:$x, 456 node:$x, node:$x, node:$x, node:$x, 457 node:$x, node:$x, node:$x, node:$x)>; 458 459multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, 460 PatFrag splat_pat, bits<32> simdop> { 461 defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins), 462 [(set (vec_t V128:$dst), (splat_pat reg_t:$x))], 463 vec#".splat\t$dst, $x", vec#".splat", simdop>; 464} 465 466defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>; 467defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>; 468defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>; 469defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>; 470defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>; 471defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>; 472 473// scalar_to_vector leaves high lanes undefined, so can be a splat 474class ScalarSplatPat<ValueType vec_t, ValueType lane_t, 475 WebAssemblyRegClass reg_t> : 476 Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))), 477 (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>; 478 479def : ScalarSplatPat<v16i8, i32, I32>; 480def : ScalarSplatPat<v8i16, i32, I32>; 481def : ScalarSplatPat<v4i32, i32, I32>; 482def : ScalarSplatPat<v2i64, i64, I64>; 483def : ScalarSplatPat<v4f32, f32, F32>; 484def : ScalarSplatPat<v2f64, f64, F64>; 485 486//===----------------------------------------------------------------------===// 487// Accessing lanes 488//===----------------------------------------------------------------------===// 489 490// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u 491multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, 492 bits<32> simdop, string suffix = ""> { 493 defm EXTRACT_LANE_#vec_t#suffix : 494 SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx), 495 (outs), (ins vec_i8imm_op:$idx), [], 496 vec#".extract_lane"#suffix#"\t$dst, $vec, $idx", 497 vec#".extract_lane"#suffix#"\t$idx", simdop>; 498} 499 500defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">; 501defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">; 502defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">; 503defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">; 504defm "" : ExtractLane<v4i32, "i32x4", I32, 27>; 505defm "" : ExtractLane<v2i64, "i64x2", I64, 29>; 506defm "" : ExtractLane<v4f32, "f32x4", F32, 31>; 507defm "" : ExtractLane<v2f64, "f64x2", F64, 33>; 508 509def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), 510 (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; 511def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), 512 (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; 513def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), 514 (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>; 515def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), 516 (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>; 517def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), 518 (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>; 519def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), 520 (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>; 521 522def : Pat< 523 (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), 524 (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>; 525def : Pat< 526 (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), 527 (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; 528def : Pat< 529 (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), 530 (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>; 531def : Pat< 532 (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), 533 (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; 534 535// Replace lane value: replace_lane 536multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t, 537 WebAssemblyRegClass reg_t, ValueType lane_t, 538 bits<32> simdop> { 539 defm REPLACE_LANE_#vec_t : 540 SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x), 541 (outs), (ins vec_i8imm_op:$idx), 542 [(set V128:$dst, (vector_insert 543 (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))], 544 vec#".replace_lane\t$dst, $vec, $idx, $x", 545 vec#".replace_lane\t$idx", simdop>; 546} 547 548defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>; 549defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>; 550defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>; 551defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>; 552defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>; 553defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>; 554 555// Lower undef lane indices to zero 556def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), 557 (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>; 558def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef), 559 (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>; 560def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef), 561 (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>; 562def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef), 563 (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>; 564def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef), 565 (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>; 566def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef), 567 (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>; 568 569//===----------------------------------------------------------------------===// 570// Comparisons 571//===----------------------------------------------------------------------===// 572 573multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec, 574 string name, CondCode cond, bits<32> simdop> { 575 defm _#vec_t : 576 SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 577 [(set (out_t V128:$dst), 578 (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond) 579 )], 580 vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>; 581} 582 583multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> { 584 defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>; 585 defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond, 586 !add(baseInst, 10)>; 587 defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond, 588 !add(baseInst, 20)>; 589} 590 591multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { 592 defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>; 593 defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond, 594 !add(baseInst, 6)>; 595} 596 597// Equality: eq 598let isCommutable = 1 in { 599defm EQ : SIMDConditionInt<"eq", SETEQ, 35>; 600defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>; 601} // isCommutable = 1 602 603// Non-equality: ne 604let isCommutable = 1 in { 605defm NE : SIMDConditionInt<"ne", SETNE, 36>; 606defm NE : SIMDConditionFP<"ne", SETUNE, 66>; 607} // isCommutable = 1 608 609// Less than: lt_s / lt_u / lt 610defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>; 611defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>; 612defm LT : SIMDConditionFP<"lt", SETOLT, 67>; 613 614// Greater than: gt_s / gt_u / gt 615defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>; 616defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>; 617defm GT : SIMDConditionFP<"gt", SETOGT, 68>; 618 619// Less than or equal: le_s / le_u / le 620defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>; 621defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>; 622defm LE : SIMDConditionFP<"le", SETOLE, 69>; 623 624// Greater than or equal: ge_s / ge_u / ge 625defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>; 626defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>; 627defm GE : SIMDConditionFP<"ge", SETOGE, 70>; 628 629// Lower float comparisons that don't care about NaN to standard WebAssembly 630// float comparisons. These instructions are generated with nnan and in the 631// target-independent expansion of unordered comparisons and ordered ne. 632foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32], 633 [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in 634def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 635 (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>; 636 637foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64], 638 [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in 639def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 640 (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>; 641 642 643//===----------------------------------------------------------------------===// 644// Bitwise operations 645//===----------------------------------------------------------------------===// 646 647multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name, 648 bits<32> simdop> { 649 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 650 (outs), (ins), 651 [(set (vec_t V128:$dst), 652 (node (vec_t V128:$lhs), (vec_t V128:$rhs)) 653 )], 654 vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, 655 simdop>; 656} 657 658multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> { 659 defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>; 660 defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>; 661 defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>; 662 defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>; 663} 664 665multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name, 666 bits<32> simdop> { 667 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 668 [(set (vec_t V128:$dst), 669 (vec_t (node (vec_t V128:$vec))) 670 )], 671 vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; 672} 673 674// Bitwise logic: v128.not 675foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in 676defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>; 677 678// Bitwise logic: v128.and / v128.or / v128.xor 679let isCommutable = 1 in { 680defm AND : SIMDBitwise<and, "and", 78>; 681defm OR : SIMDBitwise<or, "or", 80>; 682defm XOR : SIMDBitwise<xor, "xor", 81>; 683} // isCommutable = 1 684 685// Bitwise logic: v128.andnot 686def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; 687defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>; 688 689// Bitwise select: v128.bitselect 690foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in 691 defm BITSELECT_#vec_t : 692 SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), 693 [(set (vec_t V128:$dst), 694 (vec_t (int_wasm_bitselect 695 (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) 696 )) 697 )], 698 "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; 699 700// Bitselect is equivalent to (c & v1) | (~c & v2) 701foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in 702 def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)), 703 (and (vnot V128:$c), (vec_t V128:$v2)))), 704 (!cast<Instruction>("BITSELECT_"#vec_t) 705 V128:$v1, V128:$v2, V128:$c)>; 706 707// Also implement vselect in terms of bitselect 708foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64], 709 [v4f32, v4i32], [v2f64, v2i64]] in 710 def : Pat<(types[0] (vselect 711 (types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2) 712 )), 713 (!cast<Instruction>("BITSELECT_"#types[0]) 714 V128:$v1, V128:$v2, V128:$c 715 )>; 716 717// MVP select on v128 values 718foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { 719defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), 720 (outs), (ins), 721 [(set V128:$dst, 722 (select I32:$cond, 723 (vec_t V128:$lhs), (vec_t V128:$rhs) 724 ) 725 )], 726 "v128.select\t$dst, $lhs, $rhs, $cond", 727 "v128.select", 0x1b>; 728 729// ISD::SELECT requires its operand to conform to getBooleanContents, but 730// WebAssembly's select interprets any non-zero value as true, so we can fold 731// a setne with 0 into a select. 732def : Pat<(select 733 (i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) 734 ), 735 (!cast<Instruction>("SELECT_"#vec_t) 736 V128:$lhs, V128:$rhs, I32:$cond 737 )>; 738 739// And again, this time with seteq instead of setne and the arms reversed. 740def : Pat<(select 741 (i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) 742 ), 743 (!cast<Instruction>("SELECT_"#vec_t) 744 V128:$rhs, V128:$lhs, I32:$cond 745 )>; 746} // foreach vec_t 747 748//===----------------------------------------------------------------------===// 749// Integer unary arithmetic 750//===----------------------------------------------------------------------===// 751 752multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> { 753 defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>; 754 defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 755 defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 756 defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 757} 758 759multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name, 760 bits<32> simdop> { 761 defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 762 [(set I32:$dst, (i32 (op (vec_t V128:$vec))))], 763 vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; 764} 765 766multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> { 767 defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>; 768 defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>; 769 defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>; 770 defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>; 771} 772 773// Integer vector negation 774def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 775 776// Integer absolute value: abs 777defm ABS : SIMDUnaryInt<abs, "abs", 96>; 778 779// Integer negation: neg 780defm NEG : SIMDUnaryInt<ivneg, "neg", 97>; 781 782// Any lane true: any_true 783defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>; 784 785// All lanes true: all_true 786defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>; 787 788// Population count: popcnt 789defm POPCNT : SIMDUnary<v16i8, "i8x16", int_wasm_popcnt, "popcnt", 124>; 790 791// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 792// can be folded out 793foreach reduction = 794 [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in 795foreach ty = [v16i8, v8i16, v4i32, v2i64] in { 796def : Pat<(i32 (and 797 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 798 (i32 1) 799 )), 800 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 801def : Pat<(i32 (setne 802 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 803 (i32 0) 804 )), 805 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 806def : Pat<(i32 (seteq 807 (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), 808 (i32 1) 809 )), 810 (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; 811} 812 813multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> { 814 defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 815 [(set I32:$dst, 816 (i32 (int_wasm_bitmask (vec_t V128:$vec))) 817 )], 818 vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>; 819} 820 821defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>; 822defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>; 823defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>; 824 825//===----------------------------------------------------------------------===// 826// Bit shifts 827//===----------------------------------------------------------------------===// 828 829multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name, 830 bits<32> simdop> { 831 defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), 832 (outs), (ins), 833 [(set (vec_t V128:$dst), (node V128:$vec, I32:$x))], 834 vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>; 835} 836 837multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { 838 defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>; 839 defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 840 defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 841 defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 842} 843 844// WebAssembly SIMD shifts are nonstandard in that the shift amount is 845// an i32 rather than a vector, so they need custom nodes. 846def wasm_shift_t : SDTypeProfile<1, 2, 847 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>] 848>; 849def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; 850def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; 851def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; 852 853// Left shift by scalar: shl 854defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>; 855 856// Right shift by scalar: shr_s / shr_u 857defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>; 858defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>; 859 860//===----------------------------------------------------------------------===// 861// Integer binary arithmetic 862//===----------------------------------------------------------------------===// 863 864multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> { 865 defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 866 defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 867 defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 868} 869 870multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> { 871 defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>; 872 defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; 873} 874 875multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> { 876 defm "" : SIMDBinaryIntSmall<node, name, baseInst>; 877 defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; 878} 879 880multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> { 881 defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; 882 defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; 883} 884 885// Integer addition: add / add_saturate_s / add_saturate_u 886let isCommutable = 1 in { 887defm ADD : SIMDBinaryInt<add, "add", 110>; 888defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 111>; 889defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 112>; 890} // isCommutable = 1 891 892// Integer subtraction: sub / sub_saturate_s / sub_saturate_u 893defm SUB : SIMDBinaryInt<sub, "sub", 113>; 894defm SUB_SAT_S : 895 SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 114>; 896defm SUB_SAT_U : 897 SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 115>; 898 899// Integer multiplication: mul 900let isCommutable = 1 in 901defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>; 902 903// Integer min_s / min_u / max_s / max_u 904let isCommutable = 1 in { 905defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>; 906defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>; 907defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>; 908defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>; 909} // isCommutable = 1 910 911// Integer unsigned rounding average: avgr_u 912let isCommutable = 1 in { 913defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>; 914defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>; 915} 916 917def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), 918 (add node:$lhs, node:$rhs), 919 "return N->getFlags().hasNoUnsignedWrap();">; 920 921foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in 922def : Pat<(wasm_shr_u 923 (add_nuw 924 (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)), 925 (nodes[1] (i32 1)) 926 ), 927 (i32 1) 928 ), 929 (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>; 930 931// Widening dot product: i32x4.dot_i16x8_s 932let isCommutable = 1 in 933defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 934 [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], 935 "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", 936 186>; 937 938//===----------------------------------------------------------------------===// 939// Floating-point unary arithmetic 940//===----------------------------------------------------------------------===// 941 942multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { 943 defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>; 944 defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; 945} 946 947// Absolute value: abs 948defm ABS : SIMDUnaryFP<fabs, "abs", 224>; 949 950// Negation: neg 951defm NEG : SIMDUnaryFP<fneg, "neg", 225>; 952 953// Square root: sqrt 954defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>; 955 956// Rounding: ceil, floor, trunc, nearest 957defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>; 958defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>; 959defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>; 960defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>; 961defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>; 962defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>; 963defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>; 964defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>; 965 966//===----------------------------------------------------------------------===// 967// Floating-point binary arithmetic 968//===----------------------------------------------------------------------===// 969 970multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> { 971 defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>; 972 defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; 973} 974 975// Addition: add 976let isCommutable = 1 in 977defm ADD : SIMDBinaryFP<fadd, "add", 228>; 978 979// Subtraction: sub 980defm SUB : SIMDBinaryFP<fsub, "sub", 229>; 981 982// Multiplication: mul 983let isCommutable = 1 in 984defm MUL : SIMDBinaryFP<fmul, "mul", 230>; 985 986// Division: div 987defm DIV : SIMDBinaryFP<fdiv, "div", 231>; 988 989// NaN-propagating minimum: min 990defm MIN : SIMDBinaryFP<fminimum, "min", 232>; 991 992// NaN-propagating maximum: max 993defm MAX : SIMDBinaryFP<fmaximum, "max", 233>; 994 995// Pseudo-minimum: pmin 996defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>; 997 998// Pseudo-maximum: pmax 999defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>; 1000 1001//===----------------------------------------------------------------------===// 1002// Conversions 1003//===----------------------------------------------------------------------===// 1004 1005multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op, 1006 string name, bits<32> simdop> { 1007 defm op#_#vec_t#_#arg_t : 1008 SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 1009 [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))], 1010 name#"\t$dst, $vec", name, simdop>; 1011} 1012 1013// Floating point to integer with saturation: trunc_sat 1014defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>; 1015defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>; 1016 1017// Integer to floating point: convert 1018defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>; 1019defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>; 1020 1021// Lower llvm.wasm.trunc.saturate.* to saturating instructions 1022def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), 1023 (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; 1024def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), 1025 (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; 1026 1027// Widening operations 1028def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1029def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>; 1030def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>; 1031def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>; 1032def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>; 1033 1034multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg, 1035 bits<32> baseInst> { 1036 defm "" : SIMDConvert<vec_t, arg_t, widen_low_s, 1037 vec#".widen_low_"#arg#"_s", baseInst>; 1038 defm "" : SIMDConvert<vec_t, arg_t, widen_high_s, 1039 vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>; 1040 defm "" : SIMDConvert<vec_t, arg_t, widen_low_u, 1041 vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>; 1042 defm "" : SIMDConvert<vec_t, arg_t, widen_high_u, 1043 vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>; 1044} 1045 1046defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>; 1047defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>; 1048 1049// Narrowing operations 1050multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg, 1051 bits<32> baseInst> { 1052 defm NARROW_S_#vec_t : 1053 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 1054 [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed 1055 (arg_t V128:$low), (arg_t V128:$high))))], 1056 vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s", 1057 baseInst>; 1058 defm NARROW_U_#vec_t : 1059 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 1060 [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned 1061 (arg_t V128:$low), (arg_t V128:$high))))], 1062 vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u", 1063 !add(baseInst, 1)>; 1064} 1065 1066defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>; 1067defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>; 1068 1069// Use narrowing operations for truncating stores. Since the narrowing 1070// operations are saturating instead of truncating, we need to mask 1071// the stored values first. 1072// TODO: Use consts instead of splats 1073def store_v8i8_trunc_v8i16 : 1074 OutPatFrag<(ops node:$val), 1075 (EXTRACT_LANE_v2i64 1076 (NARROW_U_v16i8 1077 (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x00ff00ff)), node:$val), 1078 node:$val // Unused input 1079 ), 1080 0 1081 )>; 1082 1083def store_v4i16_trunc_v4i32 : 1084 OutPatFrag<(ops node:$val), 1085 (EXTRACT_LANE_v2i64 1086 (NARROW_U_v8i16 1087 (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x0000ffff)), node:$val), 1088 node:$val // Unused input 1089 ), 1090 0 1091 )>; 1092 1093// Store patterns adapted from WebAssemblyInstrMemory.td 1094multiclass NarrowingStorePatNoOffset<ValueType ty, PatFrag node, 1095 OutPatFrag out> { 1096 def : Pat<(node ty:$val, I32:$addr), 1097 (STORE_I64_A32 0, 0, I32:$addr, (i64 (out ty:$val)))>, 1098 Requires<[HasAddr32]>; 1099 def : Pat<(node ty:$val, I64:$addr), 1100 (STORE_I64_A64 0, 0, I64:$addr, (i64 (out ty:$val)))>, 1101 Requires<[HasAddr64]>; 1102} 1103 1104defm : NarrowingStorePatNoOffset<v8i16, truncstorevi8, store_v8i8_trunc_v8i16>; 1105defm : NarrowingStorePatNoOffset<v4i32, truncstorevi16, 1106 store_v4i16_trunc_v4i32>; 1107 1108multiclass NarrowingStorePatImmOff<ValueType ty, PatFrag kind, 1109 PatFrag operand, OutPatFrag out> { 1110 def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), 1111 (STORE_I64_A32 0, imm:$off, I32:$addr, (i64 (out ty:$val)))>, 1112 Requires<[HasAddr32]>; 1113 def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)), 1114 (STORE_I64_A64 0, imm:$off, I64:$addr, (i64 (out ty:$val)))>, 1115 Requires<[HasAddr64]>; 1116} 1117 1118defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, regPlusImm, 1119 store_v8i8_trunc_v8i16>; 1120defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, regPlusImm, 1121 store_v4i16_trunc_v4i32>; 1122defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, or_is_add, 1123 store_v8i8_trunc_v8i16>; 1124defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, or_is_add, 1125 store_v4i16_trunc_v4i32>; 1126 1127multiclass NarrowingStorePatOffsetOnly<ValueType ty, PatFrag kind, 1128 OutPatFrag out> { 1129 def : Pat<(kind ty:$val, imm:$off), 1130 (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (i64 (out ty:$val)))>, 1131 Requires<[HasAddr32]>; 1132 def : Pat<(kind ty:$val, imm:$off), 1133 (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (i64 (out ty:$val)))>, 1134 Requires<[HasAddr64]>; 1135} 1136 1137defm : NarrowingStorePatOffsetOnly<v8i16, truncstorevi8, 1138 store_v8i8_trunc_v8i16>; 1139defm : NarrowingStorePatOffsetOnly<v4i32, truncstorevi16, 1140 store_v4i16_trunc_v4i32>; 1141 1142multiclass NarrowingStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, 1143 OutPatFrag out> { 1144 def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), 1145 (STORE_I64_A32 1146 0, tglobaladdr:$off, (CONST_I32 0), (i64 (out ty:$val)))>, 1147 Requires<[IsNotPIC, HasAddr32]>; 1148 def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), 1149 (STORE_I64_A64 1150 0, tglobaladdr:$off, (CONST_I64 0), (i64 (out ty:$val)))>, 1151 Requires<[IsNotPIC, HasAddr64]>; 1152} 1153 1154defm : NarrowingStorePatGlobalAddrOffOnly<v8i16, truncstorevi8, 1155 store_v8i8_trunc_v8i16>; 1156defm : NarrowingStorePatGlobalAddrOffOnly<v4i32, truncstorevi16, 1157 store_v4i16_trunc_v4i32>; 1158 1159// Bitcasts are nops 1160// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types 1161foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in 1162foreach t2 = !foldl( 1163 []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 1164 acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)), 1165 acc, !listconcat(acc, [cur]) 1166 ) 1167) in 1168def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; 1169 1170//===----------------------------------------------------------------------===// 1171// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) 1172//===----------------------------------------------------------------------===// 1173 1174multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> simdopA, 1175 bits<32> simdopS> { 1176 defm QFMA_#vec_t : 1177 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), 1178 (outs), (ins), 1179 [(set (vec_t V128:$dst), 1180 (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], 1181 vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", simdopA>; 1182 defm QFMS_#vec_t : 1183 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), 1184 (outs), (ins), 1185 [(set (vec_t V128:$dst), 1186 (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], 1187 vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", simdopS>; 1188} 1189 1190defm "" : SIMDQFM<v4f32, "f32x4", 180, 212>; 1191defm "" : SIMDQFM<v2f64, "f64x2", 254, 255>; 1192 1193//===----------------------------------------------------------------------===// 1194// Saturating Rounding Q-Format Multiplication 1195//===----------------------------------------------------------------------===// 1196 1197defm Q15MULR_SAT_S : 1198 SIMDBinary<v8i16, "i16x8", int_wasm_q15mulr_saturate_signed, "q15mulr_sat_s", 1199 156>; 1200