1// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// WebAssembly SIMD operand code-gen constructs.
12///
13//===----------------------------------------------------------------------===//
14
15// Instructions requiring HasSIMD128 and the simd128 prefix byte
16multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
17                  list<dag> pattern_r, string asmstr_r = "",
18                  string asmstr_s = "", bits<32> simdop = -1> {
19  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
20              !or(0xfd00, !and(0xff, simdop))>,
21            Requires<[HasSIMD128]>;
22}
23
24defm "" : ARGUMENT<V128, v16i8>;
25defm "" : ARGUMENT<V128, v8i16>;
26defm "" : ARGUMENT<V128, v4i32>;
27defm "" : ARGUMENT<V128, v2i64>;
28defm "" : ARGUMENT<V128, v4f32>;
29defm "" : ARGUMENT<V128, v2f64>;
30
31// Constrained immediate argument types
32foreach SIZE = [8, 16] in
33def ImmI#SIZE : ImmLeaf<i32,
34  "return ((uint64_t)Imm & ((1UL << "#SIZE#") - 1)) == (uint64_t)Imm;"
35>;
36foreach SIZE = [2, 4, 8, 16, 32] in
37def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
38
39//===----------------------------------------------------------------------===//
40// Constructing SIMD values
41//===----------------------------------------------------------------------===//
42
43// Constant: v128.const
44multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
45  let isMoveImm = 1, isReMaterializable = 1 in
46  defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
47                                  [(set V128:$dst, (vec_t pat))],
48                                  "v128.const\t$dst, "#args,
49                                  "v128.const\t"#args, 0>;
50}
51
52defm "" : ConstVec<v16i8,
53                   (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
54                        vec_i8imm_op:$i2, vec_i8imm_op:$i3,
55                        vec_i8imm_op:$i4, vec_i8imm_op:$i5,
56                        vec_i8imm_op:$i6, vec_i8imm_op:$i7,
57                        vec_i8imm_op:$i8, vec_i8imm_op:$i9,
58                        vec_i8imm_op:$iA, vec_i8imm_op:$iB,
59                        vec_i8imm_op:$iC, vec_i8imm_op:$iD,
60                        vec_i8imm_op:$iE, vec_i8imm_op:$iF),
61                   (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
62                                 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
63                                 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
64                                 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
65                   !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
66                              "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
67defm "" : ConstVec<v8i16,
68                   (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
69                        vec_i16imm_op:$i2, vec_i16imm_op:$i3,
70                        vec_i16imm_op:$i4, vec_i16imm_op:$i5,
71                        vec_i16imm_op:$i6, vec_i16imm_op:$i7),
72                   (build_vector
73                     ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
74                     ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
75                   "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
76defm "" : ConstVec<v4i32,
77                   (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
78                        vec_i32imm_op:$i2, vec_i32imm_op:$i3),
79                   (build_vector (i32 imm:$i0), (i32 imm:$i1),
80                                 (i32 imm:$i2), (i32 imm:$i3)),
81                   "$i0, $i1, $i2, $i3">;
82defm "" : ConstVec<v2i64,
83                   (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
84                   (build_vector (i64 imm:$i0), (i64 imm:$i1)),
85                   "$i0, $i1">;
86defm "" : ConstVec<v4f32,
87                   (ins f32imm_op:$i0, f32imm_op:$i1,
88                        f32imm_op:$i2, f32imm_op:$i3),
89                   (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
90                                 (f32 fpimm:$i2), (f32 fpimm:$i3)),
91                   "$i0, $i1, $i2, $i3">;
92defm "" : ConstVec<v2f64,
93                  (ins f64imm_op:$i0, f64imm_op:$i1),
94                  (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
95                  "$i0, $i1">;
96
97// Create vector with identical lanes: splat
98def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
99def splat4 : PatFrag<(ops node:$x), (build_vector
100                       node:$x, node:$x, node:$x, node:$x)>;
101def splat8 : PatFrag<(ops node:$x), (build_vector
102                       node:$x, node:$x, node:$x, node:$x,
103                       node:$x, node:$x, node:$x, node:$x)>;
104def splat16 : PatFrag<(ops node:$x), (build_vector
105                        node:$x, node:$x, node:$x, node:$x,
106                        node:$x, node:$x, node:$x, node:$x,
107                        node:$x, node:$x, node:$x, node:$x,
108                        node:$x, node:$x, node:$x, node:$x)>;
109
110multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
111                 PatFrag splat_pat, bits<32> simdop> {
112  // Prefer splats over v128.const for const splats (65 is lowest that works)
113  let AddedComplexity = 65 in
114  defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
115                             [(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
116                             vec#".splat\t$dst, $x", vec#".splat", simdop>;
117}
118
119defm "" : Splat<v16i8, "i8x16", I32, splat16, 3>;
120defm "" : Splat<v8i16, "i16x8", I32, splat8, 4>;
121defm "" : Splat<v4i32, "i32x4", I32, splat4, 5>;
122defm "" : Splat<v2i64, "i64x2", I64, splat2, 6>;
123defm "" : Splat<v4f32, "f32x4", F32, splat4, 7>;
124defm "" : Splat<v2f64, "f64x2", F64, splat2, 8>;
125
126//===----------------------------------------------------------------------===//
127// Accessing lanes
128//===----------------------------------------------------------------------===//
129
130// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
131multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
132                       WebAssemblyRegClass reg_t, bits<32> simdop,
133                       string suffix = "", SDNode extract = vector_extract> {
134  defm EXTRACT_LANE_#vec_t#suffix :
135      SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
136             (outs), (ins vec_i8imm_op:$idx),
137             [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
138             vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
139             vec#".extract_lane"#suffix#"\t$idx", simdop>;
140}
141
142multiclass ExtractPat<ValueType lane_t, int mask> {
143  def _s : PatFrag<(ops node:$vec, node:$idx),
144                   (i32 (sext_inreg
145                     (i32 (vector_extract
146                       node:$vec,
147                       node:$idx
148                     )),
149                     lane_t
150                   ))>;
151  def _u : PatFrag<(ops node:$vec, node:$idx),
152                   (i32 (and
153                     (i32 (vector_extract
154                       node:$vec,
155                       node:$idx
156                     )),
157                     (i32 mask)
158                   ))>;
159}
160
161defm extract_i8x16 : ExtractPat<i8, 0xff>;
162defm extract_i16x8 : ExtractPat<i16, 0xffff>;
163
164multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
165  defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
166                        !cast<PatFrag>("extract_i8x16"#sign)>;
167  defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 2), sign,
168                        !cast<PatFrag>("extract_i16x8"#sign)>;
169}
170
171defm "" : ExtractLaneExtended<"_s", 9>;
172defm "" : ExtractLaneExtended<"_u", 10>;
173defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
174defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 14>;
175defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 15>;
176defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 16>;
177
178// Follow convention of making implicit expansions unsigned
179def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
180          (EXTRACT_LANE_v16i8_u V128:$vec, (i32 LaneIdx16:$idx))>;
181def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
182          (EXTRACT_LANE_v8i16_u V128:$vec, (i32 LaneIdx8:$idx))>;
183
184// Lower undef lane indices to zero
185def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
186          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
187def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
188          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
189def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
190          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
191def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
192          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
193def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
194          (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
195def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
196          (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
197def : Pat<(vector_extract (v4i32 V128:$vec), undef),
198          (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
199def : Pat<(vector_extract (v2i64 V128:$vec), undef),
200          (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
201def : Pat<(vector_extract (v4f32 V128:$vec), undef),
202          (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
203def : Pat<(vector_extract (v2f64 V128:$vec), undef),
204          (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
205
206// Replace lane value: replace_lane
207multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
208                       WebAssemblyRegClass reg_t, ValueType lane_t,
209                       bits<32> simdop> {
210  defm REPLACE_LANE_#vec_t :
211      SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x),
212             (outs), (ins vec_i8imm_op:$idx),
213             [(set V128:$dst, (vector_insert
214               (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))],
215             vec#".replace_lane\t$dst, $vec, $idx, $x",
216             vec#".replace_lane\t$idx", simdop>;
217}
218
219defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 17>;
220defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 18>;
221defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 19>;
222defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 20>;
223defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 21>;
224defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 22>;
225
226// Lower undef lane indices to zero
227def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
228          (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>;
229def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
230          (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>;
231def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
232          (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>;
233def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
234          (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>;
235def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
236          (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>;
237def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
238          (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
239
240// Arbitrary other BUILD_VECTOR patterns
241def : Pat<(v16i8 (build_vector
242            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
243            (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7),
244            (i32 I32:$x8), (i32 I32:$x9), (i32 I32:$x10), (i32 I32:$x11),
245            (i32 I32:$x12), (i32 I32:$x13), (i32 I32:$x14), (i32 I32:$x15)
246          )),
247          (v16i8 (REPLACE_LANE_v16i8
248            (v16i8 (REPLACE_LANE_v16i8
249              (v16i8 (REPLACE_LANE_v16i8
250                (v16i8 (REPLACE_LANE_v16i8
251                  (v16i8 (REPLACE_LANE_v16i8
252                    (v16i8 (REPLACE_LANE_v16i8
253                      (v16i8 (REPLACE_LANE_v16i8
254                        (v16i8 (REPLACE_LANE_v16i8
255                          (v16i8 (REPLACE_LANE_v16i8
256                            (v16i8 (REPLACE_LANE_v16i8
257                              (v16i8 (REPLACE_LANE_v16i8
258                                (v16i8 (REPLACE_LANE_v16i8
259                                  (v16i8 (REPLACE_LANE_v16i8
260                                    (v16i8 (REPLACE_LANE_v16i8
261                                      (v16i8 (REPLACE_LANE_v16i8
262                                        (v16i8 (SPLAT_v16i8 (i32 I32:$x0))),
263                                        1, I32:$x1
264                                      )),
265                                      2, I32:$x2
266                                    )),
267                                    3, I32:$x3
268                                  )),
269                                  4, I32:$x4
270                                )),
271                                5, I32:$x5
272                              )),
273                              6, I32:$x6
274                            )),
275                            7, I32:$x7
276                          )),
277                          8, I32:$x8
278                        )),
279                        9, I32:$x9
280                      )),
281                      10, I32:$x10
282                    )),
283                    11, I32:$x11
284                  )),
285                  12, I32:$x12
286                )),
287                13, I32:$x13
288              )),
289              14, I32:$x14
290            )),
291            15, I32:$x15
292          ))>;
293def : Pat<(v8i16 (build_vector
294            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
295            (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7)
296          )),
297          (v8i16 (REPLACE_LANE_v8i16
298            (v8i16 (REPLACE_LANE_v8i16
299              (v8i16 (REPLACE_LANE_v8i16
300                (v8i16 (REPLACE_LANE_v8i16
301                  (v8i16 (REPLACE_LANE_v8i16
302                    (v8i16 (REPLACE_LANE_v8i16
303                      (v8i16 (REPLACE_LANE_v8i16
304                        (v8i16 (SPLAT_v8i16 (i32 I32:$x0))),
305                        1, I32:$x1
306                      )),
307                      2, I32:$x2
308                    )),
309                    3, I32:$x3
310                  )),
311                  4, I32:$x4
312                )),
313                5, I32:$x5
314              )),
315              6, I32:$x6
316            )),
317            7, I32:$x7
318          ))>;
319def : Pat<(v4i32 (build_vector
320            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3)
321          )),
322          (v4i32 (REPLACE_LANE_v4i32
323            (v4i32 (REPLACE_LANE_v4i32
324              (v4i32 (REPLACE_LANE_v4i32
325                (v4i32 (SPLAT_v4i32 (i32 I32:$x0))),
326                1, I32:$x1
327              )),
328              2, I32:$x2
329            )),
330            3, I32:$x3
331          ))>;
332def : Pat<(v2i64 (build_vector (i64 I64:$x0), (i64 I64:$x1))),
333          (v2i64 (REPLACE_LANE_v2i64
334            (v2i64 (SPLAT_v2i64 (i64 I64:$x0))), 1, I64:$x1))>;
335def : Pat<(v4f32 (build_vector
336            (f32 F32:$x0), (f32 F32:$x1), (f32 F32:$x2), (f32 F32:$x3)
337          )),
338          (v4f32 (REPLACE_LANE_v4f32
339            (v4f32 (REPLACE_LANE_v4f32
340              (v4f32 (REPLACE_LANE_v4f32
341                (v4f32 (SPLAT_v4f32 (f32 F32:$x0))),
342                1, F32:$x1
343              )),
344              2, F32:$x2
345            )),
346            3, F32:$x3
347          ))>;
348def : Pat<(v2f64 (build_vector (f64 F64:$x0), (f64 F64:$x1))),
349          (v2f64 (REPLACE_LANE_v2f64
350            (v2f64 (SPLAT_v2f64 (f64 F64:$x0))), 1, F64:$x1))>;
351
352// Shuffle lanes: shuffle
353defm SHUFFLE_v16i8 :
354  SIMD_I<(outs V128:$dst),
355         (ins V128:$x, V128:$y,
356           vec_i8imm_op:$m0, vec_i8imm_op:$m1,
357           vec_i8imm_op:$m2, vec_i8imm_op:$m3,
358           vec_i8imm_op:$m4, vec_i8imm_op:$m5,
359           vec_i8imm_op:$m6, vec_i8imm_op:$m7,
360           vec_i8imm_op:$m8, vec_i8imm_op:$m9,
361           vec_i8imm_op:$mA, vec_i8imm_op:$mB,
362           vec_i8imm_op:$mC, vec_i8imm_op:$mD,
363           vec_i8imm_op:$mE, vec_i8imm_op:$mF),
364         (outs),
365         (ins
366           vec_i8imm_op:$m0, vec_i8imm_op:$m1,
367           vec_i8imm_op:$m2, vec_i8imm_op:$m3,
368           vec_i8imm_op:$m4, vec_i8imm_op:$m5,
369           vec_i8imm_op:$m6, vec_i8imm_op:$m7,
370           vec_i8imm_op:$m8, vec_i8imm_op:$m9,
371           vec_i8imm_op:$mA, vec_i8imm_op:$mB,
372           vec_i8imm_op:$mC, vec_i8imm_op:$mD,
373           vec_i8imm_op:$mE, vec_i8imm_op:$mF),
374         [],
375         "v8x16.shuffle\t$dst, $x, $y, "#
376           "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
377           "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
378         "v8x16.shuffle\t"#
379           "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
380           "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
381         23>;
382
383// Shuffles after custom lowering
384def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
385def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
386foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
387def : Pat<(v16i8 (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
388            (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
389            (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
390            (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
391            (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
392            (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
393            (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
394            (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
395            (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
396          (v16i8 (SHUFFLE_v16i8 (vec_t V128:$x), (vec_t V128:$y),
397            (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
398            (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
399            (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
400            (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
401            (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
402            (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
403            (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
404            (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
405}
406
407//===----------------------------------------------------------------------===//
408// Integer arithmetic
409//===----------------------------------------------------------------------===//
410
411multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name,
412                      bits<32> simdop> {
413  defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
414                        (outs), (ins),
415                        [(set (vec_t V128:$dst),
416                          (node (vec_t V128:$lhs), (vec_t V128:$rhs))
417                        )],
418                        vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
419                        simdop>;
420}
421
422multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> {
423  defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
424  defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 1)>;
425  defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 2)>;
426}
427
428multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> {
429  defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
430  defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 3)>;
431}
432
433// Integer vector negation
434def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
435
436// Integer addition: add
437let isCommutable = 1 in
438defm ADD : SIMDBinaryInt<add, "add", 24>;
439
440// Integer subtraction: sub
441defm SUB : SIMDBinaryInt<sub, "sub", 28>;
442
443// Integer multiplication: mul
444defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 32>;
445
446// Integer negation: neg
447multiclass SIMDNeg<ValueType vec_t, string vec, SDNode neg, bits<32> simdop> {
448  defm NEG_#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
449                           [(set (vec_t V128:$dst),
450                             (vec_t (neg (vec_t V128:$vec)))
451                           )],
452                           vec#".neg\t$dst, $vec", vec#".neg", simdop>;
453}
454
455defm "" : SIMDNeg<v16i8, "i8x16", ivneg, 36>;
456defm "" : SIMDNeg<v8i16, "i16x8", ivneg, 37>;
457defm "" : SIMDNeg<v4i32, "i32x4", ivneg, 38>;
458defm "" : SIMDNeg<v2i64, "i64x2", ivneg, 39>;
459
460//===----------------------------------------------------------------------===//
461// Saturating integer arithmetic
462//===----------------------------------------------------------------------===//
463
464multiclass SIMDBinarySat<SDNode node, string name, bits<32> baseInst> {
465  defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
466  defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 2)>;
467}
468
469// Saturating integer addition: add_saturate_s / add_saturate_u
470let isCommutable = 1 in {
471defm ADD_SAT_S :
472  SIMDBinarySat<int_wasm_add_saturate_signed, "add_saturate_s", 40>;
473defm ADD_SAT_U :
474  SIMDBinarySat<int_wasm_add_saturate_unsigned, "add_saturate_u", 41>;
475} // isCommutable = 1
476
477// Saturating integer subtraction: sub_saturate_s / sub_saturate_u
478defm SUB_SAT_S :
479  SIMDBinarySat<int_wasm_sub_saturate_signed, "sub_saturate_s", 44>;
480defm SUB_SAT_U :
481  SIMDBinarySat<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 45>;
482
483//===----------------------------------------------------------------------===//
484// Bit shifts
485//===----------------------------------------------------------------------===//
486
487multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec,
488                     string name, bits<32> simdop> {
489  defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
490                        (outs), (ins),
491                        [(set (vec_t V128:$dst),
492                          (node V128:$vec, (vec_t shift_vec)))],
493                        vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
494}
495
496multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst, int skip> {
497  defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>;
498  defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name,
499                      !add(baseInst, !if(skip, 2, 1))>;
500  defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name,
501                      !add(baseInst, !if(skip, 4, 2))>;
502  defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))),
503                      name, !add(baseInst, !if(skip, 6, 3))>;
504}
505
506// Left shift by scalar: shl
507defm SHL : SIMDShiftInt<shl, "shl", 48, 0>;
508
509// Right shift by scalar: shr_s / shr_u
510defm SHR_S : SIMDShiftInt<sra, "shr_s", 52, 1>;
511defm SHR_U : SIMDShiftInt<srl, "shr_u", 53, 1>;
512
513// Truncate i64 shift operands to i32s
514foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in
515def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
516          (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
517
518// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping
519def wasm_shift_t : SDTypeProfile<1, 2,
520  [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]
521>;
522def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
523def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
524def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
525foreach shifts = [[wasm_shl, SHL_v2i64],
526                  [wasm_shr_s, SHR_S_v2i64],
527                  [wasm_shr_u, SHR_U_v2i64]] in
528def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)),
529          (v2i64 (shifts[1] (v2i64 V128:$vec), I32:$x))>;
530
531//===----------------------------------------------------------------------===//
532// Bitwise operations
533//===----------------------------------------------------------------------===//
534
535multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> {
536  defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>;
537  defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>;
538  defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>;
539  defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>;
540}
541
542// Bitwise logic: v128.and / v128.or / v128.xor
543let isCommutable = 1 in {
544defm AND : SIMDBitwise<and, "and", 60>;
545defm OR : SIMDBitwise<or, "or", 61>;
546defm XOR : SIMDBitwise<xor, "xor", 62>;
547} // isCommutable = 1
548
549// Bitwise logic: v128.not
550multiclass SIMDNot<ValueType vec_t> {
551  defm NOT_#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
552                           [(set (vec_t V128:$dst), (vec_t (vnot V128:$vec)))],
553                           "v128.not\t$dst, $vec", "v128.not", 63>;
554}
555
556defm "" : SIMDNot<v16i8>;
557defm "" : SIMDNot<v8i16>;
558defm "" : SIMDNot<v4i32>;
559defm "" : SIMDNot<v2i64>;
560
561// Bitwise select: v128.bitselect
562multiclass Bitselect<ValueType vec_t> {
563  defm BITSELECT_#vec_t :
564    SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
565           [(set (vec_t V128:$dst),
566             (vec_t (int_wasm_bitselect
567               (vec_t V128:$c), (vec_t V128:$v1), (vec_t V128:$v2)
568             ))
569           )],
570           "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 64>;
571}
572
573foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
574defm "" : Bitselect<vec_t>;
575
576// Bitselect is equivalent to (c & v1) | (~c & v2)
577foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
578  def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)),
579              (and (vnot V128:$c), (vec_t V128:$v2)))),
580            (!cast<Instruction>("BITSELECT_"#vec_t)
581              V128:$v1, V128:$v2, V128:$c)>;
582
583//===----------------------------------------------------------------------===//
584// Boolean horizontal reductions
585//===----------------------------------------------------------------------===//
586
587multiclass SIMDReduceVec<ValueType vec_t, string vec, string name, SDNode op,
588                         bits<32> simdop> {
589  defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
590                        [(set I32:$dst, (i32 (op (vec_t V128:$vec))))],
591                        vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
592}
593
594multiclass SIMDReduce<string name, SDNode op, bits<32> baseInst> {
595  defm "" : SIMDReduceVec<v16i8, "i8x16", name, op, baseInst>;
596  defm "" : SIMDReduceVec<v8i16, "i16x8", name, op, !add(baseInst, 1)>;
597  defm "" : SIMDReduceVec<v4i32, "i32x4", name, op, !add(baseInst, 2)>;
598  defm "" : SIMDReduceVec<v2i64, "i64x2", name, op, !add(baseInst, 3)>;
599}
600
601// Any lane true: any_true
602defm ANYTRUE : SIMDReduce<"any_true", int_wasm_anytrue, 65>;
603
604// All lanes true: all_true
605defm ALLTRUE : SIMDReduce<"all_true", int_wasm_alltrue, 69>;
606
607//===----------------------------------------------------------------------===//
608// Comparisons
609//===----------------------------------------------------------------------===//
610
611multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec,
612                         string name, CondCode cond, bits<32> simdop> {
613  defm _#vec_t :
614    SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
615           [(set (out_t V128:$dst),
616             (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond)
617           )],
618           vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>;
619}
620
621multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst,
622                            int step = 1> {
623  defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>;
624  defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond,
625                          !add(baseInst, step)>;
626  defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond,
627                          !add(!add(baseInst, step), step)>;
628}
629
630multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
631  defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>;
632  defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond,
633                          !add(baseInst, 1)>;
634}
635
636// Equality: eq
637let isCommutable = 1 in {
638defm EQ : SIMDConditionInt<"eq", SETEQ, 73>;
639defm EQ : SIMDConditionFP<"eq", SETOEQ, 77>;
640} // isCommutable = 1
641
642// Non-equality: ne
643let isCommutable = 1 in {
644defm NE : SIMDConditionInt<"ne", SETNE, 79>;
645defm NE : SIMDConditionFP<"ne", SETUNE, 83>;
646} // isCommutable = 1
647
648// Less than: lt_s / lt_u / lt
649defm LT_S : SIMDConditionInt<"lt_s", SETLT, 85, 2>;
650defm LT_U : SIMDConditionInt<"lt_u", SETULT, 86, 2>;
651defm LT : SIMDConditionFP<"lt", SETOLT, 93>;
652
653// Less than or equal: le_s / le_u / le
654defm LE_S : SIMDConditionInt<"le_s", SETLE, 95, 2>;
655defm LE_U : SIMDConditionInt<"le_u", SETULE, 96, 2>;
656defm LE : SIMDConditionFP<"le", SETOLE, 103>;
657
658// Greater than: gt_s / gt_u / gt
659defm GT_S : SIMDConditionInt<"gt_s", SETGT, 105, 2>;
660defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 106, 2>;
661defm GT : SIMDConditionFP<"gt", SETOGT, 113>;
662
663// Greater than or equal: ge_s / ge_u / ge
664defm GE_S : SIMDConditionInt<"ge_s", SETGE, 115, 2>;
665defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 116, 2>;
666defm GE : SIMDConditionFP<"ge", SETOGE, 123>;
667
668// Lower float comparisons that don't care about NaN to standard WebAssembly
669// float comparisons. These instructions are generated in the target-independent
670// expansion of unordered comparisons and ordered ne.
671def : Pat<(v4i32 (seteq (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
672          (v4i32 (EQ_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
673def : Pat<(v4i32 (setne (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
674          (v4i32 (NE_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
675def : Pat<(v2i64 (seteq (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
676          (v2i64 (EQ_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
677def : Pat<(v2i64 (setne (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
678          (v2i64 (NE_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
679
680//===----------------------------------------------------------------------===//
681// Load and store
682//===----------------------------------------------------------------------===//
683
684// Load: v128.load
685multiclass SIMDLoad<ValueType vec_t> {
686  let mayLoad = 1 in
687  defm LOAD_#vec_t :
688    SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
689           (outs), (ins P2Align:$align, offset32_op:$off), [],
690           "v128.load\t$dst, ${off}(${addr})$align",
691           "v128.load\t$off$align", 1>;
692}
693
694foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
695defm "" : SIMDLoad<vec_t>;
696
697// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
698def : LoadPatNoOffset<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
699def : LoadPatImmOff<vec_t, load, regPlusImm, !cast<NI>("LOAD_"#vec_t)>;
700def : LoadPatImmOff<vec_t, load, or_is_add, !cast<NI>("LOAD_"#vec_t)>;
701def : LoadPatGlobalAddr<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
702def : LoadPatExternalSym<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
703def : LoadPatOffsetOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
704def : LoadPatGlobalAddrOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
705def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
706}
707
708// Store: v128.store
709multiclass SIMDStore<ValueType vec_t> {
710  let mayStore = 1 in
711  defm STORE_#vec_t :
712    SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
713           (outs), (ins P2Align:$align, offset32_op:$off), [],
714           "v128.store\t${off}(${addr})$align, $vec",
715           "v128.store\t$off$align", 2>;
716}
717
718foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
719defm "" : SIMDStore<vec_t>;
720
721// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
722def : StorePatNoOffset<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
723def : StorePatImmOff<vec_t, store, regPlusImm, !cast<NI>("STORE_"#vec_t)>;
724def : StorePatImmOff<vec_t, store, or_is_add, !cast<NI>("STORE_"#vec_t)>;
725def : StorePatGlobalAddr<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
726def : StorePatExternalSym<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
727def : StorePatOffsetOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
728def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
729def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
730}
731
732//===----------------------------------------------------------------------===//
733// Floating-point sign bit operations
734//===----------------------------------------------------------------------===//
735
736// Negation: neg
737defm "" : SIMDNeg<v4f32, "f32x4", fneg, 125>;
738defm "" : SIMDNeg<v2f64, "f64x2", fneg, 126>;
739
740// Absolute value: abs
741multiclass SIMDAbs<ValueType vec_t, string vec, bits<32> simdop> {
742  defm ABS_#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
743                           [(set (vec_t V128:$dst), (vec_t (fabs V128:$vec)))],
744                           vec#".abs\t$dst, $vec", vec#".abs", simdop>;
745}
746
747defm "" : SIMDAbs<v4f32, "f32x4", 127>;
748defm "" : SIMDAbs<v2f64, "f64x2", 128>;
749
750//===----------------------------------------------------------------------===//
751// Floating-point min and max
752//===----------------------------------------------------------------------===//
753
754multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
755  defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
756  defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 1)>;
757}
758
759// NaN-propagating minimum: min
760defm MIN : SIMDBinaryFP<fminnan, "min", 129>;
761
762// NaN-propagating maximum: max
763defm MAX : SIMDBinaryFP<fmaxnan, "max", 131>;
764
765//===----------------------------------------------------------------------===//
766// Floating-point arithmetic
767//===----------------------------------------------------------------------===//
768
769// Addition: add
770let isCommutable = 1 in
771defm ADD : SIMDBinaryFP<fadd, "add", 133>;
772
773// Subtraction: sub
774defm SUB : SIMDBinaryFP<fsub, "sub", 135>;
775
776// Division: div
777defm DIV : SIMDBinaryFP<fdiv, "div", 137>;
778
779// Multiplication: mul
780let isCommutable = 1 in
781defm MUL : SIMDBinaryFP<fmul, "mul", 139>;
782
783// Square root: sqrt
784multiclass SIMDSqrt<ValueType vec_t, string vec, bits<32> simdop> {
785  defm SQRT_#vec_t :
786    SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
787           [(set (vec_t V128:$dst), (vec_t (fsqrt V128:$vec)))],
788           vec#".sqrt\t$dst, $vec", vec#".sqrt", simdop>;
789}
790
791defm "" : SIMDSqrt<v4f32, "f32x4", 141>;
792defm "" : SIMDSqrt<v2f64, "f64x2", 142>;
793
794//===----------------------------------------------------------------------===//
795// Conversions
796//===----------------------------------------------------------------------===//
797
798multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op,
799                       string name, bits<32> simdop> {
800  defm op#_#vec_t#_#arg_t :
801    SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
802           [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))],
803           name#"\t$dst, $vec", name, simdop>;
804}
805
806// Integer to floating point: convert_s / convert_u
807defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_s/i32x4", 143>;
808defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_u/i32x4", 144>;
809defm "" : SIMDConvert<v2f64, v2i64, sint_to_fp, "f64x2.convert_s/i64x2", 145>;
810defm "" : SIMDConvert<v2f64, v2i64, uint_to_fp, "f64x2.convert_u/i64x2", 146>;
811
812// Floating point to integer with saturation: trunc_sat_s / trunc_sat_u
813defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_s/f32x4", 147>;
814defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_u/f32x4", 148>;
815defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_s/f64x2", 149>;
816defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_u/f64x2", 150>;
817
818// Lower llvm.wasm.trunc.saturate.* to saturating instructions
819def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
820          (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
821def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
822          (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
823def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))),
824          (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>;
825def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))),
826          (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>;
827
828// Bitcasts are nops
829// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
830foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
831foreach t2 = !foldl(
832  []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
833  acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)),
834    acc, !listconcat(acc, [cur])
835  )
836) in
837def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
838