1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes=CHECK,UNIMP
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SIMD-VM
3
4; Test that the logic to choose between v128.const vector
5; initialization and splat vector initialization and to optimize the
6; choice of splat value works correctly.
7
8target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
9target triple = "wasm32-unknown-unknown"
10
11; CHECK-LABEL:  emulated_const_trivial_splat:
12; CHECK-NEXT:   .functype       emulated_const_trivial_splat () -> (v128)
13; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
14; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
15; SIMD-VM-NEXT: return  $pop1
16; UNIMP: v128.const
17define <4 x i32> @emulated_const_trivial_splat() {
18  ret <4 x i32> <i32 1, i32 2, i32 1, i32 2>
19}
20
21; CHECK-LABEL:  emulated_const_first_sufficient:
22; CHECK-NEXT:   .functype       emulated_const_first_sufficient () -> (v128)
23; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
24; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
25; SIMD-VM-NEXT: return  $pop1
26; UNIMP: v128.const
27define <4 x i32> @emulated_const_first_sufficient() {
28  ret <4 x i32> <i32 1, i32 2, i32 undef, i32 2>
29}
30
31; CHECK-LABEL:  emulated_const_second_sufficient:
32; CHECK-NEXT:   .functype       emulated_const_second_sufficient () -> (v128)
33; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
34; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
35; SIMD-VM-NEXT: return  $pop1
36; UNIMP: v128.const
37define <4 x i32> @emulated_const_second_sufficient() {
38  ret <4 x i32> <i32 1, i32 undef, i32 1, i32 2>
39}
40
41; CHECK-LABEL:  emulated_const_combined_sufficient:
42; CHECK-NEXT:   .functype       emulated_const_combined_sufficient () -> (v128)
43; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
44; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
45; SIMD-VM-NEXT: return  $pop1
46; UNIMP: v128.const
47define <4 x i32> @emulated_const_combined_sufficient() {
48  ret <4 x i32> <i32 1, i32 undef, i32 undef, i32 2>
49}
50
51; CHECK-LABEL:  emulated_const_either_sufficient:
52; CHECK-NEXT:   .functype       emulated_const_either_sufficient () -> (v128)
53; SIMD-VM-NEXT: i64.const       $push0=, 1
54; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
55; SIMD-VM-NEXT: return  $pop1
56; UNIMP: v128.const
57define <4 x i32> @emulated_const_either_sufficient() {
58  ret <4 x i32> <i32 1, i32 undef, i32 1, i32 undef>
59}
60
61; CHECK-LABEL: emulated_const_neither_sufficient:
62; CHECK-NEXT:   .functype       emulated_const_neither_sufficient () -> (v128)
63; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
64; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
65; SIMD-VM-NEXT: i64.const       $push2=, 17179869184
66; SIMD-VM-NEXT: i64x2.replace_lane      $push3=, $pop1, 1, $pop2
67; SIMD-VM-NEXT: return  $pop3
68define <4 x i32> @emulated_const_neither_sufficient() {
69  ret <4 x i32> <i32 1, i32 2, i32 undef, i32 4>
70}
71
72; CHECK-LABEL:  emulated_const_combined_sufficient_large:
73; CHECK-NEXT:   .functype       emulated_const_combined_sufficient_large () -> (v128)
74; SIMD-VM-NEXT: i64.const       $push0=, 506097522914230528
75; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
76; SIMD-VM-NEXT: return  $pop1
77define <16 x i8> @emulated_const_combined_sufficient_large() {
78  ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 undef,
79                 i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7>
80}
81
82; CHECK-LABEL: emulated_const_neither_sufficient_large:
83; CHECK-NEXT:   .functype       emulated_const_neither_sufficient_large () -> (v128)
84; SIMD-VM-NEXT: i64.const       $push0=, -70368726997663744
85; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
86; SIMD-VM-NEXT: i64.const       $push2=, 504408655873966336
87; SIMD-VM-NEXT: i64x2.replace_lane      $push3=, $pop1, 1, $pop2
88; SIMD-VM-NEXT: return  $pop3
89define <16 x i8> @emulated_const_neither_sufficient_large() {
90  ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 255,
91                 i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7>
92}
93
94; CHECK-LABEL: same_const_one_replaced_i16x8:
95; CHECK-NEXT:  .functype       same_const_one_replaced_i16x8 (i32) -> (v128)
96; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
97; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
98; UNIMP-NEXT:  return          $pop[[L1]]
99; SIMD-VM: i64x2.splat
100define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
101  %v = insertelement
102    <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
103    i16 %x,
104    i32 5
105  ret <8 x i16> %v
106}
107
108; CHECK-LABEL: different_const_one_replaced_i16x8:
109; CHECK-NEXT:  .functype       different_const_one_replaced_i16x8 (i32) -> (v128)
110; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
111; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
112; UNIMP-NEXT:  return          $pop[[L1]]
113; SIMD-VM: i64x2.splat
114define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
115  %v = insertelement
116    <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
117    i16 %x,
118    i32 5
119  ret <8 x i16> %v
120}
121
122; CHECK-LABEL: same_const_one_replaced_f32x4:
123; CHECK-NEXT:  .functype       same_const_one_replaced_f32x4 (f32) -> (v128)
124; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
125; UNIMP-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
126; UNIMP-NEXT:  return          $pop[[L1]]
127; SIMD-VM: f32x4.splat
128define <4 x float> @same_const_one_replaced_f32x4(float %x) {
129  %v = insertelement
130    <4 x float> <float 42., float 42., float 42., float 42.>,
131    float %x,
132    i32 2
133  ret <4 x float> %v
134}
135
136; CHECK-LABEL: different_const_one_replaced_f32x4:
137; CHECK-NEXT:  .functype       different_const_one_replaced_f32x4 (f32) -> (v128)
138; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2
139; UNIMP-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
140; UNIMP-NEXT:  return          $pop[[L1]]
141; SIMD-VM: f32x4.splat
142define <4 x float> @different_const_one_replaced_f32x4(float %x) {
143  %v = insertelement
144    <4 x float> <float 1., float 2., float 3., float 4.>,
145    float %x,
146    i32 2
147  ret <4 x float> %v
148}
149
150; CHECK-LABEL: splat_common_const_i32x4:
151; CHECK-NEXT:  .functype       splat_common_const_i32x4 () -> (v128)
152; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0, 3, 3, 1
153; UNIMP-NEXT:  return          $pop[[L0]]
154; SIMD-VM: i64x2.splat
155define <4 x i32> @splat_common_const_i32x4() {
156  ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
157}
158
159; CHECK-LABEL: splat_common_arg_i16x8:
160; CHECK-NEXT:  .functype       splat_common_arg_i16x8 (i32, i32, i32) -> (v128)
161; CHECK-NEXT:  i16x8.splat     $push[[L0:[0-9]+]]=, $2
162; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1
163; CHECK-NEXT:  i16x8.replace_lane      $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0
164; CHECK-NEXT:  i16x8.replace_lane      $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1
165; CHECK-NEXT:  i16x8.replace_lane      $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1
166; CHECK-NEXT:  return          $pop[[L4]]
167define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
168  %v0 = insertelement <8 x i16> undef, i16 %b, i32 0
169  %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1
170  %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2
171  %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3
172  %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4
173  %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5
174  %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6
175  %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7
176  ret <8 x i16> %v7
177}
178
179; CHECK-LABEL: swizzle_one_i8x16:
180; CHECK-NEXT:  .functype       swizzle_one_i8x16 (v128, v128) -> (v128)
181; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
182; CHECK-NEXT:  return          $pop[[L0]]
183define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
184  %m0 = extractelement <16 x i8> %mask, i32 0
185  %s0 = extractelement <16 x i8> %src, i8 %m0
186  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
187  ret <16 x i8> %v0
188}
189
190; CHECK-LABEL: swizzle_all_i8x16:
191; CHECK-NEXT:  .functype       swizzle_all_i8x16 (v128, v128) -> (v128)
192; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
193; CHECK-NEXT:  return          $pop[[L0]]
194define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
195  %m0 = extractelement <16 x i8> %mask, i32 0
196  %s0 = extractelement <16 x i8> %src, i8 %m0
197  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
198  %m1 = extractelement <16 x i8> %mask, i32 1
199  %s1 = extractelement <16 x i8> %src, i8 %m1
200  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
201  %m2 = extractelement <16 x i8> %mask, i32 2
202  %s2 = extractelement <16 x i8> %src, i8 %m2
203  %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
204  %m3 = extractelement <16 x i8> %mask, i32 3
205  %s3 = extractelement <16 x i8> %src, i8 %m3
206  %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
207  %m4 = extractelement <16 x i8> %mask, i32 4
208  %s4 = extractelement <16 x i8> %src, i8 %m4
209  %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
210  %m5 = extractelement <16 x i8> %mask, i32 5
211  %s5 = extractelement <16 x i8> %src, i8 %m5
212  %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
213  %m6 = extractelement <16 x i8> %mask, i32 6
214  %s6 = extractelement <16 x i8> %src, i8 %m6
215  %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
216  %m7 = extractelement <16 x i8> %mask, i32 7
217  %s7 = extractelement <16 x i8> %src, i8 %m7
218  %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
219  %m8 = extractelement <16 x i8> %mask, i32 8
220  %s8 = extractelement <16 x i8> %src, i8 %m8
221  %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
222  %m9 = extractelement <16 x i8> %mask, i32 9
223  %s9 = extractelement <16 x i8> %src, i8 %m9
224  %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
225  %m10 = extractelement <16 x i8> %mask, i32 10
226  %s10 = extractelement <16 x i8> %src, i8 %m10
227  %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
228  %m11 = extractelement <16 x i8> %mask, i32 11
229  %s11 = extractelement <16 x i8> %src, i8 %m11
230  %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
231  %m12 = extractelement <16 x i8> %mask, i32 12
232  %s12 = extractelement <16 x i8> %src, i8 %m12
233  %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
234  %m13 = extractelement <16 x i8> %mask, i32 13
235  %s13 = extractelement <16 x i8> %src, i8 %m13
236  %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
237  %m14 = extractelement <16 x i8> %mask, i32 14
238  %s14 = extractelement <16 x i8> %src, i8 %m14
239  %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
240  %m15 = extractelement <16 x i8> %mask, i32 15
241  %s15 = extractelement <16 x i8> %src, i8 %m15
242  %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
243  ret <16 x i8> %v15
244}
245
246; CHECK-LABEL: swizzle_one_i16x8:
247; CHECK-NEXT:  .functype       swizzle_one_i16x8 (v128, v128) -> (v128)
248; CHECK-NOT:    swizzle
249; CHECK:        return
250define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
251  %m0 = extractelement <8 x i16> %mask, i32 0
252  %s0 = extractelement <8 x i16> %src, i16 %m0
253  %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
254  ret <8 x i16> %v0
255}
256
257; CHECK-LABEL: mashup_swizzle_i8x16:
258; CHECK-NEXT:  .functype       mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
259; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
260; CHECK:       i8x16.replace_lane
261; CHECK:       i8x16.replace_lane
262; CHECK:       i8x16.replace_lane
263; CHECK:       i8x16.replace_lane
264; CHECK:       return
265define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
266  ; swizzle 0
267  %m0 = extractelement <16 x i8> %mask, i32 0
268  %s0 = extractelement <16 x i8> %src, i8 %m0
269  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
270  ; swizzle 7
271  %m1 = extractelement <16 x i8> %mask, i32 7
272  %s1 = extractelement <16 x i8> %src, i8 %m1
273  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
274  ; splat 3
275  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
276  ; splat 12
277  %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
278  ; const 4
279  %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
280  ; const 14
281  %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
282  ret <16 x i8> %v5
283}
284
285; CHECK-LABEL: mashup_const_i8x16:
286; CHECK-NEXT:  .functype       mashup_const_i8x16 (v128, v128, i32) -> (v128)
287; UNIMP:       v128.const      $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
288; UNIMP:       i8x16.replace_lane
289; UNIMP:       i8x16.replace_lane
290; UNIMP:       i8x16.replace_lane
291; UNIMP:       return
292; SIMD-VM: i64x2.splat
293define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
294  ; swizzle 0
295  %m0 = extractelement <16 x i8> %mask, i32 0
296  %s0 = extractelement <16 x i8> %src, i8 %m0
297  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
298  ; splat 3
299  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
300  ; splat 12
301  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
302  ; const 4
303  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
304  ; const 14
305  %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
306  ret <16 x i8> %v4
307}
308
309; CHECK-LABEL: mashup_splat_i8x16:
310; CHECK-NEXT:  .functype       mashup_splat_i8x16 (v128, v128, i32) -> (v128)
311; CHECK:       i8x16.splat     $push[[L0:[0-9]+]]=, $2
312; CHECK:       i8x16.replace_lane
313; CHECK:       i8x16.replace_lane
314; CHECK:       return
315define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
316  ; swizzle 0
317  %m0 = extractelement <16 x i8> %mask, i32 0
318  %s0 = extractelement <16 x i8> %src, i8 %m0
319  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
320  ; splat 3
321  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
322  ; splat 12
323  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
324  ; const 4
325  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
326  ret <16 x i8> %v3
327}
328
329; CHECK-LABEL: undef_const_insert_f32x4:
330; CHECK-NEXT:  .functype       undef_const_insert_f32x4 () -> (v128)
331; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
332; UNIMP-NEXT:  return          $pop[[L0]]
333; SIMD-VM: f32x4.splat
334define <4 x float> @undef_const_insert_f32x4() {
335  %v = insertelement <4 x float> undef, float 42., i32 1
336  ret <4 x float> %v
337}
338
339; CHECK-LABEL: undef_arg_insert_i32x4:
340; CHECK-NEXT:  .functype       undef_arg_insert_i32x4 (i32) -> (v128)
341; CHECK-NEXT:  i32x4.splat     $push[[L0:[0-9]+]]=, $0
342; CHECK-NEXT:  return          $pop[[L0]]
343define <4 x i32> @undef_arg_insert_i32x4(i32 %x) {
344  %v = insertelement <4 x i32> undef, i32 %x, i32 3
345  ret <4 x i32> %v
346}
347
348; CHECK-LABEL: all_undef_i8x16:
349; CHECK-NEXT:  .functype       all_undef_i8x16 () -> (v128)
350; CHECK-NEXT:  return          $0
351define <16 x i8> @all_undef_i8x16() {
352  %v = insertelement <16 x i8> undef, i8 undef, i32 4
353  ret <16 x i8> %v
354}
355
356; CHECK-LABEL: all_undef_f64x2:
357; CHECK-NEXT:  .functype       all_undef_f64x2 () -> (v128)
358; CHECK-NEXT:  return          $0
359define <2 x double> @all_undef_f64x2() {
360  ret <2 x double> undef
361}
362