1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
2
3; Test that the logic to choose between v128.const vector
4; initialization and splat vector initialization and to optimize the
5; choice of splat value works correctly.
6
7target triple = "wasm32-unknown-unknown"
8
9; CHECK-LABEL: same_const_one_replaced_i16x8:
10; CHECK-NEXT:  .functype       same_const_one_replaced_i16x8 (i32) -> (v128)
11; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
12; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
13; CHECK-NEXT:  return          $pop[[L1]]
14define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
15  %v = insertelement
16    <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
17    i16 %x,
18    i32 5
19  ret <8 x i16> %v
20}
21
22; CHECK-LABEL: different_const_one_replaced_i16x8:
23; CHECK-NEXT:  .functype       different_const_one_replaced_i16x8 (i32) -> (v128)
24; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
25; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
26; CHECK-NEXT:  return          $pop[[L1]]
27define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
28  %v = insertelement
29    <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
30    i16 %x,
31    i32 5
32  ret <8 x i16> %v
33}
34
35; CHECK-LABEL: same_const_one_replaced_f32x4:
36; CHECK-NEXT:  .functype       same_const_one_replaced_f32x4 (f32) -> (v128)
37; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
38; CHECK-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
39; CHECK-NEXT:  return          $pop[[L1]]
40define <4 x float> @same_const_one_replaced_f32x4(float %x) {
41  %v = insertelement
42    <4 x float> <float 42., float 42., float 42., float 42.>,
43    float %x,
44    i32 2
45  ret <4 x float> %v
46}
47
48; CHECK-LABEL: different_const_one_replaced_f32x4:
49; CHECK-NEXT:  .functype       different_const_one_replaced_f32x4 (f32) -> (v128)
50; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2
51; CHECK-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
52; CHECK-NEXT:  return          $pop[[L1]]
53define <4 x float> @different_const_one_replaced_f32x4(float %x) {
54  %v = insertelement
55    <4 x float> <float 1., float 2., float 3., float 4.>,
56    float %x,
57    i32 2
58  ret <4 x float> %v
59}
60
61; CHECK-LABEL: splat_common_const_i32x4:
62; CHECK-NEXT:  .functype       splat_common_const_i32x4 () -> (v128)
63; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0, 3, 3, 1
64; CHECK-NEXT:  return          $pop[[L0]]
65define <4 x i32> @splat_common_const_i32x4() {
66  ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
67}
68
69; CHECK-LABEL: splat_common_arg_i16x8:
70; CHECK-NEXT:  .functype       splat_common_arg_i16x8 (i32, i32, i32) -> (v128)
71; CHECK-NEXT:  i16x8.splat     $push[[L0:[0-9]+]]=, $2
72; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1
73; CHECK-NEXT:  i16x8.replace_lane      $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0
74; CHECK-NEXT:  i16x8.replace_lane      $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1
75; CHECK-NEXT:  i16x8.replace_lane      $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1
76; CHECK-NEXT:  return          $pop[[L4]]
77define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
78  %v0 = insertelement <8 x i16> undef, i16 %b, i32 0
79  %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1
80  %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2
81  %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3
82  %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4
83  %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5
84  %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6
85  %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7
86  ret <8 x i16> %v7
87}
88
89; CHECK-LABEL: swizzle_one_i8x16:
90; CHECK-NEXT:  .functype       swizzle_one_i8x16 (v128, v128) -> (v128)
91; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
92; CHECK-NEXT:  return          $pop[[L0]]
93define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
94  %m0 = extractelement <16 x i8> %mask, i32 0
95  %s0 = extractelement <16 x i8> %src, i8 %m0
96  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
97  ret <16 x i8> %v0
98}
99
100; CHECK-LABEL: swizzle_all_i8x16:
101; CHECK-NEXT:  .functype       swizzle_all_i8x16 (v128, v128) -> (v128)
102; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
103; CHECK-NEXT:  return          $pop[[L0]]
104define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
105  %m0 = extractelement <16 x i8> %mask, i32 0
106  %s0 = extractelement <16 x i8> %src, i8 %m0
107  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
108  %m1 = extractelement <16 x i8> %mask, i32 1
109  %s1 = extractelement <16 x i8> %src, i8 %m1
110  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
111  %m2 = extractelement <16 x i8> %mask, i32 2
112  %s2 = extractelement <16 x i8> %src, i8 %m2
113  %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
114  %m3 = extractelement <16 x i8> %mask, i32 3
115  %s3 = extractelement <16 x i8> %src, i8 %m3
116  %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
117  %m4 = extractelement <16 x i8> %mask, i32 4
118  %s4 = extractelement <16 x i8> %src, i8 %m4
119  %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
120  %m5 = extractelement <16 x i8> %mask, i32 5
121  %s5 = extractelement <16 x i8> %src, i8 %m5
122  %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
123  %m6 = extractelement <16 x i8> %mask, i32 6
124  %s6 = extractelement <16 x i8> %src, i8 %m6
125  %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
126  %m7 = extractelement <16 x i8> %mask, i32 7
127  %s7 = extractelement <16 x i8> %src, i8 %m7
128  %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
129  %m8 = extractelement <16 x i8> %mask, i32 8
130  %s8 = extractelement <16 x i8> %src, i8 %m8
131  %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
132  %m9 = extractelement <16 x i8> %mask, i32 9
133  %s9 = extractelement <16 x i8> %src, i8 %m9
134  %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
135  %m10 = extractelement <16 x i8> %mask, i32 10
136  %s10 = extractelement <16 x i8> %src, i8 %m10
137  %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
138  %m11 = extractelement <16 x i8> %mask, i32 11
139  %s11 = extractelement <16 x i8> %src, i8 %m11
140  %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
141  %m12 = extractelement <16 x i8> %mask, i32 12
142  %s12 = extractelement <16 x i8> %src, i8 %m12
143  %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
144  %m13 = extractelement <16 x i8> %mask, i32 13
145  %s13 = extractelement <16 x i8> %src, i8 %m13
146  %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
147  %m14 = extractelement <16 x i8> %mask, i32 14
148  %s14 = extractelement <16 x i8> %src, i8 %m14
149  %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
150  %m15 = extractelement <16 x i8> %mask, i32 15
151  %s15 = extractelement <16 x i8> %src, i8 %m15
152  %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
153  ret <16 x i8> %v15
154}
155
156; CHECK-LABEL: swizzle_one_i16x8:
157; CHECK-NEXT:  .functype       swizzle_one_i16x8 (v128, v128) -> (v128)
158; CHECK-NOT:    swizzle
159; CHECK:        return
160define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
161  %m0 = extractelement <8 x i16> %mask, i32 0
162  %s0 = extractelement <8 x i16> %src, i16 %m0
163  %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
164  ret <8 x i16> %v0
165}
166
167; CHECK-LABEL: half_shuffle_i32x4:
168; CHECK-NEXT: .functype        half_shuffle_i32x4 (v128) -> (v128)
169; CHECK:      i8x16.shuffle $push[[L0:[0-9]+]]=, $0, $0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 0, 0, 0
170; CHECK:      i32x4.replace_lane
171; CHECK:      i32x4.replace_lane
172; CHECK:      return
173define <4 x i32> @half_shuffle_i32x4(<4 x i32> %src) {
174  %s0 = extractelement <4 x i32> %src, i32 0
175  %s2 = extractelement <4 x i32> %src, i32 2
176  %v0 = insertelement <4 x i32> undef, i32 0, i32 0
177  %v1 = insertelement <4 x i32> %v0, i32 %s2, i32 1
178  %v2 = insertelement <4 x i32> %v1, i32 %s0, i32 2
179  %v3 = insertelement <4 x i32> %v2, i32 3, i32 3
180  ret <4 x i32> %v3
181}
182
183; CHECK-LABEL: mashup_swizzle_i8x16:
184; CHECK-NEXT:  .functype       mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
185; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
186; CHECK:       i8x16.replace_lane
187; CHECK:       i8x16.replace_lane
188; CHECK:       i8x16.replace_lane
189; CHECK:       i8x16.replace_lane
190; CHECK:       return
191define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
192  ; swizzle 0
193  %m0 = extractelement <16 x i8> %mask, i32 0
194  %s0 = extractelement <16 x i8> %src, i8 %m0
195  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
196  ; swizzle 7
197  %m1 = extractelement <16 x i8> %mask, i32 7
198  %s1 = extractelement <16 x i8> %src, i8 %m1
199  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
200  ; splat 3
201  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
202  ; splat 12
203  %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
204  ; const 4
205  %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
206  ; const 14
207  %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
208  ret <16 x i8> %v5
209}
210
211; CHECK-LABEL: mashup_const_i8x16:
212; CHECK-NEXT:  .functype       mashup_const_i8x16 (v128, v128, i32) -> (v128)
213; CHECK:       v128.const      $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
214; CHECK:       v128.load8_lane
215; CHECK:       i8x16.replace_lane
216; CHECK:       i8x16.replace_lane
217; CHECK:       return
218define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
219  ; swizzle 0
220  %m0 = extractelement <16 x i8> %mask, i32 0
221  %s0 = extractelement <16 x i8> %src, i8 %m0
222  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
223  ; splat 3
224  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
225  ; splat 12
226  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
227  ; const 4
228  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
229  ; const 14
230  %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
231  ret <16 x i8> %v4
232}
233
234; CHECK-LABEL: mashup_splat_i8x16:
235; CHECK-NEXT:  .functype       mashup_splat_i8x16 (v128, v128, i32) -> (v128)
236; CHECK:       i8x16.splat     $push[[L0:[0-9]+]]=, $2
237; CHECK:       v128.load8_lane
238; CHECK:       i8x16.replace_lane
239; CHECK:       return
240define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
241  ; swizzle 0
242  %m0 = extractelement <16 x i8> %mask, i32 0
243  %s0 = extractelement <16 x i8> %src, i8 %m0
244  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
245  ; splat 3
246  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
247  ; splat 12
248  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
249  ; const 4
250  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
251  ret <16 x i8> %v3
252}
253
254; CHECK-LABEL: undef_const_insert_f32x4:
255; CHECK-NEXT:  .functype       undef_const_insert_f32x4 () -> (v128)
256; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
257; CHECK-NEXT:  return          $pop[[L0]]
258; SIMD-VM: f32x4.splat
259define <4 x float> @undef_const_insert_f32x4() {
260  %v = insertelement <4 x float> undef, float 42., i32 1
261  ret <4 x float> %v
262}
263
264; CHECK-LABEL: undef_arg_insert_i32x4:
265; CHECK-NEXT:  .functype       undef_arg_insert_i32x4 (i32) -> (v128)
266; CHECK-NEXT:  i32x4.splat     $push[[L0:[0-9]+]]=, $0
267; CHECK-NEXT:  return          $pop[[L0]]
268define <4 x i32> @undef_arg_insert_i32x4(i32 %x) {
269  %v = insertelement <4 x i32> undef, i32 %x, i32 3
270  ret <4 x i32> %v
271}
272
273; CHECK-LABEL: all_undef_i8x16:
274; CHECK-NEXT:  .functype       all_undef_i8x16 () -> (v128)
275; CHECK-NEXT:  return          $0
276define <16 x i8> @all_undef_i8x16() {
277  %v = insertelement <16 x i8> undef, i8 undef, i32 4
278  ret <16 x i8> %v
279}
280
281; CHECK-LABEL: all_undef_f64x2:
282; CHECK-NEXT:  .functype       all_undef_f64x2 () -> (v128)
283; CHECK-NEXT:  return          $0
284define <2 x double> @all_undef_f64x2() {
285  ret <2 x double> undef
286}
287