1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
3
4; Test that the logic to choose between v128.const vector
5; initialization and splat vector initialization and to optimize the
6; choice of splat value works correctly.
7
8target triple = "wasm32-unknown-unknown"
9
10define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
11; CHECK-LABEL: same_const_one_replaced_i16x8:
12; CHECK:         .functype same_const_one_replaced_i16x8 (i32) -> (v128)
13; CHECK-NEXT:  # %bb.0:
14; CHECK-NEXT:    v128.const $push0=, 42, 42, 42, 42, 42, 0, 42, 42
15; CHECK-NEXT:    i16x8.replace_lane $push1=, $pop0, 5, $0
16; CHECK-NEXT:    return $pop1
17  %v = insertelement
18    <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
19    i16 %x,
20    i32 5
21  ret <8 x i16> %v
22}
23
24define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
25; CHECK-LABEL: different_const_one_replaced_i16x8:
26; CHECK:         .functype different_const_one_replaced_i16x8 (i32) -> (v128)
27; CHECK-NEXT:  # %bb.0:
28; CHECK-NEXT:    v128.const $push0=, 1, -2, 3, -4, 5, 0, 7, -8
29; CHECK-NEXT:    i16x8.replace_lane $push1=, $pop0, 5, $0
30; CHECK-NEXT:    return $pop1
31  %v = insertelement
32    <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
33    i16 %x,
34    i32 5
35  ret <8 x i16> %v
36}
37
38define <4 x float> @same_const_one_replaced_f32x4(float %x) {
39; CHECK-LABEL: same_const_one_replaced_f32x4:
40; CHECK:         .functype same_const_one_replaced_f32x4 (f32) -> (v128)
41; CHECK-NEXT:  # %bb.0:
42; CHECK-NEXT:    v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
43; CHECK-NEXT:    f32x4.replace_lane $push1=, $pop0, 2, $0
44; CHECK-NEXT:    return $pop1
45  %v = insertelement
46    <4 x float> <float 42., float 42., float 42., float 42.>,
47    float %x,
48    i32 2
49  ret <4 x float> %v
50}
51
52define <4 x float> @different_const_one_replaced_f32x4(float %x) {
53; CHECK-LABEL: different_const_one_replaced_f32x4:
54; CHECK:         .functype different_const_one_replaced_f32x4 (f32) -> (v128)
55; CHECK-NEXT:  # %bb.0:
56; CHECK-NEXT:    v128.const $push0=, 0x1p0, 0x1p1, 0x0p0, 0x1p2
57; CHECK-NEXT:    f32x4.replace_lane $push1=, $pop0, 2, $0
58; CHECK-NEXT:    return $pop1
59  %v = insertelement
60    <4 x float> <float 1., float 2., float 3., float 4.>,
61    float %x,
62    i32 2
63  ret <4 x float> %v
64}
65
66define <4 x i32> @splat_common_const_i32x4() {
67; CHECK-LABEL: splat_common_const_i32x4:
68; CHECK:         .functype splat_common_const_i32x4 () -> (v128)
69; CHECK-NEXT:  # %bb.0:
70; CHECK-NEXT:    v128.const $push0=, 0, 3, 3, 1
71; CHECK-NEXT:    return $pop0
72  ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
73}
74
75define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
76; CHECK-LABEL: splat_common_arg_i16x8:
77; CHECK:         .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128)
78; CHECK-NEXT:  # %bb.0:
79; CHECK-NEXT:    i16x8.splat $push0=, $2
80; CHECK-NEXT:    i16x8.replace_lane $push1=, $pop0, 0, $1
81; CHECK-NEXT:    i16x8.replace_lane $push2=, $pop1, 2, $0
82; CHECK-NEXT:    i16x8.replace_lane $push3=, $pop2, 4, $1
83; CHECK-NEXT:    i16x8.replace_lane $push4=, $pop3, 7, $1
84; CHECK-NEXT:    return $pop4
85  %v0 = insertelement <8 x i16> undef, i16 %b, i32 0
86  %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1
87  %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2
88  %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3
89  %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4
90  %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5
91  %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6
92  %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7
93  ret <8 x i16> %v7
94}
95
96define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
97; CHECK-LABEL: swizzle_one_i8x16:
98; CHECK:         .functype swizzle_one_i8x16 (v128, v128) -> (v128)
99; CHECK-NEXT:  # %bb.0:
100; CHECK-NEXT:    i8x16.swizzle $push0=, $0, $1
101; CHECK-NEXT:    return $pop0
102  %m0 = extractelement <16 x i8> %mask, i32 0
103  %s0 = extractelement <16 x i8> %src, i8 %m0
104  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
105  ret <16 x i8> %v0
106}
107
108define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
109; CHECK-LABEL: swizzle_all_i8x16:
110; CHECK:         .functype swizzle_all_i8x16 (v128, v128) -> (v128)
111; CHECK-NEXT:  # %bb.0:
112; CHECK-NEXT:    i8x16.swizzle $push0=, $0, $1
113; CHECK-NEXT:    return $pop0
114  %m0 = extractelement <16 x i8> %mask, i32 0
115  %s0 = extractelement <16 x i8> %src, i8 %m0
116  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
117  %m1 = extractelement <16 x i8> %mask, i32 1
118  %s1 = extractelement <16 x i8> %src, i8 %m1
119  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
120  %m2 = extractelement <16 x i8> %mask, i32 2
121  %s2 = extractelement <16 x i8> %src, i8 %m2
122  %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
123  %m3 = extractelement <16 x i8> %mask, i32 3
124  %s3 = extractelement <16 x i8> %src, i8 %m3
125  %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
126  %m4 = extractelement <16 x i8> %mask, i32 4
127  %s4 = extractelement <16 x i8> %src, i8 %m4
128  %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
129  %m5 = extractelement <16 x i8> %mask, i32 5
130  %s5 = extractelement <16 x i8> %src, i8 %m5
131  %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
132  %m6 = extractelement <16 x i8> %mask, i32 6
133  %s6 = extractelement <16 x i8> %src, i8 %m6
134  %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
135  %m7 = extractelement <16 x i8> %mask, i32 7
136  %s7 = extractelement <16 x i8> %src, i8 %m7
137  %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
138  %m8 = extractelement <16 x i8> %mask, i32 8
139  %s8 = extractelement <16 x i8> %src, i8 %m8
140  %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
141  %m9 = extractelement <16 x i8> %mask, i32 9
142  %s9 = extractelement <16 x i8> %src, i8 %m9
143  %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
144  %m10 = extractelement <16 x i8> %mask, i32 10
145  %s10 = extractelement <16 x i8> %src, i8 %m10
146  %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
147  %m11 = extractelement <16 x i8> %mask, i32 11
148  %s11 = extractelement <16 x i8> %src, i8 %m11
149  %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
150  %m12 = extractelement <16 x i8> %mask, i32 12
151  %s12 = extractelement <16 x i8> %src, i8 %m12
152  %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
153  %m13 = extractelement <16 x i8> %mask, i32 13
154  %s13 = extractelement <16 x i8> %src, i8 %m13
155  %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
156  %m14 = extractelement <16 x i8> %mask, i32 14
157  %s14 = extractelement <16 x i8> %src, i8 %m14
158  %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
159  %m15 = extractelement <16 x i8> %mask, i32 15
160  %s15 = extractelement <16 x i8> %src, i8 %m15
161  %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
162  ret <16 x i8> %v15
163}
164
165; Ensure we don't us swizzle
166define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
167; CHECK-LABEL: swizzle_one_i16x8:
168; CHECK:         .functype swizzle_one_i16x8 (v128, v128) -> (v128)
169; CHECK-NEXT:  # %bb.0:
170; CHECK-NEXT:    global.get $push7=, __stack_pointer
171; CHECK-NEXT:    i32.const $push8=, 16
172; CHECK-NEXT:    i32.sub $push10=, $pop7, $pop8
173; CHECK-NEXT:    local.tee $push9=, $2=, $pop10
174; CHECK-NEXT:    v128.store 0($pop9), $0
175; CHECK-NEXT:    i16x8.extract_lane_u $push0=, $1, 0
176; CHECK-NEXT:    i32.const $push1=, 7
177; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
178; CHECK-NEXT:    i32.const $push3=, 1
179; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop3
180; CHECK-NEXT:    i32.or $push5=, $2, $pop4
181; CHECK-NEXT:    v128.load16_splat $push6=, 0($pop5)
182; CHECK-NEXT:    return $pop6
183  %m0 = extractelement <8 x i16> %mask, i32 0
184  %s0 = extractelement <8 x i16> %src, i16 %m0
185  %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
186  ret <8 x i16> %v0
187}
188
189define <4 x i32> @half_shuffle_i32x4(<4 x i32> %src) {
190; CHECK-LABEL: half_shuffle_i32x4:
191; CHECK:         .functype half_shuffle_i32x4 (v128) -> (v128)
192; CHECK-NEXT:  # %bb.0:
193; CHECK-NEXT:    i8x16.shuffle $push0=, $0, $0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 0, 0, 0
194; CHECK-NEXT:    i32.const $push1=, 0
195; CHECK-NEXT:    i32x4.replace_lane $push2=, $pop0, 0, $pop1
196; CHECK-NEXT:    i32.const $push3=, 3
197; CHECK-NEXT:    i32x4.replace_lane $push4=, $pop2, 3, $pop3
198; CHECK-NEXT:    return $pop4
199  %s0 = extractelement <4 x i32> %src, i32 0
200  %s2 = extractelement <4 x i32> %src, i32 2
201  %v0 = insertelement <4 x i32> undef, i32 0, i32 0
202  %v1 = insertelement <4 x i32> %v0, i32 %s2, i32 1
203  %v2 = insertelement <4 x i32> %v1, i32 %s0, i32 2
204  %v3 = insertelement <4 x i32> %v2, i32 3, i32 3
205  ret <4 x i32> %v3
206}
207
208define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
209  ; swizzle 0
210; CHECK-LABEL: mashup_swizzle_i8x16:
211; CHECK:         .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
212; CHECK-NEXT:  # %bb.0:
213; CHECK-NEXT:    i8x16.swizzle $push0=, $0, $1
214; CHECK-NEXT:    i8x16.replace_lane $push1=, $pop0, 3, $2
215; CHECK-NEXT:    i32.const $push2=, 42
216; CHECK-NEXT:    i8x16.replace_lane $push3=, $pop1, 4, $pop2
217; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop3, 12, $2
218; CHECK-NEXT:    i32.const $push6=, 42
219; CHECK-NEXT:    i8x16.replace_lane $push5=, $pop4, 14, $pop6
220; CHECK-NEXT:    return $pop5
221  %m0 = extractelement <16 x i8> %mask, i32 0
222  %s0 = extractelement <16 x i8> %src, i8 %m0
223  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
224  ; swizzle 7
225  %m1 = extractelement <16 x i8> %mask, i32 7
226  %s1 = extractelement <16 x i8> %src, i8 %m1
227  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
228  ; splat 3
229  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
230  ; splat 12
231  %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
232  ; const 4
233  %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
234  ; const 14
235  %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
236  ret <16 x i8> %v5
237}
238
239define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
240  ; swizzle 0
241; CHECK-LABEL: mashup_const_i8x16:
242; CHECK:         .functype mashup_const_i8x16 (v128, v128, i32) -> (v128)
243; CHECK-NEXT:  # %bb.0:
244; CHECK-NEXT:    global.get $push8=, __stack_pointer
245; CHECK-NEXT:    i32.const $push9=, 16
246; CHECK-NEXT:    i32.sub $push11=, $pop8, $pop9
247; CHECK-NEXT:    local.tee $push10=, $3=, $pop11
248; CHECK-NEXT:    v128.store 0($pop10), $0
249; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
250; CHECK-NEXT:    i32.const $push1=, 15
251; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
252; CHECK-NEXT:    i32.or $push3=, $3, $pop2
253; CHECK-NEXT:    v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
254; CHECK-NEXT:    v128.load8_lane $push5=, 0($pop3), $pop4, 0
255; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop5, 3, $2
256; CHECK-NEXT:    i8x16.replace_lane $push7=, $pop6, 12, $2
257; CHECK-NEXT:    return $pop7
258  %m0 = extractelement <16 x i8> %mask, i32 0
259  %s0 = extractelement <16 x i8> %src, i8 %m0
260  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
261  ; splat 3
262  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
263  ; splat 12
264  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
265  ; const 4
266  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
267  ; const 14
268  %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
269  ret <16 x i8> %v4
270}
271
272define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
273  ; swizzle 0
274; CHECK-LABEL: mashup_splat_i8x16:
275; CHECK:         .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128)
276; CHECK-NEXT:  # %bb.0:
277; CHECK-NEXT:    global.get $push8=, __stack_pointer
278; CHECK-NEXT:    i32.const $push9=, 16
279; CHECK-NEXT:    i32.sub $push11=, $pop8, $pop9
280; CHECK-NEXT:    local.tee $push10=, $3=, $pop11
281; CHECK-NEXT:    v128.store 0($pop10), $0
282; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
283; CHECK-NEXT:    i32.const $push1=, 15
284; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
285; CHECK-NEXT:    i32.or $push3=, $3, $pop2
286; CHECK-NEXT:    i8x16.splat $push4=, $2
287; CHECK-NEXT:    v128.load8_lane $push5=, 0($pop3), $pop4, 0
288; CHECK-NEXT:    i32.const $push6=, 42
289; CHECK-NEXT:    i8x16.replace_lane $push7=, $pop5, 4, $pop6
290; CHECK-NEXT:    return $pop7
291  %m0 = extractelement <16 x i8> %mask, i32 0
292  %s0 = extractelement <16 x i8> %src, i8 %m0
293  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
294  ; splat 3
295  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
296  ; splat 12
297  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
298  ; const 4
299  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
300  ret <16 x i8> %v3
301}
302
303define <4 x float> @undef_const_insert_f32x4() {
304; CHECK-LABEL: undef_const_insert_f32x4:
305; CHECK:         .functype undef_const_insert_f32x4 () -> (v128)
306; CHECK-NEXT:  # %bb.0:
307; CHECK-NEXT:    v128.const $push0=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
308; CHECK-NEXT:    return $pop0
309  %v = insertelement <4 x float> undef, float 42., i32 1
310  ret <4 x float> %v
311}
312
313define <4 x i32> @undef_arg_insert_i32x4(i32 %x) {
314; CHECK-LABEL: undef_arg_insert_i32x4:
315; CHECK:         .functype undef_arg_insert_i32x4 (i32) -> (v128)
316; CHECK-NEXT:  # %bb.0:
317; CHECK-NEXT:    i32x4.splat $push0=, $0
318; CHECK-NEXT:    return $pop0
319  %v = insertelement <4 x i32> undef, i32 %x, i32 3
320  ret <4 x i32> %v
321}
322
323define <16 x i8> @all_undef_i8x16() {
324; CHECK-LABEL: all_undef_i8x16:
325; CHECK:         .functype all_undef_i8x16 () -> (v128)
326; CHECK-NEXT:  # %bb.0:
327; CHECK-NEXT:    return $0
328  %v = insertelement <16 x i8> undef, i8 undef, i32 4
329  ret <16 x i8> %v
330}
331
332define <2 x double> @all_undef_f64x2() {
333; CHECK-LABEL: all_undef_f64x2:
334; CHECK:         .functype all_undef_f64x2 () -> (v128)
335; CHECK-NEXT:  # %bb.0:
336; CHECK-NEXT:    return $0
337  ret <2 x double> undef
338}
339