1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s
2
3; Test that the logic to choose between v128.const vector
4; initialization and splat vector initialization and to optimize the
5; choice of splat value works correctly.
6
7target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
8target triple = "wasm32-unknown-unknown"
9
10; CHECK-LABEL: same_const_one_replaced_i16x8:
11; CHECK-NEXT:  .functype       same_const_one_replaced_i16x8 (i32) -> (v128)
12; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
13; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
14; CHECK-NEXT:  return          $pop[[L1]]
15define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
16  %v = insertelement
17    <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
18    i16 %x,
19    i32 5
20  ret <8 x i16> %v
21}
22
23; CHECK-LABEL: different_const_one_replaced_i16x8:
24; CHECK-NEXT:  .functype       different_const_one_replaced_i16x8 (i32) -> (v128)
25; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
26; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
27; CHECK-NEXT:  return          $pop[[L1]]
28define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
29  %v = insertelement
30    <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
31    i16 %x,
32    i32 5
33  ret <8 x i16> %v
34}
35
36; CHECK-LABEL: same_const_one_replaced_f32x4:
37; CHECK-NEXT:  .functype       same_const_one_replaced_f32x4 (f32) -> (v128)
38; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
39; CHECK-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
40; CHECK-NEXT:  return          $pop[[L1]]
41define <4 x float> @same_const_one_replaced_f32x4(float %x) {
42  %v = insertelement
43    <4 x float> <float 42., float 42., float 42., float 42.>,
44    float %x,
45    i32 2
46  ret <4 x float> %v
47}
48
49; CHECK-LABEL: different_const_one_replaced_f32x4:
50; CHECK-NEXT:  .functype       different_const_one_replaced_f32x4 (f32) -> (v128)
51; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2
52; CHECK-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
53; CHECK-NEXT:  return          $pop[[L1]]
54define <4 x float> @different_const_one_replaced_f32x4(float %x) {
55  %v = insertelement
56    <4 x float> <float 1., float 2., float 3., float 4.>,
57    float %x,
58    i32 2
59  ret <4 x float> %v
60}
61
62; CHECK-LABEL: splat_common_const_i32x4:
63; CHECK-NEXT:  .functype       splat_common_const_i32x4 () -> (v128)
64; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0, 3, 3, 1
65; CHECK-NEXT:  return          $pop[[L0]]
66define <4 x i32> @splat_common_const_i32x4() {
67  ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
68}
69
70; CHECK-LABEL: splat_common_arg_i16x8:
71; CHECK-NEXT:  .functype       splat_common_arg_i16x8 (i32, i32, i32) -> (v128)
72; CHECK-NEXT:  i16x8.splat     $push[[L0:[0-9]+]]=, $2
73; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1
74; CHECK-NEXT:  i16x8.replace_lane      $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0
75; CHECK-NEXT:  i16x8.replace_lane      $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1
76; CHECK-NEXT:  i16x8.replace_lane      $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1
77; CHECK-NEXT:  return          $pop[[L4]]
78define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
79  %v0 = insertelement <8 x i16> undef, i16 %b, i32 0
80  %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1
81  %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2
82  %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3
83  %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4
84  %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5
85  %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6
86  %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7
87  ret <8 x i16> %v7
88}
89
90; CHECK-LABEL: swizzle_one_i8x16:
91; CHECK-NEXT:  .functype       swizzle_one_i8x16 (v128, v128) -> (v128)
92; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
93; CHECK-NEXT:  return          $pop[[L0]]
94define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
95  %m0 = extractelement <16 x i8> %mask, i32 0
96  %s0 = extractelement <16 x i8> %src, i8 %m0
97  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
98  ret <16 x i8> %v0
99}
100
101; CHECK-LABEL: swizzle_all_i8x16:
102; CHECK-NEXT:  .functype       swizzle_all_i8x16 (v128, v128) -> (v128)
103; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
104; CHECK-NEXT:  return          $pop[[L0]]
105define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
106  %m0 = extractelement <16 x i8> %mask, i32 0
107  %s0 = extractelement <16 x i8> %src, i8 %m0
108  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
109  %m1 = extractelement <16 x i8> %mask, i32 1
110  %s1 = extractelement <16 x i8> %src, i8 %m1
111  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
112  %m2 = extractelement <16 x i8> %mask, i32 2
113  %s2 = extractelement <16 x i8> %src, i8 %m2
114  %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
115  %m3 = extractelement <16 x i8> %mask, i32 3
116  %s3 = extractelement <16 x i8> %src, i8 %m3
117  %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
118  %m4 = extractelement <16 x i8> %mask, i32 4
119  %s4 = extractelement <16 x i8> %src, i8 %m4
120  %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
121  %m5 = extractelement <16 x i8> %mask, i32 5
122  %s5 = extractelement <16 x i8> %src, i8 %m5
123  %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
124  %m6 = extractelement <16 x i8> %mask, i32 6
125  %s6 = extractelement <16 x i8> %src, i8 %m6
126  %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
127  %m7 = extractelement <16 x i8> %mask, i32 7
128  %s7 = extractelement <16 x i8> %src, i8 %m7
129  %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
130  %m8 = extractelement <16 x i8> %mask, i32 8
131  %s8 = extractelement <16 x i8> %src, i8 %m8
132  %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
133  %m9 = extractelement <16 x i8> %mask, i32 9
134  %s9 = extractelement <16 x i8> %src, i8 %m9
135  %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
136  %m10 = extractelement <16 x i8> %mask, i32 10
137  %s10 = extractelement <16 x i8> %src, i8 %m10
138  %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
139  %m11 = extractelement <16 x i8> %mask, i32 11
140  %s11 = extractelement <16 x i8> %src, i8 %m11
141  %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
142  %m12 = extractelement <16 x i8> %mask, i32 12
143  %s12 = extractelement <16 x i8> %src, i8 %m12
144  %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
145  %m13 = extractelement <16 x i8> %mask, i32 13
146  %s13 = extractelement <16 x i8> %src, i8 %m13
147  %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
148  %m14 = extractelement <16 x i8> %mask, i32 14
149  %s14 = extractelement <16 x i8> %src, i8 %m14
150  %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
151  %m15 = extractelement <16 x i8> %mask, i32 15
152  %s15 = extractelement <16 x i8> %src, i8 %m15
153  %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
154  ret <16 x i8> %v15
155}
156
157; CHECK-LABEL: swizzle_one_i16x8:
158; CHECK-NEXT:  .functype       swizzle_one_i16x8 (v128, v128) -> (v128)
159; CHECK-NOT:    swizzle
160; CHECK:        return
161define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
162  %m0 = extractelement <8 x i16> %mask, i32 0
163  %s0 = extractelement <8 x i16> %src, i16 %m0
164  %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
165  ret <8 x i16> %v0
166}
167
168; CHECK-LABEL: mashup_swizzle_i8x16:
169; CHECK-NEXT:  .functype       mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
170; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
171; CHECK:       i8x16.replace_lane
172; CHECK:       i8x16.replace_lane
173; CHECK:       i8x16.replace_lane
174; CHECK:       i8x16.replace_lane
175; CHECK:       return
176define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
177  ; swizzle 0
178  %m0 = extractelement <16 x i8> %mask, i32 0
179  %s0 = extractelement <16 x i8> %src, i8 %m0
180  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
181  ; swizzle 7
182  %m1 = extractelement <16 x i8> %mask, i32 7
183  %s1 = extractelement <16 x i8> %src, i8 %m1
184  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
185  ; splat 3
186  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
187  ; splat 12
188  %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
189  ; const 4
190  %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
191  ; const 14
192  %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
193  ret <16 x i8> %v5
194}
195
196; CHECK-LABEL: mashup_const_i8x16:
197; CHECK-NEXT:  .functype       mashup_const_i8x16 (v128, v128, i32) -> (v128)
198; CHECK:       v128.const      $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
199; CHECK:       i8x16.replace_lane
200; CHECK:       i8x16.replace_lane
201; CHECK:       i8x16.replace_lane
202; CHECK:       return
203define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
204  ; swizzle 0
205  %m0 = extractelement <16 x i8> %mask, i32 0
206  %s0 = extractelement <16 x i8> %src, i8 %m0
207  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
208  ; splat 3
209  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
210  ; splat 12
211  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
212  ; const 4
213  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
214  ; const 14
215  %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
216  ret <16 x i8> %v4
217}
218
219; CHECK-LABEL: mashup_splat_i8x16:
220; CHECK-NEXT:  .functype       mashup_splat_i8x16 (v128, v128, i32) -> (v128)
221; CHECK:       i8x16.splat     $push[[L0:[0-9]+]]=, $2
222; CHECK:       i8x16.replace_lane
223; CHECK:       i8x16.replace_lane
224; CHECK:       return
225define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
226  ; swizzle 0
227  %m0 = extractelement <16 x i8> %mask, i32 0
228  %s0 = extractelement <16 x i8> %src, i8 %m0
229  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
230  ; splat 3
231  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
232  ; splat 12
233  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
234  ; const 4
235  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
236  ret <16 x i8> %v3
237}
238
239; CHECK-LABEL: undef_const_insert_f32x4:
240; CHECK-NEXT:  .functype       undef_const_insert_f32x4 () -> (v128)
241; CHECK-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
242; CHECK-NEXT:  return          $pop[[L0]]
243define <4 x float> @undef_const_insert_f32x4() {
244  %v = insertelement <4 x float> undef, float 42., i32 1
245  ret <4 x float> %v
246}
247
248; CHECK-LABEL: undef_arg_insert_i32x4:
249; CHECK-NEXT:  .functype       undef_arg_insert_i32x4 (i32) -> (v128)
250; CHECK-NEXT:  i32x4.splat     $push[[L0:[0-9]+]]=, $0
251; CHECK-NEXT:  return          $pop[[L0]]
252define <4 x i32> @undef_arg_insert_i32x4(i32 %x) {
253  %v = insertelement <4 x i32> undef, i32 %x, i32 3
254  ret <4 x i32> %v
255}
256
257; CHECK-LABEL: all_undef_i8x16:
258; CHECK-NEXT:  .functype       all_undef_i8x16 () -> (v128)
259; CHECK-NEXT:  return          $0
260define <16 x i8> @all_undef_i8x16() {
261  %v = insertelement <16 x i8> undef, i8 undef, i32 4
262  ret <16 x i8> %v
263}
264
265; CHECK-LABEL: all_undef_f64x2:
266; CHECK-NEXT:  .functype       all_undef_f64x2 () -> (v128)
267; CHECK-NEXT:  return          $0
268define <2 x double> @all_undef_f64x2() {
269  ret <2 x double> undef
270}
271