1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes=CHECK,UNIMP
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SIMD-VM
3
4; Test that the logic to choose between v128.const vector
5; initialization and splat vector initialization and to optimize the
6; choice of splat value works correctly.
7
8target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
9target triple = "wasm32-unknown-unknown"
10
11; CHECK-LABEL: same_const_one_replaced_i16x8:
12; CHECK-NEXT:  .functype       same_const_one_replaced_i16x8 (i32) -> (v128)
13; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
14; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
15; UNIMP-NEXT:  return          $pop[[L1]]
16; SIMD-VM: i16x8.splat
17define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
18  %v = insertelement
19    <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
20    i16 %x,
21    i32 5
22  ret <8 x i16> %v
23}
24
25; CHECK-LABEL: different_const_one_replaced_i16x8:
26; CHECK-NEXT:  .functype       different_const_one_replaced_i16x8 (i32) -> (v128)
27; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
28; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
29; UNIMP-NEXT:  return          $pop[[L1]]
30; SIMD-VM: i16x8.splat
31define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
32  %v = insertelement
33    <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
34    i16 %x,
35    i32 5
36  ret <8 x i16> %v
37}
38
39; CHECK-LABEL: same_const_one_replaced_f32x4:
40; CHECK-NEXT:  .functype       same_const_one_replaced_f32x4 (f32) -> (v128)
41; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
42; UNIMP-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
43; UNIMP-NEXT:  return          $pop[[L1]]
44; SIMD-VM: f32x4.splat
45define <4 x float> @same_const_one_replaced_f32x4(float %x) {
46  %v = insertelement
47    <4 x float> <float 42., float 42., float 42., float 42.>,
48    float %x,
49    i32 2
50  ret <4 x float> %v
51}
52
53; CHECK-LABEL: different_const_one_replaced_f32x4:
54; CHECK-NEXT:  .functype       different_const_one_replaced_f32x4 (f32) -> (v128)
55; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2
56; UNIMP-NEXT:  f32x4.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
57; UNIMP-NEXT:  return          $pop[[L1]]
58; SIMD-VM: f32x4.splat
59define <4 x float> @different_const_one_replaced_f32x4(float %x) {
60  %v = insertelement
61    <4 x float> <float 1., float 2., float 3., float 4.>,
62    float %x,
63    i32 2
64  ret <4 x float> %v
65}
66
67; CHECK-LABEL: splat_common_const_i32x4:
68; CHECK-NEXT:  .functype       splat_common_const_i32x4 () -> (v128)
69; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0, 3, 3, 1
70; UNIMP-NEXT:  return          $pop[[L0]]
71; SIMD-VM: i32x4.splat
72define <4 x i32> @splat_common_const_i32x4() {
73  ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
74}
75
76; CHECK-LABEL: splat_common_arg_i16x8:
77; CHECK-NEXT:  .functype       splat_common_arg_i16x8 (i32, i32, i32) -> (v128)
78; CHECK-NEXT:  i16x8.splat     $push[[L0:[0-9]+]]=, $2
79; CHECK-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1
80; CHECK-NEXT:  i16x8.replace_lane      $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0
81; CHECK-NEXT:  i16x8.replace_lane      $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1
82; CHECK-NEXT:  i16x8.replace_lane      $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1
83; CHECK-NEXT:  return          $pop[[L4]]
84define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
85  %v0 = insertelement <8 x i16> undef, i16 %b, i32 0
86  %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1
87  %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2
88  %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3
89  %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4
90  %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5
91  %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6
92  %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7
93  ret <8 x i16> %v7
94}
95
96; CHECK-LABEL: swizzle_one_i8x16:
97; CHECK-NEXT:  .functype       swizzle_one_i8x16 (v128, v128) -> (v128)
98; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
99; CHECK-NEXT:  return          $pop[[L0]]
100define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
101  %m0 = extractelement <16 x i8> %mask, i32 0
102  %s0 = extractelement <16 x i8> %src, i8 %m0
103  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
104  ret <16 x i8> %v0
105}
106
107; CHECK-LABEL: swizzle_all_i8x16:
108; CHECK-NEXT:  .functype       swizzle_all_i8x16 (v128, v128) -> (v128)
109; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
110; CHECK-NEXT:  return          $pop[[L0]]
111define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
112  %m0 = extractelement <16 x i8> %mask, i32 0
113  %s0 = extractelement <16 x i8> %src, i8 %m0
114  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
115  %m1 = extractelement <16 x i8> %mask, i32 1
116  %s1 = extractelement <16 x i8> %src, i8 %m1
117  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
118  %m2 = extractelement <16 x i8> %mask, i32 2
119  %s2 = extractelement <16 x i8> %src, i8 %m2
120  %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
121  %m3 = extractelement <16 x i8> %mask, i32 3
122  %s3 = extractelement <16 x i8> %src, i8 %m3
123  %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
124  %m4 = extractelement <16 x i8> %mask, i32 4
125  %s4 = extractelement <16 x i8> %src, i8 %m4
126  %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
127  %m5 = extractelement <16 x i8> %mask, i32 5
128  %s5 = extractelement <16 x i8> %src, i8 %m5
129  %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
130  %m6 = extractelement <16 x i8> %mask, i32 6
131  %s6 = extractelement <16 x i8> %src, i8 %m6
132  %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
133  %m7 = extractelement <16 x i8> %mask, i32 7
134  %s7 = extractelement <16 x i8> %src, i8 %m7
135  %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
136  %m8 = extractelement <16 x i8> %mask, i32 8
137  %s8 = extractelement <16 x i8> %src, i8 %m8
138  %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
139  %m9 = extractelement <16 x i8> %mask, i32 9
140  %s9 = extractelement <16 x i8> %src, i8 %m9
141  %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
142  %m10 = extractelement <16 x i8> %mask, i32 10
143  %s10 = extractelement <16 x i8> %src, i8 %m10
144  %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
145  %m11 = extractelement <16 x i8> %mask, i32 11
146  %s11 = extractelement <16 x i8> %src, i8 %m11
147  %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
148  %m12 = extractelement <16 x i8> %mask, i32 12
149  %s12 = extractelement <16 x i8> %src, i8 %m12
150  %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
151  %m13 = extractelement <16 x i8> %mask, i32 13
152  %s13 = extractelement <16 x i8> %src, i8 %m13
153  %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
154  %m14 = extractelement <16 x i8> %mask, i32 14
155  %s14 = extractelement <16 x i8> %src, i8 %m14
156  %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
157  %m15 = extractelement <16 x i8> %mask, i32 15
158  %s15 = extractelement <16 x i8> %src, i8 %m15
159  %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
160  ret <16 x i8> %v15
161}
162
163; CHECK-LABEL: swizzle_one_i16x8:
164; CHECK-NEXT:  .functype       swizzle_one_i16x8 (v128, v128) -> (v128)
165; CHECK-NOT:    swizzle
166; CHECK:        return
167define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
168  %m0 = extractelement <8 x i16> %mask, i32 0
169  %s0 = extractelement <8 x i16> %src, i16 %m0
170  %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
171  ret <8 x i16> %v0
172}
173
174; CHECK-LABEL: mashup_swizzle_i8x16:
175; CHECK-NEXT:  .functype       mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
176; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
177; CHECK:       i8x16.replace_lane
178; CHECK:       i8x16.replace_lane
179; CHECK:       i8x16.replace_lane
180; CHECK:       i8x16.replace_lane
181; CHECK:       return
182define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
183  ; swizzle 0
184  %m0 = extractelement <16 x i8> %mask, i32 0
185  %s0 = extractelement <16 x i8> %src, i8 %m0
186  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
187  ; swizzle 7
188  %m1 = extractelement <16 x i8> %mask, i32 7
189  %s1 = extractelement <16 x i8> %src, i8 %m1
190  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
191  ; splat 3
192  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
193  ; splat 12
194  %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
195  ; const 4
196  %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
197  ; const 14
198  %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
199  ret <16 x i8> %v5
200}
201
202; CHECK-LABEL: mashup_const_i8x16:
203; CHECK-NEXT:  .functype       mashup_const_i8x16 (v128, v128, i32) -> (v128)
204; UNIMP:       v128.const      $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
205; UNIMP:       i8x16.replace_lane
206; UNIMP:       i8x16.replace_lane
207; UNIMP:       i8x16.replace_lane
208; UNIMP:       return
209; SIMD-VM: i8x16.splat
210define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
211  ; swizzle 0
212  %m0 = extractelement <16 x i8> %mask, i32 0
213  %s0 = extractelement <16 x i8> %src, i8 %m0
214  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
215  ; splat 3
216  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
217  ; splat 12
218  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
219  ; const 4
220  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
221  ; const 14
222  %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
223  ret <16 x i8> %v4
224}
225
226; CHECK-LABEL: mashup_splat_i8x16:
227; CHECK-NEXT:  .functype       mashup_splat_i8x16 (v128, v128, i32) -> (v128)
228; CHECK:       i8x16.splat     $push[[L0:[0-9]+]]=, $2
229; CHECK:       i8x16.replace_lane
230; CHECK:       i8x16.replace_lane
231; CHECK:       return
232define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
233  ; swizzle 0
234  %m0 = extractelement <16 x i8> %mask, i32 0
235  %s0 = extractelement <16 x i8> %src, i8 %m0
236  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
237  ; splat 3
238  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
239  ; splat 12
240  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
241  ; const 4
242  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
243  ret <16 x i8> %v3
244}
245
246; CHECK-LABEL: undef_const_insert_f32x4:
247; CHECK-NEXT:  .functype       undef_const_insert_f32x4 () -> (v128)
248; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
249; UNIMP-NEXT:  return          $pop[[L0]]
250; SIMD-VM: f32x4.splat
251define <4 x float> @undef_const_insert_f32x4() {
252  %v = insertelement <4 x float> undef, float 42., i32 1
253  ret <4 x float> %v
254}
255
256; CHECK-LABEL: undef_arg_insert_i32x4:
257; CHECK-NEXT:  .functype       undef_arg_insert_i32x4 (i32) -> (v128)
258; CHECK-NEXT:  i32x4.splat     $push[[L0:[0-9]+]]=, $0
259; CHECK-NEXT:  return          $pop[[L0]]
260define <4 x i32> @undef_arg_insert_i32x4(i32 %x) {
261  %v = insertelement <4 x i32> undef, i32 %x, i32 3
262  ret <4 x i32> %v
263}
264
265; CHECK-LABEL: all_undef_i8x16:
266; CHECK-NEXT:  .functype       all_undef_i8x16 () -> (v128)
267; CHECK-NEXT:  return          $0
268define <16 x i8> @all_undef_i8x16() {
269  %v = insertelement <16 x i8> undef, i8 undef, i32 4
270  ret <16 x i8> %v
271}
272
273; CHECK-LABEL: all_undef_f64x2:
274; CHECK-NEXT:  .functype       all_undef_f64x2 () -> (v128)
275; CHECK-NEXT:  return          $0
276define <2 x double> @all_undef_f64x2() {
277  ret <2 x double> undef
278}
279