1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
3; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
4; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
5; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128
6; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
7
8; check that a non-test run (including explicit locals pass) at least finishes
9; RUN: llc < %s -O0 -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext
10; RUN: llc < %s -O2 -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext
11
12; Test that basic SIMD128 arithmetic operations assemble as expected.
13
14target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
15target triple = "wasm32-unknown-unknown"
16
17; ==============================================================================
18; 16 x i8
19; ==============================================================================
20; CHECK-LABEL: add_v16i8:
21; NO-SIMD128-NOT: i8x16
22; SIMD128-NEXT: .param v128, v128{{$}}
23; SIMD128-NEXT: .result v128{{$}}
24; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
25; SIMD128-NEXT: return $pop[[R]]{{$}}
26define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
27  %a = add <16 x i8> %x, %y
28  ret <16 x i8> %a
29}
30
31; CHECK-LABEL: sub_v16i8:
32; NO-SIMD128-NOT: i8x16
33; SIMD128-NEXT: .param v128, v128{{$}}
34; SIMD128-NEXT: .result v128{{$}}
35; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
36; SIMD128-NEXT: return $pop[[R]]{{$}}
37define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
38  %a = sub <16 x i8> %x, %y
39  ret <16 x i8> %a
40}
41
42; CHECK-LABEL: mul_v16i8:
43; NO-SIMD128-NOT: i8x16
44; SIMD128-NEXT: .param v128, v128{{$}}
45; SIMD128-NEXT: .result v128{{$}}
46; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
47; SIMD128-NEXT: return $pop[[R]]{{$}}
48define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
49  %a = mul <16 x i8> %x, %y
50  ret <16 x i8> %a
51}
52
53; CHECK-LABEL: neg_v16i8:
54; NO-SIMD128-NOT: i8x16
55; SIMD128-NEXT: .param v128{{$}}
56; SIMD128-NEXT: .result v128{{$}}
57; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
58; SIMD128-NEXT: return $pop[[R]]{{$}}
59define <16 x i8> @neg_v16i8(<16 x i8> %x) {
60  %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
61                      i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
62                     %x
63  ret <16 x i8> %a
64}
65
66; CHECK-LABEL: shl_v16i8:
67; NO-SIMD128-NOT: i8x16
68; SIMD128-NEXT: .param v128, i32{{$}}
69; SIMD128-NEXT: .result v128{{$}}
70; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
71; SIMD128-NEXT: return $pop[[R]]{{$}}
72define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
73  %t = insertelement <16 x i8> undef, i8 %x, i32 0
74  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
75    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
76                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
77  %a = shl <16 x i8> %v, %s
78  ret <16 x i8> %a
79}
80
81; CHECK-LABEL: shl_const_v16i8:
82; NO-SIMD128-NOT: i8x16
83; SIMD128-NEXT: .param v128{{$}}
84; SIMD128-NEXT: .result v128{{$}}
85; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
86; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
87; SIMD128-NEXT: return $pop[[R]]{{$}}
88define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
89  %a = shl <16 x i8> %v,
90    <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
91     i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
92  ret <16 x i8> %a
93}
94
95; CHECK-LABEL: shl_vec_v16i8:
96; NO-SIMD128-NOT: i8x16
97; SIMD128-NEXT: .param v128, v128{{$}}
98; SIMD128-NEXT: .result v128{{$}}
99; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
100; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
101; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
102; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
103; Skip 14 lanes
104; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
105; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
106; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
107; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
108; SIMD128-NEXT: return $pop[[R]]{{$}}
109define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
110  %a = shl <16 x i8> %v, %x
111  ret <16 x i8> %a
112}
113
114; CHECK-LABEL: shr_s_v16i8:
115; NO-SIMD128-NOT: i8x16
116; SIMD128-NEXT: .param v128, i32{{$}}
117; SIMD128-NEXT: .result v128{{$}}
118; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
119; SIMD128-NEXT: return $pop[[R]]{{$}}
120define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
121  %t = insertelement <16 x i8> undef, i8 %x, i32 0
122  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
123    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
124                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
125  %a = ashr <16 x i8> %v, %s
126  ret <16 x i8> %a
127}
128
129; CHECK-LABEL: shr_s_vec_v16i8:
130; NO-SIMD128-NOT: i8x16
131; SIMD128-NEXT: .param v128, v128{{$}}
132; SIMD128-NEXT: .result v128{{$}}
133; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
134; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 24{{$}}
135; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
136; SIMD128-NEXT: i32.const $push[[L3:[0-9]+]]=, 24{{$}}
137; SIMD128-NEXT: i32.shr_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
138; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 0{{$}}
139; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
140; SIMD128-NEXT: i8x16.splat $push[[L7:[0-9]+]]=, $pop[[L6]]{{$}}
141; Skip 14 lanes
142; SIMD128:      i8x16.extract_lane_u $push[[L7:[0-9]+]]=, $0, 15{{$}}
143; SIMD128-NEXT: i32.const $push[[L8:[0-9]+]]=, 24{{$}}
144; SIMD128-NEXT: i32.shl $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
145; SIMD128-NEXT: i32.const $push[[L10:[0-9]+]]=, 24{{$}}
146; SIMD128-NEXT: i32.shr_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
147; SIMD128-NEXT: i8x16.extract_lane_u $push[[L12:[0-9]+]]=, $1, 15{{$}}
148; SIMD128-NEXT: i32.shr_s $push[[L13:[0-9]+]]=, $pop[[L11]], $pop[[L12]]{{$}}
149; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L14:[0-9]+]], 15, $pop[[L13]]{{$}}
150; SIMD128-NEXT: return $pop[[R]]{{$}}
151define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
152  %a = ashr <16 x i8> %v, %x
153  ret <16 x i8> %a
154}
155
156; CHECK-LABEL: shr_u_v16i8:
157; NO-SIMD128-NOT: i8x16
158; SIMD128-NEXT: .param v128, i32{{$}}
159; SIMD128-NEXT: .result v128{{$}}
160; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
161; SIMD128-NEXT: return $pop[[R]]{{$}}
162define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
163  %t = insertelement <16 x i8> undef, i8 %x, i32 0
164  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
165    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
166                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
167  %a = lshr <16 x i8> %v, %s
168  ret <16 x i8> %a
169}
170
171; CHECK-LABEL: shr_u_vec_v16i8:
172; NO-SIMD128-NOT: i8x16
173; SIMD128-NEXT: .param v128, v128{{$}}
174; SIMD128-NEXT: .result v128{{$}}
175; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
176; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
177; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
178; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
179; Skip 14 lanes
180; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
181; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
182; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
183; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
184; SIMD128-NEXT: return $pop[[R]]{{$}}
185define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
186  %a = lshr <16 x i8> %v, %x
187  ret <16 x i8> %a
188}
189
190; CHECK-LABEL: and_v16i8:
191; NO-SIMD128-NOT: v128
192; SIMD128-NEXT: .param v128, v128{{$}}
193; SIMD128-NEXT: .result v128{{$}}
194; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
195; SIMD128-NEXT: return $pop[[R]]{{$}}
196define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
197  %a = and <16 x i8> %x, %y
198  ret <16 x i8> %a
199}
200
201; CHECK-LABEL: or_v16i8:
202; NO-SIMD128-NOT: v128
203; SIMD128-NEXT: .param v128, v128{{$}}
204; SIMD128-NEXT: .result v128{{$}}
205; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
206; SIMD128-NEXT: return $pop[[R]]{{$}}
207define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
208  %a = or <16 x i8> %x, %y
209  ret <16 x i8> %a
210}
211
212; CHECK-LABEL: xor_v16i8:
213; NO-SIMD128-NOT: v128
214; SIMD128-NEXT: .param v128, v128{{$}}
215; SIMD128-NEXT: .result v128{{$}}
216; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
217; SIMD128-NEXT: return $pop[[R]]{{$}}
218define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
219  %a = xor <16 x i8> %x, %y
220  ret <16 x i8> %a
221}
222
223; CHECK-LABEL: not_v16i8:
224; NO-SIMD128-NOT: v128
225; SIMD128-NEXT: .param v128{{$}}
226; SIMD128-NEXT: .result v128{{$}}
227; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
228; SIMD128-NEXT: return $pop[[R]]{{$}}
229define <16 x i8> @not_v16i8(<16 x i8> %x) {
230  %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
231                          i8 -1, i8 -1, i8 -1, i8 -1,
232                          i8 -1, i8 -1, i8 -1, i8 -1,
233                          i8 -1, i8 -1, i8 -1, i8 -1>
234  ret <16 x i8> %a
235}
236
237; CHECK-LABEL: bitselect_v16i8:
238; NO-SIMD128-NOT: v128
239; SIMD128-NEXT: .param v128, v128, v128{{$}}
240; SIMD128-NEXT: .result v128{{$}}
241; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
242; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
243; SIMD128-FAST-NEXT: v128.and
244; SIMD128-FAST-NEXT: v128.not
245; SIMD128-FAST-NEXT: v128.and
246; SIMD128-FAST-NEXT: v128.or
247; SIMD128-FAST-NEXT: return
248define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
249  %masked_v1 = and <16 x i8> %c, %v1
250  %inv_mask = xor <16 x i8> %c,
251    <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
252     i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
253  %masked_v2 = and <16 x i8> %inv_mask, %v2
254  %a = or <16 x i8> %masked_v1, %masked_v2
255  ret <16 x i8> %a
256}
257
258; ==============================================================================
259; 8 x i16
260; ==============================================================================
261; CHECK-LABEL: add_v8i16:
262; NO-SIMD128-NOT: i16x8
263; SIMD128-NEXT: .param v128, v128{{$}}
264; SIMD128-NEXT: .result v128{{$}}
265; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
266; SIMD128-NEXT: return $pop[[R]]{{$}}
267define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
268  %a = add <8 x i16> %x, %y
269  ret <8 x i16> %a
270}
271
272; CHECK-LABEL: sub_v8i16:
273; NO-SIMD128-NOT: i16x8
274; SIMD128-NEXT: .param v128, v128{{$}}
275; SIMD128-NEXT: .result v128{{$}}
276; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
277; SIMD128-NEXT: return $pop[[R]]{{$}}
278define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
279  %a = sub <8 x i16> %x, %y
280  ret <8 x i16> %a
281}
282
283; CHECK-LABEL: mul_v8i16:
284; NO-SIMD128-NOT: i16x8
285; SIMD128-NEXT: .param v128, v128{{$}}
286; SIMD128-NEXT: .result v128{{$}}
287; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
288; SIMD128-NEXT: return $pop[[R]]{{$}}
289define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
290  %a = mul <8 x i16> %x, %y
291  ret <8 x i16> %a
292}
293
294; CHECK-LABEL: neg_v8i16:
295; NO-SIMD128-NOT: i16x8
296; SIMD128-NEXT: .param v128{{$}}
297; SIMD128-NEXT: .result v128{{$}}
298; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
299; SIMD128-NEXT: return $pop[[R]]{{$}}
300define <8 x i16> @neg_v8i16(<8 x i16> %x) {
301  %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
302                     %x
303  ret <8 x i16> %a
304}
305
306; CHECK-LABEL: shl_v8i16:
307; NO-SIMD128-NOT: i16x8
308; SIMD128-NEXT: .param v128, i32{{$}}
309; SIMD128-NEXT: .result v128{{$}}
310; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
311; SIMD128-NEXT: return $pop[[R]]{{$}}
312define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
313  %t = insertelement <8 x i16> undef, i16 %x, i32 0
314  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
315    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
316  %a = shl <8 x i16> %v, %s
317  ret <8 x i16> %a
318}
319
320; CHECK-LABEL: shl_const_v8i16:
321; NO-SIMD128-NOT: i16x8
322; SIMD128-NEXT: .param v128{{$}}
323; SIMD128-NEXT: .result v128{{$}}
324; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
325; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
326; SIMD128-NEXT: return $pop[[R]]{{$}}
327define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
328  %a = shl <8 x i16> %v,
329    <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
330  ret <8 x i16> %a
331}
332
333; CHECK-LABEL: shl_vec_v8i16:
334; NO-SIMD128-NOT: i16x8
335; SIMD128-NEXT: .param v128, v128{{$}}
336; SIMD128-NEXT: .result v128{{$}}
337; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
338; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
339; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
340; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
341; Skip 6 lanes
342; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
343; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
344; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
345; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
346; SIMD128-NEXT: return $pop[[R]]{{$}}
347define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
348  %a = shl <8 x i16> %v, %x
349  ret <8 x i16> %a
350}
351
352; CHECK-LABEL: shr_s_v8i16:
353; NO-SIMD128-NOT: i16x8
354; SIMD128-NEXT: .param v128, i32{{$}}
355; SIMD128-NEXT: .result v128{{$}}
356; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
357; SIMD128-NEXT: return $pop[[R]]{{$}}
358define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
359  %t = insertelement <8 x i16> undef, i16 %x, i32 0
360  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
361    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
362  %a = ashr <8 x i16> %v, %s
363  ret <8 x i16> %a
364}
365
366; CHECK-LABEL: shr_s_vec_v8i16:
367; NO-SIMD128-NOT: i16x8
368; SIMD128-NEXT: .param v128, v128{{$}}
369; SIMD128-NEXT: .result v128{{$}}
370; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
371; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}}
372; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
373; SIMD128-NEXT: i32.const $push[[L3:[0-9]+]]=, 16{{$}}
374; SIMD128-NEXT: i32.shr_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
375; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 0{{$}}
376; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
377; SIMD128-NEXT: i16x8.splat $push[[L7:[0-9]+]]=, $pop[[L6]]{{$}}
378; Skip 6 lanes
379; SIMD128:      i16x8.extract_lane_u $push[[L7:[0-9]+]]=, $0, 7{{$}}
380; SIMD128-NEXT: i32.const $push[[L8:[0-9]+]]=, 16{{$}}
381; SIMD128-NEXT: i32.shl $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
382; SIMD128-NEXT: i32.const $push[[L10:[0-9]+]]=, 16{{$}}
383; SIMD128-NEXT: i32.shr_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
384; SIMD128-NEXT: i16x8.extract_lane_u $push[[L12:[0-9]+]]=, $1, 7{{$}}
385; SIMD128-NEXT: i32.shr_s $push[[L13:[0-9]+]]=, $pop[[L11]], $pop[[L12]]{{$}}
386; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L14:[0-9]+]], 7, $pop[[L13]]{{$}}
387; SIMD128-NEXT: return $pop[[R]]{{$}}
388define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
389  %a = ashr <8 x i16> %v, %x
390  ret <8 x i16> %a
391}
392
393; CHECK-LABEL: shr_u_v8i16:
394; NO-SIMD128-NOT: i16x8
395; SIMD128-NEXT: .param v128, i32{{$}}
396; SIMD128-NEXT: .result v128{{$}}
397; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
398; SIMD128-NEXT: return $pop[[R]]{{$}}
399define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
400  %t = insertelement <8 x i16> undef, i16 %x, i32 0
401  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
402    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
403  %a = lshr <8 x i16> %v, %s
404  ret <8 x i16> %a
405}
406
407; CHECK-LABEL: shr_u_vec_v8i16:
408; NO-SIMD128-NOT: i16x8
409; SIMD128-NEXT: .param v128, v128{{$}}
410; SIMD128-NEXT: .result v128{{$}}
411; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
412; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
413; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
414; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
415; Skip 6 lanes
416; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
417; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
418; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
419; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
420; SIMD128-NEXT: return $pop[[R]]{{$}}
421define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
422  %a = lshr <8 x i16> %v, %x
423  ret <8 x i16> %a
424}
425
426; CHECK-LABEL: and_v8i16:
427; NO-SIMD128-NOT: v128
428; SIMD128-NEXT: .param v128, v128{{$}}
429; SIMD128-NEXT: .result v128{{$}}
430; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
431; SIMD128-NEXT: return $pop[[R]]{{$}}
432define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
433  %a = and <8 x i16> %x, %y
434  ret <8 x i16> %a
435}
436
437; CHECK-LABEL: or_v8i16:
438; NO-SIMD128-NOT: v128
439; SIMD128-NEXT: .param v128, v128{{$}}
440; SIMD128-NEXT: .result v128{{$}}
441; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
442; SIMD128-NEXT: return $pop[[R]]{{$}}
443define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
444  %a = or <8 x i16> %x, %y
445  ret <8 x i16> %a
446}
447
448; CHECK-LABEL: xor_v8i16:
449; NO-SIMD128-NOT: v128
450; SIMD128-NEXT: .param v128, v128{{$}}
451; SIMD128-NEXT: .result v128{{$}}
452; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
453; SIMD128-NEXT: return $pop[[R]]{{$}}
454define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
455  %a = xor <8 x i16> %x, %y
456  ret <8 x i16> %a
457}
458
459; CHECK-LABEL: not_v8i16:
460; NO-SIMD128-NOT: v128
461; SIMD128-NEXT: .param v128{{$}}
462; SIMD128-NEXT: .result v128{{$}}
463; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
464; SIMD128-NEXT: return $pop[[R]]{{$}}
465define <8 x i16> @not_v8i16(<8 x i16> %x) {
466  %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
467                          i16 -1, i16 -1, i16 -1, i16 -1>
468  ret <8 x i16> %a
469}
470
471; CHECK-LABEL: bitselect_v8i16:
472; NO-SIMD128-NOT: v128
473; SIMD128-NEXT: .param v128, v128, v128{{$}}
474; SIMD128-NEXT: .result v128{{$}}
475; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
476; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
477; SIMD128-FAST-NEXT: v128.and
478; SIMD128-FAST-NEXT: v128.not
479; SIMD128-FAST-NEXT: v128.and
480; SIMD128-FAST-NEXT: v128.or
481; SIMD128-FAST-NEXT: return
482define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
483  %masked_v1 = and <8 x i16> %v1, %c
484  %inv_mask = xor <8 x i16>
485    <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
486    %c
487  %masked_v2 = and <8 x i16> %v2, %inv_mask
488  %a = or <8 x i16> %masked_v1, %masked_v2
489  ret <8 x i16> %a
490}
491
492; ==============================================================================
493; 4 x i32
494; ==============================================================================
495; CHECK-LABEL: add_v4i32:
496; NO-SIMD128-NOT: i32x4
497; SIMD128-NEXT: .param v128, v128{{$}}
498; SIMD128-NEXT: .result v128{{$}}
499; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
500; SIMD128-NEXT: return $pop[[R]]{{$}}
501define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
502  %a = add <4 x i32> %x, %y
503  ret <4 x i32> %a
504}
505
506; CHECK-LABEL: sub_v4i32:
507; NO-SIMD128-NOT: i32x4
508; SIMD128-NEXT: .param v128, v128{{$}}
509; SIMD128-NEXT: .result v128{{$}}
510; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
511; SIMD128-NEXT: return $pop[[R]]{{$}}
512define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
513  %a = sub <4 x i32> %x, %y
514  ret <4 x i32> %a
515}
516
517; CHECK-LABEL: mul_v4i32:
518; NO-SIMD128-NOT: i32x4
519; SIMD128-NEXT: .param v128, v128{{$}}
520; SIMD128-NEXT: .result v128{{$}}
521; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
522; SIMD128-NEXT: return $pop[[R]]{{$}}
523define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
524  %a = mul <4 x i32> %x, %y
525  ret <4 x i32> %a
526}
527
528; CHECK-LABEL: neg_v4i32:
529; NO-SIMD128-NOT: i32x4
530; SIMD128-NEXT: .param v128{{$}}
531; SIMD128-NEXT: .result v128{{$}}
532; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
533; SIMD128-NEXT: return $pop[[R]]{{$}}
534define <4 x i32> @neg_v4i32(<4 x i32> %x) {
535  %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
536  ret <4 x i32> %a
537}
538
539; CHECK-LABEL: shl_v4i32:
540; NO-SIMD128-NOT: i32x4
541; SIMD128-NEXT: .param v128, i32{{$}}
542; SIMD128-NEXT: .result v128{{$}}
543; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
544; SIMD128-NEXT: return $pop[[R]]{{$}}
545define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
546  %t = insertelement <4 x i32> undef, i32 %x, i32 0
547  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
548    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
549  %a = shl <4 x i32> %v, %s
550  ret <4 x i32> %a
551}
552
553; CHECK-LABEL: shl_const_v4i32:
554; NO-SIMD128-NOT: i32x4
555; SIMD128-NEXT: .param v128{{$}}
556; SIMD128-NEXT: .result v128{{$}}
557; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
558; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
559; SIMD128-NEXT: return $pop[[R]]{{$}}
560define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
561  %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
562  ret <4 x i32> %a
563}
564
565; CHECK-LABEL: shl_vec_v4i32:
566; NO-SIMD128-NOT: i32x4
567; SIMD128-NEXT: .param v128, v128{{$}}
568; SIMD128-NEXT: .result v128{{$}}
569; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
570; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
571; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
572; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
573; Skip 2 lanes
574; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
575; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
576; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
577; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
578; SIMD128-NEXT: return $pop[[R]]{{$}}
579define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
580  %a = shl <4 x i32> %v, %x
581  ret <4 x i32> %a
582}
583
584; CHECK-LABEL: shr_s_v4i32:
585; NO-SIMD128-NOT: i32x4
586; SIMD128-NEXT: .param v128, i32{{$}}
587; SIMD128-NEXT: .result v128{{$}}
588; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
589; SIMD128-NEXT: return $pop[[R]]{{$}}
590define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
591  %t = insertelement <4 x i32> undef, i32 %x, i32 0
592  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
593    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
594  %a = ashr <4 x i32> %v, %s
595  ret <4 x i32> %a
596}
597
598; CHECK-LABEL: shr_s_vec_v4i32:
599; NO-SIMD128-NOT: i32x4
600; SIMD128-NEXT: .param v128, v128{{$}}
601; SIMD128-NEXT: .result v128{{$}}
602; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
603; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
604; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
605; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
606; Skip 2 lanes
607; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
608; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
609; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
610; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
611; SIMD128-NEXT: return $pop[[R]]{{$}}
612define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
613  %a = ashr <4 x i32> %v, %x
614  ret <4 x i32> %a
615}
616
617; CHECK-LABEL: shr_u_v4i32:
618; NO-SIMD128-NOT: i32x4
619; SIMD128-NEXT: .param v128, i32{{$}}
620; SIMD128-NEXT: .result v128{{$}}
621; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
622; SIMD128-NEXT: return $pop[[R]]{{$}}
623define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
624  %t = insertelement <4 x i32> undef, i32 %x, i32 0
625  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
626    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
627  %a = lshr <4 x i32> %v, %s
628  ret <4 x i32> %a
629}
630
631; CHECK-LABEL: shr_u_vec_v4i32:
632; NO-SIMD128-NOT: i32x4
633; SIMD128-NEXT: .param v128, v128{{$}}
634; SIMD128-NEXT: .result v128{{$}}
635; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
636; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
637; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
638; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
639; Skip 2 lanes
640; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
641; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
642; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
643; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
644; SIMD128-NEXT: return $pop[[R]]{{$}}
645define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
646  %a = lshr <4 x i32> %v, %x
647  ret <4 x i32> %a
648}
649
650; CHECK-LABEL: and_v4i32:
651; NO-SIMD128-NOT: v128
652; SIMD128-NEXT: .param v128, v128{{$}}
653; SIMD128-NEXT: .result v128{{$}}
654; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
655; SIMD128-NEXT: return $pop[[R]]{{$}}
656define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
657  %a = and <4 x i32> %x, %y
658  ret <4 x i32> %a
659}
660
661; CHECK-LABEL: or_v4i32:
662; NO-SIMD128-NOT: v128
663; SIMD128-NEXT: .param v128, v128{{$}}
664; SIMD128-NEXT: .result v128{{$}}
665; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
666; SIMD128-NEXT: return $pop[[R]]{{$}}
667define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
668  %a = or <4 x i32> %x, %y
669  ret <4 x i32> %a
670}
671
672; CHECK-LABEL: xor_v4i32:
673; NO-SIMD128-NOT: v128
674; SIMD128-NEXT: .param v128, v128{{$}}
675; SIMD128-NEXT: .result v128{{$}}
676; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
677; SIMD128-NEXT: return $pop[[R]]{{$}}
678define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
679  %a = xor <4 x i32> %x, %y
680  ret <4 x i32> %a
681}
682
683; CHECK-LABEL: not_v4i32:
684; NO-SIMD128-NOT: v128
685; SIMD128-NEXT: .param v128{{$}}
686; SIMD128-NEXT: .result v128{{$}}
687; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
688; SIMD128-NEXT: return $pop[[R]]{{$}}
689define <4 x i32> @not_v4i32(<4 x i32> %x) {
690  %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
691  ret <4 x i32> %a
692}
693
694; CHECK-LABEL: bitselect_v4i32:
695; NO-SIMD128-NOT: v128
696; SIMD128-NEXT: .param v128, v128, v128{{$}}
697; SIMD128-NEXT: .result v128{{$}}
698; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
699; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
700; SIMD128-FAST-NEXT: v128.not
701; SIMD128-FAST-NEXT: v128.and
702; SIMD128-FAST-NEXT: v128.and
703; SIMD128-FAST-NEXT: v128.or
704; SIMD128-FAST-NEXT: return
705define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
706  %masked_v1 = and <4 x i32> %c, %v1
707  %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
708  %masked_v2 = and <4 x i32> %inv_mask, %v2
709  %a = or <4 x i32> %masked_v2, %masked_v1
710  ret <4 x i32> %a
711}
712
713; ==============================================================================
714; 2 x i64
715; ==============================================================================
716; CHECK-LABEL: add_v2i64:
717; NO-SIMD128-NOT: i64x2
718; SIMD128-VM-NOT: i64x2
719; SIMD128-NEXT: .param v128, v128{{$}}
720; SIMD128-NEXT: .result v128{{$}}
721; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
722; SIMD128-NEXT: return $pop[[R]]{{$}}
723define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
724  %a = add <2 x i64> %x, %y
725  ret <2 x i64> %a
726}
727
728; CHECK-LABEL: sub_v2i64:
729; NO-SIMD128-NOT: i64x2
730; SIMD128-VM-NOT: i64x2
731; SIMD128-NEXT: .param v128, v128{{$}}
732; SIMD128-NEXT: .result v128{{$}}
733; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
734; SIMD128-NEXT: return $pop[[R]]{{$}}
735define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
736  %a = sub <2 x i64> %x, %y
737  ret <2 x i64> %a
738}
739
740; v2i64.mul is not in spec
741; CHECK-LABEL: mul_v2i64:
742; NO-SIMD128-NOT: i64x2
743; SIMD128-VM-NOT: i64x2
744; SIMD128-NOT: i64x2.mul
745; SIMD128: i64x2.extract_lane
746; SIMD128: i64.mul
747define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
748  %a = mul <2 x i64> %x, %y
749  ret <2 x i64> %a
750}
751
752; CHECK-LABEL: neg_v2i64:
753; NO-SIMD128-NOT: i64x2
754; SIMD128-NEXT: .param v128{{$}}
755; SIMD128-NEXT: .result v128{{$}}
756; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
757; SIMD128-NEXT: return $pop[[R]]{{$}}
758define <2 x i64> @neg_v2i64(<2 x i64> %x) {
759  %a = sub <2 x i64> <i64 0, i64 0>, %x
760  ret <2 x i64> %a
761}
762
763; CHECK-LABEL: shl_v2i64:
764; NO-SIMD128-NOT: i64x2
765; SIMD128-NEXT: .param v128, i32{{$}}
766; SIMD128-NEXT: .result v128{{$}}
767; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
768; SIMD128-NEXT: return $pop[[R]]{{$}}
769define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
770  %x2 = zext i32 %x to i64
771  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
772  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
773  %a = shl <2 x i64> %v, %s
774  ret <2 x i64> %a
775}
776
777; CHECK-LABEL: shl_nozext_v2i64:
778; NO-SIMD128-NOT: i64x2
779; SIMD128-NEXT: .param v128, i64{{$}}
780; SIMD128-NEXT: .result v128{{$}}
781; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
782; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
783; SIMD128-NEXT: return $pop[[R]]{{$}}
784define <2 x i64> @shl_nozext_v2i64(<2 x i64> %v, i64 %x) {
785  %t = insertelement <2 x i64> undef, i64 %x, i32 0
786  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
787  %a = shl <2 x i64> %v, %s
788  ret <2 x i64> %a
789}
790
791; CHECK-LABEL: shl_const_v2i64:
792; NO-SIMD128-NOT: i64x2
793; SIMD128-NEXT: .param v128{{$}}
794; SIMD128-NEXT: .result v128{{$}}
795; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
796; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
797; SIMD128-NEXT: return $pop[[R]]{{$}}
798define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
799  %a = shl <2 x i64> %v, <i64 5, i64 5>
800  ret <2 x i64> %a
801}
802
803; CHECK-LABEL: shl_vec_v2i64:
804; NO-SIMD128-NOT: i64x2
805; SIMD128-NEXT: .param v128, v128{{$}}
806; SIMD128-NEXT: .result v128{{$}}
807; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
808; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
809; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
810; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
811; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
812; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
813; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
814; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
815; SIMD128-NEXT: return $pop[[R]]{{$}}
816define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
817  %a = shl <2 x i64> %v, %x
818  ret <2 x i64> %a
819}
820
821; CHECK-LABEL: shr_s_v2i64:
822; NO-SIMD128-NOT: i64x2
823; SIMD128-NEXT: .param v128, i32{{$}}
824; SIMD128-NEXT: .result v128{{$}}
825; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
826; SIMD128-NEXT: return $pop[[R]]{{$}}
827define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
828  %x2 = zext i32 %x to i64
829  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
830  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
831  %a = ashr <2 x i64> %v, %s
832  ret <2 x i64> %a
833}
834
835; CHECK-LABEL: shr_s_nozext_v2i64:
836; NO-SIMD128-NOT: i64x2
837; SIMD128-NEXT: .param v128, i64{{$}}
838; SIMD128-NEXT: .result v128{{$}}
839; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
840; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
841; SIMD128-NEXT: return $pop[[R]]{{$}}
842define <2 x i64> @shr_s_nozext_v2i64(<2 x i64> %v, i64 %x) {
843  %t = insertelement <2 x i64> undef, i64 %x, i32 0
844  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
845  %a = ashr <2 x i64> %v, %s
846  ret <2 x i64> %a
847}
848
849; CHECK-LABEL: shr_s_const_v2i64:
850; NO-SIMD128-NOT: i64x2
851; SIMD128-NEXT: .param v128{{$}}
852; SIMD128-NEXT: .result v128{{$}}
853; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
854; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
855; SIMD128-NEXT: return $pop[[R]]{{$}}
856define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
857  %a = ashr <2 x i64> %v, <i64 5, i64 5>
858  ret <2 x i64> %a
859}
860
861; CHECK-LABEL: shr_s_vec_v2i64:
862; NO-SIMD128-NOT: i64x2
863; SIMD128-NEXT: .param v128, v128{{$}}
864; SIMD128-NEXT: .result v128{{$}}
865; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
866; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
867; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
868; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
869; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
870; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
871; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
872; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
873; SIMD128-NEXT: return $pop[[R]]{{$}}
874define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
875  %a = ashr <2 x i64> %v, %x
876  ret <2 x i64> %a
877}
878
879; CHECK-LABEL: shr_u_v2i64:
880; NO-SIMD128-NOT: i64x2
881; SIMD128-NEXT: .param v128, i32{{$}}
882; SIMD128-NEXT: .result v128{{$}}
883; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
884; SIMD128-NEXT: return $pop[[R]]{{$}}
885define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
886  %x2 = zext i32 %x to i64
887  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
888  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
889  %a = lshr <2 x i64> %v, %s
890  ret <2 x i64> %a
891}
892
893; CHECK-LABEL: shr_u_nozext_v2i64:
894; NO-SIMD128-NOT: i64x2
895; SIMD128-NEXT: .param v128, i64{{$}}
896; SIMD128-NEXT: .result v128{{$}}
897; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
898; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
899; SIMD128-NEXT: return $pop[[R]]{{$}}
900define <2 x i64> @shr_u_nozext_v2i64(<2 x i64> %v, i64 %x) {
901  %t = insertelement <2 x i64> undef, i64 %x, i32 0
902  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
903  %a = lshr <2 x i64> %v, %s
904  ret <2 x i64> %a
905}
906
907; CHECK-LABEL: shr_u_const_v2i64:
908; NO-SIMD128-NOT: i64x2
909; SIMD128-NEXT: .param v128{{$}}
910; SIMD128-NEXT: .result v128{{$}}
911; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
912; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
913; SIMD128-NEXT: return $pop[[R]]{{$}}
914define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
915  %a = lshr <2 x i64> %v, <i64 5, i64 5>
916  ret <2 x i64> %a
917}
918
919; CHECK-LABEL: shr_u_vec_v2i64:
920; NO-SIMD128-NOT: i64x2
921; SIMD128-NEXT: .param v128, v128{{$}}
922; SIMD128-NEXT: .result v128{{$}}
923; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
924; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
925; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
926; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
927; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
928; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
929; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
930; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
931; SIMD128-NEXT: return $pop[[R]]{{$}}
932define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
933  %a = lshr <2 x i64> %v, %x
934  ret <2 x i64> %a
935}
936
937; CHECK-LABEL: and_v2i64:
938; NO-SIMD128-NOT: v128
939; SIMD128-VM-NOT: v128
940; SIMD128-NEXT: .param v128, v128{{$}}
941; SIMD128-NEXT: .result v128{{$}}
942; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
943; SIMD128-NEXT: return $pop[[R]]{{$}}
944define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
945  %a = and <2 x i64> %x, %y
946  ret <2 x i64> %a
947}
948
949; CHECK-LABEL: or_v2i64:
950; NO-SIMD128-NOT: v128
951; SIMD128-VM-NOT: v128
952; SIMD128-NEXT: .param v128, v128{{$}}
953; SIMD128-NEXT: .result v128{{$}}
954; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
955; SIMD128-NEXT: return $pop[[R]]{{$}}
956define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
957  %a = or <2 x i64> %x, %y
958  ret <2 x i64> %a
959}
960
961; CHECK-LABEL: xor_v2i64:
962; NO-SIMD128-NOT: v128
963; SIMD128-VM-NOT: v128
964; SIMD128-NEXT: .param v128, v128{{$}}
965; SIMD128-NEXT: .result v128{{$}}
966; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
967; SIMD128-NEXT: return $pop[[R]]{{$}}
968define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
969  %a = xor <2 x i64> %x, %y
970  ret <2 x i64> %a
971}
972
973; CHECK-LABEL: not_v2i64:
974; NO-SIMD128-NOT: v128
975; SIMD128-VM-NOT: v128
976; SIMD128-NEXT: .param v128{{$}}
977; SIMD128-NEXT: .result v128{{$}}
978; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
979; SIMD128-NEXT: return $pop[[R]]{{$}}
980define <2 x i64> @not_v2i64(<2 x i64> %x) {
981  %a = xor <2 x i64> %x, <i64 -1, i64 -1>
982  ret <2 x i64> %a
983}
984
985; CHECK-LABEL: bitselect_v2i64:
986; NO-SIMD128-NOT: v128
987; SIMD128-VM-NOT: v128
988; SIMD128-NEXT: .param v128, v128, v128{{$}}
989; SIMD128-NEXT: .result v128{{$}}
990; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
991; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
992; SIMD128-FAST-NEXT: v128.not
993; SIMD128-FAST-NEXT: v128.and
994; SIMD128-FAST-NEXT: v128.and
995; SIMD128-FAST-NEXT: v128.or
996; SIMD128-FAST-NEXT: return
997define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
998  %masked_v1 = and <2 x i64> %v1, %c
999  %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
1000  %masked_v2 = and <2 x i64> %v2, %inv_mask
1001  %a = or <2 x i64> %masked_v2, %masked_v1
1002  ret <2 x i64> %a
1003}
1004
1005; ==============================================================================
1006; 4 x float
1007; ==============================================================================
1008; CHECK-LABEL: neg_v4f32:
1009; NO-SIMD128-NOT: f32x4
1010; SIMD128-NEXT: .param v128{{$}}
1011; SIMD128-NEXT: .result v128{{$}}
1012; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
1013; SIMD128-NEXT: return $pop[[R]]{{$}}
1014define <4 x float> @neg_v4f32(<4 x float> %x) {
1015  ; nsz makes this semantically equivalent to flipping sign bit
1016  %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
1017  ret <4 x float> %a
1018}
1019
1020; CHECK-LABEL: abs_v4f32:
1021; NO-SIMD128-NOT: f32x4
1022; SIMD128-NEXT: .param v128{{$}}
1023; SIMD128-NEXT: .result v128{{$}}
1024; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
1025; SIMD128-NEXT: return $pop[[R]]{{$}}
1026declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
1027define <4 x float> @abs_v4f32(<4 x float> %x) {
1028  %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
1029  ret <4 x float> %a
1030}
1031
1032; CHECK-LABEL: min_unordered_v4f32:
1033; NO-SIMD128-NOT: f32x4
1034; SIMD128-NEXT: .param v128{{$}}
1035; SIMD128-NEXT: .result v128{{$}}
1036; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1037; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1038; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1039; SIMD128-NEXT: return $pop[[R]]{{$}}
1040define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
1041  %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
1042  %a = select <4 x i1> %cmps, <4 x float> %x,
1043    <4 x float> <float 5., float 5., float 5., float 5.>
1044  ret <4 x float> %a
1045}
1046
1047; CHECK-LABEL: max_unordered_v4f32:
1048; NO-SIMD128-NOT: f32x4
1049; SIMD128-NEXT: .param v128{{$}}
1050; SIMD128-NEXT: .result v128{{$}}
1051; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1052; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1053; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1054; SIMD128-NEXT: return $pop[[R]]{{$}}
1055define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
1056  %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
1057  %a = select <4 x i1> %cmps, <4 x float> %x,
1058    <4 x float> <float 5., float 5., float 5., float 5.>
1059  ret <4 x float> %a
1060}
1061
1062; CHECK-LABEL: min_ordered_v4f32:
1063; NO-SIMD128-NOT: f32x4
1064; SIMD128-NEXT: .param v128{{$}}
1065; SIMD128-NEXT: .result v128{{$}}
1066; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1067; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1068; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1069; SIMD128-NEXT: return $pop[[R]]{{$}}
1070define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
1071  %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
1072  %a = select <4 x i1> %cmps,
1073    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1074  ret <4 x float> %a
1075}
1076
1077; CHECK-LABEL: max_ordered_v4f32:
1078; NO-SIMD128-NOT: f32x4
1079; SIMD128-NEXT: .param v128{{$}}
1080; SIMD128-NEXT: .result v128{{$}}
1081; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1082; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1083; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1084; SIMD128-NEXT: return $pop[[R]]{{$}}
1085define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1086  %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1087  %a = select <4 x i1> %cmps,
1088    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1089  ret <4 x float> %a
1090}
1091
1092; CHECK-LABEL: min_intrinsic_v4f32:
1093; NO-SIMD128-NOT: f32x4
1094; SIMD128-NEXT: .param v128, v128{{$}}
1095; SIMD128-NEXT: .result v128{{$}}
1096; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1097; SIMD128-NEXT: return $pop[[R]]{{$}}
1098declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1099define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1100  %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1101  ret <4 x float> %a
1102}
1103
1104; CHECK-LABEL: max_intrinsic_v4f32:
1105; NO-SIMD128-NOT: f32x4
1106; SIMD128-NEXT: .param v128, v128{{$}}
1107; SIMD128-NEXT: .result v128{{$}}
1108; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1109; SIMD128-NEXT: return $pop[[R]]{{$}}
1110declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1111define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1112  %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1113  ret <4 x float> %a
1114}
1115
1116; CHECK-LABEL: min_const_intrinsic_v4f32:
1117; NO-SIMD128-NOT: f32x4
1118; SIMD128-NEXT: .result v128{{$}}
1119; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1120; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1121; SIMD128-NEXT: return $pop[[R]]{{$}}
1122define <4 x float> @min_const_intrinsic_v4f32() {
1123  %a = call <4 x float> @llvm.minimum.v4f32(
1124    <4 x float> <float 42., float 42., float 42., float 42.>,
1125    <4 x float> <float 5., float 5., float 5., float 5.>
1126  )
1127  ret <4 x float> %a
1128}
1129
1130; CHECK-LABEL: max_const_intrinsic_v4f32:
1131; NO-SIMD128-NOT: f32x4
1132; SIMD128-NEXT: .result v128{{$}}
1133; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1134; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1135; SIMD128-NEXT: return $pop[[R]]{{$}}
1136define <4 x float> @max_const_intrinsic_v4f32() {
1137  %a = call <4 x float> @llvm.maximum.v4f32(
1138    <4 x float> <float 42., float 42., float 42., float 42.>,
1139    <4 x float> <float 5., float 5., float 5., float 5.>
1140  )
1141  ret <4 x float> %a
1142}
1143
1144; CHECK-LABEL: add_v4f32:
1145; NO-SIMD128-NOT: f32x4
1146; SIMD128-NEXT: .param v128, v128{{$}}
1147; SIMD128-NEXT: .result v128{{$}}
1148; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1149; SIMD128-NEXT: return $pop[[R]]{{$}}
1150define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1151  %a = fadd <4 x float> %x, %y
1152  ret <4 x float> %a
1153}
1154
1155; CHECK-LABEL: sub_v4f32:
1156; NO-SIMD128-NOT: f32x4
1157; SIMD128-NEXT: .param v128, v128{{$}}
1158; SIMD128-NEXT: .result v128{{$}}
1159; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1160; SIMD128-NEXT: return $pop[[R]]{{$}}
1161define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1162  %a = fsub <4 x float> %x, %y
1163  ret <4 x float> %a
1164}
1165
1166; CHECK-LABEL: div_v4f32:
1167; NO-SIMD128-NOT: f32x4
1168; SIMD128-NEXT: .param v128, v128{{$}}
1169; SIMD128-NEXT: .result v128{{$}}
1170; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1171; SIMD128-NEXT: return $pop[[R]]{{$}}
1172define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1173  %a = fdiv <4 x float> %x, %y
1174  ret <4 x float> %a
1175}
1176
1177; CHECK-LABEL: mul_v4f32:
1178; NO-SIMD128-NOT: f32x4
1179; SIMD128-NEXT: .param v128, v128{{$}}
1180; SIMD128-NEXT: .result v128{{$}}
1181; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1182; SIMD128-NEXT: return $pop[[R]]{{$}}
1183define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1184  %a = fmul <4 x float> %x, %y
1185  ret <4 x float> %a
1186}
1187
1188; CHECK-LABEL: sqrt_v4f32:
1189; NO-SIMD128-NOT: f32x4
1190; SIMD128-NEXT: .param v128{{$}}
1191; SIMD128-NEXT: .result v128{{$}}
1192; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1193; SIMD128-NEXT: return $pop[[R]]{{$}}
1194declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1195define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1196  %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1197  ret <4 x float> %a
1198}
1199
1200; ==============================================================================
1201; 2 x double
1202; ==============================================================================
1203; CHECK-LABEL: neg_v2f64:
1204; NO-SIMD128-NOT: f64x2
1205; SIMD128-NEXT: .param v128{{$}}
1206; SIMD128-NEXT: .result v128{{$}}
1207; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1208; SIMD128-NEXT: return $pop[[R]]{{$}}
1209define <2 x double> @neg_v2f64(<2 x double> %x) {
1210  ; nsz makes this semantically equivalent to flipping sign bit
1211  %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1212  ret <2 x double> %a
1213}
1214
1215; CHECK-LABEL: abs_v2f64:
1216; NO-SIMD128-NOT: f64x2
1217; SIMD128-NEXT: .param v128{{$}}
1218; SIMD128-NEXT: .result v128{{$}}
1219; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1220; SIMD128-NEXT: return $pop[[R]]{{$}}
1221declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1222define <2 x double> @abs_v2f64(<2 x double> %x) {
1223  %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1224  ret <2 x double> %a
1225}
1226
1227; CHECK-LABEL: min_unordered_v2f64:
1228; NO-SIMD128-NOT: f64x2
1229; SIMD128-NEXT: .param v128{{$}}
1230; SIMD128-NEXT: .result v128{{$}}
1231; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1232; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1233; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1234; SIMD128-NEXT: return $pop[[R]]{{$}}
1235define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1236  %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1237  %a = select <2 x i1> %cmps, <2 x double> %x,
1238    <2 x double> <double 5., double 5.>
1239  ret <2 x double> %a
1240}
1241
1242; CHECK-LABEL: max_unordered_v2f64:
1243; NO-SIMD128-NOT: f64x2
1244; SIMD128-NEXT: .param v128{{$}}
1245; SIMD128-NEXT: .result v128{{$}}
1246; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1247; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1248; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1249; SIMD128-NEXT: return $pop[[R]]{{$}}
1250define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1251  %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1252  %a = select <2 x i1> %cmps, <2 x double> %x,
1253    <2 x double> <double 5., double 5.>
1254  ret <2 x double> %a
1255}
1256
1257; CHECK-LABEL: min_ordered_v2f64:
1258; NO-SIMD128-NOT: f64x2
1259; SIMD128-NEXT: .param v128{{$}}
1260; SIMD128-NEXT: .result v128{{$}}
1261; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1262; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1263; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1264; SIMD128-NEXT: return $pop[[R]]{{$}}
1265define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1266  %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1267  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1268    <2 x double> %x
1269  ret <2 x double> %a
1270}
1271
1272; CHECK-LABEL: max_ordered_v2f64:
1273; NO-SIMD128-NOT: f64x2
1274; SIMD128-NEXT: .param v128{{$}}
1275; SIMD128-NEXT: .result v128{{$}}
1276; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1277; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1278; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1279; SIMD128-NEXT: return $pop[[R]]{{$}}
1280define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1281  %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1282  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1283    <2 x double> %x
1284  ret <2 x double> %a
1285}
1286
1287; CHECK-LABEL: min_intrinsic_v2f64:
1288; NO-SIMD128-NOT: f64x2
1289; SIMD128-NEXT: .param v128, v128{{$}}
1290; SIMD128-NEXT: .result v128{{$}}
1291; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1292; SIMD128-NEXT: return $pop[[R]]{{$}}
1293declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1294define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1295  %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1296  ret <2 x double> %a
1297}
1298
1299; CHECK-LABEL: max_intrinsic_v2f64:
1300; NO-SIMD128-NOT: f64x2
1301; SIMD128-NEXT: .param v128, v128{{$}}
1302; SIMD128-NEXT: .result v128{{$}}
1303; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1304; SIMD128-NEXT: return $pop[[R]]{{$}}
1305declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1306define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1307  %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1308  ret <2 x double> %a
1309}
1310
1311; CHECK-LABEL: min_const_intrinsic_v2f64:
1312; NO-SIMD128-NOT: f64x2
1313; SIMD128-NEXT: .result v128{{$}}
1314; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1315; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1316; SIMD128-NEXT: return $pop[[R]]{{$}}
1317define <2 x double> @min_const_intrinsic_v2f64() {
1318  %a = call <2 x double> @llvm.minimum.v2f64(
1319    <2 x double> <double 42., double 42.>,
1320    <2 x double> <double 5., double 5.>
1321  )
1322  ret <2 x double> %a
1323}
1324
1325; CHECK-LABEL: max_const_intrinsic_v2f64:
1326; NO-SIMD128-NOT: f64x2
1327; SIMD128-NEXT: .result v128{{$}}
1328; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1329; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1330; SIMD128-NEXT: return $pop[[R]]{{$}}
1331define <2 x double> @max_const_intrinsic_v2f64() {
1332  %a = call <2 x double> @llvm.maximum.v2f64(
1333    <2 x double> <double 42., double 42.>,
1334    <2 x double> <double 5., double 5.>
1335  )
1336  ret <2 x double> %a
1337}
1338
1339; CHECK-LABEL: add_v2f64:
1340; NO-SIMD128-NOT: f64x2
1341; SIMD128-VM-NOT: f62x2
1342; SIMD128-NEXT: .param v128, v128{{$}}
1343; SIMD128-NEXT: .result v128{{$}}
1344; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1345; SIMD128-NEXT: return $pop[[R]]{{$}}
1346define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1347  %a = fadd <2 x double> %x, %y
1348  ret <2 x double> %a
1349}
1350
1351; CHECK-LABEL: sub_v2f64:
1352; NO-SIMD128-NOT: f64x2
1353; SIMD128-VM-NOT: f62x2
1354; SIMD128-NEXT: .param v128, v128{{$}}
1355; SIMD128-NEXT: .result v128{{$}}
1356; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1357; SIMD128-NEXT: return $pop[[R]]{{$}}
1358define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1359  %a = fsub <2 x double> %x, %y
1360  ret <2 x double> %a
1361}
1362
1363; CHECK-LABEL: div_v2f64:
1364; NO-SIMD128-NOT: f64x2
1365; SIMD128-VM-NOT: f62x2
1366; SIMD128-NEXT: .param v128, v128{{$}}
1367; SIMD128-NEXT: .result v128{{$}}
1368; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1369; SIMD128-NEXT: return $pop[[R]]{{$}}
1370define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1371  %a = fdiv <2 x double> %x, %y
1372  ret <2 x double> %a
1373}
1374
1375; CHECK-LABEL: mul_v2f64:
1376; NO-SIMD128-NOT: f64x2
1377; SIMD128-VM-NOT: f62x2
1378; SIMD128-NEXT: .param v128, v128{{$}}
1379; SIMD128-NEXT: .result v128{{$}}
1380; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1381; SIMD128-NEXT: return $pop[[R]]{{$}}
1382define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1383  %a = fmul <2 x double> %x, %y
1384  ret <2 x double> %a
1385}
1386
1387; CHECK-LABEL: sqrt_v2f64:
1388; NO-SIMD128-NOT: f64x2
1389; SIMD128-NEXT: .param v128{{$}}
1390; SIMD128-NEXT: .result v128{{$}}
1391; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1392; SIMD128-NEXT: return $pop[[R]]{{$}}
1393declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1394define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1395  %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1396  ret <2 x double> %a
1397}
1398