1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
3; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
4; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
5; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128
6; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
7
8; check that a non-test run (including explicit locals pass) at least finishes
9; RUN: llc < %s -O0 -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext
10; RUN: llc < %s -O2 -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext
11
12; Test that basic SIMD128 arithmetic operations assemble as expected.
13
14target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
15target triple = "wasm32-unknown-unknown"
16
17; ==============================================================================
18; 16 x i8
19; ==============================================================================
20; CHECK-LABEL: add_v16i8:
21; NO-SIMD128-NOT: i8x16
22; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
23; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
24; SIMD128-NEXT: return $pop[[R]]{{$}}
25define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
26  %a = add <16 x i8> %x, %y
27  ret <16 x i8> %a
28}
29
30; CHECK-LABEL: sub_v16i8:
31; NO-SIMD128-NOT: i8x16
32; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
33; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
34; SIMD128-NEXT: return $pop[[R]]{{$}}
35define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
36  %a = sub <16 x i8> %x, %y
37  ret <16 x i8> %a
38}
39
40; CHECK-LABEL: mul_v16i8:
41; NO-SIMD128-NOT: i8x16
42; SIMD128-NEXT: .functype mul_v16i8 (v128, v128) -> (v128){{$}}
43; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
44; SIMD128-NEXT: return $pop[[R]]{{$}}
45define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
46  %a = mul <16 x i8> %x, %y
47  ret <16 x i8> %a
48}
49
50; CHECK-LABEL: neg_v16i8:
51; NO-SIMD128-NOT: i8x16
52; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
53; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
54; SIMD128-NEXT: return $pop[[R]]{{$}}
55define <16 x i8> @neg_v16i8(<16 x i8> %x) {
56  %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
57                      i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
58                     %x
59  ret <16 x i8> %a
60}
61
62; CHECK-LABEL: shl_v16i8:
63; NO-SIMD128-NOT: i8x16
64; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
65; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
66; SIMD128-NEXT: return $pop[[R]]{{$}}
67define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
68  %t = insertelement <16 x i8> undef, i8 %x, i32 0
69  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
70    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
71                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
72  %a = shl <16 x i8> %v, %s
73  ret <16 x i8> %a
74}
75
76; CHECK-LABEL: shl_const_v16i8:
77; NO-SIMD128-NOT: i8x16
78; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
79; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
80; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
81; SIMD128-NEXT: return $pop[[R]]{{$}}
82define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
83  %a = shl <16 x i8> %v,
84    <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
85     i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
86  ret <16 x i8> %a
87}
88
89; CHECK-LABEL: shl_vec_v16i8:
90; NO-SIMD128-NOT: i8x16
91; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
92; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
93; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
94; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
95; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
96; Skip 14 lanes
97; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
98; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
99; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
100; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
101; SIMD128-NEXT: return $pop[[R]]{{$}}
102define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
103  %a = shl <16 x i8> %v, %x
104  ret <16 x i8> %a
105}
106
107; CHECK-LABEL: shr_s_v16i8:
108; NO-SIMD128-NOT: i8x16
109; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
110; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
111; SIMD128-NEXT: return $pop[[R]]{{$}}
112define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
113  %t = insertelement <16 x i8> undef, i8 %x, i32 0
114  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
115    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
116                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
117  %a = ashr <16 x i8> %v, %s
118  ret <16 x i8> %a
119}
120
121; CHECK-LABEL: shr_s_vec_v16i8:
122; NO-SIMD128-NOT: i8x16
123; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
124; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
125; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 24{{$}}
126; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
127; SIMD128-NEXT: i32.const $push[[L3:[0-9]+]]=, 24{{$}}
128; SIMD128-NEXT: i32.shr_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
129; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 0{{$}}
130; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
131; SIMD128-NEXT: i8x16.splat $push[[L7:[0-9]+]]=, $pop[[L6]]{{$}}
132; Skip 14 lanes
133; SIMD128:      i8x16.extract_lane_u $push[[L7:[0-9]+]]=, $0, 15{{$}}
134; SIMD128-NEXT: i32.const $push[[L8:[0-9]+]]=, 24{{$}}
135; SIMD128-NEXT: i32.shl $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
136; SIMD128-NEXT: i32.const $push[[L10:[0-9]+]]=, 24{{$}}
137; SIMD128-NEXT: i32.shr_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
138; SIMD128-NEXT: i8x16.extract_lane_u $push[[L12:[0-9]+]]=, $1, 15{{$}}
139; SIMD128-NEXT: i32.shr_s $push[[L13:[0-9]+]]=, $pop[[L11]], $pop[[L12]]{{$}}
140; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L14:[0-9]+]], 15, $pop[[L13]]{{$}}
141; SIMD128-NEXT: return $pop[[R]]{{$}}
142define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
143  %a = ashr <16 x i8> %v, %x
144  ret <16 x i8> %a
145}
146
147; CHECK-LABEL: shr_u_v16i8:
148; NO-SIMD128-NOT: i8x16
149; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
150; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
151; SIMD128-NEXT: return $pop[[R]]{{$}}
152define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
153  %t = insertelement <16 x i8> undef, i8 %x, i32 0
154  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
155    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
156                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
157  %a = lshr <16 x i8> %v, %s
158  ret <16 x i8> %a
159}
160
161; CHECK-LABEL: shr_u_vec_v16i8:
162; NO-SIMD128-NOT: i8x16
163; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
164; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
165; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
166; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
167; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
168; Skip 14 lanes
169; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
170; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
171; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
172; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
173; SIMD128-NEXT: return $pop[[R]]{{$}}
174define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
175  %a = lshr <16 x i8> %v, %x
176  ret <16 x i8> %a
177}
178
179; CHECK-LABEL: and_v16i8:
180; NO-SIMD128-NOT: v128
181; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
182; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
183; SIMD128-NEXT: return $pop[[R]]{{$}}
184define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
185  %a = and <16 x i8> %x, %y
186  ret <16 x i8> %a
187}
188
189; CHECK-LABEL: or_v16i8:
190; NO-SIMD128-NOT: v128
191; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
192; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
193; SIMD128-NEXT: return $pop[[R]]{{$}}
194define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
195  %a = or <16 x i8> %x, %y
196  ret <16 x i8> %a
197}
198
199; CHECK-LABEL: xor_v16i8:
200; NO-SIMD128-NOT: v128
201; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
202; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
203; SIMD128-NEXT: return $pop[[R]]{{$}}
204define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
205  %a = xor <16 x i8> %x, %y
206  ret <16 x i8> %a
207}
208
209; CHECK-LABEL: not_v16i8:
210; NO-SIMD128-NOT: v128
211; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
212; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
213; SIMD128-NEXT: return $pop[[R]]{{$}}
214define <16 x i8> @not_v16i8(<16 x i8> %x) {
215  %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
216                          i8 -1, i8 -1, i8 -1, i8 -1,
217                          i8 -1, i8 -1, i8 -1, i8 -1,
218                          i8 -1, i8 -1, i8 -1, i8 -1>
219  ret <16 x i8> %a
220}
221
222; CHECK-LABEL: bitselect_v16i8:
223; NO-SIMD128-NOT: v128
224; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
225; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
226; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
227; SIMD128-FAST-NEXT: v128.and
228; SIMD128-FAST-NEXT: v128.not
229; SIMD128-FAST-NEXT: v128.and
230; SIMD128-FAST-NEXT: v128.or
231; SIMD128-FAST-NEXT: return
232define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
233  %masked_v1 = and <16 x i8> %c, %v1
234  %inv_mask = xor <16 x i8> %c,
235    <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
236     i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
237  %masked_v2 = and <16 x i8> %inv_mask, %v2
238  %a = or <16 x i8> %masked_v1, %masked_v2
239  ret <16 x i8> %a
240}
241
242; ==============================================================================
243; 8 x i16
244; ==============================================================================
245; CHECK-LABEL: add_v8i16:
246; NO-SIMD128-NOT: i16x8
247; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
248; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
249; SIMD128-NEXT: return $pop[[R]]{{$}}
250define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
251  %a = add <8 x i16> %x, %y
252  ret <8 x i16> %a
253}
254
255; CHECK-LABEL: sub_v8i16:
256; NO-SIMD128-NOT: i16x8
257; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
258; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
259; SIMD128-NEXT: return $pop[[R]]{{$}}
260define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
261  %a = sub <8 x i16> %x, %y
262  ret <8 x i16> %a
263}
264
265; CHECK-LABEL: mul_v8i16:
266; NO-SIMD128-NOT: i16x8
267; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
268; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
269; SIMD128-NEXT: return $pop[[R]]{{$}}
270define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
271  %a = mul <8 x i16> %x, %y
272  ret <8 x i16> %a
273}
274
275; CHECK-LABEL: neg_v8i16:
276; NO-SIMD128-NOT: i16x8
277; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
278; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
279; SIMD128-NEXT: return $pop[[R]]{{$}}
280define <8 x i16> @neg_v8i16(<8 x i16> %x) {
281  %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
282                     %x
283  ret <8 x i16> %a
284}
285
286; CHECK-LABEL: shl_v8i16:
287; NO-SIMD128-NOT: i16x8
288; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
289; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
290; SIMD128-NEXT: return $pop[[R]]{{$}}
291define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
292  %t = insertelement <8 x i16> undef, i16 %x, i32 0
293  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
294    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
295  %a = shl <8 x i16> %v, %s
296  ret <8 x i16> %a
297}
298
299; CHECK-LABEL: shl_const_v8i16:
300; NO-SIMD128-NOT: i16x8
301; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
302; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
303; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
304; SIMD128-NEXT: return $pop[[R]]{{$}}
305define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
306  %a = shl <8 x i16> %v,
307    <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
308  ret <8 x i16> %a
309}
310
311; CHECK-LABEL: shl_vec_v8i16:
312; NO-SIMD128-NOT: i16x8
313; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
314; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
315; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
316; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
317; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
318; Skip 6 lanes
319; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
320; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
321; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
322; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
323; SIMD128-NEXT: return $pop[[R]]{{$}}
324define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
325  %a = shl <8 x i16> %v, %x
326  ret <8 x i16> %a
327}
328
329; CHECK-LABEL: shr_s_v8i16:
330; NO-SIMD128-NOT: i16x8
331; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
332; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
333; SIMD128-NEXT: return $pop[[R]]{{$}}
334define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
335  %t = insertelement <8 x i16> undef, i16 %x, i32 0
336  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
337    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
338  %a = ashr <8 x i16> %v, %s
339  ret <8 x i16> %a
340}
341
342; CHECK-LABEL: shr_s_vec_v8i16:
343; NO-SIMD128-NOT: i16x8
344; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
345; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
346; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}}
347; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
348; SIMD128-NEXT: i32.const $push[[L3:[0-9]+]]=, 16{{$}}
349; SIMD128-NEXT: i32.shr_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
350; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 0{{$}}
351; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
352; SIMD128-NEXT: i16x8.splat $push[[L7:[0-9]+]]=, $pop[[L6]]{{$}}
353; Skip 6 lanes
354; SIMD128:      i16x8.extract_lane_u $push[[L7:[0-9]+]]=, $0, 7{{$}}
355; SIMD128-NEXT: i32.const $push[[L8:[0-9]+]]=, 16{{$}}
356; SIMD128-NEXT: i32.shl $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
357; SIMD128-NEXT: i32.const $push[[L10:[0-9]+]]=, 16{{$}}
358; SIMD128-NEXT: i32.shr_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
359; SIMD128-NEXT: i16x8.extract_lane_u $push[[L12:[0-9]+]]=, $1, 7{{$}}
360; SIMD128-NEXT: i32.shr_s $push[[L13:[0-9]+]]=, $pop[[L11]], $pop[[L12]]{{$}}
361; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L14:[0-9]+]], 7, $pop[[L13]]{{$}}
362; SIMD128-NEXT: return $pop[[R]]{{$}}
363define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
364  %a = ashr <8 x i16> %v, %x
365  ret <8 x i16> %a
366}
367
368; CHECK-LABEL: shr_u_v8i16:
369; NO-SIMD128-NOT: i16x8
370; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
371; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
372; SIMD128-NEXT: return $pop[[R]]{{$}}
373define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
374  %t = insertelement <8 x i16> undef, i16 %x, i32 0
375  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
376    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
377  %a = lshr <8 x i16> %v, %s
378  ret <8 x i16> %a
379}
380
381; CHECK-LABEL: shr_u_vec_v8i16:
382; NO-SIMD128-NOT: i16x8
383; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
384; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
385; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
386; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
387; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
388; Skip 6 lanes
389; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
390; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
391; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
392; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
393; SIMD128-NEXT: return $pop[[R]]{{$}}
394define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
395  %a = lshr <8 x i16> %v, %x
396  ret <8 x i16> %a
397}
398
399; CHECK-LABEL: and_v8i16:
400; NO-SIMD128-NOT: v128
401; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
402; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
403; SIMD128-NEXT: return $pop[[R]]{{$}}
404define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
405  %a = and <8 x i16> %x, %y
406  ret <8 x i16> %a
407}
408
409; CHECK-LABEL: or_v8i16:
410; NO-SIMD128-NOT: v128
411; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
412; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
413; SIMD128-NEXT: return $pop[[R]]{{$}}
414define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
415  %a = or <8 x i16> %x, %y
416  ret <8 x i16> %a
417}
418
419; CHECK-LABEL: xor_v8i16:
420; NO-SIMD128-NOT: v128
421; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
422; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
423; SIMD128-NEXT: return $pop[[R]]{{$}}
424define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
425  %a = xor <8 x i16> %x, %y
426  ret <8 x i16> %a
427}
428
429; CHECK-LABEL: not_v8i16:
430; NO-SIMD128-NOT: v128
431; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
432; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
433; SIMD128-NEXT: return $pop[[R]]{{$}}
434define <8 x i16> @not_v8i16(<8 x i16> %x) {
435  %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
436                          i16 -1, i16 -1, i16 -1, i16 -1>
437  ret <8 x i16> %a
438}
439
440; CHECK-LABEL: bitselect_v8i16:
441; NO-SIMD128-NOT: v128
442; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
443; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
444; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
445; SIMD128-FAST-NEXT: v128.and
446; SIMD128-FAST-NEXT: v128.not
447; SIMD128-FAST-NEXT: v128.and
448; SIMD128-FAST-NEXT: v128.or
449; SIMD128-FAST-NEXT: return
450define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
451  %masked_v1 = and <8 x i16> %v1, %c
452  %inv_mask = xor <8 x i16>
453    <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
454    %c
455  %masked_v2 = and <8 x i16> %v2, %inv_mask
456  %a = or <8 x i16> %masked_v1, %masked_v2
457  ret <8 x i16> %a
458}
459
460; ==============================================================================
461; 4 x i32
462; ==============================================================================
463; CHECK-LABEL: add_v4i32:
464; NO-SIMD128-NOT: i32x4
465; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
466; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
467; SIMD128-NEXT: return $pop[[R]]{{$}}
468define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
469  %a = add <4 x i32> %x, %y
470  ret <4 x i32> %a
471}
472
473; CHECK-LABEL: sub_v4i32:
474; NO-SIMD128-NOT: i32x4
475; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
476; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
477; SIMD128-NEXT: return $pop[[R]]{{$}}
478define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
479  %a = sub <4 x i32> %x, %y
480  ret <4 x i32> %a
481}
482
483; CHECK-LABEL: mul_v4i32:
484; NO-SIMD128-NOT: i32x4
485; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
486; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
487; SIMD128-NEXT: return $pop[[R]]{{$}}
488define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
489  %a = mul <4 x i32> %x, %y
490  ret <4 x i32> %a
491}
492
493; CHECK-LABEL: neg_v4i32:
494; NO-SIMD128-NOT: i32x4
495; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
496; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
497; SIMD128-NEXT: return $pop[[R]]{{$}}
498define <4 x i32> @neg_v4i32(<4 x i32> %x) {
499  %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
500  ret <4 x i32> %a
501}
502
503; CHECK-LABEL: shl_v4i32:
504; NO-SIMD128-NOT: i32x4
505; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
506; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
507; SIMD128-NEXT: return $pop[[R]]{{$}}
508define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
509  %t = insertelement <4 x i32> undef, i32 %x, i32 0
510  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
511    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
512  %a = shl <4 x i32> %v, %s
513  ret <4 x i32> %a
514}
515
516; CHECK-LABEL: shl_const_v4i32:
517; NO-SIMD128-NOT: i32x4
518; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
519; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
520; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
521; SIMD128-NEXT: return $pop[[R]]{{$}}
522define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
523  %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
524  ret <4 x i32> %a
525}
526
527; CHECK-LABEL: shl_vec_v4i32:
528; NO-SIMD128-NOT: i32x4
529; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
530; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
531; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
532; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
533; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
534; Skip 2 lanes
535; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
536; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
537; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
538; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
539; SIMD128-NEXT: return $pop[[R]]{{$}}
540define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
541  %a = shl <4 x i32> %v, %x
542  ret <4 x i32> %a
543}
544
545; CHECK-LABEL: shr_s_v4i32:
546; NO-SIMD128-NOT: i32x4
547; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
548; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
549; SIMD128-NEXT: return $pop[[R]]{{$}}
550define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
551  %t = insertelement <4 x i32> undef, i32 %x, i32 0
552  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
553    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
554  %a = ashr <4 x i32> %v, %s
555  ret <4 x i32> %a
556}
557
558; CHECK-LABEL: shr_s_vec_v4i32:
559; NO-SIMD128-NOT: i32x4
560; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
561; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
562; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
563; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
564; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
565; Skip 2 lanes
566; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
567; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
568; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
569; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
570; SIMD128-NEXT: return $pop[[R]]{{$}}
571define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
572  %a = ashr <4 x i32> %v, %x
573  ret <4 x i32> %a
574}
575
576; CHECK-LABEL: shr_u_v4i32:
577; NO-SIMD128-NOT: i32x4
578; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
579; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
580; SIMD128-NEXT: return $pop[[R]]{{$}}
581define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
582  %t = insertelement <4 x i32> undef, i32 %x, i32 0
583  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
584    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
585  %a = lshr <4 x i32> %v, %s
586  ret <4 x i32> %a
587}
588
589; CHECK-LABEL: shr_u_vec_v4i32:
590; NO-SIMD128-NOT: i32x4
591; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
592; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
593; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
594; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
595; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
596; Skip 2 lanes
597; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
598; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
599; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
600; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
601; SIMD128-NEXT: return $pop[[R]]{{$}}
602define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
603  %a = lshr <4 x i32> %v, %x
604  ret <4 x i32> %a
605}
606
607; CHECK-LABEL: and_v4i32:
608; NO-SIMD128-NOT: v128
609; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
610; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
611; SIMD128-NEXT: return $pop[[R]]{{$}}
612define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
613  %a = and <4 x i32> %x, %y
614  ret <4 x i32> %a
615}
616
617; CHECK-LABEL: or_v4i32:
618; NO-SIMD128-NOT: v128
619; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
620; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
621; SIMD128-NEXT: return $pop[[R]]{{$}}
622define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
623  %a = or <4 x i32> %x, %y
624  ret <4 x i32> %a
625}
626
627; CHECK-LABEL: xor_v4i32:
628; NO-SIMD128-NOT: v128
629; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
630; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
631; SIMD128-NEXT: return $pop[[R]]{{$}}
632define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
633  %a = xor <4 x i32> %x, %y
634  ret <4 x i32> %a
635}
636
637; CHECK-LABEL: not_v4i32:
638; NO-SIMD128-NOT: v128
639; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
640; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
641; SIMD128-NEXT: return $pop[[R]]{{$}}
642define <4 x i32> @not_v4i32(<4 x i32> %x) {
643  %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
644  ret <4 x i32> %a
645}
646
647; CHECK-LABEL: bitselect_v4i32:
648; NO-SIMD128-NOT: v128
649; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
650; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
651; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
652; SIMD128-FAST-NEXT: v128.not
653; SIMD128-FAST-NEXT: v128.and
654; SIMD128-FAST-NEXT: v128.and
655; SIMD128-FAST-NEXT: v128.or
656; SIMD128-FAST-NEXT: return
657define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
658  %masked_v1 = and <4 x i32> %c, %v1
659  %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
660  %masked_v2 = and <4 x i32> %inv_mask, %v2
661  %a = or <4 x i32> %masked_v2, %masked_v1
662  ret <4 x i32> %a
663}
664
665; ==============================================================================
666; 2 x i64
667; ==============================================================================
668; CHECK-LABEL: add_v2i64:
669; NO-SIMD128-NOT: i64x2
670; SIMD128-VM-NOT: i64x2
671; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
672; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
673; SIMD128-NEXT: return $pop[[R]]{{$}}
674define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
675  %a = add <2 x i64> %x, %y
676  ret <2 x i64> %a
677}
678
679; CHECK-LABEL: sub_v2i64:
680; NO-SIMD128-NOT: i64x2
681; SIMD128-VM-NOT: i64x2
682; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
683; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
684; SIMD128-NEXT: return $pop[[R]]{{$}}
685define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
686  %a = sub <2 x i64> %x, %y
687  ret <2 x i64> %a
688}
689
690; v2i64.mul is not in spec
691; CHECK-LABEL: mul_v2i64:
692; NO-SIMD128-NOT: i64x2
693; SIMD128-VM-NOT: i64x2
694; SIMD128-NOT: i64x2.mul
695; SIMD128: i64x2.extract_lane
696; SIMD128: i64.mul
697define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
698  %a = mul <2 x i64> %x, %y
699  ret <2 x i64> %a
700}
701
702; CHECK-LABEL: neg_v2i64:
703; NO-SIMD128-NOT: i64x2
704; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
705; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
706; SIMD128-NEXT: return $pop[[R]]{{$}}
707define <2 x i64> @neg_v2i64(<2 x i64> %x) {
708  %a = sub <2 x i64> <i64 0, i64 0>, %x
709  ret <2 x i64> %a
710}
711
712; CHECK-LABEL: shl_v2i64:
713; NO-SIMD128-NOT: i64x2
714; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
715; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
716; SIMD128-NEXT: return $pop[[R]]{{$}}
717define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
718  %x2 = zext i32 %x to i64
719  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
720  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
721  %a = shl <2 x i64> %v, %s
722  ret <2 x i64> %a
723}
724
725; CHECK-LABEL: shl_nozext_v2i64:
726; NO-SIMD128-NOT: i64x2
727; SIMD128-NEXT: .functype shl_nozext_v2i64 (v128, i64) -> (v128){{$}}
728; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
729; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
730; SIMD128-NEXT: return $pop[[R]]{{$}}
731define <2 x i64> @shl_nozext_v2i64(<2 x i64> %v, i64 %x) {
732  %t = insertelement <2 x i64> undef, i64 %x, i32 0
733  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
734  %a = shl <2 x i64> %v, %s
735  ret <2 x i64> %a
736}
737
738; CHECK-LABEL: shl_const_v2i64:
739; NO-SIMD128-NOT: i64x2
740; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
741; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
742; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
743; SIMD128-NEXT: return $pop[[R]]{{$}}
744define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
745  %a = shl <2 x i64> %v, <i64 5, i64 5>
746  ret <2 x i64> %a
747}
748
749; CHECK-LABEL: shl_vec_v2i64:
750; NO-SIMD128-NOT: i64x2
751; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
752; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
753; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
754; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
755; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
756; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
757; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
758; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
759; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
760; SIMD128-NEXT: return $pop[[R]]{{$}}
761define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
762  %a = shl <2 x i64> %v, %x
763  ret <2 x i64> %a
764}
765
766; CHECK-LABEL: shr_s_v2i64:
767; NO-SIMD128-NOT: i64x2
768; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
769; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
770; SIMD128-NEXT: return $pop[[R]]{{$}}
771define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
772  %x2 = zext i32 %x to i64
773  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
774  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
775  %a = ashr <2 x i64> %v, %s
776  ret <2 x i64> %a
777}
778
779; CHECK-LABEL: shr_s_nozext_v2i64:
780; NO-SIMD128-NOT: i64x2
781; SIMD128-NEXT: .functype shr_s_nozext_v2i64 (v128, i64) -> (v128){{$}}
782; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
783; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
784; SIMD128-NEXT: return $pop[[R]]{{$}}
785define <2 x i64> @shr_s_nozext_v2i64(<2 x i64> %v, i64 %x) {
786  %t = insertelement <2 x i64> undef, i64 %x, i32 0
787  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
788  %a = ashr <2 x i64> %v, %s
789  ret <2 x i64> %a
790}
791
792; CHECK-LABEL: shr_s_const_v2i64:
793; NO-SIMD128-NOT: i64x2
794; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
795; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
796; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
797; SIMD128-NEXT: return $pop[[R]]{{$}}
798define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
799  %a = ashr <2 x i64> %v, <i64 5, i64 5>
800  ret <2 x i64> %a
801}
802
803; CHECK-LABEL: shr_s_vec_v2i64:
804; NO-SIMD128-NOT: i64x2
805; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
806; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
807; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
808; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
809; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
810; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
811; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
812; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
813; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
814; SIMD128-NEXT: return $pop[[R]]{{$}}
815define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
816  %a = ashr <2 x i64> %v, %x
817  ret <2 x i64> %a
818}
819
820; CHECK-LABEL: shr_u_v2i64:
821; NO-SIMD128-NOT: i64x2
822; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
823; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
824; SIMD128-NEXT: return $pop[[R]]{{$}}
825define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
826  %x2 = zext i32 %x to i64
827  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
828  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
829  %a = lshr <2 x i64> %v, %s
830  ret <2 x i64> %a
831}
832
833; CHECK-LABEL: shr_u_nozext_v2i64:
834; NO-SIMD128-NOT: i64x2
835; SIMD128-NEXT: .functype shr_u_nozext_v2i64 (v128, i64) -> (v128){{$}}
836; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
837; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
838; SIMD128-NEXT: return $pop[[R]]{{$}}
839define <2 x i64> @shr_u_nozext_v2i64(<2 x i64> %v, i64 %x) {
840  %t = insertelement <2 x i64> undef, i64 %x, i32 0
841  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
842  %a = lshr <2 x i64> %v, %s
843  ret <2 x i64> %a
844}
845
846; CHECK-LABEL: shr_u_const_v2i64:
847; NO-SIMD128-NOT: i64x2
848; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
849; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
850; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
851; SIMD128-NEXT: return $pop[[R]]{{$}}
852define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
853  %a = lshr <2 x i64> %v, <i64 5, i64 5>
854  ret <2 x i64> %a
855}
856
857; CHECK-LABEL: shr_u_vec_v2i64:
858; NO-SIMD128-NOT: i64x2
859; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
860; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
861; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
862; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
863; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
864; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
865; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
866; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
867; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
868; SIMD128-NEXT: return $pop[[R]]{{$}}
869define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
870  %a = lshr <2 x i64> %v, %x
871  ret <2 x i64> %a
872}
873
874; CHECK-LABEL: and_v2i64:
875; NO-SIMD128-NOT: v128
876; SIMD128-VM-NOT: v128
877; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
878; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
879; SIMD128-NEXT: return $pop[[R]]{{$}}
880define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
881  %a = and <2 x i64> %x, %y
882  ret <2 x i64> %a
883}
884
885; CHECK-LABEL: or_v2i64:
886; NO-SIMD128-NOT: v128
887; SIMD128-VM-NOT: v128
888; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
889; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
890; SIMD128-NEXT: return $pop[[R]]{{$}}
891define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
892  %a = or <2 x i64> %x, %y
893  ret <2 x i64> %a
894}
895
896; CHECK-LABEL: xor_v2i64:
897; NO-SIMD128-NOT: v128
898; SIMD128-VM-NOT: v128
899; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
900; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
901; SIMD128-NEXT: return $pop[[R]]{{$}}
902define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
903  %a = xor <2 x i64> %x, %y
904  ret <2 x i64> %a
905}
906
907; CHECK-LABEL: not_v2i64:
908; NO-SIMD128-NOT: v128
909; SIMD128-VM-NOT: v128
910; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
911; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
912; SIMD128-NEXT: return $pop[[R]]{{$}}
913define <2 x i64> @not_v2i64(<2 x i64> %x) {
914  %a = xor <2 x i64> %x, <i64 -1, i64 -1>
915  ret <2 x i64> %a
916}
917
918; CHECK-LABEL: bitselect_v2i64:
919; NO-SIMD128-NOT: v128
920; SIMD128-VM-NOT: v128
921; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
922; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
923; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
924; SIMD128-FAST-NEXT: v128.not
925; SIMD128-FAST-NEXT: v128.and
926; SIMD128-FAST-NEXT: v128.and
927; SIMD128-FAST-NEXT: v128.or
928; SIMD128-FAST-NEXT: return
929define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
930  %masked_v1 = and <2 x i64> %v1, %c
931  %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
932  %masked_v2 = and <2 x i64> %v2, %inv_mask
933  %a = or <2 x i64> %masked_v2, %masked_v1
934  ret <2 x i64> %a
935}
936
937; ==============================================================================
938; 4 x float
939; ==============================================================================
940; CHECK-LABEL: neg_v4f32:
941; NO-SIMD128-NOT: f32x4
942; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
943; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
944; SIMD128-NEXT: return $pop[[R]]{{$}}
945define <4 x float> @neg_v4f32(<4 x float> %x) {
946  ; nsz makes this semantically equivalent to flipping sign bit
947  %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
948  ret <4 x float> %a
949}
950
951; CHECK-LABEL: abs_v4f32:
952; NO-SIMD128-NOT: f32x4
953; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
954; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
955; SIMD128-NEXT: return $pop[[R]]{{$}}
956declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
957define <4 x float> @abs_v4f32(<4 x float> %x) {
958  %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
959  ret <4 x float> %a
960}
961
962; CHECK-LABEL: min_unordered_v4f32:
963; NO-SIMD128-NOT: f32x4
964; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
965; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
966; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
967; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
968; SIMD128-NEXT: return $pop[[R]]{{$}}
969define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
970  %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
971  %a = select <4 x i1> %cmps, <4 x float> %x,
972    <4 x float> <float 5., float 5., float 5., float 5.>
973  ret <4 x float> %a
974}
975
976; CHECK-LABEL: max_unordered_v4f32:
977; NO-SIMD128-NOT: f32x4
978; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
979; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
980; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
981; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
982; SIMD128-NEXT: return $pop[[R]]{{$}}
983define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
984  %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
985  %a = select <4 x i1> %cmps, <4 x float> %x,
986    <4 x float> <float 5., float 5., float 5., float 5.>
987  ret <4 x float> %a
988}
989
990; CHECK-LABEL: min_ordered_v4f32:
991; NO-SIMD128-NOT: f32x4
992; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
993; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
994; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
995; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
996; SIMD128-NEXT: return $pop[[R]]{{$}}
997define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
998  %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
999  %a = select <4 x i1> %cmps,
1000    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1001  ret <4 x float> %a
1002}
1003
1004; CHECK-LABEL: max_ordered_v4f32:
1005; NO-SIMD128-NOT: f32x4
1006; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1007; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1008; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1009; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1010; SIMD128-NEXT: return $pop[[R]]{{$}}
1011define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1012  %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1013  %a = select <4 x i1> %cmps,
1014    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1015  ret <4 x float> %a
1016}
1017
1018; CHECK-LABEL: min_intrinsic_v4f32:
1019; NO-SIMD128-NOT: f32x4
1020; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1021; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1022; SIMD128-NEXT: return $pop[[R]]{{$}}
1023declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1024define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1025  %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1026  ret <4 x float> %a
1027}
1028
1029; CHECK-LABEL: max_intrinsic_v4f32:
1030; NO-SIMD128-NOT: f32x4
1031; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1032; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1033; SIMD128-NEXT: return $pop[[R]]{{$}}
1034declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1035define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1036  %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1037  ret <4 x float> %a
1038}
1039
1040; CHECK-LABEL: min_const_intrinsic_v4f32:
1041; NO-SIMD128-NOT: f32x4
1042; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1043; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1044; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1045; SIMD128-NEXT: return $pop[[R]]{{$}}
1046define <4 x float> @min_const_intrinsic_v4f32() {
1047  %a = call <4 x float> @llvm.minimum.v4f32(
1048    <4 x float> <float 42., float 42., float 42., float 42.>,
1049    <4 x float> <float 5., float 5., float 5., float 5.>
1050  )
1051  ret <4 x float> %a
1052}
1053
1054; CHECK-LABEL: max_const_intrinsic_v4f32:
1055; NO-SIMD128-NOT: f32x4
1056; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1057; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1058; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1059; SIMD128-NEXT: return $pop[[R]]{{$}}
1060define <4 x float> @max_const_intrinsic_v4f32() {
1061  %a = call <4 x float> @llvm.maximum.v4f32(
1062    <4 x float> <float 42., float 42., float 42., float 42.>,
1063    <4 x float> <float 5., float 5., float 5., float 5.>
1064  )
1065  ret <4 x float> %a
1066}
1067
1068; CHECK-LABEL: add_v4f32:
1069; NO-SIMD128-NOT: f32x4
1070; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1071; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1072; SIMD128-NEXT: return $pop[[R]]{{$}}
1073define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1074  %a = fadd <4 x float> %x, %y
1075  ret <4 x float> %a
1076}
1077
1078; CHECK-LABEL: sub_v4f32:
1079; NO-SIMD128-NOT: f32x4
1080; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1081; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1082; SIMD128-NEXT: return $pop[[R]]{{$}}
1083define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1084  %a = fsub <4 x float> %x, %y
1085  ret <4 x float> %a
1086}
1087
1088; CHECK-LABEL: div_v4f32:
1089; NO-SIMD128-NOT: f32x4
1090; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1091; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1092; SIMD128-NEXT: return $pop[[R]]{{$}}
1093define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1094  %a = fdiv <4 x float> %x, %y
1095  ret <4 x float> %a
1096}
1097
1098; CHECK-LABEL: mul_v4f32:
1099; NO-SIMD128-NOT: f32x4
1100; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1101; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1102; SIMD128-NEXT: return $pop[[R]]{{$}}
1103define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1104  %a = fmul <4 x float> %x, %y
1105  ret <4 x float> %a
1106}
1107
1108; CHECK-LABEL: sqrt_v4f32:
1109; NO-SIMD128-NOT: f32x4
1110; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1111; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1112; SIMD128-NEXT: return $pop[[R]]{{$}}
1113declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1114define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1115  %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1116  ret <4 x float> %a
1117}
1118
1119; ==============================================================================
1120; 2 x double
1121; ==============================================================================
1122; CHECK-LABEL: neg_v2f64:
1123; NO-SIMD128-NOT: f64x2
1124; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1125; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1126; SIMD128-NEXT: return $pop[[R]]{{$}}
1127define <2 x double> @neg_v2f64(<2 x double> %x) {
1128  ; nsz makes this semantically equivalent to flipping sign bit
1129  %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1130  ret <2 x double> %a
1131}
1132
1133; CHECK-LABEL: abs_v2f64:
1134; NO-SIMD128-NOT: f64x2
1135; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1136; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1137; SIMD128-NEXT: return $pop[[R]]{{$}}
1138declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1139define <2 x double> @abs_v2f64(<2 x double> %x) {
1140  %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1141  ret <2 x double> %a
1142}
1143
1144; CHECK-LABEL: min_unordered_v2f64:
1145; NO-SIMD128-NOT: f64x2
1146; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1147; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1148; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1149; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1150; SIMD128-NEXT: return $pop[[R]]{{$}}
1151define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1152  %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1153  %a = select <2 x i1> %cmps, <2 x double> %x,
1154    <2 x double> <double 5., double 5.>
1155  ret <2 x double> %a
1156}
1157
1158; CHECK-LABEL: max_unordered_v2f64:
1159; NO-SIMD128-NOT: f64x2
1160; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1161; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1162; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1163; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1164; SIMD128-NEXT: return $pop[[R]]{{$}}
1165define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1166  %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1167  %a = select <2 x i1> %cmps, <2 x double> %x,
1168    <2 x double> <double 5., double 5.>
1169  ret <2 x double> %a
1170}
1171
1172; CHECK-LABEL: min_ordered_v2f64:
1173; NO-SIMD128-NOT: f64x2
1174; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1175; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1176; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1177; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1178; SIMD128-NEXT: return $pop[[R]]{{$}}
1179define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1180  %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1181  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1182    <2 x double> %x
1183  ret <2 x double> %a
1184}
1185
1186; CHECK-LABEL: max_ordered_v2f64:
1187; NO-SIMD128-NOT: f64x2
1188; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1189; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1190; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1191; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1192; SIMD128-NEXT: return $pop[[R]]{{$}}
1193define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1194  %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1195  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1196    <2 x double> %x
1197  ret <2 x double> %a
1198}
1199
1200; CHECK-LABEL: min_intrinsic_v2f64:
1201; NO-SIMD128-NOT: f64x2
1202; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1203; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1204; SIMD128-NEXT: return $pop[[R]]{{$}}
1205declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1206define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1207  %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1208  ret <2 x double> %a
1209}
1210
1211; CHECK-LABEL: max_intrinsic_v2f64:
1212; NO-SIMD128-NOT: f64x2
1213; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1214; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1215; SIMD128-NEXT: return $pop[[R]]{{$}}
1216declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1217define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1218  %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1219  ret <2 x double> %a
1220}
1221
1222; CHECK-LABEL: min_const_intrinsic_v2f64:
1223; NO-SIMD128-NOT: f64x2
1224; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1225; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1226; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1227; SIMD128-NEXT: return $pop[[R]]{{$}}
1228define <2 x double> @min_const_intrinsic_v2f64() {
1229  %a = call <2 x double> @llvm.minimum.v2f64(
1230    <2 x double> <double 42., double 42.>,
1231    <2 x double> <double 5., double 5.>
1232  )
1233  ret <2 x double> %a
1234}
1235
1236; CHECK-LABEL: max_const_intrinsic_v2f64:
1237; NO-SIMD128-NOT: f64x2
1238; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1239; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1240; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1241; SIMD128-NEXT: return $pop[[R]]{{$}}
1242define <2 x double> @max_const_intrinsic_v2f64() {
1243  %a = call <2 x double> @llvm.maximum.v2f64(
1244    <2 x double> <double 42., double 42.>,
1245    <2 x double> <double 5., double 5.>
1246  )
1247  ret <2 x double> %a
1248}
1249
1250; CHECK-LABEL: add_v2f64:
1251; NO-SIMD128-NOT: f64x2
1252; SIMD128-VM-NOT: f62x2
1253; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1254; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1255; SIMD128-NEXT: return $pop[[R]]{{$}}
1256define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1257  %a = fadd <2 x double> %x, %y
1258  ret <2 x double> %a
1259}
1260
1261; CHECK-LABEL: sub_v2f64:
1262; NO-SIMD128-NOT: f64x2
1263; SIMD128-VM-NOT: f62x2
1264; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1265; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1266; SIMD128-NEXT: return $pop[[R]]{{$}}
1267define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1268  %a = fsub <2 x double> %x, %y
1269  ret <2 x double> %a
1270}
1271
1272; CHECK-LABEL: div_v2f64:
1273; NO-SIMD128-NOT: f64x2
1274; SIMD128-VM-NOT: f62x2
1275; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1276; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1277; SIMD128-NEXT: return $pop[[R]]{{$}}
1278define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1279  %a = fdiv <2 x double> %x, %y
1280  ret <2 x double> %a
1281}
1282
1283; CHECK-LABEL: mul_v2f64:
1284; NO-SIMD128-NOT: f64x2
1285; SIMD128-VM-NOT: f62x2
1286; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1287; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1288; SIMD128-NEXT: return $pop[[R]]{{$}}
1289define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1290  %a = fmul <2 x double> %x, %y
1291  ret <2 x double> %a
1292}
1293
1294; CHECK-LABEL: sqrt_v2f64:
1295; NO-SIMD128-NOT: f64x2
1296; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1297; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1298; SIMD128-NEXT: return $pop[[R]]{{$}}
1299declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1300define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1301  %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1302  ret <2 x double> %a
1303}
1304