1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
3; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
4; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
5; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128
6; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
7
8; check that a non-test run (including explicit locals pass) at least finishes
9; RUN: llc < %s -O0 -mattr=+unimplemented-simd128
10; RUN: llc < %s -O2 -mattr=+unimplemented-simd128
11
12; Test that basic SIMD128 arithmetic operations assemble as expected.
13
14target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
15target triple = "wasm32-unknown-unknown"
16
17; ==============================================================================
18; 16 x i8
19; ==============================================================================
20; CHECK-LABEL: add_v16i8:
21; NO-SIMD128-NOT: i8x16
22; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
23; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
24; SIMD128-NEXT: return $pop[[R]]{{$}}
25define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
26  %a = add <16 x i8> %x, %y
27  ret <16 x i8> %a
28}
29
30; CHECK-LABEL: sub_v16i8:
31; NO-SIMD128-NOT: i8x16
32; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
33; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
34; SIMD128-NEXT: return $pop[[R]]{{$}}
35define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
36  %a = sub <16 x i8> %x, %y
37  ret <16 x i8> %a
38}
39
40; CHECK-LABEL: mul_v16i8:
41; NO-SIMD128-NOT: i8x16
42; SIMD128-NEXT: .functype mul_v16i8 (v128, v128) -> (v128){{$}}
43; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
44; SIMD128-NEXT: return $pop[[R]]{{$}}
45define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
46  %a = mul <16 x i8> %x, %y
47  ret <16 x i8> %a
48}
49
50; CHECK-LABEL: neg_v16i8:
51; NO-SIMD128-NOT: i8x16
52; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
53; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
54; SIMD128-NEXT: return $pop[[R]]{{$}}
55define <16 x i8> @neg_v16i8(<16 x i8> %x) {
56  %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
57                      i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
58                     %x
59  ret <16 x i8> %a
60}
61
62; CHECK-LABEL: shl_v16i8:
63; NO-SIMD128-NOT: i8x16
64; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
65; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
66; SIMD128-NEXT: return $pop[[R]]{{$}}
67define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
68  %t = insertelement <16 x i8> undef, i8 %x, i32 0
69  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
70    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
71                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
72  %a = shl <16 x i8> %v, %s
73  ret <16 x i8> %a
74}
75
76; CHECK-LABEL: shl_const_v16i8:
77; NO-SIMD128-NOT: i8x16
78; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
79; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
80; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
81; SIMD128-NEXT: return $pop[[R]]{{$}}
82define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
83  %a = shl <16 x i8> %v,
84    <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
85     i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
86  ret <16 x i8> %a
87}
88
89; CHECK-LABEL: shl_vec_v16i8:
90; NO-SIMD128-NOT: i8x16
91; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
92; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
93; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
94; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
95; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
96; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
97; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
98; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
99; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
100; Skip 14 lanes
101; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
102; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
103; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
104; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
105; SIMD128-NEXT: return $pop[[R]]{{$}}
106define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
107  %a = shl <16 x i8> %v, %x
108  ret <16 x i8> %a
109}
110
111; CHECK-LABEL: shr_s_v16i8:
112; NO-SIMD128-NOT: i8x16
113; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
114; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
115; SIMD128-NEXT: return $pop[[R]]{{$}}
116define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
117  %t = insertelement <16 x i8> undef, i8 %x, i32 0
118  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
119    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
120                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121  %a = ashr <16 x i8> %v, %s
122  ret <16 x i8> %a
123}
124
125; CHECK-LABEL: shr_s_vec_v16i8:
126; NO-SIMD128-NOT: i8x16
127; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
128; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
129; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
130; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
131; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
132; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
133; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
134; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
135; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
136; Skip 14 lanes
137; SIMD128:      i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 15{{$}}
138; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 15{{$}}
139; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
140; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 15, $pop[[L2]]{{$}}
141; SIMD128-NEXT: return $pop[[R]]{{$}}
142define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
143  %a = ashr <16 x i8> %v, %x
144  ret <16 x i8> %a
145}
146
147; CHECK-LABEL: shr_u_v16i8:
148; NO-SIMD128-NOT: i8x16
149; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
150; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
151; SIMD128-NEXT: return $pop[[R]]{{$}}
152define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
153  %t = insertelement <16 x i8> undef, i8 %x, i32 0
154  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
155    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
156                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
157  %a = lshr <16 x i8> %v, %s
158  ret <16 x i8> %a
159}
160
161; CHECK-LABEL: shr_u_vec_v16i8:
162; NO-SIMD128-NOT: i8x16
163; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
164; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
165; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
166; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
167; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
168; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
169; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
170; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
171; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
172; Skip 14 lanes
173; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
174; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
175; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
176; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
177; SIMD128-NEXT: return $pop[[R]]{{$}}
178define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
179  %a = lshr <16 x i8> %v, %x
180  ret <16 x i8> %a
181}
182
183; CHECK-LABEL: and_v16i8:
184; NO-SIMD128-NOT: v128
185; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
186; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
187; SIMD128-NEXT: return $pop[[R]]{{$}}
188define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
189  %a = and <16 x i8> %x, %y
190  ret <16 x i8> %a
191}
192
193; CHECK-LABEL: or_v16i8:
194; NO-SIMD128-NOT: v128
195; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
196; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
197; SIMD128-NEXT: return $pop[[R]]{{$}}
198define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
199  %a = or <16 x i8> %x, %y
200  ret <16 x i8> %a
201}
202
203; CHECK-LABEL: xor_v16i8:
204; NO-SIMD128-NOT: v128
205; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
206; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
207; SIMD128-NEXT: return $pop[[R]]{{$}}
208define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
209  %a = xor <16 x i8> %x, %y
210  ret <16 x i8> %a
211}
212
213; CHECK-LABEL: not_v16i8:
214; NO-SIMD128-NOT: v128
215; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
216; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
217; SIMD128-NEXT: return $pop[[R]]{{$}}
218define <16 x i8> @not_v16i8(<16 x i8> %x) {
219  %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
220                          i8 -1, i8 -1, i8 -1, i8 -1,
221                          i8 -1, i8 -1, i8 -1, i8 -1,
222                          i8 -1, i8 -1, i8 -1, i8 -1>
223  ret <16 x i8> %a
224}
225
226; CHECK-LABEL: bitselect_v16i8:
227; NO-SIMD128-NOT: v128
228; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
229; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
230; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
231; SIMD128-FAST-NEXT: v128.and
232; SIMD128-FAST-NEXT: v128.not
233; SIMD128-FAST-NEXT: v128.and
234; SIMD128-FAST-NEXT: v128.or
235; SIMD128-FAST-NEXT: return
236define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
237  %masked_v1 = and <16 x i8> %c, %v1
238  %inv_mask = xor <16 x i8> %c,
239    <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
240     i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
241  %masked_v2 = and <16 x i8> %inv_mask, %v2
242  %a = or <16 x i8> %masked_v1, %masked_v2
243  ret <16 x i8> %a
244}
245
246; ==============================================================================
247; 8 x i16
248; ==============================================================================
249; CHECK-LABEL: add_v8i16:
250; NO-SIMD128-NOT: i16x8
251; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
252; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
253; SIMD128-NEXT: return $pop[[R]]{{$}}
254define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
255  %a = add <8 x i16> %x, %y
256  ret <8 x i16> %a
257}
258
259; CHECK-LABEL: sub_v8i16:
260; NO-SIMD128-NOT: i16x8
261; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
262; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
263; SIMD128-NEXT: return $pop[[R]]{{$}}
264define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
265  %a = sub <8 x i16> %x, %y
266  ret <8 x i16> %a
267}
268
269; CHECK-LABEL: mul_v8i16:
270; NO-SIMD128-NOT: i16x8
271; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
272; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
273; SIMD128-NEXT: return $pop[[R]]{{$}}
274define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
275  %a = mul <8 x i16> %x, %y
276  ret <8 x i16> %a
277}
278
279; CHECK-LABEL: neg_v8i16:
280; NO-SIMD128-NOT: i16x8
281; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
282; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
283; SIMD128-NEXT: return $pop[[R]]{{$}}
284define <8 x i16> @neg_v8i16(<8 x i16> %x) {
285  %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
286                     %x
287  ret <8 x i16> %a
288}
289
290; CHECK-LABEL: shl_v8i16:
291; NO-SIMD128-NOT: i16x8
292; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
293; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
294; SIMD128-NEXT: return $pop[[R]]{{$}}
295define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
296  %t = insertelement <8 x i16> undef, i16 %x, i32 0
297  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
298    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
299  %a = shl <8 x i16> %v, %s
300  ret <8 x i16> %a
301}
302
303; CHECK-LABEL: shl_const_v8i16:
304; NO-SIMD128-NOT: i16x8
305; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
306; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
307; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
308; SIMD128-NEXT: return $pop[[R]]{{$}}
309define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
310  %a = shl <8 x i16> %v,
311    <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
312  ret <8 x i16> %a
313}
314
315; CHECK-LABEL: shl_vec_v8i16:
316; NO-SIMD128-NOT: i16x8
317; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
318; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
319; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
320; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
321; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
322; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
323; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
324; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
325; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
326; Skip 6 lanes
327; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
328; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
329; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
330; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
331; SIMD128-NEXT: return $pop[[R]]{{$}}
332define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
333  %a = shl <8 x i16> %v, %x
334  ret <8 x i16> %a
335}
336
337; CHECK-LABEL: shr_s_v8i16:
338; NO-SIMD128-NOT: i16x8
339; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
340; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
341; SIMD128-NEXT: return $pop[[R]]{{$}}
342define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
343  %t = insertelement <8 x i16> undef, i16 %x, i32 0
344  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
345    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
346  %a = ashr <8 x i16> %v, %s
347  ret <8 x i16> %a
348}
349
350; CHECK-LABEL: shr_s_vec_v8i16:
351; NO-SIMD128-NOT: i16x8
352; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
353; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
354; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
355; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
356; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
357; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
358; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
359; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
360; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
361; Skip 6 lanes
362; SIMD128:      i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 7{{$}}
363; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 7{{$}}
364; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
365; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 7, $pop[[L2]]{{$}}
366; SIMD128-NEXT: return $pop[[R]]{{$}}
367define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
368  %a = ashr <8 x i16> %v, %x
369  ret <8 x i16> %a
370}
371
372; CHECK-LABEL: shr_u_v8i16:
373; NO-SIMD128-NOT: i16x8
374; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
375; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
376; SIMD128-NEXT: return $pop[[R]]{{$}}
377define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
378  %t = insertelement <8 x i16> undef, i16 %x, i32 0
379  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
380    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
381  %a = lshr <8 x i16> %v, %s
382  ret <8 x i16> %a
383}
384
385; CHECK-LABEL: shr_u_vec_v8i16:
386; NO-SIMD128-NOT: i16x8
387; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
388; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
389; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
390; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
391; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
392; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
393; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
394; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
395; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
396; Skip 6 lanes
397; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
398; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
399; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
400; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
401; SIMD128-NEXT: return $pop[[R]]{{$}}
402define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
403  %a = lshr <8 x i16> %v, %x
404  ret <8 x i16> %a
405}
406
407; CHECK-LABEL: and_v8i16:
408; NO-SIMD128-NOT: v128
409; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
410; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
411; SIMD128-NEXT: return $pop[[R]]{{$}}
412define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
413  %a = and <8 x i16> %x, %y
414  ret <8 x i16> %a
415}
416
417; CHECK-LABEL: or_v8i16:
418; NO-SIMD128-NOT: v128
419; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
420; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
421; SIMD128-NEXT: return $pop[[R]]{{$}}
422define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
423  %a = or <8 x i16> %x, %y
424  ret <8 x i16> %a
425}
426
427; CHECK-LABEL: xor_v8i16:
428; NO-SIMD128-NOT: v128
429; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
430; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
431; SIMD128-NEXT: return $pop[[R]]{{$}}
432define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
433  %a = xor <8 x i16> %x, %y
434  ret <8 x i16> %a
435}
436
437; CHECK-LABEL: not_v8i16:
438; NO-SIMD128-NOT: v128
439; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
440; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
441; SIMD128-NEXT: return $pop[[R]]{{$}}
442define <8 x i16> @not_v8i16(<8 x i16> %x) {
443  %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
444                          i16 -1, i16 -1, i16 -1, i16 -1>
445  ret <8 x i16> %a
446}
447
448; CHECK-LABEL: bitselect_v8i16:
449; NO-SIMD128-NOT: v128
450; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
451; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
452; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
453; SIMD128-FAST-NEXT: v128.and
454; SIMD128-FAST-NEXT: v128.not
455; SIMD128-FAST-NEXT: v128.and
456; SIMD128-FAST-NEXT: v128.or
457; SIMD128-FAST-NEXT: return
458define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
459  %masked_v1 = and <8 x i16> %v1, %c
460  %inv_mask = xor <8 x i16>
461    <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
462    %c
463  %masked_v2 = and <8 x i16> %v2, %inv_mask
464  %a = or <8 x i16> %masked_v1, %masked_v2
465  ret <8 x i16> %a
466}
467
468; ==============================================================================
469; 4 x i32
470; ==============================================================================
471; CHECK-LABEL: add_v4i32:
472; NO-SIMD128-NOT: i32x4
473; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
474; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
475; SIMD128-NEXT: return $pop[[R]]{{$}}
476define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
477  %a = add <4 x i32> %x, %y
478  ret <4 x i32> %a
479}
480
481; CHECK-LABEL: sub_v4i32:
482; NO-SIMD128-NOT: i32x4
483; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
484; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
485; SIMD128-NEXT: return $pop[[R]]{{$}}
486define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
487  %a = sub <4 x i32> %x, %y
488  ret <4 x i32> %a
489}
490
491; CHECK-LABEL: mul_v4i32:
492; NO-SIMD128-NOT: i32x4
493; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
494; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
495; SIMD128-NEXT: return $pop[[R]]{{$}}
496define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
497  %a = mul <4 x i32> %x, %y
498  ret <4 x i32> %a
499}
500
501; CHECK-LABEL: neg_v4i32:
502; NO-SIMD128-NOT: i32x4
503; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
504; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
505; SIMD128-NEXT: return $pop[[R]]{{$}}
506define <4 x i32> @neg_v4i32(<4 x i32> %x) {
507  %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
508  ret <4 x i32> %a
509}
510
511; CHECK-LABEL: shl_v4i32:
512; NO-SIMD128-NOT: i32x4
513; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
514; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
515; SIMD128-NEXT: return $pop[[R]]{{$}}
516define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
517  %t = insertelement <4 x i32> undef, i32 %x, i32 0
518  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
519    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
520  %a = shl <4 x i32> %v, %s
521  ret <4 x i32> %a
522}
523
524; CHECK-LABEL: shl_const_v4i32:
525; NO-SIMD128-NOT: i32x4
526; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
527; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
528; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
529; SIMD128-NEXT: return $pop[[R]]{{$}}
530define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
531  %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
532  ret <4 x i32> %a
533}
534
535; CHECK-LABEL: shl_vec_v4i32:
536; NO-SIMD128-NOT: i32x4
537; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
538; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
539; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
540; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
541; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
542; Skip 2 lanes
543; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
544; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
545; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
546; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
547; SIMD128-NEXT: return $pop[[R]]{{$}}
548define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
549  %a = shl <4 x i32> %v, %x
550  ret <4 x i32> %a
551}
552
553; CHECK-LABEL: shr_s_v4i32:
554; NO-SIMD128-NOT: i32x4
555; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
556; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
557; SIMD128-NEXT: return $pop[[R]]{{$}}
558define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
559  %t = insertelement <4 x i32> undef, i32 %x, i32 0
560  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
561    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
562  %a = ashr <4 x i32> %v, %s
563  ret <4 x i32> %a
564}
565
566; CHECK-LABEL: shr_s_vec_v4i32:
567; NO-SIMD128-NOT: i32x4
568; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
569; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
570; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
571; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
572; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
573; Skip 2 lanes
574; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
575; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
576; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
577; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
578; SIMD128-NEXT: return $pop[[R]]{{$}}
579define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
580  %a = ashr <4 x i32> %v, %x
581  ret <4 x i32> %a
582}
583
584; CHECK-LABEL: shr_u_v4i32:
585; NO-SIMD128-NOT: i32x4
586; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
587; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
588; SIMD128-NEXT: return $pop[[R]]{{$}}
589define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
590  %t = insertelement <4 x i32> undef, i32 %x, i32 0
591  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
592    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
593  %a = lshr <4 x i32> %v, %s
594  ret <4 x i32> %a
595}
596
597; CHECK-LABEL: shr_u_vec_v4i32:
598; NO-SIMD128-NOT: i32x4
599; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
600; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
601; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
602; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
603; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
604; Skip 2 lanes
605; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
606; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
607; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
608; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
609; SIMD128-NEXT: return $pop[[R]]{{$}}
610define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
611  %a = lshr <4 x i32> %v, %x
612  ret <4 x i32> %a
613}
614
615; CHECK-LABEL: and_v4i32:
616; NO-SIMD128-NOT: v128
617; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
618; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
619; SIMD128-NEXT: return $pop[[R]]{{$}}
620define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
621  %a = and <4 x i32> %x, %y
622  ret <4 x i32> %a
623}
624
625; CHECK-LABEL: or_v4i32:
626; NO-SIMD128-NOT: v128
627; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
628; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
629; SIMD128-NEXT: return $pop[[R]]{{$}}
630define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
631  %a = or <4 x i32> %x, %y
632  ret <4 x i32> %a
633}
634
635; CHECK-LABEL: xor_v4i32:
636; NO-SIMD128-NOT: v128
637; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
638; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
639; SIMD128-NEXT: return $pop[[R]]{{$}}
640define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
641  %a = xor <4 x i32> %x, %y
642  ret <4 x i32> %a
643}
644
645; CHECK-LABEL: not_v4i32:
646; NO-SIMD128-NOT: v128
647; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
648; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
649; SIMD128-NEXT: return $pop[[R]]{{$}}
650define <4 x i32> @not_v4i32(<4 x i32> %x) {
651  %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
652  ret <4 x i32> %a
653}
654
655; CHECK-LABEL: bitselect_v4i32:
656; NO-SIMD128-NOT: v128
657; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
658; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
659; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
660; SIMD128-FAST-NEXT: v128.not
661; SIMD128-FAST-NEXT: v128.and
662; SIMD128-FAST-NEXT: v128.and
663; SIMD128-FAST-NEXT: v128.or
664; SIMD128-FAST-NEXT: return
665define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
666  %masked_v1 = and <4 x i32> %c, %v1
667  %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
668  %masked_v2 = and <4 x i32> %inv_mask, %v2
669  %a = or <4 x i32> %masked_v2, %masked_v1
670  ret <4 x i32> %a
671}
672
673; ==============================================================================
674; 2 x i64
675; ==============================================================================
676; CHECK-LABEL: add_v2i64:
677; NO-SIMD128-NOT: i64x2
678; SIMD128-VM-NOT: i64x2
679; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
680; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
681; SIMD128-NEXT: return $pop[[R]]{{$}}
682define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
683  %a = add <2 x i64> %x, %y
684  ret <2 x i64> %a
685}
686
687; CHECK-LABEL: sub_v2i64:
688; NO-SIMD128-NOT: i64x2
689; SIMD128-VM-NOT: i64x2
690; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
691; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
692; SIMD128-NEXT: return $pop[[R]]{{$}}
693define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
694  %a = sub <2 x i64> %x, %y
695  ret <2 x i64> %a
696}
697
698; v2i64.mul is not in spec
699; CHECK-LABEL: mul_v2i64:
700; NO-SIMD128-NOT: i64x2
701; SIMD128-VM-NOT: i64x2
702; SIMD128-NOT: i64x2.mul
703; SIMD128: i64x2.extract_lane
704; SIMD128: i64.mul
705define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
706  %a = mul <2 x i64> %x, %y
707  ret <2 x i64> %a
708}
709
710; CHECK-LABEL: neg_v2i64:
711; NO-SIMD128-NOT: i64x2
712; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
713; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
714; SIMD128-NEXT: return $pop[[R]]{{$}}
715define <2 x i64> @neg_v2i64(<2 x i64> %x) {
716  %a = sub <2 x i64> <i64 0, i64 0>, %x
717  ret <2 x i64> %a
718}
719
720; CHECK-LABEL: shl_v2i64:
721; NO-SIMD128-NOT: i64x2
722; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
723; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
724; SIMD128-NEXT: return $pop[[R]]{{$}}
725define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
726  %x2 = zext i32 %x to i64
727  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
728  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
729  %a = shl <2 x i64> %v, %s
730  ret <2 x i64> %a
731}
732
733; CHECK-LABEL: shl_sext_v2i64:
734; NO-SIMD128-NOT: i64x2
735; SIMD128-NEXT: .functype shl_sext_v2i64 (v128, i32) -> (v128){{$}}
736; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
737; SIMD128-NEXT: return $pop[[R]]{{$}}
738define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) {
739  %x2 = sext i32 %x to i64
740  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
741  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
742  %a = shl <2 x i64> %v, %s
743  ret <2 x i64> %a
744}
745
746; CHECK-LABEL: shl_noext_v2i64:
747; NO-SIMD128-NOT: i64x2
748; SIMD128-NEXT: .functype shl_noext_v2i64 (v128, i64) -> (v128){{$}}
749; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
750; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
751; SIMD128-NEXT: return $pop[[R]]{{$}}
752define <2 x i64> @shl_noext_v2i64(<2 x i64> %v, i64 %x) {
753  %t = insertelement <2 x i64> undef, i64 %x, i32 0
754  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
755  %a = shl <2 x i64> %v, %s
756  ret <2 x i64> %a
757}
758
759; CHECK-LABEL: shl_const_v2i64:
760; NO-SIMD128-NOT: i64x2
761; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
762; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
763; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
764; SIMD128-NEXT: return $pop[[R]]{{$}}
765define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
766  %a = shl <2 x i64> %v, <i64 5, i64 5>
767  ret <2 x i64> %a
768}
769
770; CHECK-LABEL: shl_vec_v2i64:
771; NO-SIMD128-NOT: i64x2
772; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
773; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
774; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
775; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
776; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
777; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
778; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
779; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
780; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
781; SIMD128-NEXT: return $pop[[R]]{{$}}
782define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
783  %a = shl <2 x i64> %v, %x
784  ret <2 x i64> %a
785}
786
787; CHECK-LABEL: shr_s_v2i64:
788; NO-SIMD128-NOT: i64x2
789; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
790; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
791; SIMD128-NEXT: return $pop[[R]]{{$}}
792define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
793  %x2 = zext i32 %x to i64
794  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
795  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
796  %a = ashr <2 x i64> %v, %s
797  ret <2 x i64> %a
798}
799
800; CHECK-LABEL: shr_s_sext_v2i64:
801; NO-SIMD128-NOT: i64x2
802; SIMD128-NEXT: .functype shr_s_sext_v2i64 (v128, i32) -> (v128){{$}}
803; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
804; SIMD128-NEXT: return $pop[[R]]{{$}}
805define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) {
806  %x2 = sext i32 %x to i64
807  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
808  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
809  %a = ashr <2 x i64> %v, %s
810  ret <2 x i64> %a
811}
812
813; CHECK-LABEL: shr_s_noext_v2i64:
814; NO-SIMD128-NOT: i64x2
815; SIMD128-NEXT: .functype shr_s_noext_v2i64 (v128, i64) -> (v128){{$}}
816; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
817; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
818; SIMD128-NEXT: return $pop[[R]]{{$}}
819define <2 x i64> @shr_s_noext_v2i64(<2 x i64> %v, i64 %x) {
820  %t = insertelement <2 x i64> undef, i64 %x, i32 0
821  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
822  %a = ashr <2 x i64> %v, %s
823  ret <2 x i64> %a
824}
825
826; CHECK-LABEL: shr_s_const_v2i64:
827; NO-SIMD128-NOT: i64x2
828; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
829; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
830; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
831; SIMD128-NEXT: return $pop[[R]]{{$}}
832define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
833  %a = ashr <2 x i64> %v, <i64 5, i64 5>
834  ret <2 x i64> %a
835}
836
837; CHECK-LABEL: shr_s_vec_v2i64:
838; NO-SIMD128-NOT: i64x2
839; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
840; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
841; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
842; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
843; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
844; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
845; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
846; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
847; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
848; SIMD128-NEXT: return $pop[[R]]{{$}}
849define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
850  %a = ashr <2 x i64> %v, %x
851  ret <2 x i64> %a
852}
853
854; CHECK-LABEL: shr_u_v2i64:
855; NO-SIMD128-NOT: i64x2
856; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
857; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
858; SIMD128-NEXT: return $pop[[R]]{{$}}
859define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
860  %x2 = zext i32 %x to i64
861  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
862  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
863  %a = lshr <2 x i64> %v, %s
864  ret <2 x i64> %a
865}
866
867; CHECK-LABEL: shr_u_sext_v2i64:
868; NO-SIMD128-NOT: i64x2
869; SIMD128-NEXT: .functype shr_u_sext_v2i64 (v128, i32) -> (v128){{$}}
870; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
871; SIMD128-NEXT: return $pop[[R]]{{$}}
872define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) {
873  %x2 = sext i32 %x to i64
874  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
875  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
876  %a = lshr <2 x i64> %v, %s
877  ret <2 x i64> %a
878}
879
880; CHECK-LABEL: shr_u_noext_v2i64:
881; NO-SIMD128-NOT: i64x2
882; SIMD128-NEXT: .functype shr_u_noext_v2i64 (v128, i64) -> (v128){{$}}
883; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
884; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
885; SIMD128-NEXT: return $pop[[R]]{{$}}
886define <2 x i64> @shr_u_noext_v2i64(<2 x i64> %v, i64 %x) {
887  %t = insertelement <2 x i64> undef, i64 %x, i32 0
888  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
889  %a = lshr <2 x i64> %v, %s
890  ret <2 x i64> %a
891}
892
893; CHECK-LABEL: shr_u_const_v2i64:
894; NO-SIMD128-NOT: i64x2
895; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
896; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
897; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
898; SIMD128-NEXT: return $pop[[R]]{{$}}
899define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
900  %a = lshr <2 x i64> %v, <i64 5, i64 5>
901  ret <2 x i64> %a
902}
903
904; CHECK-LABEL: shr_u_vec_v2i64:
905; NO-SIMD128-NOT: i64x2
906; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
907; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
908; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
909; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
910; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
911; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
912; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
913; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
914; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
915; SIMD128-NEXT: return $pop[[R]]{{$}}
916define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
917  %a = lshr <2 x i64> %v, %x
918  ret <2 x i64> %a
919}
920
921; CHECK-LABEL: and_v2i64:
922; NO-SIMD128-NOT: v128
923; SIMD128-VM-NOT: v128
924; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
925; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
926; SIMD128-NEXT: return $pop[[R]]{{$}}
927define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
928  %a = and <2 x i64> %x, %y
929  ret <2 x i64> %a
930}
931
932; CHECK-LABEL: or_v2i64:
933; NO-SIMD128-NOT: v128
934; SIMD128-VM-NOT: v128
935; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
936; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
937; SIMD128-NEXT: return $pop[[R]]{{$}}
938define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
939  %a = or <2 x i64> %x, %y
940  ret <2 x i64> %a
941}
942
943; CHECK-LABEL: xor_v2i64:
944; NO-SIMD128-NOT: v128
945; SIMD128-VM-NOT: v128
946; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
947; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
948; SIMD128-NEXT: return $pop[[R]]{{$}}
949define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
950  %a = xor <2 x i64> %x, %y
951  ret <2 x i64> %a
952}
953
954; CHECK-LABEL: not_v2i64:
955; NO-SIMD128-NOT: v128
956; SIMD128-VM-NOT: v128
957; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
958; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
959; SIMD128-NEXT: return $pop[[R]]{{$}}
960define <2 x i64> @not_v2i64(<2 x i64> %x) {
961  %a = xor <2 x i64> %x, <i64 -1, i64 -1>
962  ret <2 x i64> %a
963}
964
965; CHECK-LABEL: bitselect_v2i64:
966; NO-SIMD128-NOT: v128
967; SIMD128-VM-NOT: v128
968; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
969; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
970; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
971; SIMD128-FAST-NEXT: v128.not
972; SIMD128-FAST-NEXT: v128.and
973; SIMD128-FAST-NEXT: v128.and
974; SIMD128-FAST-NEXT: v128.or
975; SIMD128-FAST-NEXT: return
976define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
977  %masked_v1 = and <2 x i64> %v1, %c
978  %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
979  %masked_v2 = and <2 x i64> %v2, %inv_mask
980  %a = or <2 x i64> %masked_v2, %masked_v1
981  ret <2 x i64> %a
982}
983
984; ==============================================================================
985; 4 x float
986; ==============================================================================
987; CHECK-LABEL: neg_v4f32:
988; NO-SIMD128-NOT: f32x4
989; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
990; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
991; SIMD128-NEXT: return $pop[[R]]{{$}}
992define <4 x float> @neg_v4f32(<4 x float> %x) {
993  ; nsz makes this semantically equivalent to flipping sign bit
994  %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
995  ret <4 x float> %a
996}
997
998; CHECK-LABEL: abs_v4f32:
999; NO-SIMD128-NOT: f32x4
1000; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
1001; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
1002; SIMD128-NEXT: return $pop[[R]]{{$}}
1003declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
1004define <4 x float> @abs_v4f32(<4 x float> %x) {
1005  %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
1006  ret <4 x float> %a
1007}
1008
1009; CHECK-LABEL: min_unordered_v4f32:
1010; NO-SIMD128-NOT: f32x4
1011; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
1012; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1013; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1014; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1015; SIMD128-NEXT: return $pop[[R]]{{$}}
1016define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
1017  %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
1018  %a = select <4 x i1> %cmps, <4 x float> %x,
1019    <4 x float> <float 5., float 5., float 5., float 5.>
1020  ret <4 x float> %a
1021}
1022
1023; CHECK-LABEL: max_unordered_v4f32:
1024; NO-SIMD128-NOT: f32x4
1025; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
1026; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1027; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1028; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1029; SIMD128-NEXT: return $pop[[R]]{{$}}
1030define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
1031  %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
1032  %a = select <4 x i1> %cmps, <4 x float> %x,
1033    <4 x float> <float 5., float 5., float 5., float 5.>
1034  ret <4 x float> %a
1035}
1036
1037; CHECK-LABEL: min_ordered_v4f32:
1038; NO-SIMD128-NOT: f32x4
1039; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
1040; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1041; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1042; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1043; SIMD128-NEXT: return $pop[[R]]{{$}}
1044define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
1045  %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
1046  %a = select <4 x i1> %cmps,
1047    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1048  ret <4 x float> %a
1049}
1050
1051; CHECK-LABEL: max_ordered_v4f32:
1052; NO-SIMD128-NOT: f32x4
1053; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1054; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1055; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1056; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1057; SIMD128-NEXT: return $pop[[R]]{{$}}
1058define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1059  %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1060  %a = select <4 x i1> %cmps,
1061    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1062  ret <4 x float> %a
1063}
1064
1065; CHECK-LABEL: min_intrinsic_v4f32:
1066; NO-SIMD128-NOT: f32x4
1067; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1068; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1069; SIMD128-NEXT: return $pop[[R]]{{$}}
1070declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1071define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1072  %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1073  ret <4 x float> %a
1074}
1075
1076; CHECK-LABEL: minnum_intrinsic_v4f32:
1077; NO-SIMD128-NOT: f32x4
1078; SIMD128-NEXT: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1079; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1080; SIMD128-NEXT: return $pop[[R]]{{$}}
1081declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1082define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1083  %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
1084  ret <4 x float> %a
1085}
1086
1087; CHECK-LABEL: max_intrinsic_v4f32:
1088; NO-SIMD128-NOT: f32x4
1089; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1090; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1091; SIMD128-NEXT: return $pop[[R]]{{$}}
1092declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1093define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1094  %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1095  ret <4 x float> %a
1096}
1097
1098; CHECK-LABEL: maxnum_intrinsic_v4f32:
1099; NO-SIMD128-NOT: f32x4
1100; SIMD128-NEXT: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1101; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1102; SIMD128-NEXT: return $pop[[R]]{{$}}
1103declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1104define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1105  %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
1106  ret <4 x float> %a
1107}
1108
1109; CHECK-LABEL: min_const_intrinsic_v4f32:
1110; NO-SIMD128-NOT: f32x4
1111; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1112; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1113; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1114; SIMD128-NEXT: return $pop[[R]]{{$}}
1115define <4 x float> @min_const_intrinsic_v4f32() {
1116  %a = call <4 x float> @llvm.minimum.v4f32(
1117    <4 x float> <float 42., float 42., float 42., float 42.>,
1118    <4 x float> <float 5., float 5., float 5., float 5.>
1119  )
1120  ret <4 x float> %a
1121}
1122
1123; CHECK-LABEL: max_const_intrinsic_v4f32:
1124; NO-SIMD128-NOT: f32x4
1125; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1126; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1127; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1128; SIMD128-NEXT: return $pop[[R]]{{$}}
1129define <4 x float> @max_const_intrinsic_v4f32() {
1130  %a = call <4 x float> @llvm.maximum.v4f32(
1131    <4 x float> <float 42., float 42., float 42., float 42.>,
1132    <4 x float> <float 5., float 5., float 5., float 5.>
1133  )
1134  ret <4 x float> %a
1135}
1136
1137; CHECK-LABEL: add_v4f32:
1138; NO-SIMD128-NOT: f32x4
1139; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1140; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1141; SIMD128-NEXT: return $pop[[R]]{{$}}
1142define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1143  %a = fadd <4 x float> %x, %y
1144  ret <4 x float> %a
1145}
1146
1147; CHECK-LABEL: sub_v4f32:
1148; NO-SIMD128-NOT: f32x4
1149; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1150; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1151; SIMD128-NEXT: return $pop[[R]]{{$}}
1152define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1153  %a = fsub <4 x float> %x, %y
1154  ret <4 x float> %a
1155}
1156
1157; CHECK-LABEL: div_v4f32:
1158; NO-SIMD128-NOT: f32x4
1159; SIMD128-VM-NOT: f32x4.div
1160; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1161; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1162; SIMD128-NEXT: return $pop[[R]]{{$}}
1163define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1164  %a = fdiv <4 x float> %x, %y
1165  ret <4 x float> %a
1166}
1167
1168; CHECK-LABEL: mul_v4f32:
1169; NO-SIMD128-NOT: f32x4
1170; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1171; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1172; SIMD128-NEXT: return $pop[[R]]{{$}}
1173define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1174  %a = fmul <4 x float> %x, %y
1175  ret <4 x float> %a
1176}
1177
1178; CHECK-LABEL: sqrt_v4f32:
1179; NO-SIMD128-NOT: f32x4
1180; SIMD128-VM-NOT: f32x4.sqrt
1181; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1182; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1183; SIMD128-NEXT: return $pop[[R]]{{$}}
1184declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1185define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1186  %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1187  ret <4 x float> %a
1188}
1189
1190; ==============================================================================
1191; 2 x double
1192; ==============================================================================
1193; CHECK-LABEL: neg_v2f64:
1194; NO-SIMD128-NOT: f64x2
1195; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1196; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1197; SIMD128-NEXT: return $pop[[R]]{{$}}
1198define <2 x double> @neg_v2f64(<2 x double> %x) {
1199  ; nsz makes this semantically equivalent to flipping sign bit
1200  %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1201  ret <2 x double> %a
1202}
1203
1204; CHECK-LABEL: abs_v2f64:
1205; NO-SIMD128-NOT: f64x2
1206; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1207; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1208; SIMD128-NEXT: return $pop[[R]]{{$}}
1209declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1210define <2 x double> @abs_v2f64(<2 x double> %x) {
1211  %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1212  ret <2 x double> %a
1213}
1214
1215; CHECK-LABEL: min_unordered_v2f64:
1216; NO-SIMD128-NOT: f64x2
1217; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1218; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1219; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1220; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1221; SIMD128-NEXT: return $pop[[R]]{{$}}
1222define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1223  %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1224  %a = select <2 x i1> %cmps, <2 x double> %x,
1225    <2 x double> <double 5., double 5.>
1226  ret <2 x double> %a
1227}
1228
1229; CHECK-LABEL: max_unordered_v2f64:
1230; NO-SIMD128-NOT: f64x2
1231; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1232; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1233; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1234; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1235; SIMD128-NEXT: return $pop[[R]]{{$}}
1236define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1237  %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1238  %a = select <2 x i1> %cmps, <2 x double> %x,
1239    <2 x double> <double 5., double 5.>
1240  ret <2 x double> %a
1241}
1242
1243; CHECK-LABEL: min_ordered_v2f64:
1244; NO-SIMD128-NOT: f64x2
1245; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1246; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1247; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1248; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1249; SIMD128-NEXT: return $pop[[R]]{{$}}
1250define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1251  %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1252  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1253    <2 x double> %x
1254  ret <2 x double> %a
1255}
1256
1257; CHECK-LABEL: max_ordered_v2f64:
1258; NO-SIMD128-NOT: f64x2
1259; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1260; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1261; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1262; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1263; SIMD128-NEXT: return $pop[[R]]{{$}}
1264define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1265  %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1266  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1267    <2 x double> %x
1268  ret <2 x double> %a
1269}
1270
1271; CHECK-LABEL: min_intrinsic_v2f64:
1272; NO-SIMD128-NOT: f64x2
1273; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1274; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1275; SIMD128-NEXT: return $pop[[R]]{{$}}
1276declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1277define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1278  %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1279  ret <2 x double> %a
1280}
1281
1282; CHECK-LABEL: max_intrinsic_v2f64:
1283; NO-SIMD128-NOT: f64x2
1284; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1285; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1286; SIMD128-NEXT: return $pop[[R]]{{$}}
1287declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1288define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1289  %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1290  ret <2 x double> %a
1291}
1292
1293; CHECK-LABEL: min_const_intrinsic_v2f64:
1294; NO-SIMD128-NOT: f64x2
1295; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1296; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1297; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1298; SIMD128-NEXT: return $pop[[R]]{{$}}
1299define <2 x double> @min_const_intrinsic_v2f64() {
1300  %a = call <2 x double> @llvm.minimum.v2f64(
1301    <2 x double> <double 42., double 42.>,
1302    <2 x double> <double 5., double 5.>
1303  )
1304  ret <2 x double> %a
1305}
1306
1307; CHECK-LABEL: max_const_intrinsic_v2f64:
1308; NO-SIMD128-NOT: f64x2
1309; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1310; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1311; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1312; SIMD128-NEXT: return $pop[[R]]{{$}}
1313define <2 x double> @max_const_intrinsic_v2f64() {
1314  %a = call <2 x double> @llvm.maximum.v2f64(
1315    <2 x double> <double 42., double 42.>,
1316    <2 x double> <double 5., double 5.>
1317  )
1318  ret <2 x double> %a
1319}
1320
1321; CHECK-LABEL: add_v2f64:
1322; NO-SIMD128-NOT: f64x2
1323; SIMD128-VM-NOT: f62x2
1324; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1325; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1326; SIMD128-NEXT: return $pop[[R]]{{$}}
1327define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1328  %a = fadd <2 x double> %x, %y
1329  ret <2 x double> %a
1330}
1331
1332; CHECK-LABEL: sub_v2f64:
1333; NO-SIMD128-NOT: f64x2
1334; SIMD128-VM-NOT: f62x2
1335; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1336; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1337; SIMD128-NEXT: return $pop[[R]]{{$}}
1338define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1339  %a = fsub <2 x double> %x, %y
1340  ret <2 x double> %a
1341}
1342
1343; CHECK-LABEL: div_v2f64:
1344; NO-SIMD128-NOT: f64x2
1345; SIMD128-VM-NOT: f62x2
1346; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1347; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1348; SIMD128-NEXT: return $pop[[R]]{{$}}
1349define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1350  %a = fdiv <2 x double> %x, %y
1351  ret <2 x double> %a
1352}
1353
1354; CHECK-LABEL: mul_v2f64:
1355; NO-SIMD128-NOT: f64x2
1356; SIMD128-VM-NOT: f62x2
1357; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1358; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1359; SIMD128-NEXT: return $pop[[R]]{{$}}
1360define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1361  %a = fmul <2 x double> %x, %y
1362  ret <2 x double> %a
1363}
1364
1365; CHECK-LABEL: sqrt_v2f64:
1366; NO-SIMD128-NOT: f64x2
1367; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1368; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1369; SIMD128-NEXT: return $pop[[R]]{{$}}
1370declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1371define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1372  %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1373  ret <2 x double> %a
1374}
1375