1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
2
3; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
4
5; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128
6
7; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
8
9; check that a non-test run (including explicit locals pass) at least finishes
10; RUN: llc < %s -O0 -mattr=+simd128
11; RUN: llc < %s -O2 -mattr=+simd128
12
13; Test that basic SIMD128 arithmetic operations assemble as expected.
14
15target triple = "wasm32-unknown-unknown"
16
17; ==============================================================================
18; 16 x i8
19; ==============================================================================
20; CHECK-LABEL: add_v16i8:
21; NO-SIMD128-NOT: i8x16
22; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
23; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
24; SIMD128-NEXT: return $pop[[R]]{{$}}
25define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
26  %a = add <16 x i8> %x, %y
27  ret <16 x i8> %a
28}
29
30; CHECK-LABEL: sub_v16i8:
31; NO-SIMD128-NOT: i8x16
32; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
33; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
34; SIMD128-NEXT: return $pop[[R]]{{$}}
35define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
36  %a = sub <16 x i8> %x, %y
37  ret <16 x i8> %a
38}
39
40; i8x16.mul is not in spec
41; CHECK-LABEL: mul_v16i8:
42; NO-SIMD128-NOT: i8x16
43; SIMD128-NOT: i8x16.mul
44; SIMD128: i8x16.extract_lane_u
45; SIMD128: i32.mul
46define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
47  %a = mul <16 x i8> %x, %y
48  ret <16 x i8> %a
49}
50
51; CHECK-LABEL: min_s_v16i8:
52; NO-SIMD128-NOT: i8x16
53; SIMD128-NEXT: .functype min_s_v16i8 (v128, v128) -> (v128){{$}}
54; SIMD128-NEXT: i8x16.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
55; SIMD128-NEXT: return $pop[[R]]{{$}}
56define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
57  %c = icmp slt <16 x i8> %x, %y
58  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
59  ret <16 x i8> %a
60}
61
62; CHECK-LABEL: min_u_v16i8:
63; NO-SIMD128-NOT: i8x16
64; SIMD128-NEXT: .functype min_u_v16i8 (v128, v128) -> (v128){{$}}
65; SIMD128-NEXT: i8x16.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
66; SIMD128-NEXT: return $pop[[R]]{{$}}
67define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
68  %c = icmp ult <16 x i8> %x, %y
69  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
70  ret <16 x i8> %a
71}
72
73; CHECK-LABEL: max_s_v16i8:
74; NO-SIMD128-NOT: i8x16
75; SIMD128-NEXT: .functype max_s_v16i8 (v128, v128) -> (v128){{$}}
76; SIMD128-NEXT: i8x16.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
77; SIMD128-NEXT: return $pop[[R]]{{$}}
78define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
79  %c = icmp sgt <16 x i8> %x, %y
80  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
81  ret <16 x i8> %a
82}
83
84; CHECK-LABEL: max_u_v16i8:
85; NO-SIMD128-NOT: i8x16
86; SIMD128-NEXT: .functype max_u_v16i8 (v128, v128) -> (v128){{$}}
87; SIMD128-NEXT: i8x16.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
88; SIMD128-NEXT: return $pop[[R]]{{$}}
89define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
90  %c = icmp ugt <16 x i8> %x, %y
91  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
92  ret <16 x i8> %a
93}
94
95; CHECK-LABEL: avgr_u_v16i8:
96; NO-SIMD128-NOT: i8x16
97; SIMD128-NEXT: .functype avgr_u_v16i8 (v128, v128) -> (v128){{$}}
98; SIMD128-NEXT: i8x16.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
99; SIMD128-NEXT: return $pop[[R]]{{$}}
100define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
101  %a = add nuw <16 x i8> %x, %y
102  %b = add nuw <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
103                              i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
104  %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
105                           i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
106  ret <16 x i8> %c
107}
108
109; CHECK-LABEL: avgr_u_v16i8_wrap:
110; NO-SIMD128-NOT: i8x16
111; SIMD128-NEXT: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128){{$}}
112; SIMD128-NOT: i8x16.avgr_u
113define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) {
114  %a = add <16 x i8> %x, %y
115  %b = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
116                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
117  %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
118                           i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
119  ret <16 x i8> %c
120}
121
122; CHECK-LABEL: abs_v16i8:
123; NO-SIMD128-NOT: i8x16
124; SIMD128-NEXT: .functype abs_v16i8 (v128) -> (v128){{$}}
125; SIMD128-NEXT: i8x16.abs $push[[R:[0-9]+]]=, $0{{$}}
126; SIMD128-NEXT: return $pop[[R]]{{$}}
127define <16 x i8> @abs_v16i8(<16 x i8> %x) {
128  %a = sub <16 x i8> zeroinitializer, %x
129  %b = icmp slt <16 x i8> %x, zeroinitializer
130  %c = select <16 x i1> %b, <16 x i8> %a, <16 x i8> %x
131  ret <16 x i8> %c
132}
133
134; CHECK-LABEL: neg_v16i8:
135; NO-SIMD128-NOT: i8x16
136; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
137; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
138; SIMD128-NEXT: return $pop[[R]]{{$}}
139define <16 x i8> @neg_v16i8(<16 x i8> %x) {
140  %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
141                      i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
142                     %x
143  ret <16 x i8> %a
144}
145
146; CHECK-LABEL: shl_v16i8:
147; NO-SIMD128-NOT: i8x16
148; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
149; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
150; SIMD128-NEXT: return $pop[[R]]{{$}}
151define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
152  %t = insertelement <16 x i8> undef, i8 %x, i32 0
153  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
154    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
155                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
156  %a = shl <16 x i8> %v, %s
157  ret <16 x i8> %a
158}
159
160; CHECK-LABEL: shl_const_v16i8:
161; NO-SIMD128-NOT: i8x16
162; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
163; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
164; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
165; SIMD128-NEXT: return $pop[[R]]{{$}}
166define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
167  %a = shl <16 x i8> %v,
168    <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
169     i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
170  ret <16 x i8> %a
171}
172
173; CHECK-LABEL: shl_vec_v16i8:
174; NO-SIMD128-NOT: i8x16
175; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
176; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
177; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
178; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
179; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
180; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
181; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
182; Skip 14 lanes
183; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
184; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
185; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
186; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
187; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
188; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
189; SIMD128-NEXT: return $pop[[R]]{{$}}
190define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
191  %a = shl <16 x i8> %v, %x
192  ret <16 x i8> %a
193}
194
195; CHECK-LABEL: shr_s_v16i8:
196; NO-SIMD128-NOT: i8x16
197; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
198; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
199; SIMD128-NEXT: return $pop[[R]]{{$}}
200define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
201  %t = insertelement <16 x i8> undef, i8 %x, i32 0
202  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
203    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
204                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
205  %a = ashr <16 x i8> %v, %s
206  ret <16 x i8> %a
207}
208
209; CHECK-LABEL: shr_s_vec_v16i8:
210; NO-SIMD128-NOT: i8x16
211; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
212; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
213; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
214; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
215; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
216; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
217; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
218; Skip 14 lanes
219; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
220; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
221; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
222; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
223; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
224; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
225; SIMD128-NEXT: return $pop[[R]]{{$}}
226define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
227  %a = ashr <16 x i8> %v, %x
228  ret <16 x i8> %a
229}
230
231; CHECK-LABEL: shr_u_v16i8:
232; NO-SIMD128-NOT: i8x16
233; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
234; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
235; SIMD128-NEXT: return $pop[[R]]{{$}}
236define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
237  %t = insertelement <16 x i8> undef, i8 %x, i32 0
238  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
239    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
240                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
241  %a = lshr <16 x i8> %v, %s
242  ret <16 x i8> %a
243}
244
245; CHECK-LABEL: shr_u_vec_v16i8:
246; NO-SIMD128-NOT: i8x16
247; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
248; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
249; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
250; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
251; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
252; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
253; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
254; Skip 14 lanes
255; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
256; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
257; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
258; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
259; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
260; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
261; SIMD128-NEXT: return $pop[[R]]{{$}}
262define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
263  %a = lshr <16 x i8> %v, %x
264  ret <16 x i8> %a
265}
266
267; CHECK-LABEL: and_v16i8:
268; NO-SIMD128-NOT: v128
269; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
270; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
271; SIMD128-NEXT: return $pop[[R]]{{$}}
272define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
273  %a = and <16 x i8> %x, %y
274  ret <16 x i8> %a
275}
276
277; CHECK-LABEL: or_v16i8:
278; NO-SIMD128-NOT: v128
279; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
280; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
281; SIMD128-NEXT: return $pop[[R]]{{$}}
282define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
283  %a = or <16 x i8> %x, %y
284  ret <16 x i8> %a
285}
286
287; CHECK-LABEL: xor_v16i8:
288; NO-SIMD128-NOT: v128
289; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
290; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
291; SIMD128-NEXT: return $pop[[R]]{{$}}
292define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
293  %a = xor <16 x i8> %x, %y
294  ret <16 x i8> %a
295}
296
297; CHECK-LABEL: not_v16i8:
298; NO-SIMD128-NOT: v128
299; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
300; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
301; SIMD128-NEXT: return $pop[[R]]{{$}}
302define <16 x i8> @not_v16i8(<16 x i8> %x) {
303  %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
304                          i8 -1, i8 -1, i8 -1, i8 -1,
305                          i8 -1, i8 -1, i8 -1, i8 -1,
306                          i8 -1, i8 -1, i8 -1, i8 -1>
307  ret <16 x i8> %a
308}
309
310; CHECK-LABEL: andnot_v16i8:
311; NO-SIMD128-NOT: v128
312; SIMD128-NEXT: .functype andnot_v16i8 (v128, v128) -> (v128){{$}}
313; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
314; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
315; SIMD128-FAST-NEXT: v128.not
316; SIMD128-FAST-NEXT: v128.and
317; SIMD128-FAST-NEXT: return
318define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) {
319 %inv_y = xor <16 x i8> %y,
320   <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
321    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
322 %a = and <16 x i8> %x, %inv_y
323 ret <16 x i8> %a
324}
325
326; CHECK-LABEL: bitselect_v16i8:
327; NO-SIMD128-NOT: v128
328; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
329; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
330; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
331; SIMD128-FAST-NEXT: v128.and
332; SIMD128-FAST-NEXT: v128.not
333; SIMD128-FAST-NEXT: v128.and
334; SIMD128-FAST-NEXT: v128.or
335; SIMD128-FAST-NEXT: return
336define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
337  %masked_v1 = and <16 x i8> %c, %v1
338  %inv_mask = xor <16 x i8> %c,
339    <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
340     i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
341  %masked_v2 = and <16 x i8> %inv_mask, %v2
342  %a = or <16 x i8> %masked_v1, %masked_v2
343  ret <16 x i8> %a
344}
345
346; ==============================================================================
347; 8 x i16
348; ==============================================================================
349; CHECK-LABEL: add_v8i16:
350; NO-SIMD128-NOT: i16x8
351; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
352; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
353; SIMD128-NEXT: return $pop[[R]]{{$}}
354define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
355  %a = add <8 x i16> %x, %y
356  ret <8 x i16> %a
357}
358
359; CHECK-LABEL: sub_v8i16:
360; NO-SIMD128-NOT: i16x8
361; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
362; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
363; SIMD128-NEXT: return $pop[[R]]{{$}}
364define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
365  %a = sub <8 x i16> %x, %y
366  ret <8 x i16> %a
367}
368
369; CHECK-LABEL: mul_v8i16:
370; NO-SIMD128-NOT: i16x8
371; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
372; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
373; SIMD128-NEXT: return $pop[[R]]{{$}}
374define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
375  %a = mul <8 x i16> %x, %y
376  ret <8 x i16> %a
377}
378
379; CHECK-LABEL: min_s_v8i16:
380; NO-SIMD128-NOT: i16x8
381; SIMD128-NEXT: .functype min_s_v8i16 (v128, v128) -> (v128){{$}}
382; SIMD128-NEXT: i16x8.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
383; SIMD128-NEXT: return $pop[[R]]{{$}}
384define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
385  %c = icmp slt <8 x i16> %x, %y
386  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
387  ret <8 x i16> %a
388}
389
390; CHECK-LABEL: min_u_v8i16:
391; NO-SIMD128-NOT: i16x8
392; SIMD128-NEXT: .functype min_u_v8i16 (v128, v128) -> (v128){{$}}
393; SIMD128-NEXT: i16x8.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
394; SIMD128-NEXT: return $pop[[R]]{{$}}
395define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
396  %c = icmp ult <8 x i16> %x, %y
397  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
398  ret <8 x i16> %a
399}
400
401; CHECK-LABEL: max_s_v8i16:
402; NO-SIMD128-NOT: i16x8
403; SIMD128-NEXT: .functype max_s_v8i16 (v128, v128) -> (v128){{$}}
404; SIMD128-NEXT: i16x8.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
405; SIMD128-NEXT: return $pop[[R]]{{$}}
406define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
407  %c = icmp sgt <8 x i16> %x, %y
408  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
409  ret <8 x i16> %a
410}
411
412; CHECK-LABEL: max_u_v8i16:
413; NO-SIMD128-NOT: i16x8
414; SIMD128-NEXT: .functype max_u_v8i16 (v128, v128) -> (v128){{$}}
415; SIMD128-NEXT: i16x8.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
416; SIMD128-NEXT: return $pop[[R]]{{$}}
417define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
418  %c = icmp ugt <8 x i16> %x, %y
419  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
420  ret <8 x i16> %a
421}
422
423; CHECK-LABEL: avgr_u_v8i16:
424; NO-SIMD128-NOT: i16x8
425; SIMD128-NEXT: .functype avgr_u_v8i16 (v128, v128) -> (v128){{$}}
426; SIMD128-NEXT: i16x8.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
427; SIMD128-NEXT: return $pop[[R]]{{$}}
428define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
429  %a = add nuw <8 x i16> %x, %y
430  %b = add nuw <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
431  %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
432  ret <8 x i16> %c
433}
434
435; CHECK-LABEL: avgr_u_v8i16_wrap:
436; NO-SIMD128-NOT: i16x8
437; SIMD128-NEXT: .functype avgr_u_v8i16_wrap (v128, v128) -> (v128){{$}}
438; SIMD128-NOT: i16x8.avgr_u
439define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) {
440  %a = add <8 x i16> %x, %y
441  %b = add <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
442  %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
443  ret <8 x i16> %c
444}
445
446; CHECK-LABEL: abs_v8i16:
447; NO-SIMD128-NOT: i16x8
448; SIMD128-NEXT: .functype abs_v8i16 (v128) -> (v128){{$}}
449; SIMD128-NEXT: i16x8.abs $push[[R:[0-9]+]]=, $0{{$}}
450; SIMD128-NEXT: return $pop[[R]]{{$}}
451define <8 x i16> @abs_v8i16(<8 x i16> %x) {
452  %a = sub <8 x i16> zeroinitializer, %x
453  %b = icmp slt <8 x i16> %x, zeroinitializer
454  %c = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %x
455  ret <8 x i16> %c
456}
457
458; CHECK-LABEL: neg_v8i16:
459; NO-SIMD128-NOT: i16x8
460; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
461; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
462; SIMD128-NEXT: return $pop[[R]]{{$}}
463define <8 x i16> @neg_v8i16(<8 x i16> %x) {
464  %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
465                     %x
466  ret <8 x i16> %a
467}
468
469; CHECK-LABEL: shl_v8i16:
470; NO-SIMD128-NOT: i16x8
471; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
472; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
473; SIMD128-NEXT: return $pop[[R]]{{$}}
474define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
475  %t = insertelement <8 x i16> undef, i16 %x, i32 0
476  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
477    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
478  %a = shl <8 x i16> %v, %s
479  ret <8 x i16> %a
480}
481
482; CHECK-LABEL: shl_const_v8i16:
483; NO-SIMD128-NOT: i16x8
484; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
485; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
486; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
487; SIMD128-NEXT: return $pop[[R]]{{$}}
488define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
489  %a = shl <8 x i16> %v,
490    <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
491  ret <8 x i16> %a
492}
493
494; CHECK-LABEL: shl_vec_v8i16:
495; NO-SIMD128-NOT: i16x8
496; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
497; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
498; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
499; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
500; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
501; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
502; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
503; Skip 6 lanes
504; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
505; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
506; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
507; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
508; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
509; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
510; SIMD128-NEXT: return $pop[[R]]{{$}}
511define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
512  %a = shl <8 x i16> %v, %x
513  ret <8 x i16> %a
514}
515
516; CHECK-LABEL: shr_s_v8i16:
517; NO-SIMD128-NOT: i16x8
518; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
519; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
520; SIMD128-NEXT: return $pop[[R]]{{$}}
521define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
522  %t = insertelement <8 x i16> undef, i16 %x, i32 0
523  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
524    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
525  %a = ashr <8 x i16> %v, %s
526  ret <8 x i16> %a
527}
528
529; CHECK-LABEL: shr_s_vec_v8i16:
530; NO-SIMD128-NOT: i16x8
531; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
532; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
533; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
534; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
535; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
536; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
537; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
538; Skip 6 lanes
539; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
540; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
541; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
542; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
543; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
544; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
545; SIMD128-NEXT: return $pop[[R]]{{$}}
546define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
547  %a = ashr <8 x i16> %v, %x
548  ret <8 x i16> %a
549}
550
551; CHECK-LABEL: shr_u_v8i16:
552; NO-SIMD128-NOT: i16x8
553; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
554; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
555; SIMD128-NEXT: return $pop[[R]]{{$}}
556define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
557  %t = insertelement <8 x i16> undef, i16 %x, i32 0
558  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
559    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
560  %a = lshr <8 x i16> %v, %s
561  ret <8 x i16> %a
562}
563
564; CHECK-LABEL: shr_u_vec_v8i16:
565; NO-SIMD128-NOT: i16x8
566; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
567; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
568; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
569; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
570; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
571; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
572; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
573; Skip 6 lanes
574; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
575; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
576; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
577; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
578; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
579; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
580; SIMD128-NEXT: return $pop[[R]]{{$}}
581define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
582  %a = lshr <8 x i16> %v, %x
583  ret <8 x i16> %a
584}
585
586; CHECK-LABEL: and_v8i16:
587; NO-SIMD128-NOT: v128
588; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
589; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
590; SIMD128-NEXT: return $pop[[R]]{{$}}
591define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
592  %a = and <8 x i16> %x, %y
593  ret <8 x i16> %a
594}
595
596; CHECK-LABEL: or_v8i16:
597; NO-SIMD128-NOT: v128
598; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
599; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
600; SIMD128-NEXT: return $pop[[R]]{{$}}
601define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
602  %a = or <8 x i16> %x, %y
603  ret <8 x i16> %a
604}
605
606; CHECK-LABEL: xor_v8i16:
607; NO-SIMD128-NOT: v128
608; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
609; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
610; SIMD128-NEXT: return $pop[[R]]{{$}}
611define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
612  %a = xor <8 x i16> %x, %y
613  ret <8 x i16> %a
614}
615
616; CHECK-LABEL: not_v8i16:
617; NO-SIMD128-NOT: v128
618; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
619; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
620; SIMD128-NEXT: return $pop[[R]]{{$}}
621define <8 x i16> @not_v8i16(<8 x i16> %x) {
622  %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
623                          i16 -1, i16 -1, i16 -1, i16 -1>
624  ret <8 x i16> %a
625}
626
627; CHECK-LABEL: andnot_v8i16:
628; NO-SIMD128-NOT: v128
629; SIMD128-NEXT: .functype andnot_v8i16 (v128, v128) -> (v128){{$}}
630; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
631; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
632; SIMD128-FAST-NEXT: v128.not
633; SIMD128-FAST-NEXT: v128.and
634; SIMD128-FAST-NEXT: return
635define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) {
636 %inv_y = xor <8 x i16> %y,
637   <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
638 %a = and <8 x i16> %x, %inv_y
639 ret <8 x i16> %a
640}
641
642; CHECK-LABEL: bitselect_v8i16:
643; NO-SIMD128-NOT: v128
644; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
645; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
646; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
647; SIMD128-FAST-NEXT: v128.and
648; SIMD128-FAST-NEXT: v128.not
649; SIMD128-FAST-NEXT: v128.and
650; SIMD128-FAST-NEXT: v128.or
651; SIMD128-FAST-NEXT: return
652define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
653  %masked_v1 = and <8 x i16> %v1, %c
654  %inv_mask = xor <8 x i16>
655    <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
656    %c
657  %masked_v2 = and <8 x i16> %v2, %inv_mask
658  %a = or <8 x i16> %masked_v1, %masked_v2
659  ret <8 x i16> %a
660}
661
662; ==============================================================================
663; 4 x i32
664; ==============================================================================
665; CHECK-LABEL: add_v4i32:
666; NO-SIMD128-NOT: i32x4
667; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
668; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
669; SIMD128-NEXT: return $pop[[R]]{{$}}
670define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
671  %a = add <4 x i32> %x, %y
672  ret <4 x i32> %a
673}
674
675; CHECK-LABEL: sub_v4i32:
676; NO-SIMD128-NOT: i32x4
677; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
678; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
679; SIMD128-NEXT: return $pop[[R]]{{$}}
680define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
681  %a = sub <4 x i32> %x, %y
682  ret <4 x i32> %a
683}
684
685; CHECK-LABEL: mul_v4i32:
686; NO-SIMD128-NOT: i32x4
687; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
688; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
689; SIMD128-NEXT: return $pop[[R]]{{$}}
690define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
691  %a = mul <4 x i32> %x, %y
692  ret <4 x i32> %a
693}
694
695; CHECK-LABEL: min_s_v4i32:
696; NO-SIMD128-NOT: i32x4
697; SIMD128-NEXT: .functype min_s_v4i32 (v128, v128) -> (v128){{$}}
698; SIMD128-NEXT: i32x4.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
699; SIMD128-NEXT: return $pop[[R]]{{$}}
700define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
701  %c = icmp slt <4 x i32> %x, %y
702  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
703  ret <4 x i32> %a
704}
705
706; CHECK-LABEL: min_u_v4i32:
707; NO-SIMD128-NOT: i32x4
708; SIMD128-NEXT: .functype min_u_v4i32 (v128, v128) -> (v128){{$}}
709; SIMD128-NEXT: i32x4.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
710; SIMD128-NEXT: return $pop[[R]]{{$}}
711define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
712  %c = icmp ult <4 x i32> %x, %y
713  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
714  ret <4 x i32> %a
715}
716
717; CHECK-LABEL: max_s_v4i32:
718; NO-SIMD128-NOT: i32x4
719; SIMD128-NEXT: .functype max_s_v4i32 (v128, v128) -> (v128){{$}}
720; SIMD128-NEXT: i32x4.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
721; SIMD128-NEXT: return $pop[[R]]{{$}}
722define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
723  %c = icmp sgt <4 x i32> %x, %y
724  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
725  ret <4 x i32> %a
726}
727
728; CHECK-LABEL: max_u_v4i32:
729; NO-SIMD128-NOT: i32x4
730; SIMD128-NEXT: .functype max_u_v4i32 (v128, v128) -> (v128){{$}}
731; SIMD128-NEXT: i32x4.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
732; SIMD128-NEXT: return $pop[[R]]{{$}}
733define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
734  %c = icmp ugt <4 x i32> %x, %y
735  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
736  ret <4 x i32> %a
737}
738
739; CHECK-LABEL: abs_v4i32:
740; NO-SIMD128-NOT: i32x4
741; SIMD128-NEXT: .functype abs_v4i32 (v128) -> (v128){{$}}
742; SIMD128-NEXT: i32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
743; SIMD128-NEXT: return $pop[[R]]{{$}}
744define <4 x i32> @abs_v4i32(<4 x i32> %x) {
745  %a = sub <4 x i32> zeroinitializer, %x
746  %b = icmp slt <4 x i32> %x, zeroinitializer
747  %c = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %x
748  ret <4 x i32> %c
749}
750
751; CHECK-LABEL: neg_v4i32:
752; NO-SIMD128-NOT: i32x4
753; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
754; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
755; SIMD128-NEXT: return $pop[[R]]{{$}}
756define <4 x i32> @neg_v4i32(<4 x i32> %x) {
757  %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
758  ret <4 x i32> %a
759}
760
761; CHECK-LABEL: shl_v4i32:
762; NO-SIMD128-NOT: i32x4
763; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
764; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
765; SIMD128-NEXT: return $pop[[R]]{{$}}
766define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
767  %t = insertelement <4 x i32> undef, i32 %x, i32 0
768  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
769    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
770  %a = shl <4 x i32> %v, %s
771  ret <4 x i32> %a
772}
773
774; CHECK-LABEL: shl_const_v4i32:
775; NO-SIMD128-NOT: i32x4
776; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
777; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
778; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
779; SIMD128-NEXT: return $pop[[R]]{{$}}
780define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
781  %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
782  ret <4 x i32> %a
783}
784
785; CHECK-LABEL: shl_vec_v4i32:
786; NO-SIMD128-NOT: i32x4
787; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
788; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
789; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
790; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
791; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
792; Skip 2 lanes
793; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
794; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
795; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
796; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
797; SIMD128-NEXT: return $pop[[R]]{{$}}
798define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
799  %a = shl <4 x i32> %v, %x
800  ret <4 x i32> %a
801}
802
803; CHECK-LABEL: shr_s_v4i32:
804; NO-SIMD128-NOT: i32x4
805; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
806; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
807; SIMD128-NEXT: return $pop[[R]]{{$}}
808define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
809  %t = insertelement <4 x i32> undef, i32 %x, i32 0
810  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
811    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
812  %a = ashr <4 x i32> %v, %s
813  ret <4 x i32> %a
814}
815
816; CHECK-LABEL: shr_s_vec_v4i32:
817; NO-SIMD128-NOT: i32x4
818; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
819; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
820; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
821; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
822; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
823; Skip 2 lanes
824; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
825; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
826; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
827; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
828; SIMD128-NEXT: return $pop[[R]]{{$}}
829define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
830  %a = ashr <4 x i32> %v, %x
831  ret <4 x i32> %a
832}
833
834; CHECK-LABEL: shr_u_v4i32:
835; NO-SIMD128-NOT: i32x4
836; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
837; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
838; SIMD128-NEXT: return $pop[[R]]{{$}}
839define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
840  %t = insertelement <4 x i32> undef, i32 %x, i32 0
841  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
842    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
843  %a = lshr <4 x i32> %v, %s
844  ret <4 x i32> %a
845}
846
847; CHECK-LABEL: shr_u_vec_v4i32:
848; NO-SIMD128-NOT: i32x4
849; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
850; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
851; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
852; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
853; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
854; Skip 2 lanes
855; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
856; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
857; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
858; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
859; SIMD128-NEXT: return $pop[[R]]{{$}}
860define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
861  %a = lshr <4 x i32> %v, %x
862  ret <4 x i32> %a
863}
864
865; CHECK-LABEL: and_v4i32:
866; NO-SIMD128-NOT: v128
867; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
868; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
869; SIMD128-NEXT: return $pop[[R]]{{$}}
870define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
871  %a = and <4 x i32> %x, %y
872  ret <4 x i32> %a
873}
874
875; CHECK-LABEL: or_v4i32:
876; NO-SIMD128-NOT: v128
877; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
878; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
879; SIMD128-NEXT: return $pop[[R]]{{$}}
880define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
881  %a = or <4 x i32> %x, %y
882  ret <4 x i32> %a
883}
884
885; CHECK-LABEL: xor_v4i32:
886; NO-SIMD128-NOT: v128
887; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
888; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
889; SIMD128-NEXT: return $pop[[R]]{{$}}
890define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
891  %a = xor <4 x i32> %x, %y
892  ret <4 x i32> %a
893}
894
895; CHECK-LABEL: not_v4i32:
896; NO-SIMD128-NOT: v128
897; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
898; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
899; SIMD128-NEXT: return $pop[[R]]{{$}}
900define <4 x i32> @not_v4i32(<4 x i32> %x) {
901  %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
902  ret <4 x i32> %a
903}
904
905; CHECK-LABEL: andnot_v4i32:
906; NO-SIMD128-NOT: v128
907; SIMD128-NEXT: .functype andnot_v4i32 (v128, v128) -> (v128){{$}}
908; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
909; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
910; SIMD128-FAST-NEXT: v128.not
911; SIMD128-FAST-NEXT: v128.and
912; SIMD128-FAST-NEXT: return
913define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) {
914 %inv_y = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
915 %a = and <4 x i32> %x, %inv_y
916 ret <4 x i32> %a
917}
918
919; CHECK-LABEL: bitselect_v4i32:
920; NO-SIMD128-NOT: v128
921; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
922; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
923; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
924; SIMD128-FAST-NEXT: v128.not
925; SIMD128-FAST-NEXT: v128.and
926; SIMD128-FAST-NEXT: v128.and
927; SIMD128-FAST-NEXT: v128.or
928; SIMD128-FAST-NEXT: return
929define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
930  %masked_v1 = and <4 x i32> %c, %v1
931  %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
932  %masked_v2 = and <4 x i32> %inv_mask, %v2
933  %a = or <4 x i32> %masked_v2, %masked_v1
934  ret <4 x i32> %a
935}
936
937; ==============================================================================
938; 2 x i64
939; ==============================================================================
940; CHECK-LABEL: add_v2i64:
941; NO-SIMD128-NOT: i64x2
942; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
943; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
944; SIMD128-NEXT: return $pop[[R]]{{$}}
945define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
946  %a = add <2 x i64> %x, %y
947  ret <2 x i64> %a
948}
949
950; CHECK-LABEL: sub_v2i64:
951; NO-SIMD128-NOT: i64x2
952; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
953; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
954; SIMD128-NEXT: return $pop[[R]]{{$}}
955define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
956  %a = sub <2 x i64> %x, %y
957  ret <2 x i64> %a
958}
959
960; CHECK-LABEL: mul_v2i64:
961; NO-SIMD128-NOT: i64x2
962; SIMD128-NEXT: .functype mul_v2i64 (v128, v128) -> (v128){{$}}
963; SIMD128: i64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
964; SIMD128-NEXT: return $pop[[R]]{{$}}
965define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
966  %a = mul <2 x i64> %x, %y
967  ret <2 x i64> %a
968}
969
970; CHECK-LABEL: abs_v2i64:
971; NO-SIMD128-NOT: i64x2:
972; SIMD128-NEXT: .functype abs_v2i64 (v128) -> (v128){{$}}
973; SIMD128-NEXT: i64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
974; SIMD128-NEXT: return $pop[[R]]{{$}}
975define <2 x i64> @abs_v2i64(<2 x i64> %x) {
976  %a = sub <2 x i64> zeroinitializer, %x
977  %b = icmp slt <2 x i64> %x, zeroinitializer
978  %c = select <2 x i1> %b, <2 x i64> %a, <2 x i64> %x
979  ret <2 x i64> %c
980}
981
982; CHECK-LABEL: neg_v2i64:
983; NO-SIMD128-NOT: i64x2
984; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
985; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
986; SIMD128-NEXT: return $pop[[R]]{{$}}
987define <2 x i64> @neg_v2i64(<2 x i64> %x) {
988  %a = sub <2 x i64> <i64 0, i64 0>, %x
989  ret <2 x i64> %a
990}
991
992; CHECK-LABEL: shl_v2i64:
993; NO-SIMD128-NOT: i64x2
994; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
995; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
996; SIMD128-NEXT: return $pop[[R]]{{$}}
997define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
998  %x2 = zext i32 %x to i64
999  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1000  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1001  %a = shl <2 x i64> %v, %s
1002  ret <2 x i64> %a
1003}
1004
1005; CHECK-LABEL: shl_sext_v2i64:
1006; NO-SIMD128-NOT: i64x2
1007; SIMD128-NEXT: .functype shl_sext_v2i64 (v128, i32) -> (v128){{$}}
1008; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
1009; SIMD128-NEXT: return $pop[[R]]{{$}}
1010define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) {
1011  %x2 = sext i32 %x to i64
1012  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1013  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1014  %a = shl <2 x i64> %v, %s
1015  ret <2 x i64> %a
1016}
1017
1018; CHECK-LABEL: shl_noext_v2i64:
1019; NO-SIMD128-NOT: i64x2
1020; SIMD128-NEXT: .functype shl_noext_v2i64 (v128, i64) -> (v128){{$}}
1021; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1022; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1023; SIMD128-NEXT: return $pop[[R]]{{$}}
1024define <2 x i64> @shl_noext_v2i64(<2 x i64> %v, i64 %x) {
1025  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1026  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1027  %a = shl <2 x i64> %v, %s
1028  ret <2 x i64> %a
1029}
1030
1031; CHECK-LABEL: shl_const_v2i64:
1032; NO-SIMD128-NOT: i64x2
1033; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
1034; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1035; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1036; SIMD128-NEXT: return $pop[[R]]{{$}}
1037define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
1038  %a = shl <2 x i64> %v, <i64 5, i64 5>
1039  ret <2 x i64> %a
1040}
1041
1042; CHECK-LABEL: shl_vec_v2i64:
1043; NO-SIMD128-NOT: i64x2
1044; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
1045; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1046; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1047; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1048; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1049; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1050; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1051; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1052; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1053; SIMD128-NEXT: return $pop[[R]]{{$}}
1054define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1055  %a = shl <2 x i64> %v, %x
1056  ret <2 x i64> %a
1057}
1058
1059; CHECK-LABEL: shr_s_v2i64:
1060; NO-SIMD128-NOT: i64x2
1061; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
1062; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
1063; SIMD128-NEXT: return $pop[[R]]{{$}}
1064define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
1065  %x2 = zext i32 %x to i64
1066  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1067  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1068  %a = ashr <2 x i64> %v, %s
1069  ret <2 x i64> %a
1070}
1071
1072; CHECK-LABEL: shr_s_sext_v2i64:
1073; NO-SIMD128-NOT: i64x2
1074; SIMD128-NEXT: .functype shr_s_sext_v2i64 (v128, i32) -> (v128){{$}}
1075; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
1076; SIMD128-NEXT: return $pop[[R]]{{$}}
1077define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) {
1078  %x2 = sext i32 %x to i64
1079  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1080  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1081  %a = ashr <2 x i64> %v, %s
1082  ret <2 x i64> %a
1083}
1084
1085; CHECK-LABEL: shr_s_noext_v2i64:
1086; NO-SIMD128-NOT: i64x2
1087; SIMD128-NEXT: .functype shr_s_noext_v2i64 (v128, i64) -> (v128){{$}}
1088; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1089; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1090; SIMD128-NEXT: return $pop[[R]]{{$}}
1091define <2 x i64> @shr_s_noext_v2i64(<2 x i64> %v, i64 %x) {
1092  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1093  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1094  %a = ashr <2 x i64> %v, %s
1095  ret <2 x i64> %a
1096}
1097
1098; CHECK-LABEL: shr_s_const_v2i64:
1099; NO-SIMD128-NOT: i64x2
1100; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
1101; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1102; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1103; SIMD128-NEXT: return $pop[[R]]{{$}}
1104define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
1105  %a = ashr <2 x i64> %v, <i64 5, i64 5>
1106  ret <2 x i64> %a
1107}
1108
1109; CHECK-LABEL: shr_s_vec_v2i64:
1110; NO-SIMD128-NOT: i64x2
1111; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
1112; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1113; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1114; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1115; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1116; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1117; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1118; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1119; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1120; SIMD128-NEXT: return $pop[[R]]{{$}}
1121define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1122  %a = ashr <2 x i64> %v, %x
1123  ret <2 x i64> %a
1124}
1125
1126; CHECK-LABEL: shr_u_v2i64:
1127; NO-SIMD128-NOT: i64x2
1128; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
1129; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
1130; SIMD128-NEXT: return $pop[[R]]{{$}}
1131define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
1132  %x2 = zext i32 %x to i64
1133  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1134  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1135  %a = lshr <2 x i64> %v, %s
1136  ret <2 x i64> %a
1137}
1138
1139; CHECK-LABEL: shr_u_sext_v2i64:
1140; NO-SIMD128-NOT: i64x2
1141; SIMD128-NEXT: .functype shr_u_sext_v2i64 (v128, i32) -> (v128){{$}}
1142; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
1143; SIMD128-NEXT: return $pop[[R]]{{$}}
1144define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) {
1145  %x2 = sext i32 %x to i64
1146  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1147  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1148  %a = lshr <2 x i64> %v, %s
1149  ret <2 x i64> %a
1150}
1151
1152; CHECK-LABEL: shr_u_noext_v2i64:
1153; NO-SIMD128-NOT: i64x2
1154; SIMD128-NEXT: .functype shr_u_noext_v2i64 (v128, i64) -> (v128){{$}}
1155; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1156; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1157; SIMD128-NEXT: return $pop[[R]]{{$}}
1158define <2 x i64> @shr_u_noext_v2i64(<2 x i64> %v, i64 %x) {
1159  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1160  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1161  %a = lshr <2 x i64> %v, %s
1162  ret <2 x i64> %a
1163}
1164
1165; CHECK-LABEL: shr_u_const_v2i64:
1166; NO-SIMD128-NOT: i64x2
1167; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
1168; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1169; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1170; SIMD128-NEXT: return $pop[[R]]{{$}}
1171define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
1172  %a = lshr <2 x i64> %v, <i64 5, i64 5>
1173  ret <2 x i64> %a
1174}
1175
1176; CHECK-LABEL: shr_u_vec_v2i64:
1177; NO-SIMD128-NOT: i64x2
1178; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
1179; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1180; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1181; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1182; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1183; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1184; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1185; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1186; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1187; SIMD128-NEXT: return $pop[[R]]{{$}}
1188define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1189  %a = lshr <2 x i64> %v, %x
1190  ret <2 x i64> %a
1191}
1192
1193; CHECK-LABEL: and_v2i64:
1194; NO-SIMD128-NOT: v128
1195; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
1196; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
1197; SIMD128-NEXT: return $pop[[R]]{{$}}
1198define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
1199  %a = and <2 x i64> %x, %y
1200  ret <2 x i64> %a
1201}
1202
1203; CHECK-LABEL: or_v2i64:
1204; NO-SIMD128-NOT: v128
1205; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
1206; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
1207; SIMD128-NEXT: return $pop[[R]]{{$}}
1208define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
1209  %a = or <2 x i64> %x, %y
1210  ret <2 x i64> %a
1211}
1212
1213; CHECK-LABEL: xor_v2i64:
1214; NO-SIMD128-NOT: v128
1215; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
1216; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
1217; SIMD128-NEXT: return $pop[[R]]{{$}}
1218define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
1219  %a = xor <2 x i64> %x, %y
1220  ret <2 x i64> %a
1221}
1222
1223; CHECK-LABEL: not_v2i64:
1224; NO-SIMD128-NOT: v128
1225; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
1226; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
1227; SIMD128-NEXT: return $pop[[R]]{{$}}
1228define <2 x i64> @not_v2i64(<2 x i64> %x) {
1229  %a = xor <2 x i64> %x, <i64 -1, i64 -1>
1230  ret <2 x i64> %a
1231}
1232
1233; CHECK-LABEL: andnot_v2i64:
1234; NO-SIMD128-NOT: v128
1235; SIMD128-NEXT: .functype andnot_v2i64 (v128, v128) -> (v128){{$}}
1236; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
1237; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1238; SIMD128-FAST-NEXT: v128.not
1239; SIMD128-FAST-NEXT: v128.and
1240; SIMD128-FAST-NEXT: return
1241define <2 x i64> @andnot_v2i64(<2 x i64> %x, <2 x i64> %y) {
1242 %inv_y = xor <2 x i64> %y, <i64 -1, i64 -1>
1243 %a = and <2 x i64> %x, %inv_y
1244 ret <2 x i64> %a
1245}
1246
1247; CHECK-LABEL: bitselect_v2i64:
1248; NO-SIMD128-NOT: v128
1249; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
1250; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
1251; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1252; SIMD128-FAST-NEXT: v128.not
1253; SIMD128-FAST-NEXT: v128.and
1254; SIMD128-FAST-NEXT: v128.and
1255; SIMD128-FAST-NEXT: v128.or
1256; SIMD128-FAST-NEXT: return
1257define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
1258  %masked_v1 = and <2 x i64> %v1, %c
1259  %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
1260  %masked_v2 = and <2 x i64> %v2, %inv_mask
1261  %a = or <2 x i64> %masked_v2, %masked_v1
1262  ret <2 x i64> %a
1263}
1264
1265; ==============================================================================
1266; 4 x float
1267; ==============================================================================
1268; CHECK-LABEL: neg_v4f32:
1269; NO-SIMD128-NOT: f32x4
1270; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
1271; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
1272; SIMD128-NEXT: return $pop[[R]]{{$}}
1273define <4 x float> @neg_v4f32(<4 x float> %x) {
1274  ; nsz makes this semantically equivalent to flipping sign bit
1275  %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
1276  ret <4 x float> %a
1277}
1278
1279; CHECK-LABEL: abs_v4f32:
1280; NO-SIMD128-NOT: f32x4
1281; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
1282; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
1283; SIMD128-NEXT: return $pop[[R]]{{$}}
1284declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
1285define <4 x float> @abs_v4f32(<4 x float> %x) {
1286  %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
1287  ret <4 x float> %a
1288}
1289
1290; CHECK-LABEL: min_unordered_v4f32:
1291; NO-SIMD128-NOT: f32x4
1292; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
1293; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1294; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1295; SIMD128-NEXT: return $pop[[R]]{{$}}
1296define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
1297  %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
1298  %a = select <4 x i1> %cmps, <4 x float> %x,
1299    <4 x float> <float 5., float 5., float 5., float 5.>
1300  ret <4 x float> %a
1301}
1302
1303; CHECK-LABEL: max_unordered_v4f32:
1304; NO-SIMD128-NOT: f32x4
1305; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
1306; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
1307; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1308; SIMD128-NEXT: return $pop[[R]]{{$}}
1309define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
1310  %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
1311  %a = select <4 x i1> %cmps, <4 x float> %x,
1312    <4 x float> <float 5., float 5., float 5., float 5.>
1313  ret <4 x float> %a
1314}
1315
1316; CHECK-LABEL: min_ordered_v4f32:
1317; NO-SIMD128-NOT: f32x4
1318; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
1319; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1320; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1321; SIMD128-NEXT: return $pop[[R]]{{$}}
1322define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
1323  %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
1324  %a = select <4 x i1> %cmps,
1325    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1326  ret <4 x float> %a
1327}
1328
1329; CHECK-LABEL: max_ordered_v4f32:
1330; NO-SIMD128-NOT: f32x4
1331; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1332; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1333; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1334; SIMD128-NEXT: return $pop[[R]]{{$}}
1335define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1336  %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1337  %a = select <4 x i1> %cmps,
1338    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1339  ret <4 x float> %a
1340}
1341
1342; CHECK-LABEL: min_intrinsic_v4f32:
1343; NO-SIMD128-NOT: f32x4
1344; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1345; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1346; SIMD128-NEXT: return $pop[[R]]{{$}}
1347declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1348define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1349  %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1350  ret <4 x float> %a
1351}
1352
1353; CHECK-LABEL: minnum_intrinsic_v4f32:
1354; NO-SIMD128-NOT: f32x4
1355; SIMD128-NEXT: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1356; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1357; SIMD128-NEXT: return $pop[[R]]{{$}}
1358declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1359define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1360  %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
1361  ret <4 x float> %a
1362}
1363
1364; CHECK-LABEL: max_intrinsic_v4f32:
1365; NO-SIMD128-NOT: f32x4
1366; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1367; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1368; SIMD128-NEXT: return $pop[[R]]{{$}}
1369declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1370define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1371  %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1372  ret <4 x float> %a
1373}
1374
1375; CHECK-LABEL: maxnum_intrinsic_v4f32:
1376; NO-SIMD128-NOT: f32x4
1377; SIMD128-NEXT: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1378; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1379; SIMD128-NEXT: return $pop[[R]]{{$}}
1380declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1381define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1382  %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
1383  ret <4 x float> %a
1384}
1385
1386; CHECK-LABEL: min_const_intrinsic_v4f32:
1387; NO-SIMD128-NOT: f32x4
1388; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1389; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1390; SIMD128-NEXT: return $pop[[R]]{{$}}
1391define <4 x float> @min_const_intrinsic_v4f32() {
1392  %a = call <4 x float> @llvm.minimum.v4f32(
1393    <4 x float> <float 42., float 42., float 42., float 42.>,
1394    <4 x float> <float 5., float 5., float 5., float 5.>
1395  )
1396  ret <4 x float> %a
1397}
1398
1399; CHECK-LABEL: max_const_intrinsic_v4f32:
1400; NO-SIMD128-NOT: f32x4
1401; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1402; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}}
1403; SIMD128-NEXT: return $pop[[R]]{{$}}
1404define <4 x float> @max_const_intrinsic_v4f32() {
1405  %a = call <4 x float> @llvm.maximum.v4f32(
1406    <4 x float> <float 42., float 42., float 42., float 42.>,
1407    <4 x float> <float 5., float 5., float 5., float 5.>
1408  )
1409  ret <4 x float> %a
1410}
1411
1412; CHECK-LABEL: add_v4f32:
1413; NO-SIMD128-NOT: f32x4
1414; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1415; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1416; SIMD128-NEXT: return $pop[[R]]{{$}}
1417define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1418  %a = fadd <4 x float> %x, %y
1419  ret <4 x float> %a
1420}
1421
1422; CHECK-LABEL: sub_v4f32:
1423; NO-SIMD128-NOT: f32x4
1424; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1425; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1426; SIMD128-NEXT: return $pop[[R]]{{$}}
1427define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1428  %a = fsub <4 x float> %x, %y
1429  ret <4 x float> %a
1430}
1431
1432; CHECK-LABEL: div_v4f32:
1433; NO-SIMD128-NOT: f32x4
1434; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1435; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1436; SIMD128-NEXT: return $pop[[R]]{{$}}
1437define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1438  %a = fdiv <4 x float> %x, %y
1439  ret <4 x float> %a
1440}
1441
1442; CHECK-LABEL: mul_v4f32:
1443; NO-SIMD128-NOT: f32x4
1444; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1445; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1446; SIMD128-NEXT: return $pop[[R]]{{$}}
1447define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1448  %a = fmul <4 x float> %x, %y
1449  ret <4 x float> %a
1450}
1451
1452; CHECK-LABEL: sqrt_v4f32:
1453; NO-SIMD128-NOT: f32x4
1454; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1455; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1456; SIMD128-NEXT: return $pop[[R]]{{$}}
1457declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1458define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1459  %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1460  ret <4 x float> %a
1461}
1462
1463; ==============================================================================
1464; 2 x double
1465; ==============================================================================
1466; CHECK-LABEL: neg_v2f64:
1467; NO-SIMD128-NOT: f64x2
1468; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1469; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1470; SIMD128-NEXT: return $pop[[R]]{{$}}
1471define <2 x double> @neg_v2f64(<2 x double> %x) {
1472  ; nsz makes this semantically equivalent to flipping sign bit
1473  %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1474  ret <2 x double> %a
1475}
1476
1477; CHECK-LABEL: abs_v2f64:
1478; NO-SIMD128-NOT: f64x2
1479; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1480; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1481; SIMD128-NEXT: return $pop[[R]]{{$}}
1482declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1483define <2 x double> @abs_v2f64(<2 x double> %x) {
1484  %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1485  ret <2 x double> %a
1486}
1487
1488; CHECK-LABEL: min_unordered_v2f64:
1489; NO-SIMD128-NOT: f64x2
1490; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1491; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1492; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1493; SIMD128-NEXT: return $pop[[R]]{{$}}
1494define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1495  %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1496  %a = select <2 x i1> %cmps, <2 x double> %x,
1497    <2 x double> <double 5., double 5.>
1498  ret <2 x double> %a
1499}
1500
1501; CHECK-LABEL: max_unordered_v2f64:
1502; NO-SIMD128-NOT: f64x2
1503; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1504; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1505; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1506; SIMD128-NEXT: return $pop[[R]]{{$}}
1507define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1508  %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1509  %a = select <2 x i1> %cmps, <2 x double> %x,
1510    <2 x double> <double 5., double 5.>
1511  ret <2 x double> %a
1512}
1513
1514; CHECK-LABEL: min_ordered_v2f64:
1515; NO-SIMD128-NOT: f64x2
1516; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1517; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1518; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1519; SIMD128-NEXT: return $pop[[R]]{{$}}
1520define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1521  %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1522  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1523    <2 x double> %x
1524  ret <2 x double> %a
1525}
1526
1527; CHECK-LABEL: max_ordered_v2f64:
1528; NO-SIMD128-NOT: f64x2
1529; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1530; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1531; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1532; SIMD128-NEXT: return $pop[[R]]{{$}}
1533define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1534  %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1535  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1536    <2 x double> %x
1537  ret <2 x double> %a
1538}
1539
1540; CHECK-LABEL: min_intrinsic_v2f64:
1541; NO-SIMD128-NOT: f64x2
1542; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1543; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1544; SIMD128-NEXT: return $pop[[R]]{{$}}
1545declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1546define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1547  %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1548  ret <2 x double> %a
1549}
1550
1551; CHECK-LABEL: max_intrinsic_v2f64:
1552; NO-SIMD128-NOT: f64x2
1553; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1554; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1555; SIMD128-NEXT: return $pop[[R]]{{$}}
1556declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1557define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1558  %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1559  ret <2 x double> %a
1560}
1561
1562; CHECK-LABEL: min_const_intrinsic_v2f64:
1563; NO-SIMD128-NOT: f64x2
1564; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1565; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1566; SIMD128-NEXT: return $pop[[R]]{{$}}
1567define <2 x double> @min_const_intrinsic_v2f64() {
1568  %a = call <2 x double> @llvm.minimum.v2f64(
1569    <2 x double> <double 42., double 42.>,
1570    <2 x double> <double 5., double 5.>
1571  )
1572  ret <2 x double> %a
1573}
1574
1575; CHECK-LABEL: max_const_intrinsic_v2f64:
1576; NO-SIMD128-NOT: f64x2
1577; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1578; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5{{$}}
1579; SIMD128-NEXT: return $pop[[R]]{{$}}
1580define <2 x double> @max_const_intrinsic_v2f64() {
1581  %a = call <2 x double> @llvm.maximum.v2f64(
1582    <2 x double> <double 42., double 42.>,
1583    <2 x double> <double 5., double 5.>
1584  )
1585  ret <2 x double> %a
1586}
1587
1588; CHECK-LABEL: add_v2f64:
1589; NO-SIMD128-NOT: f64x2
1590; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1591; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1592; SIMD128-NEXT: return $pop[[R]]{{$}}
1593define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1594  %a = fadd <2 x double> %x, %y
1595  ret <2 x double> %a
1596}
1597
1598; CHECK-LABEL: sub_v2f64:
1599; NO-SIMD128-NOT: f64x2
1600; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1601; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1602; SIMD128-NEXT: return $pop[[R]]{{$}}
1603define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1604  %a = fsub <2 x double> %x, %y
1605  ret <2 x double> %a
1606}
1607
1608; CHECK-LABEL: div_v2f64:
1609; NO-SIMD128-NOT: f64x2
1610; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1611; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1612; SIMD128-NEXT: return $pop[[R]]{{$}}
1613define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1614  %a = fdiv <2 x double> %x, %y
1615  ret <2 x double> %a
1616}
1617
1618; CHECK-LABEL: mul_v2f64:
1619; NO-SIMD128-NOT: f64x2
1620; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1621; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1622; SIMD128-NEXT: return $pop[[R]]{{$}}
1623define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1624  %a = fmul <2 x double> %x, %y
1625  ret <2 x double> %a
1626}
1627
1628; CHECK-LABEL: sqrt_v2f64:
1629; NO-SIMD128-NOT: f64x2
1630; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1631; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1632; SIMD128-NEXT: return $pop[[R]]{{$}}
1633declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1634define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1635  %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1636  ret <2 x double> %a
1637}
1638