1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
2
3; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
4
5; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128
6
7; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
8
9; check that a non-test run (including explicit locals pass) at least finishes
10; RUN: llc < %s -O0 -mattr=+simd128
11; RUN: llc < %s -O2 -mattr=+simd128
12
13; Test that basic SIMD128 arithmetic operations assemble as expected.
14
15target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
16target triple = "wasm32-unknown-unknown"
17
18; ==============================================================================
19; 16 x i8
20; ==============================================================================
21; CHECK-LABEL: add_v16i8:
22; NO-SIMD128-NOT: i8x16
23; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
24; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
25; SIMD128-NEXT: return $pop[[R]]{{$}}
26define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
27  %a = add <16 x i8> %x, %y
28  ret <16 x i8> %a
29}
30
31; CHECK-LABEL: sub_v16i8:
32; NO-SIMD128-NOT: i8x16
33; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
34; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
35; SIMD128-NEXT: return $pop[[R]]{{$}}
36define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
37  %a = sub <16 x i8> %x, %y
38  ret <16 x i8> %a
39}
40
41; i8x16.mul is not in spec
42; CHECK-LABEL: mul_v16i8:
43; NO-SIMD128-NOT: i8x16
44; SIMD128-NOT: i8x16.mul
45; SIMD128: i8x16.extract_lane_u
46; SIMD128: i32.mul
47define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
48  %a = mul <16 x i8> %x, %y
49  ret <16 x i8> %a
50}
51
52; CHECK-LABEL: min_s_v16i8:
53; NO-SIMD128-NOT: i8x16
54; SIMD128-NEXT: .functype min_s_v16i8 (v128, v128) -> (v128){{$}}
55; SIMD128-NEXT: i8x16.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
56; SIMD128-NEXT: return $pop[[R]]{{$}}
57define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
58  %c = icmp slt <16 x i8> %x, %y
59  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
60  ret <16 x i8> %a
61}
62
63; CHECK-LABEL: min_u_v16i8:
64; NO-SIMD128-NOT: i8x16
65; SIMD128-NEXT: .functype min_u_v16i8 (v128, v128) -> (v128){{$}}
66; SIMD128-NEXT: i8x16.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
67; SIMD128-NEXT: return $pop[[R]]{{$}}
68define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
69  %c = icmp ult <16 x i8> %x, %y
70  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
71  ret <16 x i8> %a
72}
73
74; CHECK-LABEL: max_s_v16i8:
75; NO-SIMD128-NOT: i8x16
76; SIMD128-NEXT: .functype max_s_v16i8 (v128, v128) -> (v128){{$}}
77; SIMD128-NEXT: i8x16.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
78; SIMD128-NEXT: return $pop[[R]]{{$}}
79define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
80  %c = icmp sgt <16 x i8> %x, %y
81  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
82  ret <16 x i8> %a
83}
84
85; CHECK-LABEL: max_u_v16i8:
86; NO-SIMD128-NOT: i8x16
87; SIMD128-NEXT: .functype max_u_v16i8 (v128, v128) -> (v128){{$}}
88; SIMD128-NEXT: i8x16.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
89; SIMD128-NEXT: return $pop[[R]]{{$}}
90define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
91  %c = icmp ugt <16 x i8> %x, %y
92  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
93  ret <16 x i8> %a
94}
95
96; CHECK-LABEL: avgr_u_v16i8:
97; NO-SIMD128-NOT: i8x16
98; SIMD128-NEXT: .functype avgr_u_v16i8 (v128, v128) -> (v128){{$}}
99; SIMD128-NEXT: i8x16.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
100; SIMD128-NEXT: return $pop[[R]]{{$}}
101define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
102  %a = add nuw <16 x i8> %x, %y
103  %b = add nuw <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
104                              i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
105  %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
106                           i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
107  ret <16 x i8> %c
108}
109
110; CHECK-LABEL: avgr_u_v16i8_wrap:
111; NO-SIMD128-NOT: i8x16
112; SIMD128-NEXT: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128){{$}}
113; SIMD128-NOT: i8x16.avgr_u
114define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) {
115  %a = add <16 x i8> %x, %y
116  %b = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
117                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
118  %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
119                           i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
120  ret <16 x i8> %c
121}
122
123; CHECK-LABEL: abs_v16i8:
124; NO-SIMD128-NOT: i8x16
125; SIMD128-NEXT: .functype abs_v16i8 (v128) -> (v128){{$}}
126; SIMD128-NEXT: i8x16.abs $push[[R:[0-9]+]]=, $0{{$}}
127; SIMD128-NEXT: return $pop[[R]]{{$}}
128define <16 x i8> @abs_v16i8(<16 x i8> %x) {
129  %a = sub <16 x i8> zeroinitializer, %x
130  %b = icmp slt <16 x i8> %x, zeroinitializer
131  %c = select <16 x i1> %b, <16 x i8> %a, <16 x i8> %x
132  ret <16 x i8> %c
133}
134
135; CHECK-LABEL: neg_v16i8:
136; NO-SIMD128-NOT: i8x16
137; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
138; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
139; SIMD128-NEXT: return $pop[[R]]{{$}}
140define <16 x i8> @neg_v16i8(<16 x i8> %x) {
141  %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
142                      i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
143                     %x
144  ret <16 x i8> %a
145}
146
147; CHECK-LABEL: shl_v16i8:
148; NO-SIMD128-NOT: i8x16
149; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
150; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
151; SIMD128-NEXT: return $pop[[R]]{{$}}
152define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
153  %t = insertelement <16 x i8> undef, i8 %x, i32 0
154  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
155    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
156                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
157  %a = shl <16 x i8> %v, %s
158  ret <16 x i8> %a
159}
160
161; CHECK-LABEL: shl_const_v16i8:
162; NO-SIMD128-NOT: i8x16
163; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
164; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
165; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
166; SIMD128-NEXT: return $pop[[R]]{{$}}
167define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
168  %a = shl <16 x i8> %v,
169    <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
170     i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
171  ret <16 x i8> %a
172}
173
174; CHECK-LABEL: shl_vec_v16i8:
175; NO-SIMD128-NOT: i8x16
176; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
177; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
178; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
179; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
180; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
181; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
182; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
183; Skip 14 lanes
184; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
185; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
186; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
187; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
188; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
189; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
190; SIMD128-NEXT: return $pop[[R]]{{$}}
191define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
192  %a = shl <16 x i8> %v, %x
193  ret <16 x i8> %a
194}
195
196; CHECK-LABEL: shr_s_v16i8:
197; NO-SIMD128-NOT: i8x16
198; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
199; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
200; SIMD128-NEXT: return $pop[[R]]{{$}}
201define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
202  %t = insertelement <16 x i8> undef, i8 %x, i32 0
203  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
204    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
205                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
206  %a = ashr <16 x i8> %v, %s
207  ret <16 x i8> %a
208}
209
210; CHECK-LABEL: shr_s_vec_v16i8:
211; NO-SIMD128-NOT: i8x16
212; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
213; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
214; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
215; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
216; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
217; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
218; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
219; Skip 14 lanes
220; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
221; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
222; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
223; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
224; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
225; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
226; SIMD128-NEXT: return $pop[[R]]{{$}}
227define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
228  %a = ashr <16 x i8> %v, %x
229  ret <16 x i8> %a
230}
231
232; CHECK-LABEL: shr_u_v16i8:
233; NO-SIMD128-NOT: i8x16
234; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
235; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
236; SIMD128-NEXT: return $pop[[R]]{{$}}
237define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
238  %t = insertelement <16 x i8> undef, i8 %x, i32 0
239  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
240    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
241                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
242  %a = lshr <16 x i8> %v, %s
243  ret <16 x i8> %a
244}
245
246; CHECK-LABEL: shr_u_vec_v16i8:
247; NO-SIMD128-NOT: i8x16
248; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
249; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
250; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
251; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
252; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
253; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
254; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
255; Skip 14 lanes
256; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
257; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
258; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
259; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
260; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
261; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
262; SIMD128-NEXT: return $pop[[R]]{{$}}
263define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
264  %a = lshr <16 x i8> %v, %x
265  ret <16 x i8> %a
266}
267
268; CHECK-LABEL: and_v16i8:
269; NO-SIMD128-NOT: v128
270; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
271; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
272; SIMD128-NEXT: return $pop[[R]]{{$}}
273define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
274  %a = and <16 x i8> %x, %y
275  ret <16 x i8> %a
276}
277
278; CHECK-LABEL: or_v16i8:
279; NO-SIMD128-NOT: v128
280; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
281; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
282; SIMD128-NEXT: return $pop[[R]]{{$}}
283define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
284  %a = or <16 x i8> %x, %y
285  ret <16 x i8> %a
286}
287
288; CHECK-LABEL: xor_v16i8:
289; NO-SIMD128-NOT: v128
290; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
291; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
292; SIMD128-NEXT: return $pop[[R]]{{$}}
293define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
294  %a = xor <16 x i8> %x, %y
295  ret <16 x i8> %a
296}
297
298; CHECK-LABEL: not_v16i8:
299; NO-SIMD128-NOT: v128
300; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
301; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
302; SIMD128-NEXT: return $pop[[R]]{{$}}
303define <16 x i8> @not_v16i8(<16 x i8> %x) {
304  %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
305                          i8 -1, i8 -1, i8 -1, i8 -1,
306                          i8 -1, i8 -1, i8 -1, i8 -1,
307                          i8 -1, i8 -1, i8 -1, i8 -1>
308  ret <16 x i8> %a
309}
310
311; CHECK-LABEL: andnot_v16i8:
312; NO-SIMD128-NOT: v128
313; SIMD128-NEXT: .functype andnot_v16i8 (v128, v128) -> (v128){{$}}
314; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
315; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
316; SIMD128-FAST-NEXT: v128.not
317; SIMD128-FAST-NEXT: v128.and
318; SIMD128-FAST-NEXT: return
319define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) {
320 %inv_y = xor <16 x i8> %y,
321   <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
322    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
323 %a = and <16 x i8> %x, %inv_y
324 ret <16 x i8> %a
325}
326
327; CHECK-LABEL: bitselect_v16i8:
328; NO-SIMD128-NOT: v128
329; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
330; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
331; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
332; SIMD128-FAST-NEXT: v128.and
333; SIMD128-FAST-NEXT: v128.not
334; SIMD128-FAST-NEXT: v128.and
335; SIMD128-FAST-NEXT: v128.or
336; SIMD128-FAST-NEXT: return
337define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
338  %masked_v1 = and <16 x i8> %c, %v1
339  %inv_mask = xor <16 x i8> %c,
340    <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
341     i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
342  %masked_v2 = and <16 x i8> %inv_mask, %v2
343  %a = or <16 x i8> %masked_v1, %masked_v2
344  ret <16 x i8> %a
345}
346
347; ==============================================================================
348; 8 x i16
349; ==============================================================================
350; CHECK-LABEL: add_v8i16:
351; NO-SIMD128-NOT: i16x8
352; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
353; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
354; SIMD128-NEXT: return $pop[[R]]{{$}}
355define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
356  %a = add <8 x i16> %x, %y
357  ret <8 x i16> %a
358}
359
360; CHECK-LABEL: sub_v8i16:
361; NO-SIMD128-NOT: i16x8
362; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
363; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
364; SIMD128-NEXT: return $pop[[R]]{{$}}
365define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
366  %a = sub <8 x i16> %x, %y
367  ret <8 x i16> %a
368}
369
370; CHECK-LABEL: mul_v8i16:
371; NO-SIMD128-NOT: i16x8
372; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
373; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
374; SIMD128-NEXT: return $pop[[R]]{{$}}
375define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
376  %a = mul <8 x i16> %x, %y
377  ret <8 x i16> %a
378}
379
380; CHECK-LABEL: min_s_v8i16:
381; NO-SIMD128-NOT: i16x8
382; SIMD128-NEXT: .functype min_s_v8i16 (v128, v128) -> (v128){{$}}
383; SIMD128-NEXT: i16x8.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
384; SIMD128-NEXT: return $pop[[R]]{{$}}
385define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
386  %c = icmp slt <8 x i16> %x, %y
387  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
388  ret <8 x i16> %a
389}
390
391; CHECK-LABEL: min_u_v8i16:
392; NO-SIMD128-NOT: i16x8
393; SIMD128-NEXT: .functype min_u_v8i16 (v128, v128) -> (v128){{$}}
394; SIMD128-NEXT: i16x8.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
395; SIMD128-NEXT: return $pop[[R]]{{$}}
396define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
397  %c = icmp ult <8 x i16> %x, %y
398  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
399  ret <8 x i16> %a
400}
401
402; CHECK-LABEL: max_s_v8i16:
403; NO-SIMD128-NOT: i16x8
404; SIMD128-NEXT: .functype max_s_v8i16 (v128, v128) -> (v128){{$}}
405; SIMD128-NEXT: i16x8.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
406; SIMD128-NEXT: return $pop[[R]]{{$}}
407define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
408  %c = icmp sgt <8 x i16> %x, %y
409  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
410  ret <8 x i16> %a
411}
412
413; CHECK-LABEL: max_u_v8i16:
414; NO-SIMD128-NOT: i16x8
415; SIMD128-NEXT: .functype max_u_v8i16 (v128, v128) -> (v128){{$}}
416; SIMD128-NEXT: i16x8.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
417; SIMD128-NEXT: return $pop[[R]]{{$}}
418define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
419  %c = icmp ugt <8 x i16> %x, %y
420  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
421  ret <8 x i16> %a
422}
423
424; CHECK-LABEL: avgr_u_v8i16:
425; NO-SIMD128-NOT: i16x8
426; SIMD128-NEXT: .functype avgr_u_v8i16 (v128, v128) -> (v128){{$}}
427; SIMD128-NEXT: i16x8.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
428; SIMD128-NEXT: return $pop[[R]]{{$}}
429define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
430  %a = add nuw <8 x i16> %x, %y
431  %b = add nuw <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
432  %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
433  ret <8 x i16> %c
434}
435
436; CHECK-LABEL: avgr_u_v8i16_wrap:
437; NO-SIMD128-NOT: i16x8
438; SIMD128-NEXT: .functype avgr_u_v8i16_wrap (v128, v128) -> (v128){{$}}
439; SIMD128-NOT: i16x8.avgr_u
440define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) {
441  %a = add <8 x i16> %x, %y
442  %b = add <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
443  %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
444  ret <8 x i16> %c
445}
446
447; CHECK-LABEL: abs_v8i16:
448; NO-SIMD128-NOT: i16x8
449; SIMD128-NEXT: .functype abs_v8i16 (v128) -> (v128){{$}}
450; SIMD128-NEXT: i16x8.abs $push[[R:[0-9]+]]=, $0{{$}}
451; SIMD128-NEXT: return $pop[[R]]{{$}}
452define <8 x i16> @abs_v8i16(<8 x i16> %x) {
453  %a = sub <8 x i16> zeroinitializer, %x
454  %b = icmp slt <8 x i16> %x, zeroinitializer
455  %c = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %x
456  ret <8 x i16> %c
457}
458
459; CHECK-LABEL: neg_v8i16:
460; NO-SIMD128-NOT: i16x8
461; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
462; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
463; SIMD128-NEXT: return $pop[[R]]{{$}}
464define <8 x i16> @neg_v8i16(<8 x i16> %x) {
465  %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
466                     %x
467  ret <8 x i16> %a
468}
469
470; CHECK-LABEL: shl_v8i16:
471; NO-SIMD128-NOT: i16x8
472; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
473; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
474; SIMD128-NEXT: return $pop[[R]]{{$}}
475define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
476  %t = insertelement <8 x i16> undef, i16 %x, i32 0
477  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
478    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
479  %a = shl <8 x i16> %v, %s
480  ret <8 x i16> %a
481}
482
483; CHECK-LABEL: shl_const_v8i16:
484; NO-SIMD128-NOT: i16x8
485; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
486; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
487; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
488; SIMD128-NEXT: return $pop[[R]]{{$}}
489define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
490  %a = shl <8 x i16> %v,
491    <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
492  ret <8 x i16> %a
493}
494
495; CHECK-LABEL: shl_vec_v8i16:
496; NO-SIMD128-NOT: i16x8
497; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
498; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
499; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
500; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
501; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
502; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
503; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
504; Skip 6 lanes
505; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
506; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
507; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
508; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
509; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
510; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
511; SIMD128-NEXT: return $pop[[R]]{{$}}
512define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
513  %a = shl <8 x i16> %v, %x
514  ret <8 x i16> %a
515}
516
517; CHECK-LABEL: shr_s_v8i16:
518; NO-SIMD128-NOT: i16x8
519; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
520; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
521; SIMD128-NEXT: return $pop[[R]]{{$}}
522define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
523  %t = insertelement <8 x i16> undef, i16 %x, i32 0
524  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
525    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
526  %a = ashr <8 x i16> %v, %s
527  ret <8 x i16> %a
528}
529
530; CHECK-LABEL: shr_s_vec_v8i16:
531; NO-SIMD128-NOT: i16x8
532; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
533; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
534; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
535; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
536; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
537; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
538; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
539; Skip 6 lanes
540; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
541; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
542; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
543; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
544; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
545; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
546; SIMD128-NEXT: return $pop[[R]]{{$}}
547define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
548  %a = ashr <8 x i16> %v, %x
549  ret <8 x i16> %a
550}
551
552; CHECK-LABEL: shr_u_v8i16:
553; NO-SIMD128-NOT: i16x8
554; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
555; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
556; SIMD128-NEXT: return $pop[[R]]{{$}}
557define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
558  %t = insertelement <8 x i16> undef, i16 %x, i32 0
559  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
560    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
561  %a = lshr <8 x i16> %v, %s
562  ret <8 x i16> %a
563}
564
565; CHECK-LABEL: shr_u_vec_v8i16:
566; NO-SIMD128-NOT: i16x8
567; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
568; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
569; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
570; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
571; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
572; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
573; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
574; Skip 6 lanes
575; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
576; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
577; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
578; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
579; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
580; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
581; SIMD128-NEXT: return $pop[[R]]{{$}}
582define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
583  %a = lshr <8 x i16> %v, %x
584  ret <8 x i16> %a
585}
586
587; CHECK-LABEL: and_v8i16:
588; NO-SIMD128-NOT: v128
589; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
590; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
591; SIMD128-NEXT: return $pop[[R]]{{$}}
592define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
593  %a = and <8 x i16> %x, %y
594  ret <8 x i16> %a
595}
596
597; CHECK-LABEL: or_v8i16:
598; NO-SIMD128-NOT: v128
599; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
600; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
601; SIMD128-NEXT: return $pop[[R]]{{$}}
602define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
603  %a = or <8 x i16> %x, %y
604  ret <8 x i16> %a
605}
606
607; CHECK-LABEL: xor_v8i16:
608; NO-SIMD128-NOT: v128
609; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
610; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
611; SIMD128-NEXT: return $pop[[R]]{{$}}
612define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
613  %a = xor <8 x i16> %x, %y
614  ret <8 x i16> %a
615}
616
617; CHECK-LABEL: not_v8i16:
618; NO-SIMD128-NOT: v128
619; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
620; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
621; SIMD128-NEXT: return $pop[[R]]{{$}}
622define <8 x i16> @not_v8i16(<8 x i16> %x) {
623  %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
624                          i16 -1, i16 -1, i16 -1, i16 -1>
625  ret <8 x i16> %a
626}
627
628; CHECK-LABEL: andnot_v8i16:
629; NO-SIMD128-NOT: v128
630; SIMD128-NEXT: .functype andnot_v8i16 (v128, v128) -> (v128){{$}}
631; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
632; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
633; SIMD128-FAST-NEXT: v128.not
634; SIMD128-FAST-NEXT: v128.and
635; SIMD128-FAST-NEXT: return
636define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) {
637 %inv_y = xor <8 x i16> %y,
638   <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
639 %a = and <8 x i16> %x, %inv_y
640 ret <8 x i16> %a
641}
642
643; CHECK-LABEL: bitselect_v8i16:
644; NO-SIMD128-NOT: v128
645; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
646; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
647; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
648; SIMD128-FAST-NEXT: v128.and
649; SIMD128-FAST-NEXT: v128.not
650; SIMD128-FAST-NEXT: v128.and
651; SIMD128-FAST-NEXT: v128.or
652; SIMD128-FAST-NEXT: return
653define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
654  %masked_v1 = and <8 x i16> %v1, %c
655  %inv_mask = xor <8 x i16>
656    <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
657    %c
658  %masked_v2 = and <8 x i16> %v2, %inv_mask
659  %a = or <8 x i16> %masked_v1, %masked_v2
660  ret <8 x i16> %a
661}
662
663; ==============================================================================
664; 4 x i32
665; ==============================================================================
666; CHECK-LABEL: add_v4i32:
667; NO-SIMD128-NOT: i32x4
668; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
669; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
670; SIMD128-NEXT: return $pop[[R]]{{$}}
671define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
672  %a = add <4 x i32> %x, %y
673  ret <4 x i32> %a
674}
675
676; CHECK-LABEL: sub_v4i32:
677; NO-SIMD128-NOT: i32x4
678; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
679; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
680; SIMD128-NEXT: return $pop[[R]]{{$}}
681define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
682  %a = sub <4 x i32> %x, %y
683  ret <4 x i32> %a
684}
685
686; CHECK-LABEL: mul_v4i32:
687; NO-SIMD128-NOT: i32x4
688; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
689; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
690; SIMD128-NEXT: return $pop[[R]]{{$}}
691define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
692  %a = mul <4 x i32> %x, %y
693  ret <4 x i32> %a
694}
695
696; CHECK-LABEL: min_s_v4i32:
697; NO-SIMD128-NOT: i32x4
698; SIMD128-NEXT: .functype min_s_v4i32 (v128, v128) -> (v128){{$}}
699; SIMD128-NEXT: i32x4.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
700; SIMD128-NEXT: return $pop[[R]]{{$}}
701define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
702  %c = icmp slt <4 x i32> %x, %y
703  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
704  ret <4 x i32> %a
705}
706
707; CHECK-LABEL: min_u_v4i32:
708; NO-SIMD128-NOT: i32x4
709; SIMD128-NEXT: .functype min_u_v4i32 (v128, v128) -> (v128){{$}}
710; SIMD128-NEXT: i32x4.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
711; SIMD128-NEXT: return $pop[[R]]{{$}}
712define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
713  %c = icmp ult <4 x i32> %x, %y
714  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
715  ret <4 x i32> %a
716}
717
718; CHECK-LABEL: max_s_v4i32:
719; NO-SIMD128-NOT: i32x4
720; SIMD128-NEXT: .functype max_s_v4i32 (v128, v128) -> (v128){{$}}
721; SIMD128-NEXT: i32x4.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
722; SIMD128-NEXT: return $pop[[R]]{{$}}
723define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
724  %c = icmp sgt <4 x i32> %x, %y
725  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
726  ret <4 x i32> %a
727}
728
729; CHECK-LABEL: max_u_v4i32:
730; NO-SIMD128-NOT: i32x4
731; SIMD128-NEXT: .functype max_u_v4i32 (v128, v128) -> (v128){{$}}
732; SIMD128-NEXT: i32x4.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
733; SIMD128-NEXT: return $pop[[R]]{{$}}
734define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
735  %c = icmp ugt <4 x i32> %x, %y
736  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
737  ret <4 x i32> %a
738}
739
740; CHECK-LABEL: abs_v4i32:
741; NO-SIMD128-NOT: i32x4
742; SIMD128-NEXT: .functype abs_v4i32 (v128) -> (v128){{$}}
743; SIMD128-NEXT: i32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
744; SIMD128-NEXT: return $pop[[R]]{{$}}
745define <4 x i32> @abs_v4i32(<4 x i32> %x) {
746  %a = sub <4 x i32> zeroinitializer, %x
747  %b = icmp slt <4 x i32> %x, zeroinitializer
748  %c = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %x
749  ret <4 x i32> %c
750}
751
752; CHECK-LABEL: neg_v4i32:
753; NO-SIMD128-NOT: i32x4
754; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
755; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
756; SIMD128-NEXT: return $pop[[R]]{{$}}
757define <4 x i32> @neg_v4i32(<4 x i32> %x) {
758  %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
759  ret <4 x i32> %a
760}
761
762; CHECK-LABEL: shl_v4i32:
763; NO-SIMD128-NOT: i32x4
764; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
765; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
766; SIMD128-NEXT: return $pop[[R]]{{$}}
767define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
768  %t = insertelement <4 x i32> undef, i32 %x, i32 0
769  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
770    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
771  %a = shl <4 x i32> %v, %s
772  ret <4 x i32> %a
773}
774
775; CHECK-LABEL: shl_const_v4i32:
776; NO-SIMD128-NOT: i32x4
777; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
778; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
779; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
780; SIMD128-NEXT: return $pop[[R]]{{$}}
781define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
782  %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
783  ret <4 x i32> %a
784}
785
786; CHECK-LABEL: shl_vec_v4i32:
787; NO-SIMD128-NOT: i32x4
788; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
789; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
790; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
791; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
792; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
793; Skip 2 lanes
794; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
795; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
796; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
797; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
798; SIMD128-NEXT: return $pop[[R]]{{$}}
799define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
800  %a = shl <4 x i32> %v, %x
801  ret <4 x i32> %a
802}
803
804; CHECK-LABEL: shr_s_v4i32:
805; NO-SIMD128-NOT: i32x4
806; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
807; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
808; SIMD128-NEXT: return $pop[[R]]{{$}}
809define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
810  %t = insertelement <4 x i32> undef, i32 %x, i32 0
811  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
812    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
813  %a = ashr <4 x i32> %v, %s
814  ret <4 x i32> %a
815}
816
817; CHECK-LABEL: shr_s_vec_v4i32:
818; NO-SIMD128-NOT: i32x4
819; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
820; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
821; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
822; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
823; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
824; Skip 2 lanes
825; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
826; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
827; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
828; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
829; SIMD128-NEXT: return $pop[[R]]{{$}}
830define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
831  %a = ashr <4 x i32> %v, %x
832  ret <4 x i32> %a
833}
834
835; CHECK-LABEL: shr_u_v4i32:
836; NO-SIMD128-NOT: i32x4
837; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
838; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
839; SIMD128-NEXT: return $pop[[R]]{{$}}
840define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
841  %t = insertelement <4 x i32> undef, i32 %x, i32 0
842  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
843    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
844  %a = lshr <4 x i32> %v, %s
845  ret <4 x i32> %a
846}
847
848; CHECK-LABEL: shr_u_vec_v4i32:
849; NO-SIMD128-NOT: i32x4
850; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
851; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
852; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
853; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
854; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
855; Skip 2 lanes
856; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
857; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
858; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
859; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
860; SIMD128-NEXT: return $pop[[R]]{{$}}
861define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
862  %a = lshr <4 x i32> %v, %x
863  ret <4 x i32> %a
864}
865
866; CHECK-LABEL: and_v4i32:
867; NO-SIMD128-NOT: v128
868; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
869; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
870; SIMD128-NEXT: return $pop[[R]]{{$}}
871define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
872  %a = and <4 x i32> %x, %y
873  ret <4 x i32> %a
874}
875
876; CHECK-LABEL: or_v4i32:
877; NO-SIMD128-NOT: v128
878; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
879; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
880; SIMD128-NEXT: return $pop[[R]]{{$}}
881define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
882  %a = or <4 x i32> %x, %y
883  ret <4 x i32> %a
884}
885
886; CHECK-LABEL: xor_v4i32:
887; NO-SIMD128-NOT: v128
888; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
889; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
890; SIMD128-NEXT: return $pop[[R]]{{$}}
891define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
892  %a = xor <4 x i32> %x, %y
893  ret <4 x i32> %a
894}
895
896; CHECK-LABEL: not_v4i32:
897; NO-SIMD128-NOT: v128
898; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
899; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
900; SIMD128-NEXT: return $pop[[R]]{{$}}
901define <4 x i32> @not_v4i32(<4 x i32> %x) {
902  %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
903  ret <4 x i32> %a
904}
905
906; CHECK-LABEL: andnot_v4i32:
907; NO-SIMD128-NOT: v128
908; SIMD128-NEXT: .functype andnot_v4i32 (v128, v128) -> (v128){{$}}
909; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
910; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
911; SIMD128-FAST-NEXT: v128.not
912; SIMD128-FAST-NEXT: v128.and
913; SIMD128-FAST-NEXT: return
914define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) {
915 %inv_y = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
916 %a = and <4 x i32> %x, %inv_y
917 ret <4 x i32> %a
918}
919
920; CHECK-LABEL: bitselect_v4i32:
921; NO-SIMD128-NOT: v128
922; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
923; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
924; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
925; SIMD128-FAST-NEXT: v128.not
926; SIMD128-FAST-NEXT: v128.and
927; SIMD128-FAST-NEXT: v128.and
928; SIMD128-FAST-NEXT: v128.or
929; SIMD128-FAST-NEXT: return
930define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
931  %masked_v1 = and <4 x i32> %c, %v1
932  %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
933  %masked_v2 = and <4 x i32> %inv_mask, %v2
934  %a = or <4 x i32> %masked_v2, %masked_v1
935  ret <4 x i32> %a
936}
937
938; ==============================================================================
939; 2 x i64
940; ==============================================================================
941; CHECK-LABEL: add_v2i64:
942; NO-SIMD128-NOT: i64x2
943; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
944; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
945; SIMD128-NEXT: return $pop[[R]]{{$}}
946define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
947  %a = add <2 x i64> %x, %y
948  ret <2 x i64> %a
949}
950
951; CHECK-LABEL: sub_v2i64:
952; NO-SIMD128-NOT: i64x2
953; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
954; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
955; SIMD128-NEXT: return $pop[[R]]{{$}}
956define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
957  %a = sub <2 x i64> %x, %y
958  ret <2 x i64> %a
959}
960
961; CHECK-LABEL: mul_v2i64:
962; NO-SIMD128-NOT: i64x2
963; SIMD128-NEXT: .functype mul_v2i64 (v128, v128) -> (v128){{$}}
964; SIMD128: i64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
965; SIMD128-NEXT: return $pop[[R]]{{$}}
966define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
967  %a = mul <2 x i64> %x, %y
968  ret <2 x i64> %a
969}
970
971; CHECK-LABEL: abs_v2i64:
972; NO-SIMD128-NOT: i64x2:
973; SIMD128-NEXT: .functype abs_v2i64 (v128) -> (v128){{$}}
974; SIMD128-NEXT: i64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
975; SIMD128-NEXT: return $pop[[R]]{{$}}
976define <2 x i64> @abs_v2i64(<2 x i64> %x) {
977  %a = sub <2 x i64> zeroinitializer, %x
978  %b = icmp slt <2 x i64> %x, zeroinitializer
979  %c = select <2 x i1> %b, <2 x i64> %a, <2 x i64> %x
980  ret <2 x i64> %c
981}
982
983; CHECK-LABEL: neg_v2i64:
984; NO-SIMD128-NOT: i64x2
985; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
986; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
987; SIMD128-NEXT: return $pop[[R]]{{$}}
988define <2 x i64> @neg_v2i64(<2 x i64> %x) {
989  %a = sub <2 x i64> <i64 0, i64 0>, %x
990  ret <2 x i64> %a
991}
992
993; CHECK-LABEL: shl_v2i64:
994; NO-SIMD128-NOT: i64x2
995; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
996; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
997; SIMD128-NEXT: return $pop[[R]]{{$}}
998define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
999  %x2 = zext i32 %x to i64
1000  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1001  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1002  %a = shl <2 x i64> %v, %s
1003  ret <2 x i64> %a
1004}
1005
1006; CHECK-LABEL: shl_sext_v2i64:
1007; NO-SIMD128-NOT: i64x2
1008; SIMD128-NEXT: .functype shl_sext_v2i64 (v128, i32) -> (v128){{$}}
1009; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
1010; SIMD128-NEXT: return $pop[[R]]{{$}}
1011define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) {
1012  %x2 = sext i32 %x to i64
1013  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1014  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1015  %a = shl <2 x i64> %v, %s
1016  ret <2 x i64> %a
1017}
1018
1019; CHECK-LABEL: shl_noext_v2i64:
1020; NO-SIMD128-NOT: i64x2
1021; SIMD128-NEXT: .functype shl_noext_v2i64 (v128, i64) -> (v128){{$}}
1022; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1023; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1024; SIMD128-NEXT: return $pop[[R]]{{$}}
1025define <2 x i64> @shl_noext_v2i64(<2 x i64> %v, i64 %x) {
1026  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1027  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1028  %a = shl <2 x i64> %v, %s
1029  ret <2 x i64> %a
1030}
1031
1032; CHECK-LABEL: shl_const_v2i64:
1033; NO-SIMD128-NOT: i64x2
1034; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
1035; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1036; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1037; SIMD128-NEXT: return $pop[[R]]{{$}}
1038define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
1039  %a = shl <2 x i64> %v, <i64 5, i64 5>
1040  ret <2 x i64> %a
1041}
1042
1043; CHECK-LABEL: shl_vec_v2i64:
1044; NO-SIMD128-NOT: i64x2
1045; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
1046; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1047; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1048; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1049; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1050; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1051; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1052; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1053; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1054; SIMD128-NEXT: return $pop[[R]]{{$}}
1055define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1056  %a = shl <2 x i64> %v, %x
1057  ret <2 x i64> %a
1058}
1059
1060; CHECK-LABEL: shr_s_v2i64:
1061; NO-SIMD128-NOT: i64x2
1062; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
1063; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
1064; SIMD128-NEXT: return $pop[[R]]{{$}}
1065define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
1066  %x2 = zext i32 %x to i64
1067  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1068  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1069  %a = ashr <2 x i64> %v, %s
1070  ret <2 x i64> %a
1071}
1072
1073; CHECK-LABEL: shr_s_sext_v2i64:
1074; NO-SIMD128-NOT: i64x2
1075; SIMD128-NEXT: .functype shr_s_sext_v2i64 (v128, i32) -> (v128){{$}}
1076; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
1077; SIMD128-NEXT: return $pop[[R]]{{$}}
1078define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) {
1079  %x2 = sext i32 %x to i64
1080  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1081  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1082  %a = ashr <2 x i64> %v, %s
1083  ret <2 x i64> %a
1084}
1085
1086; CHECK-LABEL: shr_s_noext_v2i64:
1087; NO-SIMD128-NOT: i64x2
1088; SIMD128-NEXT: .functype shr_s_noext_v2i64 (v128, i64) -> (v128){{$}}
1089; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1090; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1091; SIMD128-NEXT: return $pop[[R]]{{$}}
1092define <2 x i64> @shr_s_noext_v2i64(<2 x i64> %v, i64 %x) {
1093  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1094  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1095  %a = ashr <2 x i64> %v, %s
1096  ret <2 x i64> %a
1097}
1098
1099; CHECK-LABEL: shr_s_const_v2i64:
1100; NO-SIMD128-NOT: i64x2
1101; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
1102; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1103; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1104; SIMD128-NEXT: return $pop[[R]]{{$}}
1105define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
1106  %a = ashr <2 x i64> %v, <i64 5, i64 5>
1107  ret <2 x i64> %a
1108}
1109
1110; CHECK-LABEL: shr_s_vec_v2i64:
1111; NO-SIMD128-NOT: i64x2
1112; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
1113; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1114; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1115; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1116; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1117; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1118; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1119; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1120; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1121; SIMD128-NEXT: return $pop[[R]]{{$}}
1122define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1123  %a = ashr <2 x i64> %v, %x
1124  ret <2 x i64> %a
1125}
1126
1127; CHECK-LABEL: shr_u_v2i64:
1128; NO-SIMD128-NOT: i64x2
1129; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
1130; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
1131; SIMD128-NEXT: return $pop[[R]]{{$}}
1132define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
1133  %x2 = zext i32 %x to i64
1134  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1135  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1136  %a = lshr <2 x i64> %v, %s
1137  ret <2 x i64> %a
1138}
1139
1140; CHECK-LABEL: shr_u_sext_v2i64:
1141; NO-SIMD128-NOT: i64x2
1142; SIMD128-NEXT: .functype shr_u_sext_v2i64 (v128, i32) -> (v128){{$}}
1143; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
1144; SIMD128-NEXT: return $pop[[R]]{{$}}
1145define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) {
1146  %x2 = sext i32 %x to i64
1147  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1148  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1149  %a = lshr <2 x i64> %v, %s
1150  ret <2 x i64> %a
1151}
1152
1153; CHECK-LABEL: shr_u_noext_v2i64:
1154; NO-SIMD128-NOT: i64x2
1155; SIMD128-NEXT: .functype shr_u_noext_v2i64 (v128, i64) -> (v128){{$}}
1156; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1157; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1158; SIMD128-NEXT: return $pop[[R]]{{$}}
1159define <2 x i64> @shr_u_noext_v2i64(<2 x i64> %v, i64 %x) {
1160  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1161  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1162  %a = lshr <2 x i64> %v, %s
1163  ret <2 x i64> %a
1164}
1165
1166; CHECK-LABEL: shr_u_const_v2i64:
1167; NO-SIMD128-NOT: i64x2
1168; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
1169; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1170; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1171; SIMD128-NEXT: return $pop[[R]]{{$}}
1172define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
1173  %a = lshr <2 x i64> %v, <i64 5, i64 5>
1174  ret <2 x i64> %a
1175}
1176
1177; CHECK-LABEL: shr_u_vec_v2i64:
1178; NO-SIMD128-NOT: i64x2
1179; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
1180; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1181; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1182; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1183; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1184; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1185; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1186; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1187; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1188; SIMD128-NEXT: return $pop[[R]]{{$}}
1189define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1190  %a = lshr <2 x i64> %v, %x
1191  ret <2 x i64> %a
1192}
1193
1194; CHECK-LABEL: and_v2i64:
1195; NO-SIMD128-NOT: v128
1196; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
1197; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
1198; SIMD128-NEXT: return $pop[[R]]{{$}}
1199define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
1200  %a = and <2 x i64> %x, %y
1201  ret <2 x i64> %a
1202}
1203
1204; CHECK-LABEL: or_v2i64:
1205; NO-SIMD128-NOT: v128
1206; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
1207; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
1208; SIMD128-NEXT: return $pop[[R]]{{$}}
1209define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
1210  %a = or <2 x i64> %x, %y
1211  ret <2 x i64> %a
1212}
1213
1214; CHECK-LABEL: xor_v2i64:
1215; NO-SIMD128-NOT: v128
1216; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
1217; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
1218; SIMD128-NEXT: return $pop[[R]]{{$}}
1219define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
1220  %a = xor <2 x i64> %x, %y
1221  ret <2 x i64> %a
1222}
1223
1224; CHECK-LABEL: not_v2i64:
1225; NO-SIMD128-NOT: v128
1226; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
1227; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
1228; SIMD128-NEXT: return $pop[[R]]{{$}}
1229define <2 x i64> @not_v2i64(<2 x i64> %x) {
1230  %a = xor <2 x i64> %x, <i64 -1, i64 -1>
1231  ret <2 x i64> %a
1232}
1233
1234; CHECK-LABEL: andnot_v2i64:
1235; NO-SIMD128-NOT: v128
1236; SIMD128-NEXT: .functype andnot_v2i64 (v128, v128) -> (v128){{$}}
1237; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
1238; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1239; SIMD128-FAST-NEXT: v128.not
1240; SIMD128-FAST-NEXT: v128.and
1241; SIMD128-FAST-NEXT: return
1242define <2 x i64> @andnot_v2i64(<2 x i64> %x, <2 x i64> %y) {
1243 %inv_y = xor <2 x i64> %y, <i64 -1, i64 -1>
1244 %a = and <2 x i64> %x, %inv_y
1245 ret <2 x i64> %a
1246}
1247
1248; CHECK-LABEL: bitselect_v2i64:
1249; NO-SIMD128-NOT: v128
1250; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
1251; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
1252; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1253; SIMD128-FAST-NEXT: v128.not
1254; SIMD128-FAST-NEXT: v128.and
1255; SIMD128-FAST-NEXT: v128.and
1256; SIMD128-FAST-NEXT: v128.or
1257; SIMD128-FAST-NEXT: return
1258define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
1259  %masked_v1 = and <2 x i64> %v1, %c
1260  %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
1261  %masked_v2 = and <2 x i64> %v2, %inv_mask
1262  %a = or <2 x i64> %masked_v2, %masked_v1
1263  ret <2 x i64> %a
1264}
1265
1266; ==============================================================================
1267; 4 x float
1268; ==============================================================================
1269; CHECK-LABEL: neg_v4f32:
1270; NO-SIMD128-NOT: f32x4
1271; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
1272; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
1273; SIMD128-NEXT: return $pop[[R]]{{$}}
1274define <4 x float> @neg_v4f32(<4 x float> %x) {
1275  ; nsz makes this semantically equivalent to flipping sign bit
1276  %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
1277  ret <4 x float> %a
1278}
1279
1280; CHECK-LABEL: abs_v4f32:
1281; NO-SIMD128-NOT: f32x4
1282; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
1283; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
1284; SIMD128-NEXT: return $pop[[R]]{{$}}
1285declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
1286define <4 x float> @abs_v4f32(<4 x float> %x) {
1287  %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
1288  ret <4 x float> %a
1289}
1290
1291; CHECK-LABEL: min_unordered_v4f32:
1292; NO-SIMD128-NOT: f32x4
1293; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
1294; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1295; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1296; SIMD128-NEXT: return $pop[[R]]{{$}}
1297define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
1298  %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
1299  %a = select <4 x i1> %cmps, <4 x float> %x,
1300    <4 x float> <float 5., float 5., float 5., float 5.>
1301  ret <4 x float> %a
1302}
1303
1304; CHECK-LABEL: max_unordered_v4f32:
1305; NO-SIMD128-NOT: f32x4
1306; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
1307; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
1308; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1309; SIMD128-NEXT: return $pop[[R]]{{$}}
1310define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
1311  %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
1312  %a = select <4 x i1> %cmps, <4 x float> %x,
1313    <4 x float> <float 5., float 5., float 5., float 5.>
1314  ret <4 x float> %a
1315}
1316
1317; CHECK-LABEL: min_ordered_v4f32:
1318; NO-SIMD128-NOT: f32x4
1319; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
1320; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1321; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1322; SIMD128-NEXT: return $pop[[R]]{{$}}
1323define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
1324  %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
1325  %a = select <4 x i1> %cmps,
1326    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1327  ret <4 x float> %a
1328}
1329
1330; CHECK-LABEL: max_ordered_v4f32:
1331; NO-SIMD128-NOT: f32x4
1332; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1333; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1334; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1335; SIMD128-NEXT: return $pop[[R]]{{$}}
1336define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1337  %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1338  %a = select <4 x i1> %cmps,
1339    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1340  ret <4 x float> %a
1341}
1342
1343; CHECK-LABEL: min_intrinsic_v4f32:
1344; NO-SIMD128-NOT: f32x4
1345; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1346; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1347; SIMD128-NEXT: return $pop[[R]]{{$}}
1348declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1349define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1350  %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1351  ret <4 x float> %a
1352}
1353
1354; CHECK-LABEL: minnum_intrinsic_v4f32:
1355; NO-SIMD128-NOT: f32x4
1356; SIMD128-NEXT: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1357; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1358; SIMD128-NEXT: return $pop[[R]]{{$}}
1359declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1360define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1361  %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
1362  ret <4 x float> %a
1363}
1364
1365; CHECK-LABEL: max_intrinsic_v4f32:
1366; NO-SIMD128-NOT: f32x4
1367; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1368; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1369; SIMD128-NEXT: return $pop[[R]]{{$}}
1370declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1371define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1372  %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1373  ret <4 x float> %a
1374}
1375
1376; CHECK-LABEL: maxnum_intrinsic_v4f32:
1377; NO-SIMD128-NOT: f32x4
1378; SIMD128-NEXT: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1379; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1380; SIMD128-NEXT: return $pop[[R]]{{$}}
1381declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1382define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1383  %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
1384  ret <4 x float> %a
1385}
1386
1387; CHECK-LABEL: min_const_intrinsic_v4f32:
1388; NO-SIMD128-NOT: f32x4
1389; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1390; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
1391; SIMD128-NEXT: return $pop[[R]]{{$}}
1392define <4 x float> @min_const_intrinsic_v4f32() {
1393  %a = call <4 x float> @llvm.minimum.v4f32(
1394    <4 x float> <float 42., float 42., float 42., float 42.>,
1395    <4 x float> <float 5., float 5., float 5., float 5.>
1396  )
1397  ret <4 x float> %a
1398}
1399
1400; CHECK-LABEL: max_const_intrinsic_v4f32:
1401; NO-SIMD128-NOT: f32x4
1402; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1403; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}}
1404; SIMD128-NEXT: return $pop[[R]]{{$}}
1405define <4 x float> @max_const_intrinsic_v4f32() {
1406  %a = call <4 x float> @llvm.maximum.v4f32(
1407    <4 x float> <float 42., float 42., float 42., float 42.>,
1408    <4 x float> <float 5., float 5., float 5., float 5.>
1409  )
1410  ret <4 x float> %a
1411}
1412
1413; CHECK-LABEL: add_v4f32:
1414; NO-SIMD128-NOT: f32x4
1415; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1416; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1417; SIMD128-NEXT: return $pop[[R]]{{$}}
1418define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1419  %a = fadd <4 x float> %x, %y
1420  ret <4 x float> %a
1421}
1422
1423; CHECK-LABEL: sub_v4f32:
1424; NO-SIMD128-NOT: f32x4
1425; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1426; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1427; SIMD128-NEXT: return $pop[[R]]{{$}}
1428define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1429  %a = fsub <4 x float> %x, %y
1430  ret <4 x float> %a
1431}
1432
1433; CHECK-LABEL: div_v4f32:
1434; NO-SIMD128-NOT: f32x4
1435; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1436; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1437; SIMD128-NEXT: return $pop[[R]]{{$}}
1438define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1439  %a = fdiv <4 x float> %x, %y
1440  ret <4 x float> %a
1441}
1442
1443; CHECK-LABEL: mul_v4f32:
1444; NO-SIMD128-NOT: f32x4
1445; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1446; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1447; SIMD128-NEXT: return $pop[[R]]{{$}}
1448define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1449  %a = fmul <4 x float> %x, %y
1450  ret <4 x float> %a
1451}
1452
1453; CHECK-LABEL: sqrt_v4f32:
1454; NO-SIMD128-NOT: f32x4
1455; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1456; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1457; SIMD128-NEXT: return $pop[[R]]{{$}}
1458declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1459define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1460  %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1461  ret <4 x float> %a
1462}
1463
1464; ==============================================================================
1465; 2 x double
1466; ==============================================================================
1467; CHECK-LABEL: neg_v2f64:
1468; NO-SIMD128-NOT: f64x2
1469; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1470; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1471; SIMD128-NEXT: return $pop[[R]]{{$}}
1472define <2 x double> @neg_v2f64(<2 x double> %x) {
1473  ; nsz makes this semantically equivalent to flipping sign bit
1474  %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1475  ret <2 x double> %a
1476}
1477
1478; CHECK-LABEL: abs_v2f64:
1479; NO-SIMD128-NOT: f64x2
1480; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1481; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1482; SIMD128-NEXT: return $pop[[R]]{{$}}
1483declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1484define <2 x double> @abs_v2f64(<2 x double> %x) {
1485  %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1486  ret <2 x double> %a
1487}
1488
1489; CHECK-LABEL: min_unordered_v2f64:
1490; NO-SIMD128-NOT: f64x2
1491; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1492; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1493; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1494; SIMD128-NEXT: return $pop[[R]]{{$}}
1495define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1496  %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1497  %a = select <2 x i1> %cmps, <2 x double> %x,
1498    <2 x double> <double 5., double 5.>
1499  ret <2 x double> %a
1500}
1501
1502; CHECK-LABEL: max_unordered_v2f64:
1503; NO-SIMD128-NOT: f64x2
1504; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1505; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1506; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1507; SIMD128-NEXT: return $pop[[R]]{{$}}
1508define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1509  %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1510  %a = select <2 x i1> %cmps, <2 x double> %x,
1511    <2 x double> <double 5., double 5.>
1512  ret <2 x double> %a
1513}
1514
1515; CHECK-LABEL: min_ordered_v2f64:
1516; NO-SIMD128-NOT: f64x2
1517; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1518; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1519; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1520; SIMD128-NEXT: return $pop[[R]]{{$}}
1521define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1522  %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1523  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1524    <2 x double> %x
1525  ret <2 x double> %a
1526}
1527
1528; CHECK-LABEL: max_ordered_v2f64:
1529; NO-SIMD128-NOT: f64x2
1530; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1531; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1532; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1533; SIMD128-NEXT: return $pop[[R]]{{$}}
1534define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1535  %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1536  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1537    <2 x double> %x
1538  ret <2 x double> %a
1539}
1540
1541; CHECK-LABEL: min_intrinsic_v2f64:
1542; NO-SIMD128-NOT: f64x2
1543; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1544; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1545; SIMD128-NEXT: return $pop[[R]]{{$}}
1546declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1547define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1548  %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1549  ret <2 x double> %a
1550}
1551
1552; CHECK-LABEL: max_intrinsic_v2f64:
1553; NO-SIMD128-NOT: f64x2
1554; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1555; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1556; SIMD128-NEXT: return $pop[[R]]{{$}}
1557declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1558define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1559  %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1560  ret <2 x double> %a
1561}
1562
1563; CHECK-LABEL: min_const_intrinsic_v2f64:
1564; NO-SIMD128-NOT: f64x2
1565; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1566; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
1567; SIMD128-NEXT: return $pop[[R]]{{$}}
1568define <2 x double> @min_const_intrinsic_v2f64() {
1569  %a = call <2 x double> @llvm.minimum.v2f64(
1570    <2 x double> <double 42., double 42.>,
1571    <2 x double> <double 5., double 5.>
1572  )
1573  ret <2 x double> %a
1574}
1575
1576; CHECK-LABEL: max_const_intrinsic_v2f64:
1577; NO-SIMD128-NOT: f64x2
1578; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1579; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5{{$}}
1580; SIMD128-NEXT: return $pop[[R]]{{$}}
1581define <2 x double> @max_const_intrinsic_v2f64() {
1582  %a = call <2 x double> @llvm.maximum.v2f64(
1583    <2 x double> <double 42., double 42.>,
1584    <2 x double> <double 5., double 5.>
1585  )
1586  ret <2 x double> %a
1587}
1588
1589; CHECK-LABEL: add_v2f64:
1590; NO-SIMD128-NOT: f64x2
1591; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1592; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1593; SIMD128-NEXT: return $pop[[R]]{{$}}
1594define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1595  %a = fadd <2 x double> %x, %y
1596  ret <2 x double> %a
1597}
1598
1599; CHECK-LABEL: sub_v2f64:
1600; NO-SIMD128-NOT: f64x2
1601; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1602; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1603; SIMD128-NEXT: return $pop[[R]]{{$}}
1604define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1605  %a = fsub <2 x double> %x, %y
1606  ret <2 x double> %a
1607}
1608
1609; CHECK-LABEL: div_v2f64:
1610; NO-SIMD128-NOT: f64x2
1611; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1612; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1613; SIMD128-NEXT: return $pop[[R]]{{$}}
1614define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1615  %a = fdiv <2 x double> %x, %y
1616  ret <2 x double> %a
1617}
1618
1619; CHECK-LABEL: mul_v2f64:
1620; NO-SIMD128-NOT: f64x2
1621; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1622; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1623; SIMD128-NEXT: return $pop[[R]]{{$}}
1624define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1625  %a = fmul <2 x double> %x, %y
1626  ret <2 x double> %a
1627}
1628
1629; CHECK-LABEL: sqrt_v2f64:
1630; NO-SIMD128-NOT: f64x2
1631; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1632; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1633; SIMD128-NEXT: return $pop[[R]]{{$}}
1634declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1635define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1636  %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1637  ret <2 x double> %a
1638}
1639