1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
3; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
4; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
5; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128
6; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
7
8; check that a non-test run (including explicit locals pass) at least finishes
9; RUN: llc < %s -O0 -mattr=+unimplemented-simd128
10; RUN: llc < %s -O2 -mattr=+unimplemented-simd128
11
12; Test that basic SIMD128 arithmetic operations assemble as expected.
13
14target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
15target triple = "wasm32-unknown-unknown"
16
17; ==============================================================================
18; 16 x i8
19; ==============================================================================
20; CHECK-LABEL: add_v16i8:
21; NO-SIMD128-NOT: i8x16
22; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
23; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
24; SIMD128-NEXT: return $pop[[R]]{{$}}
25define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
26  %a = add <16 x i8> %x, %y
27  ret <16 x i8> %a
28}
29
30; CHECK-LABEL: sub_v16i8:
31; NO-SIMD128-NOT: i8x16
32; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
33; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
34; SIMD128-NEXT: return $pop[[R]]{{$}}
35define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
36  %a = sub <16 x i8> %x, %y
37  ret <16 x i8> %a
38}
39
40; CHECK-LABEL: mul_v16i8:
41; NO-SIMD128-NOT: i8x16
42; SIMD128-NEXT: .functype mul_v16i8 (v128, v128) -> (v128){{$}}
43; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
44; SIMD128-NEXT: return $pop[[R]]{{$}}
45define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
46  %a = mul <16 x i8> %x, %y
47  ret <16 x i8> %a
48}
49
50; CHECK-LABEL: min_s_v16i8:
51; NO-SIMD128-NOT: i8x16
52; SIMD128-NEXT: .functype min_s_v16i8 (v128, v128) -> (v128){{$}}
53; SIMD128-NEXT: i8x16.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
54; SIMD128-NEXT: return $pop[[R]]{{$}}
55define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
56  %c = icmp slt <16 x i8> %x, %y
57  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
58  ret <16 x i8> %a
59}
60
61; CHECK-LABEL: min_u_v16i8:
62; NO-SIMD128-NOT: i8x16
63; SIMD128-NEXT: .functype min_u_v16i8 (v128, v128) -> (v128){{$}}
64; SIMD128-NEXT: i8x16.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
65; SIMD128-NEXT: return $pop[[R]]{{$}}
66define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
67  %c = icmp ult <16 x i8> %x, %y
68  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
69  ret <16 x i8> %a
70}
71
72; CHECK-LABEL: max_s_v16i8:
73; NO-SIMD128-NOT: i8x16
74; SIMD128-NEXT: .functype max_s_v16i8 (v128, v128) -> (v128){{$}}
75; SIMD128-NEXT: i8x16.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
76; SIMD128-NEXT: return $pop[[R]]{{$}}
77define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
78  %c = icmp sgt <16 x i8> %x, %y
79  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
80  ret <16 x i8> %a
81}
82
83; CHECK-LABEL: max_u_v16i8:
84; NO-SIMD128-NOT: i8x16
85; SIMD128-NEXT: .functype max_u_v16i8 (v128, v128) -> (v128){{$}}
86; SIMD128-NEXT: i8x16.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
87; SIMD128-NEXT: return $pop[[R]]{{$}}
88define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
89  %c = icmp ugt <16 x i8> %x, %y
90  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
91  ret <16 x i8> %a
92}
93
94; CHECK-LABEL: avgr_u_v16i8:
95; NO-SIMD128-NOT: i8x16
96; SIMD128-NEXT: .functype avgr_u_v16i8 (v128, v128) -> (v128){{$}}
97; SIMD128-NEXT: i8x16.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
98; SIMD128-NEXT: return $pop[[R]]{{$}}
99define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
100  %a = add nuw <16 x i8> %x, %y
101  %b = add nuw <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
102                              i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
103  %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
104                           i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
105  ret <16 x i8> %c
106}
107
108; CHECK-LABEL: avgr_u_v16i8_wrap:
109; NO-SIMD128-NOT: i8x16
110; SIMD128-NEXT: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128){{$}}
111; SIMD128-NOT: i8x16.avgr_u
112define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) {
113  %a = add <16 x i8> %x, %y
114  %b = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
115                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
116  %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
117                           i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
118  ret <16 x i8> %c
119}
120
121; CHECK-LABEL: abs_v16i8:
122; NO-SIMD128-NOT: i8x16
123; SIMD128-NEXT: .functype abs_v16i8 (v128) -> (v128){{$}}
124; SIMD128-NEXT: i8x16.abs $push[[R:[0-9]+]]=, $0{{$}}
125; SIMD128-NEXT: return $pop[[R]]{{$}}
126define <16 x i8> @abs_v16i8(<16 x i8> %x) {
127  %a = sub <16 x i8> zeroinitializer, %x
128  %b = icmp slt <16 x i8> %x, zeroinitializer
129  %c = select <16 x i1> %b, <16 x i8> %a, <16 x i8> %x
130  ret <16 x i8> %c
131}
132
133; CHECK-LABEL: neg_v16i8:
134; NO-SIMD128-NOT: i8x16
135; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
136; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
137; SIMD128-NEXT: return $pop[[R]]{{$}}
138define <16 x i8> @neg_v16i8(<16 x i8> %x) {
139  %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
140                      i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
141                     %x
142  ret <16 x i8> %a
143}
144
145; CHECK-LABEL: shl_v16i8:
146; NO-SIMD128-NOT: i8x16
147; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
148; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
149; SIMD128-NEXT: return $pop[[R]]{{$}}
150define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
151  %t = insertelement <16 x i8> undef, i8 %x, i32 0
152  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
153    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
154                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
155  %a = shl <16 x i8> %v, %s
156  ret <16 x i8> %a
157}
158
159; CHECK-LABEL: shl_const_v16i8:
160; NO-SIMD128-NOT: i8x16
161; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
162; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
163; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
164; SIMD128-NEXT: return $pop[[R]]{{$}}
165define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
166  %a = shl <16 x i8> %v,
167    <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
168     i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
169  ret <16 x i8> %a
170}
171
172; CHECK-LABEL: shl_vec_v16i8:
173; NO-SIMD128-NOT: i8x16
174; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
175; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
176; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
177; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
178; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
179; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
180; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
181; Skip 14 lanes
182; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
183; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
184; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
185; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
186; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
187; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
188; SIMD128-NEXT: return $pop[[R]]{{$}}
189define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
190  %a = shl <16 x i8> %v, %x
191  ret <16 x i8> %a
192}
193
194; CHECK-LABEL: shr_s_v16i8:
195; NO-SIMD128-NOT: i8x16
196; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
197; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
198; SIMD128-NEXT: return $pop[[R]]{{$}}
199define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
200  %t = insertelement <16 x i8> undef, i8 %x, i32 0
201  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
202    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
203                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
204  %a = ashr <16 x i8> %v, %s
205  ret <16 x i8> %a
206}
207
208; CHECK-LABEL: shr_s_vec_v16i8:
209; NO-SIMD128-NOT: i8x16
210; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
211; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
212; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
213; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
214; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
215; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
216; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
217; Skip 14 lanes
218; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
219; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
220; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
221; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
222; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
223; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
224; SIMD128-NEXT: return $pop[[R]]{{$}}
225define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
226  %a = ashr <16 x i8> %v, %x
227  ret <16 x i8> %a
228}
229
230; CHECK-LABEL: shr_u_v16i8:
231; NO-SIMD128-NOT: i8x16
232; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
233; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
234; SIMD128-NEXT: return $pop[[R]]{{$}}
235define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
236  %t = insertelement <16 x i8> undef, i8 %x, i32 0
237  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
238    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
239                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
240  %a = lshr <16 x i8> %v, %s
241  ret <16 x i8> %a
242}
243
244; CHECK-LABEL: shr_u_vec_v16i8:
245; NO-SIMD128-NOT: i8x16
246; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
247; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
248; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
249; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
250; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
251; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
252; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
253; Skip 14 lanes
254; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
255; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
256; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
257; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
258; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
259; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}}
260; SIMD128-NEXT: return $pop[[R]]{{$}}
261define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
262  %a = lshr <16 x i8> %v, %x
263  ret <16 x i8> %a
264}
265
266; CHECK-LABEL: and_v16i8:
267; NO-SIMD128-NOT: v128
268; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
269; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
270; SIMD128-NEXT: return $pop[[R]]{{$}}
271define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
272  %a = and <16 x i8> %x, %y
273  ret <16 x i8> %a
274}
275
276; CHECK-LABEL: or_v16i8:
277; NO-SIMD128-NOT: v128
278; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
279; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
280; SIMD128-NEXT: return $pop[[R]]{{$}}
281define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
282  %a = or <16 x i8> %x, %y
283  ret <16 x i8> %a
284}
285
286; CHECK-LABEL: xor_v16i8:
287; NO-SIMD128-NOT: v128
288; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
289; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
290; SIMD128-NEXT: return $pop[[R]]{{$}}
291define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
292  %a = xor <16 x i8> %x, %y
293  ret <16 x i8> %a
294}
295
296; CHECK-LABEL: not_v16i8:
297; NO-SIMD128-NOT: v128
298; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
299; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
300; SIMD128-NEXT: return $pop[[R]]{{$}}
301define <16 x i8> @not_v16i8(<16 x i8> %x) {
302  %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
303                          i8 -1, i8 -1, i8 -1, i8 -1,
304                          i8 -1, i8 -1, i8 -1, i8 -1,
305                          i8 -1, i8 -1, i8 -1, i8 -1>
306  ret <16 x i8> %a
307}
308
309; CHECK-LABEL: andnot_v16i8:
310; NO-SIMD128-NOT: v128
311; SIMD128-VM-NOT: v128.andnot
312; SIMD128-NEXT: .functype andnot_v16i8 (v128, v128) -> (v128){{$}}
313; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
314; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
315; SIMD128-FAST-NEXT: v128.not
316; SIMD128-FAST-NEXT: v128.and
317; SIMD128-FAST-NEXT: return
318define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) {
319 %inv_y = xor <16 x i8> %y,
320   <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
321    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
322 %a = and <16 x i8> %x, %inv_y
323 ret <16 x i8> %a
324}
325
326; CHECK-LABEL: bitselect_v16i8:
327; NO-SIMD128-NOT: v128
328; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
329; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
330; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
331; SIMD128-FAST-NEXT: v128.and
332; SIMD128-FAST-NEXT: v128.not
333; SIMD128-FAST-NEXT: v128.and
334; SIMD128-FAST-NEXT: v128.or
335; SIMD128-FAST-NEXT: return
336define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
337  %masked_v1 = and <16 x i8> %c, %v1
338  %inv_mask = xor <16 x i8> %c,
339    <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
340     i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
341  %masked_v2 = and <16 x i8> %inv_mask, %v2
342  %a = or <16 x i8> %masked_v1, %masked_v2
343  ret <16 x i8> %a
344}
345
346; ==============================================================================
347; 8 x i16
348; ==============================================================================
349; CHECK-LABEL: add_v8i16:
350; NO-SIMD128-NOT: i16x8
351; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
352; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
353; SIMD128-NEXT: return $pop[[R]]{{$}}
354define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
355  %a = add <8 x i16> %x, %y
356  ret <8 x i16> %a
357}
358
359; CHECK-LABEL: sub_v8i16:
360; NO-SIMD128-NOT: i16x8
361; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
362; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
363; SIMD128-NEXT: return $pop[[R]]{{$}}
364define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
365  %a = sub <8 x i16> %x, %y
366  ret <8 x i16> %a
367}
368
369; CHECK-LABEL: mul_v8i16:
370; NO-SIMD128-NOT: i16x8
371; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
372; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
373; SIMD128-NEXT: return $pop[[R]]{{$}}
374define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
375  %a = mul <8 x i16> %x, %y
376  ret <8 x i16> %a
377}
378
379; CHECK-LABEL: min_s_v8i16:
380; NO-SIMD128-NOT: i16x8
381; SIMD128-NEXT: .functype min_s_v8i16 (v128, v128) -> (v128){{$}}
382; SIMD128-NEXT: i16x8.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
383; SIMD128-NEXT: return $pop[[R]]{{$}}
384define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
385  %c = icmp slt <8 x i16> %x, %y
386  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
387  ret <8 x i16> %a
388}
389
390; CHECK-LABEL: min_u_v8i16:
391; NO-SIMD128-NOT: i16x8
392; SIMD128-NEXT: .functype min_u_v8i16 (v128, v128) -> (v128){{$}}
393; SIMD128-NEXT: i16x8.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
394; SIMD128-NEXT: return $pop[[R]]{{$}}
395define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
396  %c = icmp ult <8 x i16> %x, %y
397  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
398  ret <8 x i16> %a
399}
400
401; CHECK-LABEL: max_s_v8i16:
402; NO-SIMD128-NOT: i16x8
403; SIMD128-NEXT: .functype max_s_v8i16 (v128, v128) -> (v128){{$}}
404; SIMD128-NEXT: i16x8.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
405; SIMD128-NEXT: return $pop[[R]]{{$}}
406define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
407  %c = icmp sgt <8 x i16> %x, %y
408  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
409  ret <8 x i16> %a
410}
411
412; CHECK-LABEL: max_u_v8i16:
413; NO-SIMD128-NOT: i16x8
414; SIMD128-NEXT: .functype max_u_v8i16 (v128, v128) -> (v128){{$}}
415; SIMD128-NEXT: i16x8.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
416; SIMD128-NEXT: return $pop[[R]]{{$}}
417define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
418  %c = icmp ugt <8 x i16> %x, %y
419  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
420  ret <8 x i16> %a
421}
422
423; CHECK-LABEL: avgr_u_v8i16:
424; NO-SIMD128-NOT: i16x8
425; SIMD128-NEXT: .functype avgr_u_v8i16 (v128, v128) -> (v128){{$}}
426; SIMD128-NEXT: i16x8.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
427; SIMD128-NEXT: return $pop[[R]]{{$}}
428define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
429  %a = add nuw <8 x i16> %x, %y
430  %b = add nuw <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
431  %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
432  ret <8 x i16> %c
433}
434
435; CHECK-LABEL: avgr_u_v8i16_wrap:
436; NO-SIMD128-NOT: i16x8
437; SIMD128-NEXT: .functype avgr_u_v8i16_wrap (v128, v128) -> (v128){{$}}
438; SIMD128-NOT: i16x8.avgr_u
439define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) {
440  %a = add <8 x i16> %x, %y
441  %b = add <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
442  %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
443  ret <8 x i16> %c
444}
445
446; CHECK-LABEL: abs_v8i16:
447; NO-SIMD128-NOT: i16x8
448; SIMD128-NEXT: .functype abs_v8i16 (v128) -> (v128){{$}}
449; SIMD128-NEXT: i16x8.abs $push[[R:[0-9]+]]=, $0{{$}}
450; SIMD128-NEXT: return $pop[[R]]{{$}}
451define <8 x i16> @abs_v8i16(<8 x i16> %x) {
452  %a = sub <8 x i16> zeroinitializer, %x
453  %b = icmp slt <8 x i16> %x, zeroinitializer
454  %c = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %x
455  ret <8 x i16> %c
456}
457
458; CHECK-LABEL: neg_v8i16:
459; NO-SIMD128-NOT: i16x8
460; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
461; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
462; SIMD128-NEXT: return $pop[[R]]{{$}}
463define <8 x i16> @neg_v8i16(<8 x i16> %x) {
464  %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
465                     %x
466  ret <8 x i16> %a
467}
468
469; CHECK-LABEL: shl_v8i16:
470; NO-SIMD128-NOT: i16x8
471; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
472; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
473; SIMD128-NEXT: return $pop[[R]]{{$}}
474define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
475  %t = insertelement <8 x i16> undef, i16 %x, i32 0
476  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
477    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
478  %a = shl <8 x i16> %v, %s
479  ret <8 x i16> %a
480}
481
482; CHECK-LABEL: shl_const_v8i16:
483; NO-SIMD128-NOT: i16x8
484; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
485; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
486; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
487; SIMD128-NEXT: return $pop[[R]]{{$}}
488define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
489  %a = shl <8 x i16> %v,
490    <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
491  ret <8 x i16> %a
492}
493
494; CHECK-LABEL: shl_vec_v8i16:
495; NO-SIMD128-NOT: i16x8
496; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
497; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
498; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
499; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
500; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
501; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
502; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
503; Skip 6 lanes
504; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
505; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
506; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
507; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
508; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
509; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
510; SIMD128-NEXT: return $pop[[R]]{{$}}
511define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
512  %a = shl <8 x i16> %v, %x
513  ret <8 x i16> %a
514}
515
516; CHECK-LABEL: shr_s_v8i16:
517; NO-SIMD128-NOT: i16x8
518; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
519; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
520; SIMD128-NEXT: return $pop[[R]]{{$}}
521define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
522  %t = insertelement <8 x i16> undef, i16 %x, i32 0
523  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
524    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
525  %a = ashr <8 x i16> %v, %s
526  ret <8 x i16> %a
527}
528
529; CHECK-LABEL: shr_s_vec_v8i16:
530; NO-SIMD128-NOT: i16x8
531; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
532; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
533; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
534; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
535; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
536; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
537; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
538; Skip 6 lanes
539; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
540; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
541; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
542; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
543; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
544; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
545; SIMD128-NEXT: return $pop[[R]]{{$}}
546define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
547  %a = ashr <8 x i16> %v, %x
548  ret <8 x i16> %a
549}
550
551; CHECK-LABEL: shr_u_v8i16:
552; NO-SIMD128-NOT: i16x8
553; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
554; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
555; SIMD128-NEXT: return $pop[[R]]{{$}}
556define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
557  %t = insertelement <8 x i16> undef, i16 %x, i32 0
558  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
559    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
560  %a = lshr <8 x i16> %v, %s
561  ret <8 x i16> %a
562}
563
564; CHECK-LABEL: shr_u_vec_v8i16:
565; NO-SIMD128-NOT: i16x8
566; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
567; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
568; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
569; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
570; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
571; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
572; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
573; Skip 6 lanes
574; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
575; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
576; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
577; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
578; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
579; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}}
580; SIMD128-NEXT: return $pop[[R]]{{$}}
581define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
582  %a = lshr <8 x i16> %v, %x
583  ret <8 x i16> %a
584}
585
586; CHECK-LABEL: and_v8i16:
587; NO-SIMD128-NOT: v128
588; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
589; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
590; SIMD128-NEXT: return $pop[[R]]{{$}}
591define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
592  %a = and <8 x i16> %x, %y
593  ret <8 x i16> %a
594}
595
596; CHECK-LABEL: or_v8i16:
597; NO-SIMD128-NOT: v128
598; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
599; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
600; SIMD128-NEXT: return $pop[[R]]{{$}}
601define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
602  %a = or <8 x i16> %x, %y
603  ret <8 x i16> %a
604}
605
606; CHECK-LABEL: xor_v8i16:
607; NO-SIMD128-NOT: v128
608; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
609; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
610; SIMD128-NEXT: return $pop[[R]]{{$}}
611define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
612  %a = xor <8 x i16> %x, %y
613  ret <8 x i16> %a
614}
615
616; CHECK-LABEL: not_v8i16:
617; NO-SIMD128-NOT: v128
618; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
619; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
620; SIMD128-NEXT: return $pop[[R]]{{$}}
621define <8 x i16> @not_v8i16(<8 x i16> %x) {
622  %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
623                          i16 -1, i16 -1, i16 -1, i16 -1>
624  ret <8 x i16> %a
625}
626
627; CHECK-LABEL: andnot_v8i16:
628; SIMD128-VM-NOT: v128.andnot
629; NO-SIMD128-NOT: v128
630; SIMD128-NEXT: .functype andnot_v8i16 (v128, v128) -> (v128){{$}}
631; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
632; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
633; SIMD128-FAST-NEXT: v128.not
634; SIMD128-FAST-NEXT: v128.and
635; SIMD128-FAST-NEXT: return
636define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) {
637 %inv_y = xor <8 x i16> %y,
638   <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
639 %a = and <8 x i16> %x, %inv_y
640 ret <8 x i16> %a
641}
642
643; CHECK-LABEL: bitselect_v8i16:
644; NO-SIMD128-NOT: v128
645; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
646; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
647; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
648; SIMD128-FAST-NEXT: v128.and
649; SIMD128-FAST-NEXT: v128.not
650; SIMD128-FAST-NEXT: v128.and
651; SIMD128-FAST-NEXT: v128.or
652; SIMD128-FAST-NEXT: return
653define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
654  %masked_v1 = and <8 x i16> %v1, %c
655  %inv_mask = xor <8 x i16>
656    <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
657    %c
658  %masked_v2 = and <8 x i16> %v2, %inv_mask
659  %a = or <8 x i16> %masked_v1, %masked_v2
660  ret <8 x i16> %a
661}
662
663; ==============================================================================
664; 4 x i32
665; ==============================================================================
666; CHECK-LABEL: add_v4i32:
667; NO-SIMD128-NOT: i32x4
668; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
669; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
670; SIMD128-NEXT: return $pop[[R]]{{$}}
671define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
672  %a = add <4 x i32> %x, %y
673  ret <4 x i32> %a
674}
675
676; CHECK-LABEL: sub_v4i32:
677; NO-SIMD128-NOT: i32x4
678; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
679; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
680; SIMD128-NEXT: return $pop[[R]]{{$}}
681define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
682  %a = sub <4 x i32> %x, %y
683  ret <4 x i32> %a
684}
685
686; CHECK-LABEL: mul_v4i32:
687; NO-SIMD128-NOT: i32x4
688; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
689; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
690; SIMD128-NEXT: return $pop[[R]]{{$}}
691define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
692  %a = mul <4 x i32> %x, %y
693  ret <4 x i32> %a
694}
695
696; CHECK-LABEL: min_s_v4i32:
697; NO-SIMD128-NOT: i32x4
698; SIMD128-NEXT: .functype min_s_v4i32 (v128, v128) -> (v128){{$}}
699; SIMD128-NEXT: i32x4.min_s $push[[R:[0-9]+]]=, $0, $1{{$}}
700; SIMD128-NEXT: return $pop[[R]]{{$}}
701define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
702  %c = icmp slt <4 x i32> %x, %y
703  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
704  ret <4 x i32> %a
705}
706
707; CHECK-LABEL: min_u_v4i32:
708; NO-SIMD128-NOT: i32x4
709; SIMD128-NEXT: .functype min_u_v4i32 (v128, v128) -> (v128){{$}}
710; SIMD128-NEXT: i32x4.min_u $push[[R:[0-9]+]]=, $0, $1{{$}}
711; SIMD128-NEXT: return $pop[[R]]{{$}}
712define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
713  %c = icmp ult <4 x i32> %x, %y
714  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
715  ret <4 x i32> %a
716}
717
718; CHECK-LABEL: max_s_v4i32:
719; NO-SIMD128-NOT: i32x4
720; SIMD128-NEXT: .functype max_s_v4i32 (v128, v128) -> (v128){{$}}
721; SIMD128-NEXT: i32x4.max_s $push[[R:[0-9]+]]=, $0, $1{{$}}
722; SIMD128-NEXT: return $pop[[R]]{{$}}
723define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
724  %c = icmp sgt <4 x i32> %x, %y
725  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
726  ret <4 x i32> %a
727}
728
729; CHECK-LABEL: max_u_v4i32:
730; NO-SIMD128-NOT: i32x4
731; SIMD128-NEXT: .functype max_u_v4i32 (v128, v128) -> (v128){{$}}
732; SIMD128-NEXT: i32x4.max_u $push[[R:[0-9]+]]=, $0, $1{{$}}
733; SIMD128-NEXT: return $pop[[R]]{{$}}
734define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
735  %c = icmp ugt <4 x i32> %x, %y
736  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
737  ret <4 x i32> %a
738}
739
740; CHECK-LABEL: abs_v4i32:
741; NO-SIMD128-NOT: i32x4
742; SIMD128-NEXT: .functype abs_v4i32 (v128) -> (v128){{$}}
743; SIMD128-NEXT: i32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
744; SIMD128-NEXT: return $pop[[R]]{{$}}
745define <4 x i32> @abs_v4i32(<4 x i32> %x) {
746  %a = sub <4 x i32> zeroinitializer, %x
747  %b = icmp slt <4 x i32> %x, zeroinitializer
748  %c = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %x
749  ret <4 x i32> %c
750}
751
752; CHECK-LABEL: neg_v4i32:
753; NO-SIMD128-NOT: i32x4
754; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
755; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
756; SIMD128-NEXT: return $pop[[R]]{{$}}
757define <4 x i32> @neg_v4i32(<4 x i32> %x) {
758  %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
759  ret <4 x i32> %a
760}
761
762; CHECK-LABEL: shl_v4i32:
763; NO-SIMD128-NOT: i32x4
764; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
765; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
766; SIMD128-NEXT: return $pop[[R]]{{$}}
767define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
768  %t = insertelement <4 x i32> undef, i32 %x, i32 0
769  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
770    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
771  %a = shl <4 x i32> %v, %s
772  ret <4 x i32> %a
773}
774
775; CHECK-LABEL: shl_const_v4i32:
776; NO-SIMD128-NOT: i32x4
777; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
778; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
779; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
780; SIMD128-NEXT: return $pop[[R]]{{$}}
781define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
782  %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
783  ret <4 x i32> %a
784}
785
786; CHECK-LABEL: shl_vec_v4i32:
787; NO-SIMD128-NOT: i32x4
788; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
789; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
790; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
791; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
792; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
793; Skip 2 lanes
794; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
795; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
796; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
797; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
798; SIMD128-NEXT: return $pop[[R]]{{$}}
799define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
800  %a = shl <4 x i32> %v, %x
801  ret <4 x i32> %a
802}
803
804; CHECK-LABEL: shr_s_v4i32:
805; NO-SIMD128-NOT: i32x4
806; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
807; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
808; SIMD128-NEXT: return $pop[[R]]{{$}}
809define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
810  %t = insertelement <4 x i32> undef, i32 %x, i32 0
811  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
812    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
813  %a = ashr <4 x i32> %v, %s
814  ret <4 x i32> %a
815}
816
817; CHECK-LABEL: shr_s_vec_v4i32:
818; NO-SIMD128-NOT: i32x4
819; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
820; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
821; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
822; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
823; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
824; Skip 2 lanes
825; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
826; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
827; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
828; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
829; SIMD128-NEXT: return $pop[[R]]{{$}}
830define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
831  %a = ashr <4 x i32> %v, %x
832  ret <4 x i32> %a
833}
834
835; CHECK-LABEL: shr_u_v4i32:
836; NO-SIMD128-NOT: i32x4
837; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
838; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
839; SIMD128-NEXT: return $pop[[R]]{{$}}
840define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
841  %t = insertelement <4 x i32> undef, i32 %x, i32 0
842  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
843    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
844  %a = lshr <4 x i32> %v, %s
845  ret <4 x i32> %a
846}
847
848; CHECK-LABEL: shr_u_vec_v4i32:
849; NO-SIMD128-NOT: i32x4
850; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
851; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
852; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
853; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
854; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
855; Skip 2 lanes
856; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
857; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
858; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
859; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
860; SIMD128-NEXT: return $pop[[R]]{{$}}
861define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
862  %a = lshr <4 x i32> %v, %x
863  ret <4 x i32> %a
864}
865
866; CHECK-LABEL: and_v4i32:
867; NO-SIMD128-NOT: v128
868; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
869; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
870; SIMD128-NEXT: return $pop[[R]]{{$}}
871define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
872  %a = and <4 x i32> %x, %y
873  ret <4 x i32> %a
874}
875
876; CHECK-LABEL: or_v4i32:
877; NO-SIMD128-NOT: v128
878; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
879; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
880; SIMD128-NEXT: return $pop[[R]]{{$}}
881define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
882  %a = or <4 x i32> %x, %y
883  ret <4 x i32> %a
884}
885
886; CHECK-LABEL: xor_v4i32:
887; NO-SIMD128-NOT: v128
888; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
889; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
890; SIMD128-NEXT: return $pop[[R]]{{$}}
891define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
892  %a = xor <4 x i32> %x, %y
893  ret <4 x i32> %a
894}
895
896; CHECK-LABEL: not_v4i32:
897; NO-SIMD128-NOT: v128
898; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
899; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
900; SIMD128-NEXT: return $pop[[R]]{{$}}
901define <4 x i32> @not_v4i32(<4 x i32> %x) {
902  %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
903  ret <4 x i32> %a
904}
905
906; CHECK-LABEL: andnot_v4i32:
907; SIMD128-VM-NOT: v128.andnot
908; NO-SIMD128-NOT: v128
909; SIMD128-NEXT: .functype andnot_v4i32 (v128, v128) -> (v128){{$}}
910; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
911; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
912; SIMD128-FAST-NEXT: v128.not
913; SIMD128-FAST-NEXT: v128.and
914; SIMD128-FAST-NEXT: return
915define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) {
916 %inv_y = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
917 %a = and <4 x i32> %x, %inv_y
918 ret <4 x i32> %a
919}
920
921; CHECK-LABEL: bitselect_v4i32:
922; NO-SIMD128-NOT: v128
923; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
924; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
925; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
926; SIMD128-FAST-NEXT: v128.not
927; SIMD128-FAST-NEXT: v128.and
928; SIMD128-FAST-NEXT: v128.and
929; SIMD128-FAST-NEXT: v128.or
930; SIMD128-FAST-NEXT: return
931define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
932  %masked_v1 = and <4 x i32> %c, %v1
933  %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
934  %masked_v2 = and <4 x i32> %inv_mask, %v2
935  %a = or <4 x i32> %masked_v2, %masked_v1
936  ret <4 x i32> %a
937}
938
939; ==============================================================================
940; 2 x i64
941; ==============================================================================
942; CHECK-LABEL: add_v2i64:
943; NO-SIMD128-NOT: i64x2
944; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
945; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
946; SIMD128-NEXT: return $pop[[R]]{{$}}
947define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
948  %a = add <2 x i64> %x, %y
949  ret <2 x i64> %a
950}
951
952; CHECK-LABEL: sub_v2i64:
953; NO-SIMD128-NOT: i64x2
954; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
955; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
956; SIMD128-NEXT: return $pop[[R]]{{$}}
957define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
958  %a = sub <2 x i64> %x, %y
959  ret <2 x i64> %a
960}
961
962; v2i64.mul is not in spec
963; CHECK-LABEL: mul_v2i64:
964; NO-SIMD128-NOT: i64x2
965; SIMD128-NOT: i64x2.mul
966; SIMD128: i64x2.extract_lane
967; SIMD128: i64.mul
968define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
969  %a = mul <2 x i64> %x, %y
970  ret <2 x i64> %a
971}
972
973; CHECK-LABEL: neg_v2i64:
974; NO-SIMD128-NOT: i64x2
975; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
976; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
977; SIMD128-NEXT: return $pop[[R]]{{$}}
978define <2 x i64> @neg_v2i64(<2 x i64> %x) {
979  %a = sub <2 x i64> <i64 0, i64 0>, %x
980  ret <2 x i64> %a
981}
982
983; CHECK-LABEL: shl_v2i64:
984; NO-SIMD128-NOT: i64x2
985; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
986; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
987; SIMD128-NEXT: return $pop[[R]]{{$}}
988define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
989  %x2 = zext i32 %x to i64
990  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
991  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
992  %a = shl <2 x i64> %v, %s
993  ret <2 x i64> %a
994}
995
996; CHECK-LABEL: shl_sext_v2i64:
997; NO-SIMD128-NOT: i64x2
998; SIMD128-NEXT: .functype shl_sext_v2i64 (v128, i32) -> (v128){{$}}
999; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
1000; SIMD128-NEXT: return $pop[[R]]{{$}}
1001define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) {
1002  %x2 = sext i32 %x to i64
1003  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1004  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1005  %a = shl <2 x i64> %v, %s
1006  ret <2 x i64> %a
1007}
1008
1009; CHECK-LABEL: shl_noext_v2i64:
1010; NO-SIMD128-NOT: i64x2
1011; SIMD128-NEXT: .functype shl_noext_v2i64 (v128, i64) -> (v128){{$}}
1012; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1013; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1014; SIMD128-NEXT: return $pop[[R]]{{$}}
1015define <2 x i64> @shl_noext_v2i64(<2 x i64> %v, i64 %x) {
1016  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1017  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1018  %a = shl <2 x i64> %v, %s
1019  ret <2 x i64> %a
1020}
1021
1022; CHECK-LABEL: shl_const_v2i64:
1023; NO-SIMD128-NOT: i64x2
1024; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
1025; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1026; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1027; SIMD128-NEXT: return $pop[[R]]{{$}}
1028define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
1029  %a = shl <2 x i64> %v, <i64 5, i64 5>
1030  ret <2 x i64> %a
1031}
1032
1033; CHECK-LABEL: shl_vec_v2i64:
1034; NO-SIMD128-NOT: i64x2
1035; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
1036; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1037; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1038; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1039; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1040; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1041; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1042; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1043; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1044; SIMD128-NEXT: return $pop[[R]]{{$}}
1045define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1046  %a = shl <2 x i64> %v, %x
1047  ret <2 x i64> %a
1048}
1049
1050; CHECK-LABEL: shr_s_v2i64:
1051; NO-SIMD128-NOT: i64x2
1052; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
1053; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
1054; SIMD128-NEXT: return $pop[[R]]{{$}}
1055define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
1056  %x2 = zext i32 %x to i64
1057  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1058  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1059  %a = ashr <2 x i64> %v, %s
1060  ret <2 x i64> %a
1061}
1062
1063; CHECK-LABEL: shr_s_sext_v2i64:
1064; NO-SIMD128-NOT: i64x2
1065; SIMD128-NEXT: .functype shr_s_sext_v2i64 (v128, i32) -> (v128){{$}}
1066; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
1067; SIMD128-NEXT: return $pop[[R]]{{$}}
1068define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) {
1069  %x2 = sext i32 %x to i64
1070  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1071  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1072  %a = ashr <2 x i64> %v, %s
1073  ret <2 x i64> %a
1074}
1075
1076; CHECK-LABEL: shr_s_noext_v2i64:
1077; NO-SIMD128-NOT: i64x2
1078; SIMD128-NEXT: .functype shr_s_noext_v2i64 (v128, i64) -> (v128){{$}}
1079; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1080; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1081; SIMD128-NEXT: return $pop[[R]]{{$}}
1082define <2 x i64> @shr_s_noext_v2i64(<2 x i64> %v, i64 %x) {
1083  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1084  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1085  %a = ashr <2 x i64> %v, %s
1086  ret <2 x i64> %a
1087}
1088
1089; CHECK-LABEL: shr_s_const_v2i64:
1090; NO-SIMD128-NOT: i64x2
1091; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
1092; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1093; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1094; SIMD128-NEXT: return $pop[[R]]{{$}}
1095define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
1096  %a = ashr <2 x i64> %v, <i64 5, i64 5>
1097  ret <2 x i64> %a
1098}
1099
1100; CHECK-LABEL: shr_s_vec_v2i64:
1101; NO-SIMD128-NOT: i64x2
1102; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
1103; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1104; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1105; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1106; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1107; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1108; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1109; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1110; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1111; SIMD128-NEXT: return $pop[[R]]{{$}}
1112define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1113  %a = ashr <2 x i64> %v, %x
1114  ret <2 x i64> %a
1115}
1116
1117; CHECK-LABEL: shr_u_v2i64:
1118; NO-SIMD128-NOT: i64x2
1119; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
1120; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
1121; SIMD128-NEXT: return $pop[[R]]{{$}}
1122define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
1123  %x2 = zext i32 %x to i64
1124  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1125  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1126  %a = lshr <2 x i64> %v, %s
1127  ret <2 x i64> %a
1128}
1129
1130; CHECK-LABEL: shr_u_sext_v2i64:
1131; NO-SIMD128-NOT: i64x2
1132; SIMD128-NEXT: .functype shr_u_sext_v2i64 (v128, i32) -> (v128){{$}}
1133; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
1134; SIMD128-NEXT: return $pop[[R]]{{$}}
1135define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) {
1136  %x2 = sext i32 %x to i64
1137  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
1138  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1139  %a = lshr <2 x i64> %v, %s
1140  ret <2 x i64> %a
1141}
1142
1143; CHECK-LABEL: shr_u_noext_v2i64:
1144; NO-SIMD128-NOT: i64x2
1145; SIMD128-NEXT: .functype shr_u_noext_v2i64 (v128, i64) -> (v128){{$}}
1146; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
1147; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1148; SIMD128-NEXT: return $pop[[R]]{{$}}
1149define <2 x i64> @shr_u_noext_v2i64(<2 x i64> %v, i64 %x) {
1150  %t = insertelement <2 x i64> undef, i64 %x, i32 0
1151  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1152  %a = lshr <2 x i64> %v, %s
1153  ret <2 x i64> %a
1154}
1155
1156; CHECK-LABEL: shr_u_const_v2i64:
1157; NO-SIMD128-NOT: i64x2
1158; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
1159; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
1160; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1161; SIMD128-NEXT: return $pop[[R]]{{$}}
1162define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
1163  %a = lshr <2 x i64> %v, <i64 5, i64 5>
1164  ret <2 x i64> %a
1165}
1166
1167; CHECK-LABEL: shr_u_vec_v2i64:
1168; NO-SIMD128-NOT: i64x2
1169; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
1170; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
1171; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
1172; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
1173; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
1174; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
1175; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
1176; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
1177; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
1178; SIMD128-NEXT: return $pop[[R]]{{$}}
1179define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
1180  %a = lshr <2 x i64> %v, %x
1181  ret <2 x i64> %a
1182}
1183
1184; CHECK-LABEL: and_v2i64:
1185; NO-SIMD128-NOT: v128
1186; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
1187; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
1188; SIMD128-NEXT: return $pop[[R]]{{$}}
1189define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
1190  %a = and <2 x i64> %x, %y
1191  ret <2 x i64> %a
1192}
1193
1194; CHECK-LABEL: or_v2i64:
1195; NO-SIMD128-NOT: v128
1196; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
1197; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
1198; SIMD128-NEXT: return $pop[[R]]{{$}}
1199define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
1200  %a = or <2 x i64> %x, %y
1201  ret <2 x i64> %a
1202}
1203
1204; CHECK-LABEL: xor_v2i64:
1205; NO-SIMD128-NOT: v128
1206; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
1207; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
1208; SIMD128-NEXT: return $pop[[R]]{{$}}
1209define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
1210  %a = xor <2 x i64> %x, %y
1211  ret <2 x i64> %a
1212}
1213
1214; CHECK-LABEL: not_v2i64:
1215; NO-SIMD128-NOT: v128
1216; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
1217; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
1218; SIMD128-NEXT: return $pop[[R]]{{$}}
1219define <2 x i64> @not_v2i64(<2 x i64> %x) {
1220  %a = xor <2 x i64> %x, <i64 -1, i64 -1>
1221  ret <2 x i64> %a
1222}
1223
1224; CHECK-LABEL: andnot_v2i64:
1225; SIMD128-VM-NOT: v128.andnot
1226; NO-SIMD128-NOT: v128
1227; SIMD128-NEXT: .functype andnot_v2i64 (v128, v128) -> (v128){{$}}
1228; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}}
1229; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1230; SIMD128-FAST-NEXT: v128.not
1231; SIMD128-FAST-NEXT: v128.and
1232; SIMD128-FAST-NEXT: return
1233define <2 x i64> @andnot_v2i64(<2 x i64> %x, <2 x i64> %y) {
1234 %inv_y = xor <2 x i64> %y, <i64 -1, i64 -1>
1235 %a = and <2 x i64> %x, %inv_y
1236 ret <2 x i64> %a
1237}
1238
1239; CHECK-LABEL: bitselect_v2i64:
1240; NO-SIMD128-NOT: v128
1241; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
1242; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
1243; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1244; SIMD128-FAST-NEXT: v128.not
1245; SIMD128-FAST-NEXT: v128.and
1246; SIMD128-FAST-NEXT: v128.and
1247; SIMD128-FAST-NEXT: v128.or
1248; SIMD128-FAST-NEXT: return
1249define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
1250  %masked_v1 = and <2 x i64> %v1, %c
1251  %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
1252  %masked_v2 = and <2 x i64> %v2, %inv_mask
1253  %a = or <2 x i64> %masked_v2, %masked_v1
1254  ret <2 x i64> %a
1255}
1256
1257; ==============================================================================
1258; 4 x float
1259; ==============================================================================
1260; CHECK-LABEL: neg_v4f32:
1261; NO-SIMD128-NOT: f32x4
1262; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
1263; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
1264; SIMD128-NEXT: return $pop[[R]]{{$}}
1265define <4 x float> @neg_v4f32(<4 x float> %x) {
1266  ; nsz makes this semantically equivalent to flipping sign bit
1267  %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
1268  ret <4 x float> %a
1269}
1270
1271; CHECK-LABEL: abs_v4f32:
1272; NO-SIMD128-NOT: f32x4
1273; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
1274; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
1275; SIMD128-NEXT: return $pop[[R]]{{$}}
1276declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
1277define <4 x float> @abs_v4f32(<4 x float> %x) {
1278  %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
1279  ret <4 x float> %a
1280}
1281
1282; CHECK-LABEL: min_unordered_v4f32:
1283; NO-SIMD128-NOT: f32x4
1284; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
1285; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1286; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1287; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1288; SIMD128-NEXT: return $pop[[R]]{{$}}
1289define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
1290  %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
1291  %a = select <4 x i1> %cmps, <4 x float> %x,
1292    <4 x float> <float 5., float 5., float 5., float 5.>
1293  ret <4 x float> %a
1294}
1295
1296; CHECK-LABEL: max_unordered_v4f32:
1297; NO-SIMD128-NOT: f32x4
1298; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
1299; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1300; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1301; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1302; SIMD128-NEXT: return $pop[[R]]{{$}}
1303define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
1304  %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
1305  %a = select <4 x i1> %cmps, <4 x float> %x,
1306    <4 x float> <float 5., float 5., float 5., float 5.>
1307  ret <4 x float> %a
1308}
1309
1310; CHECK-LABEL: min_ordered_v4f32:
1311; NO-SIMD128-NOT: f32x4
1312; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
1313; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1314; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1315; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1316; SIMD128-NEXT: return $pop[[R]]{{$}}
1317define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
1318  %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
1319  %a = select <4 x i1> %cmps,
1320    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1321  ret <4 x float> %a
1322}
1323
1324; CHECK-LABEL: max_ordered_v4f32:
1325; NO-SIMD128-NOT: f32x4
1326; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1327; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1328; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1329; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1330; SIMD128-NEXT: return $pop[[R]]{{$}}
1331define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1332  %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1333  %a = select <4 x i1> %cmps,
1334    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1335  ret <4 x float> %a
1336}
1337
1338; CHECK-LABEL: min_intrinsic_v4f32:
1339; NO-SIMD128-NOT: f32x4
1340; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1341; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1342; SIMD128-NEXT: return $pop[[R]]{{$}}
1343declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1344define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1345  %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1346  ret <4 x float> %a
1347}
1348
1349; CHECK-LABEL: minnum_intrinsic_v4f32:
1350; NO-SIMD128-NOT: f32x4
1351; SIMD128-NEXT: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1352; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1353; SIMD128-NEXT: return $pop[[R]]{{$}}
1354declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1355define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1356  %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
1357  ret <4 x float> %a
1358}
1359
1360; CHECK-LABEL: max_intrinsic_v4f32:
1361; NO-SIMD128-NOT: f32x4
1362; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1363; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1364; SIMD128-NEXT: return $pop[[R]]{{$}}
1365declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1366define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1367  %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1368  ret <4 x float> %a
1369}
1370
1371; CHECK-LABEL: maxnum_intrinsic_v4f32:
1372; NO-SIMD128-NOT: f32x4
1373; SIMD128-NEXT: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1374; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1375; SIMD128-NEXT: return $pop[[R]]{{$}}
1376declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1377define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1378  %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
1379  ret <4 x float> %a
1380}
1381
1382; CHECK-LABEL: min_const_intrinsic_v4f32:
1383; NO-SIMD128-NOT: f32x4
1384; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1385; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1386; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1387; SIMD128-NEXT: return $pop[[R]]{{$}}
1388define <4 x float> @min_const_intrinsic_v4f32() {
1389  %a = call <4 x float> @llvm.minimum.v4f32(
1390    <4 x float> <float 42., float 42., float 42., float 42.>,
1391    <4 x float> <float 5., float 5., float 5., float 5.>
1392  )
1393  ret <4 x float> %a
1394}
1395
1396; CHECK-LABEL: max_const_intrinsic_v4f32:
1397; NO-SIMD128-NOT: f32x4
1398; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1399; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1400; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1401; SIMD128-NEXT: return $pop[[R]]{{$}}
1402define <4 x float> @max_const_intrinsic_v4f32() {
1403  %a = call <4 x float> @llvm.maximum.v4f32(
1404    <4 x float> <float 42., float 42., float 42., float 42.>,
1405    <4 x float> <float 5., float 5., float 5., float 5.>
1406  )
1407  ret <4 x float> %a
1408}
1409
1410; CHECK-LABEL: add_v4f32:
1411; NO-SIMD128-NOT: f32x4
1412; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1413; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1414; SIMD128-NEXT: return $pop[[R]]{{$}}
1415define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1416  %a = fadd <4 x float> %x, %y
1417  ret <4 x float> %a
1418}
1419
1420; CHECK-LABEL: sub_v4f32:
1421; NO-SIMD128-NOT: f32x4
1422; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1423; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1424; SIMD128-NEXT: return $pop[[R]]{{$}}
1425define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1426  %a = fsub <4 x float> %x, %y
1427  ret <4 x float> %a
1428}
1429
1430; CHECK-LABEL: div_v4f32:
1431; NO-SIMD128-NOT: f32x4
1432; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1433; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1434; SIMD128-NEXT: return $pop[[R]]{{$}}
1435define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1436  %a = fdiv <4 x float> %x, %y
1437  ret <4 x float> %a
1438}
1439
1440; CHECK-LABEL: mul_v4f32:
1441; NO-SIMD128-NOT: f32x4
1442; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1443; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1444; SIMD128-NEXT: return $pop[[R]]{{$}}
1445define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1446  %a = fmul <4 x float> %x, %y
1447  ret <4 x float> %a
1448}
1449
1450; CHECK-LABEL: sqrt_v4f32:
1451; NO-SIMD128-NOT: f32x4
1452; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1453; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1454; SIMD128-NEXT: return $pop[[R]]{{$}}
1455declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1456define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1457  %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1458  ret <4 x float> %a
1459}
1460
1461; ==============================================================================
1462; 2 x double
1463; ==============================================================================
1464; CHECK-LABEL: neg_v2f64:
1465; NO-SIMD128-NOT: f64x2
1466; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1467; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1468; SIMD128-NEXT: return $pop[[R]]{{$}}
1469define <2 x double> @neg_v2f64(<2 x double> %x) {
1470  ; nsz makes this semantically equivalent to flipping sign bit
1471  %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1472  ret <2 x double> %a
1473}
1474
1475; CHECK-LABEL: abs_v2f64:
1476; NO-SIMD128-NOT: f64x2
1477; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1478; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1479; SIMD128-NEXT: return $pop[[R]]{{$}}
1480declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1481define <2 x double> @abs_v2f64(<2 x double> %x) {
1482  %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1483  ret <2 x double> %a
1484}
1485
1486; CHECK-LABEL: min_unordered_v2f64:
1487; NO-SIMD128-NOT: f64x2
1488; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1489; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1490; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1491; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1492; SIMD128-NEXT: return $pop[[R]]{{$}}
1493define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1494  %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1495  %a = select <2 x i1> %cmps, <2 x double> %x,
1496    <2 x double> <double 5., double 5.>
1497  ret <2 x double> %a
1498}
1499
1500; CHECK-LABEL: max_unordered_v2f64:
1501; NO-SIMD128-NOT: f64x2
1502; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1503; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1504; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1505; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1506; SIMD128-NEXT: return $pop[[R]]{{$}}
1507define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1508  %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1509  %a = select <2 x i1> %cmps, <2 x double> %x,
1510    <2 x double> <double 5., double 5.>
1511  ret <2 x double> %a
1512}
1513
1514; CHECK-LABEL: min_ordered_v2f64:
1515; NO-SIMD128-NOT: f64x2
1516; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1517; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1518; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1519; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1520; SIMD128-NEXT: return $pop[[R]]{{$}}
1521define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1522  %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1523  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1524    <2 x double> %x
1525  ret <2 x double> %a
1526}
1527
1528; CHECK-LABEL: max_ordered_v2f64:
1529; NO-SIMD128-NOT: f64x2
1530; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1531; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1532; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1533; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1534; SIMD128-NEXT: return $pop[[R]]{{$}}
1535define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1536  %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1537  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1538    <2 x double> %x
1539  ret <2 x double> %a
1540}
1541
1542; CHECK-LABEL: min_intrinsic_v2f64:
1543; NO-SIMD128-NOT: f64x2
1544; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1545; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1546; SIMD128-NEXT: return $pop[[R]]{{$}}
1547declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1548define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1549  %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1550  ret <2 x double> %a
1551}
1552
1553; CHECK-LABEL: max_intrinsic_v2f64:
1554; NO-SIMD128-NOT: f64x2
1555; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1556; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1557; SIMD128-NEXT: return $pop[[R]]{{$}}
1558declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1559define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1560  %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1561  ret <2 x double> %a
1562}
1563
1564; CHECK-LABEL: min_const_intrinsic_v2f64:
1565; NO-SIMD128-NOT: f64x2
1566; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1567; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1568; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1569; SIMD128-NEXT: return $pop[[R]]{{$}}
1570define <2 x double> @min_const_intrinsic_v2f64() {
1571  %a = call <2 x double> @llvm.minimum.v2f64(
1572    <2 x double> <double 42., double 42.>,
1573    <2 x double> <double 5., double 5.>
1574  )
1575  ret <2 x double> %a
1576}
1577
1578; CHECK-LABEL: max_const_intrinsic_v2f64:
1579; NO-SIMD128-NOT: f64x2
1580; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1581; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1582; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1583; SIMD128-NEXT: return $pop[[R]]{{$}}
1584define <2 x double> @max_const_intrinsic_v2f64() {
1585  %a = call <2 x double> @llvm.maximum.v2f64(
1586    <2 x double> <double 42., double 42.>,
1587    <2 x double> <double 5., double 5.>
1588  )
1589  ret <2 x double> %a
1590}
1591
1592; CHECK-LABEL: add_v2f64:
1593; NO-SIMD128-NOT: f64x2
1594; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1595; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1596; SIMD128-NEXT: return $pop[[R]]{{$}}
1597define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1598  %a = fadd <2 x double> %x, %y
1599  ret <2 x double> %a
1600}
1601
1602; CHECK-LABEL: sub_v2f64:
1603; NO-SIMD128-NOT: f64x2
1604; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1605; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1606; SIMD128-NEXT: return $pop[[R]]{{$}}
1607define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1608  %a = fsub <2 x double> %x, %y
1609  ret <2 x double> %a
1610}
1611
1612; CHECK-LABEL: div_v2f64:
1613; NO-SIMD128-NOT: f64x2
1614; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1615; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1616; SIMD128-NEXT: return $pop[[R]]{{$}}
1617define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1618  %a = fdiv <2 x double> %x, %y
1619  ret <2 x double> %a
1620}
1621
1622; CHECK-LABEL: mul_v2f64:
1623; NO-SIMD128-NOT: f64x2
1624; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1625; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1626; SIMD128-NEXT: return $pop[[R]]{{$}}
1627define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1628  %a = fmul <2 x double> %x, %y
1629  ret <2 x double> %a
1630}
1631
1632; CHECK-LABEL: sqrt_v2f64:
1633; NO-SIMD128-NOT: f64x2
1634; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1635; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1636; SIMD128-NEXT: return $pop[[R]]{{$}}
1637declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1638define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1639  %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1640  ret <2 x double> %a
1641}
1642