1 // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2 
3 typedef float float4 __attribute__((ext_vector_type(4)));
4 typedef short int si8 __attribute__((ext_vector_type(8)));
5 typedef unsigned int u4 __attribute__((ext_vector_type(4)));
6 
7 __attribute__((address_space(1))) int int_as_one;
8 typedef int bar;
9 bar b;
10 
11 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
12                                   float4 vf1, float4 vf2, si8 vi1, si8 vi2,
13                                   long long int i1, long long int i2, short si,
14                                   _BitInt(31) bi1, _BitInt(31) bi2) {
15   // CHECK-LABEL: define void @test_builtin_elementwise_abs(
16   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
17   // CHECK-NEXT:  call float @llvm.fabs.f32(float [[F1]])
18   f2 = __builtin_elementwise_abs(f1);
19 
20   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
21   // CHECK-NEXT: call double @llvm.fabs.f64(double [[D1]])
22   d2 = __builtin_elementwise_abs(d1);
23 
24   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
25   // CHECK-NEXT: call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VF1]])
26   vf2 = __builtin_elementwise_abs(vf1);
27 
28   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
29   // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false)
30   i2 = __builtin_elementwise_abs(i1);
31 
32   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
33   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false)
34   vi2 = __builtin_elementwise_abs(vi1);
35 
36   // CHECK:      [[CVI2:%.+]] = load <8 x i16>, <8 x i16>* %cvi2, align 16
37   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[CVI2]], i1 false)
38   const si8 cvi2 = vi2;
39   vi2 = __builtin_elementwise_abs(cvi2);
40 
41   // CHECK:      [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
42   // CHECK-NEXT: call i31 @llvm.abs.i31(i31 [[BI1]], i1 false)
43   bi2 = __builtin_elementwise_abs(bi1);
44 
45   // CHECK:      [[IA1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
46   // CHECK-NEXT: call i32 @llvm.abs.i32(i32 [[IA1]], i1 false)
47   b = __builtin_elementwise_abs(int_as_one);
48 
49   // CHECK:   call i32 @llvm.abs.i32(i32 -10, i1 false)
50   b = __builtin_elementwise_abs(-10);
51 
52   // CHECK:      [[SI:%.+]] = load i16, i16* %si.addr, align 2
53   // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
54   // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false)
55   // CHECK-NEXT: = trunc i32 [[RES]] to i16
56   si = __builtin_elementwise_abs(si);
57 }
58 
59 void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
60                                       float4 vf1, float4 vf2, long long int i1,
61                                       long long int i2, si8 vi1, si8 vi2,
62                                       unsigned u1, unsigned u2, u4 vu1, u4 vu2,
63                                       _BitInt(31) bi1, _BitInt(31) bi2,
64                                       unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
65   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
66   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
67   // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]])
68   i1 = __builtin_elementwise_add_sat(i1, i2);
69 
70   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
71   // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10)
72   i1 = __builtin_elementwise_add_sat(i1, 10);
73 
74   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
75   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
76   // CHECK-NEXT: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
77   vi1 = __builtin_elementwise_add_sat(vi1, vi2);
78 
79   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
80   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
81   // CHECK-NEXT: call i32 @llvm.uadd.sat.i32(i32 [[U1]], i32 [[U2]])
82   u1 = __builtin_elementwise_add_sat(u1, u2);
83 
84   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
85   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
86   // CHECK-NEXT: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
87   vu1 = __builtin_elementwise_add_sat(vu1, vu2);
88 
89   // CHECK:      [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
90   // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
91   // CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[BI1]], i31 [[BI2]])
92   bi1 = __builtin_elementwise_add_sat(bi1, bi2);
93 
94   // CHECK:      [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
95   // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
96   // CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[BU1]], i55 [[BU2]])
97   bu1 = __builtin_elementwise_add_sat(bu1, bu2);
98 
99   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
100   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
101   // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]])
102   int_as_one = __builtin_elementwise_add_sat(int_as_one, b);
103 
104   // CHECK: call i32 @llvm.sadd.sat.i32(i32 1, i32 97)
105   i1 = __builtin_elementwise_add_sat(1, 'a');
106 }
107 
108 void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
109                                       float4 vf1, float4 vf2, long long int i1,
110                                       long long int i2, si8 vi1, si8 vi2,
111                                       unsigned u1, unsigned u2, u4 vu1, u4 vu2,
112                                       _BitInt(31) bi1, _BitInt(31) bi2,
113                                       unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
114   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
115   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
116   // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]])
117   i1 = __builtin_elementwise_sub_sat(i1, i2);
118 
119   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
120   // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10)
121   i1 = __builtin_elementwise_sub_sat(i1, 10);
122 
123   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
124   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
125   // CHECK-NEXT: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
126   vi1 = __builtin_elementwise_sub_sat(vi1, vi2);
127 
128   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
129   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
130   // CHECK-NEXT: call i32 @llvm.usub.sat.i32(i32 [[U1]], i32 [[U2]])
131   u1 = __builtin_elementwise_sub_sat(u1, u2);
132 
133   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
134   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
135   // CHECK-NEXT: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
136   vu1 = __builtin_elementwise_sub_sat(vu1, vu2);
137 
138   // CHECK:      [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
139   // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
140   // CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[BI1]], i31 [[BI2]])
141   bi1 = __builtin_elementwise_sub_sat(bi1, bi2);
142 
143   // CHECK:      [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
144   // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
145   // CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[BU1]], i55 [[BU2]])
146   bu1 = __builtin_elementwise_sub_sat(bu1, bu2);
147 
148   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
149   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
150   // CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]])
151   int_as_one = __builtin_elementwise_sub_sat(int_as_one, b);
152 
153   // CHECK: call i32 @llvm.ssub.sat.i32(i32 1, i32 97)
154   i1 = __builtin_elementwise_sub_sat(1, 'a');
155 }
156 
157 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
158                                   float4 vf1, float4 vf2, long long int i1,
159                                   long long int i2, si8 vi1, si8 vi2,
160                                   unsigned u1, unsigned u2, u4 vu1, u4 vu2,
161                                   _BitInt(31) bi1, _BitInt(31) bi2,
162                                   unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
163   // CHECK-LABEL: define void @test_builtin_elementwise_max(
164   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
165   // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
166   // CHECK-NEXT:  call float @llvm.maxnum.f32(float %0, float %1)
167   f1 = __builtin_elementwise_max(f1, f2);
168 
169   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
170   // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
171   // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]])
172   d1 = __builtin_elementwise_max(d1, d2);
173 
174   // CHECK:      [[D2:%.+]] = load double, double* %d2.addr, align 8
175   // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]])
176   d1 = __builtin_elementwise_max(20.0, d2);
177 
178   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
179   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
180   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
181   vf1 = __builtin_elementwise_max(vf1, vf2);
182 
183   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
184   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
185   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]])
186   i1 = __builtin_elementwise_max(i1, i2);
187 
188   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
189   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
190   i1 = __builtin_elementwise_max(i1, 10);
191 
192   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
193   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
194   // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
195   vi1 = __builtin_elementwise_max(vi1, vi2);
196 
197   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
198   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
199   // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]])
200   u1 = __builtin_elementwise_max(u1, u2);
201 
202   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
203   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
204   // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
205   vu1 = __builtin_elementwise_max(vu1, vu2);
206 
207   // CHECK:      [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
208   // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
209   // CHECK-NEXT: call i31 @llvm.smax.i31(i31 [[BI1]], i31 [[BI2]])
210   bi1 = __builtin_elementwise_max(bi1, bi2);
211 
212   // CHECK:      [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
213   // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
214   // CHECK-NEXT: call i55 @llvm.umax.i55(i55 [[BU1]], i55 [[BU2]])
215   bu1 = __builtin_elementwise_max(bu1, bu2);
216 
217   // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
218   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
219   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
220   const float4 cvf1 = vf1;
221   vf1 = __builtin_elementwise_max(cvf1, vf2);
222 
223   // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
224   // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
225   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
226   vf1 = __builtin_elementwise_max(vf2, cvf1);
227 
228   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
229   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
230   // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]])
231   int_as_one = __builtin_elementwise_max(int_as_one, b);
232 
233   // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97)
234   i1 = __builtin_elementwise_max(1, 'a');
235 }
236 
237 void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
238                                   float4 vf1, float4 vf2, long long int i1,
239                                   long long int i2, si8 vi1, si8 vi2,
240                                   unsigned u1, unsigned u2, u4 vu1, u4 vu2,
241                                   _BitInt(31) bi1, _BitInt(31) bi2,
242                                   unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
243   // CHECK-LABEL: define void @test_builtin_elementwise_min(
244   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
245   // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
246   // CHECK-NEXT:  call float @llvm.minnum.f32(float %0, float %1)
247   f1 = __builtin_elementwise_min(f1, f2);
248 
249   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
250   // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
251   // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]])
252   d1 = __builtin_elementwise_min(d1, d2);
253 
254   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
255   // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00)
256   d1 = __builtin_elementwise_min(d1, 2.0);
257 
258   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
259   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
260   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
261   vf1 = __builtin_elementwise_min(vf1, vf2);
262 
263   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
264   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
265   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]])
266   i1 = __builtin_elementwise_min(i1, i2);
267 
268   // CHECK:      [[I2:%.+]] = load i64, i64* %i2.addr, align 8
269   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
270   i1 = __builtin_elementwise_min(-11, i2);
271 
272   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
273   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
274   // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
275   vi1 = __builtin_elementwise_min(vi1, vi2);
276 
277   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
278   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
279   // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]])
280   u1 = __builtin_elementwise_min(u1, u2);
281 
282   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
283   // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64
284   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
285   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]])
286   u1 = __builtin_elementwise_min(u1, i2);
287 
288   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
289   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
290   // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
291   vu1 = __builtin_elementwise_min(vu1, vu2);
292 
293   // CHECK:      [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
294   // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
295   // CHECK-NEXT: call i31 @llvm.smin.i31(i31 [[BI1]], i31 [[BI2]])
296   bi1 = __builtin_elementwise_min(bi1, bi2);
297 
298   // CHECK:      [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
299   // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
300   // CHECK-NEXT: call i55 @llvm.umin.i55(i55 [[BU1]], i55 [[BU2]])
301   bu1 = __builtin_elementwise_min(bu1, bu2);
302 
303   // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
304   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
305   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
306   const float4 cvf1 = vf1;
307   vf1 = __builtin_elementwise_min(cvf1, vf2);
308 
309   // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
310   // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
311   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
312   vf1 = __builtin_elementwise_min(vf2, cvf1);
313 
314   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
315   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
316   // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]])
317   int_as_one = __builtin_elementwise_min(int_as_one, b);
318 }
319 
test_builtin_elementwise_ceil(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)320 void test_builtin_elementwise_ceil(float f1, float f2, double d1, double d2,
321                                    float4 vf1, float4 vf2) {
322   // CHECK-LABEL: define void @test_builtin_elementwise_ceil(
323   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
324   // CHECK-NEXT:  call float @llvm.ceil.f32(float [[F1]])
325   f2 = __builtin_elementwise_ceil(f1);
326 
327   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
328   // CHECK-NEXT: call double @llvm.ceil.f64(double [[D1]])
329   d2 = __builtin_elementwise_ceil(d1);
330 
331   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
332   // CHECK-NEXT: call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VF1]])
333   vf2 = __builtin_elementwise_ceil(vf1);
334 }
335 
test_builtin_elementwise_floor(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)336 void test_builtin_elementwise_floor(float f1, float f2, double d1, double d2,
337                                     float4 vf1, float4 vf2) {
338   // CHECK-LABEL: define void @test_builtin_elementwise_floor(
339   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
340   // CHECK-NEXT:  call float @llvm.floor.f32(float [[F1]])
341   f2 = __builtin_elementwise_floor(f1);
342 
343   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
344   // CHECK-NEXT: call double @llvm.floor.f64(double [[D1]])
345   d2 = __builtin_elementwise_floor(d1);
346 
347   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
348   // CHECK-NEXT: call <4 x float> @llvm.floor.v4f32(<4 x float> [[VF1]])
349   vf2 = __builtin_elementwise_floor(vf1);
350 }
351 
test_builtin_elementwise_roundeven(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)352 void test_builtin_elementwise_roundeven(float f1, float f2, double d1, double d2,
353                                         float4 vf1, float4 vf2) {
354   // CHECK-LABEL: define void @test_builtin_elementwise_roundeven(
355   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
356   // CHECK-NEXT:  call float @llvm.roundeven.f32(float [[F1]])
357   f2 = __builtin_elementwise_roundeven(f1);
358 
359   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
360   // CHECK-NEXT: call double @llvm.roundeven.f64(double [[D1]])
361   d2 = __builtin_elementwise_roundeven(d1);
362 
363   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
364   // CHECK-NEXT: call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VF1]])
365   vf2 = __builtin_elementwise_roundeven(vf1);
366 }
367 
test_builtin_elementwise_trunc(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)368 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
369                                     float4 vf1, float4 vf2) {
370   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
371   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
372   // CHECK-NEXT:  call float @llvm.trunc.f32(float [[F1]])
373   f2 = __builtin_elementwise_trunc(f1);
374 
375   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
376   // CHECK-NEXT: call double @llvm.trunc.f64(double [[D1]])
377   d2 = __builtin_elementwise_trunc(d1);
378 
379   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
380   // CHECK-NEXT: call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VF1]])
381   vf2 = __builtin_elementwise_trunc(vf1);
382 }
383