1 // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2
3 typedef float float4 __attribute__((ext_vector_type(4)));
4 typedef short int si8 __attribute__((ext_vector_type(8)));
5 typedef unsigned int u4 __attribute__((ext_vector_type(4)));
6
7 __attribute__((address_space(1))) int int_as_one;
8 typedef int bar;
9 bar b;
10
11 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
12 float4 vf1, float4 vf2, si8 vi1, si8 vi2,
13 long long int i1, long long int i2, short si,
14 _BitInt(31) bi1, _BitInt(31) bi2) {
15 // CHECK-LABEL: define void @test_builtin_elementwise_abs(
16 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4
17 // CHECK-NEXT: call float @llvm.fabs.f32(float [[F1]])
18 f2 = __builtin_elementwise_abs(f1);
19
20 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
21 // CHECK-NEXT: call double @llvm.fabs.f64(double [[D1]])
22 d2 = __builtin_elementwise_abs(d1);
23
24 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
25 // CHECK-NEXT: call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VF1]])
26 vf2 = __builtin_elementwise_abs(vf1);
27
28 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
29 // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false)
30 i2 = __builtin_elementwise_abs(i1);
31
32 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
33 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false)
34 vi2 = __builtin_elementwise_abs(vi1);
35
36 // CHECK: [[CVI2:%.+]] = load <8 x i16>, <8 x i16>* %cvi2, align 16
37 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[CVI2]], i1 false)
38 const si8 cvi2 = vi2;
39 vi2 = __builtin_elementwise_abs(cvi2);
40
41 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
42 // CHECK-NEXT: call i31 @llvm.abs.i31(i31 [[BI1]], i1 false)
43 bi2 = __builtin_elementwise_abs(bi1);
44
45 // CHECK: [[IA1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
46 // CHECK-NEXT: call i32 @llvm.abs.i32(i32 [[IA1]], i1 false)
47 b = __builtin_elementwise_abs(int_as_one);
48
49 // CHECK: call i32 @llvm.abs.i32(i32 -10, i1 false)
50 b = __builtin_elementwise_abs(-10);
51
52 // CHECK: [[SI:%.+]] = load i16, i16* %si.addr, align 2
53 // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
54 // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false)
55 // CHECK-NEXT: = trunc i32 [[RES]] to i16
56 si = __builtin_elementwise_abs(si);
57 }
58
59 void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
60 float4 vf1, float4 vf2, long long int i1,
61 long long int i2, si8 vi1, si8 vi2,
62 unsigned u1, unsigned u2, u4 vu1, u4 vu2,
63 _BitInt(31) bi1, _BitInt(31) bi2,
64 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
65 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
66 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
67 // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]])
68 i1 = __builtin_elementwise_add_sat(i1, i2);
69
70 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
71 // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10)
72 i1 = __builtin_elementwise_add_sat(i1, 10);
73
74 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
75 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
76 // CHECK-NEXT: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
77 vi1 = __builtin_elementwise_add_sat(vi1, vi2);
78
79 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4
80 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
81 // CHECK-NEXT: call i32 @llvm.uadd.sat.i32(i32 [[U1]], i32 [[U2]])
82 u1 = __builtin_elementwise_add_sat(u1, u2);
83
84 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
85 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
86 // CHECK-NEXT: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
87 vu1 = __builtin_elementwise_add_sat(vu1, vu2);
88
89 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
90 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
91 // CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[BI1]], i31 [[BI2]])
92 bi1 = __builtin_elementwise_add_sat(bi1, bi2);
93
94 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
95 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
96 // CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[BU1]], i55 [[BU2]])
97 bu1 = __builtin_elementwise_add_sat(bu1, bu2);
98
99 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
100 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
101 // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]])
102 int_as_one = __builtin_elementwise_add_sat(int_as_one, b);
103
104 // CHECK: call i32 @llvm.sadd.sat.i32(i32 1, i32 97)
105 i1 = __builtin_elementwise_add_sat(1, 'a');
106 }
107
108 void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
109 float4 vf1, float4 vf2, long long int i1,
110 long long int i2, si8 vi1, si8 vi2,
111 unsigned u1, unsigned u2, u4 vu1, u4 vu2,
112 _BitInt(31) bi1, _BitInt(31) bi2,
113 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
114 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
115 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
116 // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]])
117 i1 = __builtin_elementwise_sub_sat(i1, i2);
118
119 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
120 // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10)
121 i1 = __builtin_elementwise_sub_sat(i1, 10);
122
123 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
124 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
125 // CHECK-NEXT: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
126 vi1 = __builtin_elementwise_sub_sat(vi1, vi2);
127
128 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4
129 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
130 // CHECK-NEXT: call i32 @llvm.usub.sat.i32(i32 [[U1]], i32 [[U2]])
131 u1 = __builtin_elementwise_sub_sat(u1, u2);
132
133 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
134 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
135 // CHECK-NEXT: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
136 vu1 = __builtin_elementwise_sub_sat(vu1, vu2);
137
138 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
139 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
140 // CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[BI1]], i31 [[BI2]])
141 bi1 = __builtin_elementwise_sub_sat(bi1, bi2);
142
143 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
144 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
145 // CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[BU1]], i55 [[BU2]])
146 bu1 = __builtin_elementwise_sub_sat(bu1, bu2);
147
148 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
149 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
150 // CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]])
151 int_as_one = __builtin_elementwise_sub_sat(int_as_one, b);
152
153 // CHECK: call i32 @llvm.ssub.sat.i32(i32 1, i32 97)
154 i1 = __builtin_elementwise_sub_sat(1, 'a');
155 }
156
157 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
158 float4 vf1, float4 vf2, long long int i1,
159 long long int i2, si8 vi1, si8 vi2,
160 unsigned u1, unsigned u2, u4 vu1, u4 vu2,
161 _BitInt(31) bi1, _BitInt(31) bi2,
162 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
163 // CHECK-LABEL: define void @test_builtin_elementwise_max(
164 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4
165 // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
166 // CHECK-NEXT: call float @llvm.maxnum.f32(float %0, float %1)
167 f1 = __builtin_elementwise_max(f1, f2);
168
169 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
170 // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
171 // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]])
172 d1 = __builtin_elementwise_max(d1, d2);
173
174 // CHECK: [[D2:%.+]] = load double, double* %d2.addr, align 8
175 // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]])
176 d1 = __builtin_elementwise_max(20.0, d2);
177
178 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
179 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
180 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
181 vf1 = __builtin_elementwise_max(vf1, vf2);
182
183 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
184 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
185 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]])
186 i1 = __builtin_elementwise_max(i1, i2);
187
188 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
189 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
190 i1 = __builtin_elementwise_max(i1, 10);
191
192 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
193 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
194 // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
195 vi1 = __builtin_elementwise_max(vi1, vi2);
196
197 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4
198 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
199 // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]])
200 u1 = __builtin_elementwise_max(u1, u2);
201
202 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
203 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
204 // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
205 vu1 = __builtin_elementwise_max(vu1, vu2);
206
207 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
208 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
209 // CHECK-NEXT: call i31 @llvm.smax.i31(i31 [[BI1]], i31 [[BI2]])
210 bi1 = __builtin_elementwise_max(bi1, bi2);
211
212 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
213 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
214 // CHECK-NEXT: call i55 @llvm.umax.i55(i55 [[BU1]], i55 [[BU2]])
215 bu1 = __builtin_elementwise_max(bu1, bu2);
216
217 // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
218 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
219 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
220 const float4 cvf1 = vf1;
221 vf1 = __builtin_elementwise_max(cvf1, vf2);
222
223 // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
224 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
225 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
226 vf1 = __builtin_elementwise_max(vf2, cvf1);
227
228 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
229 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
230 // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]])
231 int_as_one = __builtin_elementwise_max(int_as_one, b);
232
233 // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97)
234 i1 = __builtin_elementwise_max(1, 'a');
235 }
236
237 void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
238 float4 vf1, float4 vf2, long long int i1,
239 long long int i2, si8 vi1, si8 vi2,
240 unsigned u1, unsigned u2, u4 vu1, u4 vu2,
241 _BitInt(31) bi1, _BitInt(31) bi2,
242 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
243 // CHECK-LABEL: define void @test_builtin_elementwise_min(
244 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4
245 // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
246 // CHECK-NEXT: call float @llvm.minnum.f32(float %0, float %1)
247 f1 = __builtin_elementwise_min(f1, f2);
248
249 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
250 // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
251 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]])
252 d1 = __builtin_elementwise_min(d1, d2);
253
254 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
255 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00)
256 d1 = __builtin_elementwise_min(d1, 2.0);
257
258 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
259 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
260 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
261 vf1 = __builtin_elementwise_min(vf1, vf2);
262
263 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
264 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
265 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]])
266 i1 = __builtin_elementwise_min(i1, i2);
267
268 // CHECK: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
269 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
270 i1 = __builtin_elementwise_min(-11, i2);
271
272 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
273 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
274 // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
275 vi1 = __builtin_elementwise_min(vi1, vi2);
276
277 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4
278 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
279 // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]])
280 u1 = __builtin_elementwise_min(u1, u2);
281
282 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4
283 // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64
284 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
285 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]])
286 u1 = __builtin_elementwise_min(u1, i2);
287
288 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
289 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
290 // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
291 vu1 = __builtin_elementwise_min(vu1, vu2);
292
293 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
294 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
295 // CHECK-NEXT: call i31 @llvm.smin.i31(i31 [[BI1]], i31 [[BI2]])
296 bi1 = __builtin_elementwise_min(bi1, bi2);
297
298 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
299 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
300 // CHECK-NEXT: call i55 @llvm.umin.i55(i55 [[BU1]], i55 [[BU2]])
301 bu1 = __builtin_elementwise_min(bu1, bu2);
302
303 // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
304 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
305 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
306 const float4 cvf1 = vf1;
307 vf1 = __builtin_elementwise_min(cvf1, vf2);
308
309 // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
310 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
311 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
312 vf1 = __builtin_elementwise_min(vf2, cvf1);
313
314 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
315 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
316 // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]])
317 int_as_one = __builtin_elementwise_min(int_as_one, b);
318 }
319
test_builtin_elementwise_ceil(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)320 void test_builtin_elementwise_ceil(float f1, float f2, double d1, double d2,
321 float4 vf1, float4 vf2) {
322 // CHECK-LABEL: define void @test_builtin_elementwise_ceil(
323 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4
324 // CHECK-NEXT: call float @llvm.ceil.f32(float [[F1]])
325 f2 = __builtin_elementwise_ceil(f1);
326
327 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
328 // CHECK-NEXT: call double @llvm.ceil.f64(double [[D1]])
329 d2 = __builtin_elementwise_ceil(d1);
330
331 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
332 // CHECK-NEXT: call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VF1]])
333 vf2 = __builtin_elementwise_ceil(vf1);
334 }
335
test_builtin_elementwise_floor(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)336 void test_builtin_elementwise_floor(float f1, float f2, double d1, double d2,
337 float4 vf1, float4 vf2) {
338 // CHECK-LABEL: define void @test_builtin_elementwise_floor(
339 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4
340 // CHECK-NEXT: call float @llvm.floor.f32(float [[F1]])
341 f2 = __builtin_elementwise_floor(f1);
342
343 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
344 // CHECK-NEXT: call double @llvm.floor.f64(double [[D1]])
345 d2 = __builtin_elementwise_floor(d1);
346
347 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
348 // CHECK-NEXT: call <4 x float> @llvm.floor.v4f32(<4 x float> [[VF1]])
349 vf2 = __builtin_elementwise_floor(vf1);
350 }
351
test_builtin_elementwise_roundeven(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)352 void test_builtin_elementwise_roundeven(float f1, float f2, double d1, double d2,
353 float4 vf1, float4 vf2) {
354 // CHECK-LABEL: define void @test_builtin_elementwise_roundeven(
355 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4
356 // CHECK-NEXT: call float @llvm.roundeven.f32(float [[F1]])
357 f2 = __builtin_elementwise_roundeven(f1);
358
359 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
360 // CHECK-NEXT: call double @llvm.roundeven.f64(double [[D1]])
361 d2 = __builtin_elementwise_roundeven(d1);
362
363 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
364 // CHECK-NEXT: call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VF1]])
365 vf2 = __builtin_elementwise_roundeven(vf1);
366 }
367
test_builtin_elementwise_trunc(float f1,float f2,double d1,double d2,float4 vf1,float4 vf2)368 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
369 float4 vf1, float4 vf2) {
370 // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
371 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4
372 // CHECK-NEXT: call float @llvm.trunc.f32(float [[F1]])
373 f2 = __builtin_elementwise_trunc(f1);
374
375 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8
376 // CHECK-NEXT: call double @llvm.trunc.f64(double [[D1]])
377 d2 = __builtin_elementwise_trunc(d1);
378
379 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
380 // CHECK-NEXT: call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VF1]])
381 vf2 = __builtin_elementwise_trunc(vf1);
382 }
383