1 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2 
3 typedef float float4 __attribute__((ext_vector_type(4)));
4 typedef short int si8 __attribute__((ext_vector_type(8)));
5 typedef unsigned int u4 __attribute__((ext_vector_type(4)));
6 
7 __attribute__((address_space(1))) int int_as_one;
8 typedef int bar;
9 bar b;
10 
11 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
12                                   float4 vf1, float4 vf2, si8 vi1, si8 vi2,
13                                   long long int i1, long long int i2, short si) {
14   // CHECK-LABEL: define void @test_builtin_elementwise_abs(
15   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
16   // CHECK-NEXT:  call float @llvm.fabs.f32(float [[F1]])
17   f2 = __builtin_elementwise_abs(f1);
18 
19   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
20   // CHECK-NEXT: call double @llvm.fabs.f64(double [[D1]])
21   d2 = __builtin_elementwise_abs(d1);
22 
23   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
24   // CHECK-NEXT: call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VF1]])
25   vf2 = __builtin_elementwise_abs(vf1);
26 
27   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
28   // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false)
29   i2 = __builtin_elementwise_abs(i1);
30 
31   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
32   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false)
33   vi2 = __builtin_elementwise_abs(vi1);
34 
35   // CHECK:      [[CVI2:%.+]] = load <8 x i16>, <8 x i16>* %cvi2, align 16
36   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[CVI2]], i1 false)
37   const si8 cvi2 = vi2;
38   vi2 = __builtin_elementwise_abs(cvi2);
39 
40   // CHECK:      [[IA1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
41   // CHECK-NEXT: call i32 @llvm.abs.i32(i32 [[IA1]], i1 false)
42   b = __builtin_elementwise_abs(int_as_one);
43 
44   // CHECK:   call i32 @llvm.abs.i32(i32 -10, i1 false)
45   b = __builtin_elementwise_abs(-10);
46 
47   // CHECK:      [[SI:%.+]] = load i16, i16* %si.addr, align 2
48   // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
49   // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false)
50   // CHECK-NEXT: = trunc i32 [[RES]] to i16
51   si = __builtin_elementwise_abs(si);
52 }
53 
54 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
55                                   float4 vf1, float4 vf2, long long int i1,
56                                   long long int i2, si8 vi1, si8 vi2,
57                                   unsigned u1, unsigned u2, u4 vu1, u4 vu2) {
58   // CHECK-LABEL: define void @test_builtin_elementwise_max(
59   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
60   // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
61   // CHECK-NEXT:  call float @llvm.maxnum.f32(float %0, float %1)
62   f1 = __builtin_elementwise_max(f1, f2);
63 
64   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
65   // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
66   // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]])
67   d1 = __builtin_elementwise_max(d1, d2);
68 
69   // CHECK:      [[D2:%.+]] = load double, double* %d2.addr, align 8
70   // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]])
71   d1 = __builtin_elementwise_max(20.0, d2);
72 
73   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
74   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
75   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
76   vf1 = __builtin_elementwise_max(vf1, vf2);
77 
78   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
79   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
80   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]])
81   i1 = __builtin_elementwise_max(i1, i2);
82 
83   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
84   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
85   i1 = __builtin_elementwise_max(i1, 10);
86 
87   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
88   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
89   // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
90   vi1 = __builtin_elementwise_max(vi1, vi2);
91 
92   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
93   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
94   // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]])
95   u1 = __builtin_elementwise_max(u1, u2);
96 
97   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
98   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
99   // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
100   vu1 = __builtin_elementwise_max(vu1, vu2);
101 
102   // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
103   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
104   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
105   const float4 cvf1 = vf1;
106   vf1 = __builtin_elementwise_max(cvf1, vf2);
107 
108   // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
109   // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
110   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
111   vf1 = __builtin_elementwise_max(vf2, cvf1);
112 
113   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
114   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
115   // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]])
116   int_as_one = __builtin_elementwise_max(int_as_one, b);
117 
118   // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97)
119   i1 = __builtin_elementwise_max(1, 'a');
120 }
121 
122 void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
123                                   float4 vf1, float4 vf2, long long int i1,
124                                   long long int i2, si8 vi1, si8 vi2,
125                                   unsigned u1, unsigned u2, u4 vu1, u4 vu2) {
126   // CHECK-LABEL: define void @test_builtin_elementwise_min(
127   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
128   // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
129   // CHECK-NEXT:  call float @llvm.minnum.f32(float %0, float %1)
130   f1 = __builtin_elementwise_min(f1, f2);
131 
132   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
133   // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
134   // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]])
135   d1 = __builtin_elementwise_min(d1, d2);
136 
137   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
138   // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00)
139   d1 = __builtin_elementwise_min(d1, 2.0);
140 
141   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
142   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
143   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
144   vf1 = __builtin_elementwise_min(vf1, vf2);
145 
146   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
147   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
148   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]])
149   i1 = __builtin_elementwise_min(i1, i2);
150 
151   // CHECK:      [[I2:%.+]] = load i64, i64* %i2.addr, align 8
152   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
153   i1 = __builtin_elementwise_min(-11, i2);
154 
155   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
156   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
157   // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
158   vi1 = __builtin_elementwise_min(vi1, vi2);
159 
160   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
161   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
162   // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]])
163   u1 = __builtin_elementwise_min(u1, u2);
164 
165   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
166   // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64
167   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
168   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]])
169   u1 = __builtin_elementwise_min(u1, i2);
170 
171   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
172   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
173   // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
174   vu1 = __builtin_elementwise_min(vu1, vu2);
175 
176   // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
177   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
178   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
179   const float4 cvf1 = vf1;
180   vf1 = __builtin_elementwise_min(cvf1, vf2);
181 
182   // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
183   // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
184   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
185   vf1 = __builtin_elementwise_min(vf2, cvf1);
186 
187   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
188   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
189   // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]])
190   int_as_one = __builtin_elementwise_min(int_as_one, b);
191 }
192 
193 void test_builtin_elementwise_ceil(float f1, float f2, double d1, double d2,
194                                    float4 vf1, float4 vf2) {
195   // CHECK-LABEL: define void @test_builtin_elementwise_ceil(
196   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
197   // CHECK-NEXT:  call float @llvm.ceil.f32(float [[F1]])
198   f2 = __builtin_elementwise_ceil(f1);
199 
200   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
201   // CHECK-NEXT: call double @llvm.ceil.f64(double [[D1]])
202   d2 = __builtin_elementwise_ceil(d1);
203 
204   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
205   // CHECK-NEXT: call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VF1]])
206   vf2 = __builtin_elementwise_ceil(vf1);
207 }
208 
209 void test_builtin_elementwise_floor(float f1, float f2, double d1, double d2,
210                                     float4 vf1, float4 vf2) {
211   // CHECK-LABEL: define void @test_builtin_elementwise_floor(
212   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
213   // CHECK-NEXT:  call float @llvm.floor.f32(float [[F1]])
214   f2 = __builtin_elementwise_floor(f1);
215 
216   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
217   // CHECK-NEXT: call double @llvm.floor.f64(double [[D1]])
218   d2 = __builtin_elementwise_floor(d1);
219 
220   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
221   // CHECK-NEXT: call <4 x float> @llvm.floor.v4f32(<4 x float> [[VF1]])
222   vf2 = __builtin_elementwise_floor(vf1);
223 }
224 
225 void test_builtin_elementwise_roundeven(float f1, float f2, double d1, double d2,
226                                         float4 vf1, float4 vf2) {
227   // CHECK-LABEL: define void @test_builtin_elementwise_roundeven(
228   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
229   // CHECK-NEXT:  call float @llvm.roundeven.f32(float [[F1]])
230   f2 = __builtin_elementwise_roundeven(f1);
231 
232   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
233   // CHECK-NEXT: call double @llvm.roundeven.f64(double [[D1]])
234   d2 = __builtin_elementwise_roundeven(d1);
235 
236   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
237   // CHECK-NEXT: call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VF1]])
238   vf2 = __builtin_elementwise_roundeven(vf1);
239 }
240 
241 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
242                                     float4 vf1, float4 vf2) {
243   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
244   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
245   // CHECK-NEXT:  call float @llvm.trunc.f32(float [[F1]])
246   f2 = __builtin_elementwise_trunc(f1);
247 
248   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
249   // CHECK-NEXT: call double @llvm.trunc.f64(double [[D1]])
250   d2 = __builtin_elementwise_trunc(d1);
251 
252   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
253   // CHECK-NEXT: call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VF1]])
254   vf2 = __builtin_elementwise_trunc(vf1);
255 }
256