1 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2 
3 typedef float float4 __attribute__((ext_vector_type(4)));
4 typedef short int si8 __attribute__((ext_vector_type(8)));
5 typedef unsigned int u4 __attribute__((ext_vector_type(4)));
6 
7 __attribute__((address_space(1))) int int_as_one;
8 typedef int bar;
9 bar b;
10 
11 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
12                                   float4 vf1, float4 vf2, si8 vi1, si8 vi2,
13                                   long long int i1, long long int i2, short si) {
14   // CHECK-LABEL: define void @test_builtin_elementwise_abs(
15   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
16   // CHECK-NEXT:  call float @llvm.fabs.f32(float [[F1]])
17   f2 = __builtin_elementwise_abs(f1);
18 
19   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
20   // CHECK-NEXT: call double @llvm.fabs.f64(double [[D1]])
21   d2 = __builtin_elementwise_abs(d1);
22 
23   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
24   // CHECK-NEXT: call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VF1]])
25   vf2 = __builtin_elementwise_abs(vf1);
26 
27   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
28   // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false)
29   i2 = __builtin_elementwise_abs(i1);
30 
31   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
32   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false)
33   vi2 = __builtin_elementwise_abs(vi1);
34 
35   // CHECK:      [[CVI2:%.+]] = load <8 x i16>, <8 x i16>* %cvi2, align 16
36   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[CVI2]], i1 false)
37   const si8 cvi2 = vi2;
38   vi2 = __builtin_elementwise_abs(cvi2);
39 
40   // CHECK:      [[IA1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
41   // CHECK-NEXT: call i32 @llvm.abs.i32(i32 [[IA1]], i1 false)
42   b = __builtin_elementwise_abs(int_as_one);
43 
44   // CHECK:   call i32 @llvm.abs.i32(i32 -10, i1 false)
45   b = __builtin_elementwise_abs(-10);
46 
47   // CHECK:      [[SI:%.+]] = load i16, i16* %si.addr, align 2
48   // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
49   // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false)
50   // CHECK-NEXT: = trunc i32 [[RES]] to i16
51   si = __builtin_elementwise_abs(si);
52 }
53 
54 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
55                                   float4 vf1, float4 vf2, long long int i1,
56                                   long long int i2, si8 vi1, si8 vi2,
57                                   unsigned u1, unsigned u2, u4 vu1, u4 vu2) {
58   // CHECK-LABEL: define void @test_builtin_elementwise_max(
59   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
60   // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
61   // CHECK-NEXT:  call float @llvm.maxnum.f32(float %0, float %1)
62   f1 = __builtin_elementwise_max(f1, f2);
63 
64   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
65   // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
66   // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]])
67   d1 = __builtin_elementwise_max(d1, d2);
68 
69   // CHECK:      [[D2:%.+]] = load double, double* %d2.addr, align 8
70   // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]])
71   d1 = __builtin_elementwise_max(20.0, d2);
72 
73   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
74   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
75   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
76   vf1 = __builtin_elementwise_max(vf1, vf2);
77 
78   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
79   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
80   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]])
81   i1 = __builtin_elementwise_max(i1, i2);
82 
83   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
84   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
85   i1 = __builtin_elementwise_max(i1, 10);
86 
87   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
88   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
89   // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
90   vi1 = __builtin_elementwise_max(vi1, vi2);
91 
92   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
93   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
94   // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]])
95   u1 = __builtin_elementwise_max(u1, u2);
96 
97   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
98   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
99   // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
100   vu1 = __builtin_elementwise_max(vu1, vu2);
101 
102   // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
103   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
104   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
105   const float4 cvf1 = vf1;
106   vf1 = __builtin_elementwise_max(cvf1, vf2);
107 
108   // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
109   // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
110   // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
111   vf1 = __builtin_elementwise_max(vf2, cvf1);
112 
113   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
114   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
115   // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]])
116   int_as_one = __builtin_elementwise_max(int_as_one, b);
117 
118   // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97)
119   i1 = __builtin_elementwise_max(1, 'a');
120 }
121 
122 void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
123                                   float4 vf1, float4 vf2, long long int i1,
124                                   long long int i2, si8 vi1, si8 vi2,
125                                   unsigned u1, unsigned u2, u4 vu1, u4 vu2) {
126   // CHECK-LABEL: define void @test_builtin_elementwise_min(
127   // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
128   // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
129   // CHECK-NEXT:  call float @llvm.minnum.f32(float %0, float %1)
130   f1 = __builtin_elementwise_min(f1, f2);
131 
132   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
133   // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
134   // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]])
135   d1 = __builtin_elementwise_min(d1, d2);
136 
137   // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
138   // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00)
139   d1 = __builtin_elementwise_min(d1, 2.0);
140 
141   // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
142   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
143   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
144   vf1 = __builtin_elementwise_min(vf1, vf2);
145 
146   // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
147   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
148   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]])
149   i1 = __builtin_elementwise_min(i1, i2);
150 
151   // CHECK:      [[I2:%.+]] = load i64, i64* %i2.addr, align 8
152   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
153   i1 = __builtin_elementwise_min(-11, i2);
154 
155   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
156   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
157   // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
158   vi1 = __builtin_elementwise_min(vi1, vi2);
159 
160   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
161   // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
162   // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]])
163   u1 = __builtin_elementwise_min(u1, u2);
164 
165   // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
166   // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64
167   // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
168   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]])
169   u1 = __builtin_elementwise_min(u1, i2);
170 
171   // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
172   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
173   // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
174   vu1 = __builtin_elementwise_min(vu1, vu2);
175 
176   // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
177   // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
178   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
179   const float4 cvf1 = vf1;
180   vf1 = __builtin_elementwise_min(cvf1, vf2);
181 
182   // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
183   // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
184   // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
185   vf1 = __builtin_elementwise_min(vf2, cvf1);
186 
187   // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
188   // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
189   // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]])
190   int_as_one = __builtin_elementwise_min(int_as_one, b);
191 }
192