1 // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s 2 3 typedef float float4 __attribute__((ext_vector_type(4))); 4 typedef short int si8 __attribute__((ext_vector_type(8))); 5 typedef unsigned int u4 __attribute__((ext_vector_type(4))); 6 7 __attribute__((address_space(1))) int int_as_one; 8 typedef int bar; 9 bar b; 10 11 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2, 12 float4 vf1, float4 vf2, si8 vi1, si8 vi2, 13 long long int i1, long long int i2, short si, 14 _BitInt(31) bi1, _BitInt(31) bi2) { 15 // CHECK-LABEL: define void @test_builtin_elementwise_abs( 16 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 17 // CHECK-NEXT: call float @llvm.fabs.f32(float [[F1]]) 18 f2 = __builtin_elementwise_abs(f1); 19 20 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 21 // CHECK-NEXT: call double @llvm.fabs.f64(double [[D1]]) 22 d2 = __builtin_elementwise_abs(d1); 23 24 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 25 // CHECK-NEXT: call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VF1]]) 26 vf2 = __builtin_elementwise_abs(vf1); 27 28 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 29 // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false) 30 i2 = __builtin_elementwise_abs(i1); 31 32 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 33 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false) 34 vi2 = __builtin_elementwise_abs(vi1); 35 36 // CHECK: [[CVI2:%.+]] = load <8 x i16>, <8 x i16>* %cvi2, align 16 37 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[CVI2]], i1 false) 38 const si8 cvi2 = vi2; 39 vi2 = __builtin_elementwise_abs(cvi2); 40 41 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4 42 // CHECK-NEXT: call i31 @llvm.abs.i31(i31 [[BI1]], i1 false) 43 bi2 = __builtin_elementwise_abs(bi1); 44 45 // CHECK: [[IA1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 46 // CHECK-NEXT: call i32 @llvm.abs.i32(i32 [[IA1]], i1 false) 47 b = __builtin_elementwise_abs(int_as_one); 48 49 // CHECK: call i32 @llvm.abs.i32(i32 -10, i1 false) 50 b = __builtin_elementwise_abs(-10); 51 52 // CHECK: [[SI:%.+]] = load i16, i16* %si.addr, align 2 53 // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32 54 // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false) 55 // CHECK-NEXT: = trunc i32 [[RES]] to i16 56 si = __builtin_elementwise_abs(si); 57 } 58 59 void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2, 60 float4 vf1, float4 vf2, long long int i1, 61 long long int i2, si8 vi1, si8 vi2, 62 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 63 _BitInt(31) bi1, _BitInt(31) bi2, 64 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 65 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 66 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 67 // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]]) 68 i1 = __builtin_elementwise_add_sat(i1, i2); 69 70 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 71 // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10) 72 i1 = __builtin_elementwise_add_sat(i1, 10); 73 74 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 75 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 76 // CHECK-NEXT: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 77 vi1 = __builtin_elementwise_add_sat(vi1, vi2); 78 79 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 80 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 81 // CHECK-NEXT: call i32 @llvm.uadd.sat.i32(i32 [[U1]], i32 [[U2]]) 82 u1 = __builtin_elementwise_add_sat(u1, u2); 83 84 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 85 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 86 // CHECK-NEXT: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 87 vu1 = __builtin_elementwise_add_sat(vu1, vu2); 88 89 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4 90 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4 91 // CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[BI1]], i31 [[BI2]]) 92 bi1 = __builtin_elementwise_add_sat(bi1, bi2); 93 94 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8 95 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8 96 // CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[BU1]], i55 [[BU2]]) 97 bu1 = __builtin_elementwise_add_sat(bu1, bu2); 98 99 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 100 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 101 // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]]) 102 int_as_one = __builtin_elementwise_add_sat(int_as_one, b); 103 104 // CHECK: call i32 @llvm.sadd.sat.i32(i32 1, i32 97) 105 i1 = __builtin_elementwise_add_sat(1, 'a'); 106 } 107 108 void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2, 109 float4 vf1, float4 vf2, long long int i1, 110 long long int i2, si8 vi1, si8 vi2, 111 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 112 _BitInt(31) bi1, _BitInt(31) bi2, 113 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 114 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 115 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 116 // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]]) 117 i1 = __builtin_elementwise_sub_sat(i1, i2); 118 119 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 120 // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10) 121 i1 = __builtin_elementwise_sub_sat(i1, 10); 122 123 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 124 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 125 // CHECK-NEXT: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 126 vi1 = __builtin_elementwise_sub_sat(vi1, vi2); 127 128 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 129 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 130 // CHECK-NEXT: call i32 @llvm.usub.sat.i32(i32 [[U1]], i32 [[U2]]) 131 u1 = __builtin_elementwise_sub_sat(u1, u2); 132 133 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 134 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 135 // CHECK-NEXT: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 136 vu1 = __builtin_elementwise_sub_sat(vu1, vu2); 137 138 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4 139 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4 140 // CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[BI1]], i31 [[BI2]]) 141 bi1 = __builtin_elementwise_sub_sat(bi1, bi2); 142 143 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8 144 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8 145 // CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[BU1]], i55 [[BU2]]) 146 bu1 = __builtin_elementwise_sub_sat(bu1, bu2); 147 148 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 149 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 150 // CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]]) 151 int_as_one = __builtin_elementwise_sub_sat(int_as_one, b); 152 153 // CHECK: call i32 @llvm.ssub.sat.i32(i32 1, i32 97) 154 i1 = __builtin_elementwise_sub_sat(1, 'a'); 155 } 156 157 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, 158 float4 vf1, float4 vf2, long long int i1, 159 long long int i2, si8 vi1, si8 vi2, 160 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 161 _BitInt(31) bi1, _BitInt(31) bi2, 162 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 163 // CHECK-LABEL: define void @test_builtin_elementwise_max( 164 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 165 // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 166 // CHECK-NEXT: call float @llvm.maxnum.f32(float %0, float %1) 167 f1 = __builtin_elementwise_max(f1, f2); 168 169 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 170 // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 171 // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]]) 172 d1 = __builtin_elementwise_max(d1, d2); 173 174 // CHECK: [[D2:%.+]] = load double, double* %d2.addr, align 8 175 // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]]) 176 d1 = __builtin_elementwise_max(20.0, d2); 177 178 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 179 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 180 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 181 vf1 = __builtin_elementwise_max(vf1, vf2); 182 183 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 184 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 185 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]]) 186 i1 = __builtin_elementwise_max(i1, i2); 187 188 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 189 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10) 190 i1 = __builtin_elementwise_max(i1, 10); 191 192 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 193 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 194 // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 195 vi1 = __builtin_elementwise_max(vi1, vi2); 196 197 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 198 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 199 // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]]) 200 u1 = __builtin_elementwise_max(u1, u2); 201 202 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 203 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 204 // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 205 vu1 = __builtin_elementwise_max(vu1, vu2); 206 207 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4 208 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4 209 // CHECK-NEXT: call i31 @llvm.smax.i31(i31 [[BI1]], i31 [[BI2]]) 210 bi1 = __builtin_elementwise_max(bi1, bi2); 211 212 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8 213 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8 214 // CHECK-NEXT: call i55 @llvm.umax.i55(i55 [[BU1]], i55 [[BU2]]) 215 bu1 = __builtin_elementwise_max(bu1, bu2); 216 217 // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 218 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 219 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 220 const float4 cvf1 = vf1; 221 vf1 = __builtin_elementwise_max(cvf1, vf2); 222 223 // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 224 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 225 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 226 vf1 = __builtin_elementwise_max(vf2, cvf1); 227 228 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 229 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 230 // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]]) 231 int_as_one = __builtin_elementwise_max(int_as_one, b); 232 233 // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97) 234 i1 = __builtin_elementwise_max(1, 'a'); 235 } 236 237 void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, 238 float4 vf1, float4 vf2, long long int i1, 239 long long int i2, si8 vi1, si8 vi2, 240 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 241 _BitInt(31) bi1, _BitInt(31) bi2, 242 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 243 // CHECK-LABEL: define void @test_builtin_elementwise_min( 244 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 245 // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 246 // CHECK-NEXT: call float @llvm.minnum.f32(float %0, float %1) 247 f1 = __builtin_elementwise_min(f1, f2); 248 249 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 250 // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 251 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]]) 252 d1 = __builtin_elementwise_min(d1, d2); 253 254 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 255 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00) 256 d1 = __builtin_elementwise_min(d1, 2.0); 257 258 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 259 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 260 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 261 vf1 = __builtin_elementwise_min(vf1, vf2); 262 263 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 264 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 265 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]]) 266 i1 = __builtin_elementwise_min(i1, i2); 267 268 // CHECK: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 269 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]]) 270 i1 = __builtin_elementwise_min(-11, i2); 271 272 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 273 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 274 // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 275 vi1 = __builtin_elementwise_min(vi1, vi2); 276 277 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 278 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 279 // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]]) 280 u1 = __builtin_elementwise_min(u1, u2); 281 282 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 283 // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64 284 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 285 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]]) 286 u1 = __builtin_elementwise_min(u1, i2); 287 288 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 289 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 290 // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 291 vu1 = __builtin_elementwise_min(vu1, vu2); 292 293 // CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4 294 // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4 295 // CHECK-NEXT: call i31 @llvm.smin.i31(i31 [[BI1]], i31 [[BI2]]) 296 bi1 = __builtin_elementwise_min(bi1, bi2); 297 298 // CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8 299 // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8 300 // CHECK-NEXT: call i55 @llvm.umin.i55(i55 [[BU1]], i55 [[BU2]]) 301 bu1 = __builtin_elementwise_min(bu1, bu2); 302 303 // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 304 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 305 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 306 const float4 cvf1 = vf1; 307 vf1 = __builtin_elementwise_min(cvf1, vf2); 308 309 // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 310 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 311 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 312 vf1 = __builtin_elementwise_min(vf2, cvf1); 313 314 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 315 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 316 // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]]) 317 int_as_one = __builtin_elementwise_min(int_as_one, b); 318 } 319 320 void test_builtin_elementwise_ceil(float f1, float f2, double d1, double d2, 321 float4 vf1, float4 vf2) { 322 // CHECK-LABEL: define void @test_builtin_elementwise_ceil( 323 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 324 // CHECK-NEXT: call float @llvm.ceil.f32(float [[F1]]) 325 f2 = __builtin_elementwise_ceil(f1); 326 327 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 328 // CHECK-NEXT: call double @llvm.ceil.f64(double [[D1]]) 329 d2 = __builtin_elementwise_ceil(d1); 330 331 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 332 // CHECK-NEXT: call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VF1]]) 333 vf2 = __builtin_elementwise_ceil(vf1); 334 } 335 336 void test_builtin_elementwise_floor(float f1, float f2, double d1, double d2, 337 float4 vf1, float4 vf2) { 338 // CHECK-LABEL: define void @test_builtin_elementwise_floor( 339 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 340 // CHECK-NEXT: call float @llvm.floor.f32(float [[F1]]) 341 f2 = __builtin_elementwise_floor(f1); 342 343 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 344 // CHECK-NEXT: call double @llvm.floor.f64(double [[D1]]) 345 d2 = __builtin_elementwise_floor(d1); 346 347 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 348 // CHECK-NEXT: call <4 x float> @llvm.floor.v4f32(<4 x float> [[VF1]]) 349 vf2 = __builtin_elementwise_floor(vf1); 350 } 351 352 void test_builtin_elementwise_roundeven(float f1, float f2, double d1, double d2, 353 float4 vf1, float4 vf2) { 354 // CHECK-LABEL: define void @test_builtin_elementwise_roundeven( 355 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 356 // CHECK-NEXT: call float @llvm.roundeven.f32(float [[F1]]) 357 f2 = __builtin_elementwise_roundeven(f1); 358 359 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 360 // CHECK-NEXT: call double @llvm.roundeven.f64(double [[D1]]) 361 d2 = __builtin_elementwise_roundeven(d1); 362 363 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 364 // CHECK-NEXT: call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VF1]]) 365 vf2 = __builtin_elementwise_roundeven(vf1); 366 } 367 368 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2, 369 float4 vf1, float4 vf2) { 370 // CHECK-LABEL: define void @test_builtin_elementwise_trunc( 371 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 372 // CHECK-NEXT: call float @llvm.trunc.f32(float [[F1]]) 373 f2 = __builtin_elementwise_trunc(f1); 374 375 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 376 // CHECK-NEXT: call double @llvm.trunc.f64(double [[D1]]) 377 d2 = __builtin_elementwise_trunc(d1); 378 379 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 380 // CHECK-NEXT: call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VF1]]) 381 vf2 = __builtin_elementwise_trunc(vf1); 382 } 383