1 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s 2 3 typedef float float4 __attribute__((ext_vector_type(4))); 4 typedef short int si8 __attribute__((ext_vector_type(8))); 5 typedef unsigned int u4 __attribute__((ext_vector_type(4))); 6 7 __attribute__((address_space(1))) int int_as_one; 8 typedef int bar; 9 bar b; 10 11 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2, 12 float4 vf1, float4 vf2, si8 vi1, si8 vi2, 13 long long int i1, long long int i2, short si) { 14 // CHECK-LABEL: define void @test_builtin_elementwise_abs( 15 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 16 // CHECK-NEXT: call float @llvm.fabs.f32(float [[F1]]) 17 f2 = __builtin_elementwise_abs(f1); 18 19 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 20 // CHECK-NEXT: call double @llvm.fabs.f64(double [[D1]]) 21 d2 = __builtin_elementwise_abs(d1); 22 23 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 24 // CHECK-NEXT: call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VF1]]) 25 vf2 = __builtin_elementwise_abs(vf1); 26 27 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 28 // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false) 29 i2 = __builtin_elementwise_abs(i1); 30 31 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 32 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false) 33 vi2 = __builtin_elementwise_abs(vi1); 34 35 // CHECK: [[CVI2:%.+]] = load <8 x i16>, <8 x i16>* %cvi2, align 16 36 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[CVI2]], i1 false) 37 const si8 cvi2 = vi2; 38 vi2 = __builtin_elementwise_abs(cvi2); 39 40 // CHECK: [[IA1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 41 // CHECK-NEXT: call i32 @llvm.abs.i32(i32 [[IA1]], i1 false) 42 b = __builtin_elementwise_abs(int_as_one); 43 44 // CHECK: call i32 @llvm.abs.i32(i32 -10, i1 false) 45 b = __builtin_elementwise_abs(-10); 46 47 // CHECK: [[SI:%.+]] = load i16, i16* %si.addr, align 2 48 // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32 49 // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false) 50 // CHECK-NEXT: = trunc i32 [[RES]] to i16 51 si = __builtin_elementwise_abs(si); 52 } 53 54 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, 55 float4 vf1, float4 vf2, long long int i1, 56 long long int i2, si8 vi1, si8 vi2, 57 unsigned u1, unsigned u2, u4 vu1, u4 vu2) { 58 // CHECK-LABEL: define void @test_builtin_elementwise_max( 59 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 60 // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 61 // CHECK-NEXT: call float @llvm.maxnum.f32(float %0, float %1) 62 f1 = __builtin_elementwise_max(f1, f2); 63 64 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 65 // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 66 // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]]) 67 d1 = __builtin_elementwise_max(d1, d2); 68 69 // CHECK: [[D2:%.+]] = load double, double* %d2.addr, align 8 70 // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]]) 71 d1 = __builtin_elementwise_max(20.0, d2); 72 73 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 74 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 75 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 76 vf1 = __builtin_elementwise_max(vf1, vf2); 77 78 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 79 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 80 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]]) 81 i1 = __builtin_elementwise_max(i1, i2); 82 83 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 84 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10) 85 i1 = __builtin_elementwise_max(i1, 10); 86 87 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 88 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 89 // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 90 vi1 = __builtin_elementwise_max(vi1, vi2); 91 92 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 93 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 94 // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]]) 95 u1 = __builtin_elementwise_max(u1, u2); 96 97 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 98 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 99 // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 100 vu1 = __builtin_elementwise_max(vu1, vu2); 101 102 // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 103 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 104 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 105 const float4 cvf1 = vf1; 106 vf1 = __builtin_elementwise_max(cvf1, vf2); 107 108 // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 109 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 110 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 111 vf1 = __builtin_elementwise_max(vf2, cvf1); 112 113 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 114 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 115 // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]]) 116 int_as_one = __builtin_elementwise_max(int_as_one, b); 117 118 // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97) 119 i1 = __builtin_elementwise_max(1, 'a'); 120 } 121 122 void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, 123 float4 vf1, float4 vf2, long long int i1, 124 long long int i2, si8 vi1, si8 vi2, 125 unsigned u1, unsigned u2, u4 vu1, u4 vu2) { 126 // CHECK-LABEL: define void @test_builtin_elementwise_min( 127 // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 128 // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 129 // CHECK-NEXT: call float @llvm.minnum.f32(float %0, float %1) 130 f1 = __builtin_elementwise_min(f1, f2); 131 132 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 133 // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 134 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]]) 135 d1 = __builtin_elementwise_min(d1, d2); 136 137 // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 138 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00) 139 d1 = __builtin_elementwise_min(d1, 2.0); 140 141 // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 142 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 143 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 144 vf1 = __builtin_elementwise_min(vf1, vf2); 145 146 // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 147 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 148 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]]) 149 i1 = __builtin_elementwise_min(i1, i2); 150 151 // CHECK: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 152 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]]) 153 i1 = __builtin_elementwise_min(-11, i2); 154 155 // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 156 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 157 // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 158 vi1 = __builtin_elementwise_min(vi1, vi2); 159 160 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 161 // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 162 // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]]) 163 u1 = __builtin_elementwise_min(u1, u2); 164 165 // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 166 // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64 167 // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 168 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]]) 169 u1 = __builtin_elementwise_min(u1, i2); 170 171 // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 172 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 173 // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 174 vu1 = __builtin_elementwise_min(vu1, vu2); 175 176 // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 177 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 178 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 179 const float4 cvf1 = vf1; 180 vf1 = __builtin_elementwise_min(cvf1, vf2); 181 182 // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 183 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 184 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 185 vf1 = __builtin_elementwise_min(vf2, cvf1); 186 187 // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 188 // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 189 // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]]) 190 int_as_one = __builtin_elementwise_min(int_as_one, b); 191 } 192