1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++17 | FileCheck %s 2 3 // Tests for the matrix type builtins. 4 5 template <typename EltTy, unsigned Rows, unsigned Columns> 6 using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); 7 8 template <typename EltTy, unsigned Rows, unsigned Columns> 9 struct MyMatrix { 10 matrix_t<EltTy, Rows, Columns> value; 11 }; 12 13 template <typename T, unsigned R, unsigned C> 14 MyMatrix<T, C, R> transpose(const MyMatrix<T, R, C> &M) { 15 MyMatrix<T, C, R> Res; 16 Res.value = __builtin_matrix_transpose(M.value); 17 return Res; 18 } 19 20 void test_transpose_template1() { 21 // CHECK-LABEL: define{{.*}} void @_Z24test_transpose_template1v() 22 // CHECK: call void @_Z9transposeIiLj4ELj10EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.0* sret(%struct.MyMatrix.0) align 4 %M1_t, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(160) %M1) 23 24 // CHECK-LABEL: define linkonce_odr void @_Z9transposeIiLj4ELj10EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE( 25 // CHECK: [[M:%.*]] = load <40 x i32>, <40 x i32>* {{.*}}, align 4 26 // CHECK-NEXT: [[M_T:%.*]] = call <40 x i32> @llvm.matrix.transpose.v40i32(<40 x i32> [[M]], i32 4, i32 10) 27 28 MyMatrix<int, 4, 10> M1; 29 MyMatrix<int, 10, 4> M1_t = transpose(M1); 30 } 31 32 void test_transpose_template2(MyMatrix<double, 7, 6> &M) { 33 // CHECK-LABEL: define{{.*}} void @_Z24test_transpose_template2R8MyMatrixIdLj7ELj6EE( 34 // CHECK: call void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.2* sret(%struct.MyMatrix.2) align 8 %ref.tmp1, %struct.MyMatrix.1* noundef nonnull align 8 dereferenceable(336) %0) 35 // CHECK-NEXT: call void @_Z9transposeIdLj6ELj7EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.1* sret(%struct.MyMatrix.1) align 8 %ref.tmp, %struct.MyMatrix.2* noundef nonnull align 8 dereferenceable(336) %ref.tmp1) 36 // CHECK-NEXT: call void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.2* sret(%struct.MyMatrix.2) align 8 %M2_t, %struct.MyMatrix.1* noundef nonnull align 8 dereferenceable(336) %ref.tmp) 37 38 // CHECK-LABEL: define linkonce_odr void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE( 39 // CHECK: [[M:%.*]] = load <42 x double>, <42 x double>* {{.*}}, align 8 40 // CHECK-NEXT: [[M_T:%.*]] = call <42 x double> @llvm.matrix.transpose.v42f64(<42 x double> [[M]], i32 7, i32 6) 41 // CHECK-NEXT: [[RES_ADDR:%.*]] = getelementptr inbounds %struct.MyMatrix.2, %struct.MyMatrix.2* %agg.result, i32 0, i32 0 42 // CHECK-NEXT: [[RES_ADDR_C:%.*]] = bitcast [42 x double]* [[RES_ADDR]] to <42 x double>* 43 // CHECK-NEXT: store <42 x double> [[M_T]], <42 x double>* [[RES_ADDR_C]], align 8 44 45 // CHECK-LABEL: define linkonce_odr void @_Z9transposeIdLj6ELj7EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE( 46 // CHECK: [[M:%.*]] = load <42 x double>, <42 x double>* {{.*}}, align 8 47 // CHECK-NEXT: [[M_T:%.*]] = call <42 x double> @llvm.matrix.transpose.v42f64(<42 x double> [[M]], i32 6, i32 7) 48 // CHECK-NEXT: [[RES_ADDR:%.*]] = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0 49 // CHECK-NEXT: [[RES_ADDR_C:%.*]] = bitcast [42 x double]* [[RES_ADDR]] to <42 x double>* 50 // CHECK-NEXT: store <42 x double> [[M_T]], <42 x double>* [[RES_ADDR_C]], align 8 51 52 MyMatrix<double, 6, 7> M2_t = transpose(transpose(transpose(M))); 53 } 54 55 matrix_t<float, 3, 3> get_matrix(); 56 57 void test_transpose_rvalue() { 58 // CHECK-LABEL: define{{.*}} void @_Z21test_transpose_rvaluev() 59 // CHECK-NEXT: entry: 60 // CHECK-NEXT: [[M_T_ADDR:%.*]] = alloca [9 x float], align 4 61 // CHECK-NEXT: [[CALL_RES:%.*]] = call noundef <9 x float> @_Z10get_matrixv() 62 // CHECK-NEXT: [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 63 // CHECK-NEXT: [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[ADD]], i32 3, i32 3) 64 // CHECK-NEXT: [[M_T_ADDR_CAST:%.*]] = bitcast [9 x float]* [[M_T_ADDR]] to <9 x float>* 65 // CHECK-NEXT: store <9 x float> [[M_T]], <9 x float>* [[M_T_ADDR_CAST]], align 4 66 matrix_t<float, 3, 3> m_t = __builtin_matrix_transpose(get_matrix() + 2.0); 67 } 68 69 void test_transpose_const(const matrix_t<float, 3, 3> &m) { 70 // CHECK-LABEL: define{{.*}} void @_Z20test_transpose_constRKu11matrix_typeILm3ELm3EfE( 71 // CHECK: [[MATRIX:%.*]] = load <9 x float>, <9 x float>* {{.*}}, align 4 72 // CHECK-NEXT: [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[MATRIX]], i32 3, i32 3) 73 // CHECK-NEXT: [[M_T_ADDR:%.*]] = bitcast [9 x float]* %m_t to <9 x float>* 74 // CHECK-NEXT: store <9 x float> [[M_T]], <9 x float>* [[M_T_ADDR]], align 4 75 matrix_t<float, 3, 3> m_t = __builtin_matrix_transpose(m); 76 } 77 78 // TODO: Enable once initialization support is defined and implemented for 79 // matrix types. 80 // void test_lvalue_conversion() { 81 // constexpr double4x4 m = {}; 82 // [] { return __builtin_matrix_transpose(m); } 83 //} 84 85 template <typename T, unsigned R, unsigned C, unsigned S> 86 matrix_t<T, R, C> column_major_load_with_stride(T *Ptr) { 87 return __builtin_matrix_column_major_load(Ptr, R, C, S); 88 } 89 90 void test_column_major_load_with_stride_template_double(double *Ptr) { 91 // CHECK-LABEL: define{{.*}} void @_Z50test_column_major_load_with_stride_template_doublePd(double* noundef %Ptr) 92 // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8 93 // CHECK-NEXT: call noundef <40 x double> @_Z29column_major_load_with_strideIdLj10ELj4ELj15EEu11matrix_typeIXT0_EXT1_ET_EPS0_(double* noundef [[PTR]]) 94 95 // CHECK-LABEL: define linkonce_odr noundef <40 x double> @_Z29column_major_load_with_strideIdLj10ELj4ELj15EEu11matrix_typeIXT0_EXT1_ET_EPS0_(double* noundef %Ptr) 96 // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8 97 // CHECK-NEXT: call <40 x double> @llvm.matrix.column.major.load.v40f64.i64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4) 98 99 matrix_t<double, 10, 4> M1 = column_major_load_with_stride<double, 10, 4, 15>(Ptr); 100 } 101 102 void test_column_major_load_with_stride_template_int(int *Ptr) { 103 // CHECK-LABEL: define{{.*}} void @_Z47test_column_major_load_with_stride_template_intPi(i32* noundef %Ptr) #5 { 104 // CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 105 // CHECK-NEXT: call noundef <6 x i32> @_Z29column_major_load_with_strideIiLj3ELj2ELj12EEu11matrix_typeIXT0_EXT1_ET_EPS0_(i32* noundef [[PTR]]) 106 107 // CHECK-LABEL: define linkonce_odr noundef <6 x i32> @_Z29column_major_load_with_strideIiLj3ELj2ELj12EEu11matrix_typeIXT0_EXT1_ET_EPS0_(i32* noundef %Ptr) 108 // CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 109 // CHECK-NEXT: call <6 x i32> @llvm.matrix.column.major.load.v6i32.i64(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2) 110 111 matrix_t<int, 3, 2> M1 = column_major_load_with_stride<int, 3, 2, 12>(Ptr); 112 } 113 114 struct UnsignedWrapper { 115 char x; 116 operator unsigned() { 117 return x; 118 } 119 }; 120 121 void test_column_major_load_stride_wrapper(int *Ptr, UnsignedWrapper &W) { 122 // CHECK-LABEL: define{{.*}} void @_Z37test_column_major_load_stride_wrapperPiR15UnsignedWrapper(i32* noundef %Ptr, %struct.UnsignedWrapper* noundef nonnull align 1 dereferenceable(1) %W) 123 // CHECK: [[W:%.*]] = load %struct.UnsignedWrapper*, %struct.UnsignedWrapper** %W.addr, align 8 124 // CHECK-NEXT: [[STRIDE:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} [[W]]) 125 // CHECK-NEXT: [[STRIDE_EXT:%.*]] = zext i32 [[STRIDE]] to i64 126 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 127 // CHECK-NEXT: call <4 x i32> @llvm.matrix.column.major.load.v4i32.i64(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2) 128 matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, W); 129 } 130 131 constexpr int constexpr3() { return 3; } 132 133 void test_column_major_load_constexpr_num_rows(int *Ptr) { 134 // CHECK-LABEL: define{{.*}} void @_Z41test_column_major_load_constexpr_num_rowsPi(i32* noundef %Ptr) 135 // CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 136 // CHECK-NEXT: call <6 x i32> @llvm.matrix.column.major.load.v6i32.i64(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2) 137 138 matrix_t<int, 3, 2> M1 = __builtin_matrix_column_major_load(Ptr, constexpr3(), 2, 3); 139 } 140 141 constexpr int constexpr1() { return 1; } 142 143 void test_column_major_load_constexpr_num_columns(int *Ptr) { 144 // CHECK-LABEL: define{{.*}} void @_Z44test_column_major_load_constexpr_num_columnsPi(i32* noundef %Ptr) 145 // CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 146 // CHECK-NEXT: call <2 x i32> @llvm.matrix.column.major.load.v2i32.i64(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1) 147 matrix_t<int, 2, 1> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr1(), 3); 148 } 149 150 template <unsigned N> 151 constexpr int constexpr_plus1() { return N + 1; } 152 153 void test_column_major_load_constexpr_num_columns_temp(int *Ptr) { 154 // CHECK-LABEL: define{{.*}} void @_Z49test_column_major_load_constexpr_num_columns_tempPi(i32* noundef %Ptr) 155 // CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 156 // CHECK-NEXT: call <10 x i32> @llvm.matrix.column.major.load.v10i32.i64(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5) 157 matrix_t<int, 2, 5> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr_plus1<4>(), 3); 158 } 159 160 void test_column_major_load_constexpr_stride_constexpr(int *Ptr) { 161 // CHECK-LABEL: define{{.*}} void @_Z49test_column_major_load_constexpr_stride_constexprPi(i32* noundef %Ptr) 162 // CHECK: [[STRIDE:%.*]] = call noundef i32 @_Z10constexpr3v() 163 // CHECK-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64 164 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 165 // CHECK-NEXT: call <4 x i32> @llvm.matrix.column.major.load.v4i32.i64(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2) 166 167 matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, constexpr3()); 168 } 169 170 template <typename T> 171 struct remove_pointer { 172 typedef T type; 173 }; 174 175 template <typename T> 176 struct remove_pointer<T *> { 177 typedef typename remove_pointer<T>::type type; 178 }; 179 180 // Same as column_major_load_with_stride, but with the PtrT argument itself begin a pointer type. 181 template <typename PtrT, unsigned R, unsigned C, unsigned S> 182 matrix_t<typename remove_pointer<PtrT>::type, R, C> column_major_load_with_stride2(PtrT Ptr) { 183 return __builtin_matrix_column_major_load(Ptr, R, C, S); 184 } 185 186 void call_column_major_load_with_stride2(float *Ptr) { 187 matrix_t<float, 2, 2> m = column_major_load_with_stride2<float *, 2, 2, 2>(Ptr); 188 } 189 190 template <typename T, unsigned R, unsigned C, unsigned S> 191 void column_major_store_with_stride(matrix_t<T, R, C> &m, T *Ptr) { 192 __builtin_matrix_column_major_store(m, Ptr, S); 193 } 194 195 void test_column_major_store_with_stride_template_double(double *Ptr) { 196 // CHECK-LABEL: define{{.*}} void @_Z51test_column_major_store_with_stride_template_doublePd(double* noundef %Ptr) 197 // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8 198 // CHECK-NEXT: call void @_Z30column_major_store_with_strideIdLj10ELj4ELj15EEvRu11matrix_typeIXT0_EXT1_ET_EPS0_([40 x double]* noundef nonnull align 8 dereferenceable(320) %M1, double* noundef [[PTR]]) 199 200 // CHECK-LABEL: define linkonce_odr void @_Z30column_major_store_with_strideIdLj10ELj4ELj15EEvRu11matrix_typeIXT0_EXT1_ET_EPS0_([40 x double]* noundef nonnull align 8 dereferenceable(320) %m, double* noundef %Ptr) 201 // CHECK: [[M:%.*]] = load <40 x double>, <40 x double>* {{.*}}, align 8 202 // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8 203 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v40f64.i64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4) 204 205 matrix_t<double, 10, 4> M1; 206 column_major_store_with_stride<double, 10, 4, 15>(M1, Ptr); 207 } 208 209 void test_column_major_store_with_stride_template_int(int *Ptr) { 210 // CHECK-LABEL: define{{.*}} void @_Z48test_column_major_store_with_stride_template_intPi(i32* noundef %Ptr) 211 // CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 212 // CHECK-NEXT: call void @_Z30column_major_store_with_strideIiLj3ELj2ELj3EEvRu11matrix_typeIXT0_EXT1_ET_EPS0_([6 x i32]* noundef nonnull align 4 dereferenceable(24) %M1, i32* noundef [[PTR]]) 213 214 // CHECK-LABEL: define linkonce_odr void @_Z30column_major_store_with_strideIiLj3ELj2ELj3EEvRu11matrix_typeIXT0_EXT1_ET_EPS0_([6 x i32]* noundef nonnull align 4 dereferenceable(24) %m, i32* noundef %Ptr) 215 // CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4 216 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 217 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v6i32.i64(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2) 218 219 matrix_t<int, 3, 2> M1; 220 column_major_store_with_stride<int, 3, 2, 3>(M1, Ptr); 221 } 222 223 void test_column_major_store_stride_wrapper(int *Ptr, UnsignedWrapper &W) { 224 // CHECK-LABEL: define{{.*}} void @_Z38test_column_major_store_stride_wrapperPiR15UnsignedWrapper(i32* noundef %Ptr, %struct.UnsignedWrapper* noundef nonnull align 1 dereferenceable(1) %W) 225 // CHECK: [[M:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4 226 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 227 // CHECK-NEXT: [[W:%.*]] = load %struct.UnsignedWrapper*, %struct.UnsignedWrapper** %W.addr, align 8 228 // CHECK-NEXT: [[IDX:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} [[W]]) 229 // CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[IDX]] to i64 230 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v4i32.i64(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2) 231 232 matrix_t<int, 2, 2> M1; 233 __builtin_matrix_column_major_store(M1, Ptr, W); 234 } 235 236 void test_column_major_store_constexpr_stride_constexpr(int *Ptr) { 237 // CHECK-LABEL: define{{.*}} void @_Z50test_column_major_store_constexpr_stride_constexprPi(i32* noundef %Ptr) 238 // CHECK: [[M:%.*]] = load <4 x i32>, <4 x i32>* %0, align 4 239 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8 240 // CHECK-NEXT: [[IDX:%.*]] = call noundef i32 @_Z10constexpr3v() 241 // CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[IDX]] to i64 242 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v4i32.i64(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2) 243 244 matrix_t<int, 2, 2> M; 245 __builtin_matrix_column_major_store(M, Ptr, constexpr3()); 246 } 247