1 // RUN: %clang_cc1 -no-opaque-pointers -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
2 // RUN: %clang_cc1 -no-opaque-pointers -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
3
4 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
5 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
6
7 template <typename EltTy, unsigned Rows, unsigned Columns>
8 struct MyMatrix {
9 using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
10
11 matrix_t value;
12 };
13
14 template <typename EltTy0, unsigned R0, unsigned C0>
add(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)15 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
16 return A.value + B.value;
17 }
18
test_add_template()19 void test_add_template() {
20 // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
21 // CHECK: %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
22
23 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
24 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
25 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
26 // CHECK-NEXT: [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
27 // CHECK-NEXT: ret <10 x float> [[RES]]
28
29 MyMatrix<float, 2, 5> Mat1;
30 MyMatrix<float, 2, 5> Mat2;
31 Mat1.value = add(Mat1, Mat2);
32 }
33
34 template <typename EltTy0, unsigned R0, unsigned C0>
subtract(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)35 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
36 return A.value - B.value;
37 }
38
test_subtract_template()39 void test_subtract_template() {
40 // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
41 // CHECK: %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
42
43 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
44 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
45 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
46 // CHECK-NEXT: [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
47 // CHECK-NEXT: ret <10 x float> [[RES]]
48
49 MyMatrix<float, 2, 5> Mat1;
50 MyMatrix<float, 2, 5> Mat2;
51 Mat1.value = subtract(Mat1, Mat2);
52 }
53
54 struct DoubleWrapper1 {
55 int x;
operator doubleDoubleWrapper156 operator double() {
57 return x;
58 }
59 };
60
test_DoubleWrapper1_Sub1(MyMatrix<double,10,9> & m)61 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
62 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
63 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
64 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
65 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
66 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
67 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
68 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
69
70 DoubleWrapper1 w1;
71 w1.x = 10;
72 m.value = m.value - w1;
73 }
74
test_DoubleWrapper1_Sub2(MyMatrix<double,10,9> & m)75 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
76 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
77 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
78 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
79 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
80 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
81 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
82 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
83
84 DoubleWrapper1 w1;
85 w1.x = 10;
86 m.value = w1 - m.value;
87 }
88
89 struct DoubleWrapper2 {
90 int x;
operator doubleDoubleWrapper291 operator double() {
92 return x;
93 }
94 };
95
test_DoubleWrapper2_Add1(MyMatrix<double,10,9> & m)96 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
97 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
98 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.+}}, align 8
99 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
100 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
101 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
102 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
103 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
104
105 DoubleWrapper2 w2;
106 w2.x = 20;
107 m.value = m.value + w2;
108 }
109
test_DoubleWrapper2_Add2(MyMatrix<double,10,9> & m)110 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
111 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
112 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
113 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
114 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
115 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
116 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
117 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
118
119 DoubleWrapper2 w2;
120 w2.x = 20;
121 m.value = w2 + m.value;
122 }
123
124 struct IntWrapper {
125 char x;
operator intIntWrapper126 operator int() {
127 return x;
128 }
129 };
130
test_IntWrapper_Add(MyMatrix<double,10,9> & m)131 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
132 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
133 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
134 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
135 // CHECK: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
136 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
137 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
138 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
139 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
140
141 IntWrapper w3;
142 w3.x = 'c';
143 m.value = m.value + w3;
144 }
145
test_IntWrapper_Sub(MyMatrix<double,10,9> & m)146 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
147 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
148 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
149 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
150 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
151 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
152 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
153 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
154 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
155
156 IntWrapper w3;
157 w3.x = 'c';
158 m.value = w3 - m.value;
159 }
160
161 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
multiply(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,C0,C1> & B)162 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
163 return A.value * B.value;
164 }
165
test_multiply_template(MyMatrix<float,2,5> Mat1,MyMatrix<float,5,2> Mat2)166 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
167 MyMatrix<float, 5, 2> Mat2) {
168 // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
169 // CHECK-NEXT: entry:
170 // CHECK-NEXT: [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix.2* noundef nonnull align 4 dereferenceable(40) %Mat2)
171 // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0
172 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = bitcast [4 x float]* %value to <4 x float>*
173 // CHECK-NEXT: store <4 x float> [[RES]], <4 x float>* [[VALUE_ADDR]], align 4
174 // CHECK-NEXT: ret void
175 //
176 // CHECK-LABEL: define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
177 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
178 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
179 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
180 // CHECK-NEXT: ret <4 x float> [[RES]]
181
182 MyMatrix<float, 2, 2> Res;
183 Res.value = multiply(Mat1, Mat2);
184 return Res;
185 }
186
test_IntWrapper_Multiply(MyMatrix<double,10,9> & m,IntWrapper & w3)187 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
188 // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
189 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* noundef {{.*}})
190 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
191 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
192 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
193 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
194 // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
195 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
196 // CHECK: ret void
197 m.value = w3 * m.value;
198 }
199
200 template <typename EltTy, unsigned Rows, unsigned Columns>
insert(MyMatrix<EltTy,Rows,Columns> & Mat,EltTy e,unsigned i,unsigned j)201 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
202 Mat.value[i][j] = e;
203 }
204
test_insert_template1(MyMatrix<unsigned,2,2> & Mat,unsigned e,unsigned i,unsigned j)205 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
206 // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
207 // CHECK: [[MAT_ADDR:%.*]] = load %struct.MyMatrix.3*, %struct.MyMatrix.3** %Mat.addr, align 8
208 // CHECK-NEXT: [[E:%.*]] = load i32, i32* %e.addr, align 4
209 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i.addr, align 4
210 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
211 // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.3* noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
212 // CHECK-NEXT: ret void
213 //
214 // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
215 // CHECK: [[E:%.*]] = load i32, i32* %e.addr, align 4
216 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
217 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
218 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
219 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
220 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
221 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
222 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [4 x i32]* {{.*}} to <4 x i32>*
223 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
224 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
225 // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, <4 x i32>* [[MAT_ADDR]], align 4
226 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
227 // CHECK-NEXT: store <4 x i32> [[MATINS]], <4 x i32>* [[MAT_ADDR]], align 4
228 // CHECK-NEXT: ret void
229
230 insert(Mat, e, i, j);
231 }
232
test_insert_template2(MyMatrix<float,3,8> & Mat,float e)233 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
234 // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
235 // CHECK: [[MAT_ADDR:%.*]] = load %struct.MyMatrix.4*, %struct.MyMatrix.4** %Mat.addr, align 8
236 // CHECK-NEXT: [[E:%.*]] = load float, float* %e.addr, align 4
237 // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.4* noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
238 // CHECK-NEXT: ret void
239 //
240 // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
241 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
242 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
243 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
244 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
245 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
246 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
247 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
248 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [24 x float]* {{.*}} to <24 x float>*
249 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
250 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
251 // CHECK-NEXT: [[MAT:%.*]] = load <24 x float>, <24 x float>* [[MAT_ADDR]], align 4
252 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
253 // CHECK-NEXT: store <24 x float> [[MATINS]], <24 x float>* [[MAT_ADDR]], align 4
254 // CHECK-NEXT: ret void
255
256 insert(Mat, e, 2, 5);
257 }
258
259 template <typename EltTy, unsigned Rows, unsigned Columns>
extract(MyMatrix<EltTy,Rows,Columns> & Mat)260 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
261 return Mat.value[1u][0u];
262 }
263
test_extract_template(MyMatrix<int,2,2> Mat1)264 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
265 // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
266 // CHECK-NEXT: entry:
267 // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix.5* noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
268 // CHECK-NEXT: ret i32 [[CALL]]
269 //
270 // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
271 // CHECK: [[MAT:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4
272 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
273 // CHECK-NEXT: ret i32 [[MATEXT]]
274
275 return extract(Mat1);
276 }
277
278 using double4x4 = double __attribute__((matrix_type(4, 4)));
279
280 template <class R, class C>
matrix_subscript(double4x4 m,R r,C c)281 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
282
test_matrix_subscript(double4x4 m)283 double test_matrix_subscript(double4x4 m) {
284 // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
285 // CHECK: [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}}, align 8
286 // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
287 // CHECK-NEXT: [[RES:%.*]] = load double, double* [[CALL]], align 8
288 // CHECK-NEXT: ret double [[RES]]
289
290 return matrix_subscript(m, 1, 2);
291 }
292
test_matrix_subscript_reference(const double4x4 m)293 const double &test_matrix_subscript_reference(const double4x4 m) {
294 // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
295 // CHECK-NEXT: entry:
296 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8
297 // CHECK-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
298 // CHECK-NEXT: [[NAMELESS0:%.*]] = bitcast [16 x double]* [[M_ADDR]] to <16 x double>*
299 // CHECK-NEXT: store <16 x double> [[M:%.*]], <16 x double>* [[NAMELESS0]], align 8
300 // CHECK: [[NAMELESS1:%.*]] = load <16 x double>, <16 x double>* [[NAMELESS0]], align 8
301 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
302 // CHECK-NEXT: store double [[MATEXT]], double* [[REF_TMP]], align 8
303 // CHECK: ret double* [[REF_TMP]]
304
305 return m[0][1];
306 }
307
308 struct UnsignedWrapper {
309 char x;
operator unsignedUnsignedWrapper310 operator unsigned() {
311 return x;
312 }
313 };
314
extract_IntWrapper_idx(double4x4 & m,IntWrapper i,UnsignedWrapper j)315 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
316 // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
317 // CHECK: [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %i)
318 // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1
319 // CHECK-NEXT: [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
320 // CHECK-NEXT: [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} %j)
321 // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1
322 // CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
323 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
324 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
325 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
326 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
327 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [16 x double]*, [16 x double]** %m.addr, align 8
328 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [16 x double]* [[MAT_ADDR]] to <16 x double>*
329 // CHECK-NEXT: [[MAT:%.*]] = load <16 x double>, <16 x double>* [[MAT_ADDR2]], align 8
330 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
331 // CHECK-NEXT: ret double [[MATEXT]]
332 return m[i + 1][j - 1];
333 }
334
335 template <class T, unsigned R, unsigned C>
336 using matrix_type = T __attribute__((matrix_type(R, C)));
337 struct identmatrix_t {
338 template <class T, unsigned N>
operator matrix_type<T,N,N>identmatrix_t339 operator matrix_type<T, N, N>() const {
340 matrix_type<T, N, N> result;
341 for (unsigned i = 0; i != N; ++i)
342 result[i][i] = 1;
343 return result;
344 }
345 };
346
347 constexpr identmatrix_t identmatrix;
348
test_constexpr1(matrix_type<float,4,4> & m)349 void test_constexpr1(matrix_type<float, 4, 4> &m) {
350 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
351 // CHECK: [[MAT:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 4
352 // CHECK-NEXT: [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
353 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
354 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [16 x float]*, [16 x float]** %m.addr, align 8
355 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [16 x float]* [[MAT_ADDR]] to <16 x float>*
356 // CHECK-NEXT: store <16 x float> [[ADD]], <16 x float>* [[MAT_ADDR2]], align 4
357 // CHECK-NEXT: ret voi
358
359 // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
360 // CHECK-LABEL: for.body: ; preds = %for.cond
361 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i, align 4
362 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
363 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i, align 4
364 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
365 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
366 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
367 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [16 x float]* %result to <16 x float>*
368 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
369 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
370 // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, <16 x float>* [[MAT_ADDR]], align 4
371 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
372 // CHECK-NEXT: store <16 x float> [[MATINS]], <16 x float>* [[MAT_ADDR]], align 4
373 // CHECK-NEXT: br label %for.inc
374 m = m + identmatrix;
375 }
376
test_constexpr2(matrix_type<int,5,5> & m)377 void test_constexpr2(matrix_type<int, 5, 5> &m) {
378 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
379 // CHECK: [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
380 // CHECK: [[MAT:%.*]] = load <25 x i32>, <25 x i32>* {{.*}}, align 4
381 // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
382 // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
383 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [25 x i32]*, [25 x i32]** %m.addr, align 8
384 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [25 x i32]* [[MAT_ADDR]] to <25 x i32>*
385 // CHECK-NEXT: store <25 x i32> [[SUB2]], <25 x i32>* [[MAT_ADDR2]], align 4
386 // CHECK-NEXT: ret void
387 //
388
389 // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
390 // CHECK-LABEL: for.body: ; preds = %for.cond
391 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i, align 4
392 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
393 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i, align 4
394 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
395 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
396 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
397 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [25 x i32]* %result to <25 x i32>*
398 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
399 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
400 // CHECK-NEXT: [[MAT:%.*]] = load <25 x i32>, <25 x i32>* [[MAT_ADDR]], align 4
401 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
402 // CHECK-NEXT: store <25 x i32> [[MATINS]], <25 x i32>* [[MAT_ADDR]], align 4
403 // CHECK-NEXT: br label %for.inc
404
405 m = identmatrix - m + 1;
406 }
407