1 // RUN: %clang_cc1 -no-opaque-pointers -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
2 // RUN: %clang_cc1 -no-opaque-pointers -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
3 
4 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
5 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
6 
7 template <typename EltTy, unsigned Rows, unsigned Columns>
8 struct MyMatrix {
9   using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
10 
11   matrix_t value;
12 };
13 
14 template <typename EltTy0, unsigned R0, unsigned C0>
add(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)15 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
16   return A.value + B.value;
17 }
18 
test_add_template()19 void test_add_template() {
20   // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
21   // CHECK:       %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
22 
23   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
24   // CHECK:       [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
25   // CHECK:       [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
26   // CHECK-NEXT:  [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
27   // CHECK-NEXT:  ret <10 x float> [[RES]]
28 
29   MyMatrix<float, 2, 5> Mat1;
30   MyMatrix<float, 2, 5> Mat2;
31   Mat1.value = add(Mat1, Mat2);
32 }
33 
34 template <typename EltTy0, unsigned R0, unsigned C0>
subtract(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)35 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
36   return A.value - B.value;
37 }
38 
test_subtract_template()39 void test_subtract_template() {
40   // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
41   // CHECK:       %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
42 
43   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
44   // CHECK:       [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
45   // CHECK:       [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
46   // CHECK-NEXT:  [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
47   // CHECK-NEXT:  ret <10 x float> [[RES]]
48 
49   MyMatrix<float, 2, 5> Mat1;
50   MyMatrix<float, 2, 5> Mat2;
51   Mat1.value = subtract(Mat1, Mat2);
52 }
53 
54 struct DoubleWrapper1 {
55   int x;
operator doubleDoubleWrapper156   operator double() {
57     return x;
58   }
59 };
60 
test_DoubleWrapper1_Sub1(MyMatrix<double,10,9> & m)61 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
62   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
63   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
64   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
65   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
66   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
67   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
68   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
69 
70   DoubleWrapper1 w1;
71   w1.x = 10;
72   m.value = m.value - w1;
73 }
74 
test_DoubleWrapper1_Sub2(MyMatrix<double,10,9> & m)75 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
76   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
77   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
78   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
79   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
80   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
81   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
82   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
83 
84   DoubleWrapper1 w1;
85   w1.x = 10;
86   m.value = w1 - m.value;
87 }
88 
89 struct DoubleWrapper2 {
90   int x;
operator doubleDoubleWrapper291   operator double() {
92     return x;
93   }
94 };
95 
test_DoubleWrapper2_Add1(MyMatrix<double,10,9> & m)96 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
97   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
98   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.+}}, align 8
99   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
100   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
101   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
102   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
103   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
104 
105   DoubleWrapper2 w2;
106   w2.x = 20;
107   m.value = m.value + w2;
108 }
109 
test_DoubleWrapper2_Add2(MyMatrix<double,10,9> & m)110 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
111   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
112   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
113   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
114   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
115   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
116   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
117   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
118 
119   DoubleWrapper2 w2;
120   w2.x = 20;
121   m.value = w2 + m.value;
122 }
123 
124 struct IntWrapper {
125   char x;
operator intIntWrapper126   operator int() {
127     return x;
128   }
129 };
130 
test_IntWrapper_Add(MyMatrix<double,10,9> & m)131 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
132   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
133   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
134   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
135   // CHECK:       [[SCALAR_FP:%.*]] = sitofp i32 %call to double
136   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
137   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
138   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
139   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
140 
141   IntWrapper w3;
142   w3.x = 'c';
143   m.value = m.value + w3;
144 }
145 
test_IntWrapper_Sub(MyMatrix<double,10,9> & m)146 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
147   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
148   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
149   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
150   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
151   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
152   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
153   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
154   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
155 
156   IntWrapper w3;
157   w3.x = 'c';
158   m.value = w3 - m.value;
159 }
160 
161 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
multiply(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,C0,C1> & B)162 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
163   return A.value * B.value;
164 }
165 
test_multiply_template(MyMatrix<float,2,5> Mat1,MyMatrix<float,5,2> Mat2)166 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
167                                              MyMatrix<float, 5, 2> Mat2) {
168   // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
169   // CHECK-NEXT:  entry:
170   // CHECK-NEXT:    [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix.2* noundef nonnull align 4 dereferenceable(40) %Mat2)
171   // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0
172   // CHECK-NEXT:    [[VALUE_ADDR:%.*]] = bitcast [4 x float]* %value to <4 x float>*
173   // CHECK-NEXT:    store <4 x float> [[RES]], <4 x float>* [[VALUE_ADDR]], align 4
174   // CHECK-NEXT:    ret void
175   //
176   // CHECK-LABEL:  define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
177   // CHECK:         [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
178   // CHECK:         [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
179   // CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
180   // CHECK-NEXT:    ret <4 x float> [[RES]]
181 
182   MyMatrix<float, 2, 2> Res;
183   Res.value = multiply(Mat1, Mat2);
184   return Res;
185 }
186 
test_IntWrapper_Multiply(MyMatrix<double,10,9> & m,IntWrapper & w3)187 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
188   // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
189   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* noundef {{.*}})
190   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
191   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
192   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
193   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
194   // CHECK-NEXT:  [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
195   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
196   // CHECK:       ret void
197   m.value = w3 * m.value;
198 }
199 
200 template <typename EltTy, unsigned Rows, unsigned Columns>
insert(MyMatrix<EltTy,Rows,Columns> & Mat,EltTy e,unsigned i,unsigned j)201 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
202   Mat.value[i][j] = e;
203 }
204 
test_insert_template1(MyMatrix<unsigned,2,2> & Mat,unsigned e,unsigned i,unsigned j)205 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
206   // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
207   // CHECK:         [[MAT_ADDR:%.*]] = load %struct.MyMatrix.3*, %struct.MyMatrix.3** %Mat.addr, align 8
208   // CHECK-NEXT:    [[E:%.*]] = load i32, i32* %e.addr, align 4
209   // CHECK-NEXT:    [[I:%.*]] = load i32, i32* %i.addr, align 4
210   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
211   // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.3* noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
212   // CHECK-NEXT:    ret void
213   //
214   // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
215   // CHECK:         [[E:%.*]] = load i32, i32* %e.addr, align 4
216   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
217   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
218   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
219   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
220   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
221   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
222   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [4 x i32]* {{.*}} to <4 x i32>*
223   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
224   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
225   // CHECK-NEXT:    [[MAT:%.*]] = load <4 x i32>, <4 x i32>* [[MAT_ADDR]], align 4
226   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
227   // CHECK-NEXT:    store <4 x i32> [[MATINS]], <4 x i32>* [[MAT_ADDR]], align 4
228   // CHECK-NEXT:    ret void
229 
230   insert(Mat, e, i, j);
231 }
232 
test_insert_template2(MyMatrix<float,3,8> & Mat,float e)233 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
234   // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
235   // CHECK:         [[MAT_ADDR:%.*]] = load %struct.MyMatrix.4*, %struct.MyMatrix.4** %Mat.addr, align 8
236   // CHECK-NEXT:    [[E:%.*]] = load float, float* %e.addr, align 4
237   // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.4* noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
238   // CHECK-NEXT:    ret void
239   //
240   // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
241   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
242   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
243   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
244   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
245   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
246   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
247   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
248   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [24 x float]* {{.*}} to <24 x float>*
249   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
250   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
251   // CHECK-NEXT:    [[MAT:%.*]] = load <24 x float>, <24 x float>* [[MAT_ADDR]], align 4
252   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
253   // CHECK-NEXT:    store <24 x float> [[MATINS]], <24 x float>* [[MAT_ADDR]], align 4
254   // CHECK-NEXT:    ret void
255 
256   insert(Mat, e, 2, 5);
257 }
258 
259 template <typename EltTy, unsigned Rows, unsigned Columns>
extract(MyMatrix<EltTy,Rows,Columns> & Mat)260 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
261   return Mat.value[1u][0u];
262 }
263 
test_extract_template(MyMatrix<int,2,2> Mat1)264 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
265   // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
266   // CHECK-NEXT:  entry:
267   // CHECK-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix.5* noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
268   // CHECK-NEXT:    ret i32 [[CALL]]
269   //
270   // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
271   // CHECK:         [[MAT:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4
272   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
273   // CHECK-NEXT:    ret i32 [[MATEXT]]
274 
275   return extract(Mat1);
276 }
277 
278 using double4x4 = double __attribute__((matrix_type(4, 4)));
279 
280 template <class R, class C>
matrix_subscript(double4x4 m,R r,C c)281 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
282 
test_matrix_subscript(double4x4 m)283 double test_matrix_subscript(double4x4 m) {
284   // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
285   // CHECK:         [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}}, align 8
286   // CHECK-NEXT:    [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
287   // CHECK-NEXT:    [[RES:%.*]] = load double, double* [[CALL]], align 8
288   // CHECK-NEXT:    ret double [[RES]]
289 
290   return matrix_subscript(m, 1, 2);
291 }
292 
test_matrix_subscript_reference(const double4x4 m)293 const double &test_matrix_subscript_reference(const double4x4 m) {
294   // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
295   // CHECK-NEXT:  entry:
296   // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x double], align 8
297   // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca double, align 8
298   // CHECK-NEXT:    [[NAMELESS0:%.*]] = bitcast [16 x double]* [[M_ADDR]] to <16 x double>*
299   // CHECK-NEXT:    store <16 x double> [[M:%.*]], <16 x double>* [[NAMELESS0]], align 8
300   // CHECK:         [[NAMELESS1:%.*]] = load <16 x double>, <16 x double>* [[NAMELESS0]], align 8
301   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
302   // CHECK-NEXT:    store double [[MATEXT]], double* [[REF_TMP]], align 8
303   // CHECK:         ret double* [[REF_TMP]]
304 
305   return m[0][1];
306 }
307 
308 struct UnsignedWrapper {
309   char x;
operator unsignedUnsignedWrapper310   operator unsigned() {
311     return x;
312   }
313 };
314 
extract_IntWrapper_idx(double4x4 & m,IntWrapper i,UnsignedWrapper j)315 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
316   // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
317   // CHECK:         [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %i)
318   // CHECK-NEXT:    [[I_ADD:%.*]] = add nsw i32 [[I]], 1
319   // CHECK-NEXT:    [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
320   // CHECK-NEXT:    [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} %j)
321   // CHECK-NEXT:    [[J_SUB:%.*]] = sub i32 [[J]], 1
322   // CHECK-NEXT:    [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
323   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
324   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
325   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
326   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
327   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [16 x double]*, [16 x double]** %m.addr, align 8
328   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [16 x double]* [[MAT_ADDR]] to <16 x double>*
329   // CHECK-NEXT:    [[MAT:%.*]] = load <16 x double>, <16 x double>* [[MAT_ADDR2]], align 8
330   // CHECK-NEXT:    [[MATEXT:%.*]]  = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
331   // CHECK-NEXT:    ret double [[MATEXT]]
332   return m[i + 1][j - 1];
333 }
334 
335 template <class T, unsigned R, unsigned C>
336 using matrix_type = T __attribute__((matrix_type(R, C)));
337 struct identmatrix_t {
338   template <class T, unsigned N>
operator matrix_type<T,N,N>identmatrix_t339   operator matrix_type<T, N, N>() const {
340     matrix_type<T, N, N> result;
341     for (unsigned i = 0; i != N; ++i)
342       result[i][i] = 1;
343     return result;
344   }
345 };
346 
347 constexpr identmatrix_t identmatrix;
348 
test_constexpr1(matrix_type<float,4,4> & m)349 void test_constexpr1(matrix_type<float, 4, 4> &m) {
350   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
351   // CHECK:         [[MAT:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 4
352   // CHECK-NEXT:    [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
353   // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
354   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [16 x float]*, [16 x float]** %m.addr, align 8
355   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [16 x float]* [[MAT_ADDR]] to <16 x float>*
356   // CHECK-NEXT:    store <16 x float> [[ADD]], <16 x float>* [[MAT_ADDR2]], align 4
357   // CHECK-NEXT:    ret voi
358 
359   // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
360   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
361   // CHECK-NEXT:   [[I:%.*]] = load i32, i32* %i, align 4
362   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
363   // CHECK-NEXT:   [[I2:%.*]] = load i32, i32* %i, align 4
364   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
365   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
366   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
367   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [16 x float]* %result to <16 x float>*
368   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
369   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
370   // CHECK-NEXT:   [[MAT:%.*]] = load <16 x float>, <16 x float>* [[MAT_ADDR]], align 4
371   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
372   // CHECK-NEXT:   store <16 x float> [[MATINS]], <16 x float>* [[MAT_ADDR]], align 4
373   // CHECK-NEXT:   br label %for.inc
374   m = m + identmatrix;
375 }
376 
test_constexpr2(matrix_type<int,5,5> & m)377 void test_constexpr2(matrix_type<int, 5, 5> &m) {
378   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
379   // CHECK:         [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
380   // CHECK:         [[MAT:%.*]] = load <25 x i32>, <25 x i32>* {{.*}}, align 4
381   // CHECK-NEXT:    [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
382   // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
383   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [25 x i32]*, [25 x i32]** %m.addr, align 8
384   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [25 x i32]* [[MAT_ADDR]] to <25 x i32>*
385   // CHECK-NEXT:    store <25 x i32> [[SUB2]], <25 x i32>* [[MAT_ADDR2]], align 4
386   // CHECK-NEXT:    ret void
387   //
388 
389   // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
390   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
391   // CHECK-NEXT:   [[I:%.*]] = load i32, i32* %i, align 4
392   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
393   // CHECK-NEXT:   [[I2:%.*]] = load i32, i32* %i, align 4
394   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
395   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
396   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
397   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [25 x i32]* %result to <25 x i32>*
398   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
399   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
400   // CHECK-NEXT:   [[MAT:%.*]] = load <25 x i32>, <25 x i32>* [[MAT_ADDR]], align 4
401   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
402   // CHECK-NEXT:   store <25 x i32> [[MATINS]], <25 x i32>* [[MAT_ADDR]], align 4
403   // CHECK-NEXT:   br label %for.inc
404 
405   m = identmatrix - m + 1;
406 }
407