1 // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++17 | FileCheck %s
2 
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 typedef float fx3x4_t __attribute__((matrix_type(3, 4)));
5 
6 // CHECK: %struct.Matrix = type { i8, [12 x float], float }
7 
load_store(dx5x5_t * a,dx5x5_t * b)8 void load_store(dx5x5_t *a, dx5x5_t *b) {
9   // CHECK-LABEL:  define{{.*}} void @_Z10load_storePu11matrix_typeILm5ELm5EdES0_(
10   // CHECK-NEXT:  entry:
11   // CHECK-NEXT:    %a.addr = alloca [25 x double]*, align 8
12   // CHECK-NEXT:    %b.addr = alloca [25 x double]*, align 8
13   // CHECK-NEXT:    store [25 x double]* %a, [25 x double]** %a.addr, align 8
14   // CHECK-NEXT:    store [25 x double]* %b, [25 x double]** %b.addr, align 8
15   // CHECK-NEXT:    %0 = load [25 x double]*, [25 x double]** %b.addr, align 8
16   // CHECK-NEXT:    %1 = bitcast [25 x double]* %0 to <25 x double>*
17   // CHECK-NEXT:    %2 = load <25 x double>, <25 x double>* %1, align 8
18   // CHECK-NEXT:    %3 = load [25 x double]*, [25 x double]** %a.addr, align 8
19   // CHECK-NEXT:    %4 = bitcast [25 x double]* %3 to <25 x double>*
20   // CHECK-NEXT:    store <25 x double> %2, <25 x double>* %4, align 8
21   // CHECK-NEXT:   ret void
22 
23   *a = *b;
24 }
25 
26 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
27 
parameter_passing(fx3x3_t a,fx3x3_t * b)28 void parameter_passing(fx3x3_t a, fx3x3_t *b) {
29   // CHECK-LABEL: define{{.*}} void @_Z17parameter_passingu11matrix_typeILm3ELm3EfEPS_(
30   // CHECK-NEXT:  entry:
31   // CHECK-NEXT:    %a.addr = alloca [9 x float], align 4
32   // CHECK-NEXT:    %b.addr = alloca [9 x float]*, align 8
33   // CHECK-NEXT:    %0 = bitcast [9 x float]* %a.addr to <9 x float>*
34   // CHECK-NEXT:    store <9 x float> %a, <9 x float>* %0, align 4
35   // CHECK-NEXT:    store [9 x float]* %b, [9 x float]** %b.addr, align 8
36   // CHECK-NEXT:    %1 = load <9 x float>, <9 x float>* %0, align 4
37   // CHECK-NEXT:    %2 = load [9 x float]*, [9 x float]** %b.addr, align 8
38   // CHECK-NEXT:    %3 = bitcast [9 x float]* %2 to <9 x float>*
39   // CHECK-NEXT:    store <9 x float> %1, <9 x float>* %3, align 4
40   // CHECK-NEXT:    ret void
41   *b = a;
42 }
43 
return_matrix(fx3x3_t * a)44 fx3x3_t return_matrix(fx3x3_t *a) {
45   // CHECK-LABEL: define{{.*}} <9 x float> @_Z13return_matrixPu11matrix_typeILm3ELm3EfE(
46   // CHECK-NEXT:  entry:
47   // CHECK-NEXT:    %a.addr = alloca [9 x float]*, align 8
48   // CHECK-NEXT:    store [9 x float]* %a, [9 x float]** %a.addr, align 8
49   // CHECK-NEXT:    %0 = load [9 x float]*, [9 x float]** %a.addr, align 8
50   // CHECK-NEXT:    %1 = bitcast [9 x float]* %0 to <9 x float>*
51   // CHECK-NEXT:    %2 = load <9 x float>, <9 x float>* %1, align 4
52   // CHECK-NEXT:    ret <9 x float> %2
53   return *a;
54 }
55 
56 struct Matrix {
57   char Tmp1;
58   fx3x4_t Data;
59   float Tmp2;
60 };
61 
matrix_struct_pointers(Matrix * a,Matrix * b)62 void matrix_struct_pointers(Matrix *a, Matrix *b) {
63   // CHECK-LABEL: define{{.*}} void @_Z22matrix_struct_pointersP6MatrixS0_(
64   // CHECK-NEXT:  entry:
65   // CHECK-NEXT:    %a.addr = alloca %struct.Matrix*, align 8
66   // CHECK-NEXT:    %b.addr = alloca %struct.Matrix*, align 8
67   // CHECK-NEXT:    store %struct.Matrix* %a, %struct.Matrix** %a.addr, align 8
68   // CHECK-NEXT:    store %struct.Matrix* %b, %struct.Matrix** %b.addr, align 8
69   // CHECK-NEXT:    %0 = load %struct.Matrix*, %struct.Matrix** %a.addr, align 8
70   // CHECK-NEXT:    %Data = getelementptr inbounds %struct.Matrix, %struct.Matrix* %0, i32 0, i32 1
71   // CHECK-NEXT:    %1 = bitcast [12 x float]* %Data to <12 x float>*
72   // CHECK-NEXT:    %2 = load <12 x float>, <12 x float>* %1, align 4
73   // CHECK-NEXT:    %3 = load %struct.Matrix*, %struct.Matrix** %b.addr, align 8
74   // CHECK-NEXT:    %Data1 = getelementptr inbounds %struct.Matrix, %struct.Matrix* %3, i32 0, i32 1
75   // CHECK-NEXT:    %4 = bitcast [12 x float]* %Data1 to <12 x float>*
76   // CHECK-NEXT:    store <12 x float> %2, <12 x float>* %4, align 4
77   // CHECK-NEXT:    ret void
78   b->Data = a->Data;
79 }
80 
matrix_struct_reference(Matrix & a,Matrix & b)81 void matrix_struct_reference(Matrix &a, Matrix &b) {
82   // CHECK-LABEL: define{{.*}} void @_Z23matrix_struct_referenceR6MatrixS0_(
83   // CHECK-NEXT:  entry:
84   // CHECK-NEXT:    %a.addr = alloca %struct.Matrix*, align 8
85   // CHECK-NEXT:    %b.addr = alloca %struct.Matrix*, align 8
86   // CHECK-NEXT:    store %struct.Matrix* %a, %struct.Matrix** %a.addr, align 8
87   // CHECK-NEXT:    store %struct.Matrix* %b, %struct.Matrix** %b.addr, align 8
88   // CHECK-NEXT:    %0 = load %struct.Matrix*, %struct.Matrix** %a.addr, align 8
89   // CHECK-NEXT:    %Data = getelementptr inbounds %struct.Matrix, %struct.Matrix* %0, i32 0, i32 1
90   // CHECK-NEXT:    %1 = bitcast [12 x float]* %Data to <12 x float>*
91   // CHECK-NEXT:    %2 = load <12 x float>, <12 x float>* %1, align 4
92   // CHECK-NEXT:    %3 = load %struct.Matrix*, %struct.Matrix** %b.addr, align 8
93   // CHECK-NEXT:    %Data1 = getelementptr inbounds %struct.Matrix, %struct.Matrix* %3, i32 0, i32 1
94   // CHECK-NEXT:    %4 = bitcast [12 x float]* %Data1 to <12 x float>*
95   // CHECK-NEXT:    store <12 x float> %2, <12 x float>* %4, align 4
96   // CHECK-NEXT:    ret void
97   b.Data = a.Data;
98 }
99 
100 class MatrixClass {
101 public:
102   int Tmp1;
103   fx3x4_t Data;
104   long Tmp2;
105 };
106 
matrix_class_reference(MatrixClass & a,MatrixClass & b)107 void matrix_class_reference(MatrixClass &a, MatrixClass &b) {
108   // CHECK-LABEL: define{{.*}} void @_Z22matrix_class_referenceR11MatrixClassS0_(
109   // CHECK-NEXT:  entry:
110   // CHECK-NEXT:    %a.addr = alloca %class.MatrixClass*, align 8
111   // CHECK-NEXT:    %b.addr = alloca %class.MatrixClass*, align 8
112   // CHECK-NEXT:    store %class.MatrixClass* %a, %class.MatrixClass** %a.addr, align 8
113   // CHECK-NEXT:    store %class.MatrixClass* %b, %class.MatrixClass** %b.addr, align 8
114   // CHECK-NEXT:    %0 = load %class.MatrixClass*, %class.MatrixClass** %a.addr, align 8
115   // CHECK-NEXT:    %Data = getelementptr inbounds %class.MatrixClass, %class.MatrixClass* %0, i32 0, i32 1
116   // CHECK-NEXT:    %1 = bitcast [12 x float]* %Data to <12 x float>*
117   // CHECK-NEXT:    %2 = load <12 x float>, <12 x float>* %1, align 4
118   // CHECK-NEXT:    %3 = load %class.MatrixClass*, %class.MatrixClass** %b.addr, align 8
119   // CHECK-NEXT:    %Data1 = getelementptr inbounds %class.MatrixClass, %class.MatrixClass* %3, i32 0, i32 1
120   // CHECK-NEXT:    %4 = bitcast [12 x float]* %Data1 to <12 x float>*
121   // CHECK-NEXT:    store <12 x float> %2, <12 x float>* %4, align 4
122   // CHECK-NEXT:    ret void
123   b.Data = a.Data;
124 }
125 
126 template <typename Ty, unsigned Rows, unsigned Cols>
127 class MatrixClassTemplate {
128 public:
129   using MatrixTy = Ty __attribute__((matrix_type(Rows, Cols)));
130   int Tmp1;
131   MatrixTy Data;
132   long Tmp2;
133 };
134 
135 template <typename Ty, unsigned Rows, unsigned Cols>
matrix_template_reference(MatrixClassTemplate<Ty,Rows,Cols> & a,MatrixClassTemplate<Ty,Rows,Cols> & b)136 void matrix_template_reference(MatrixClassTemplate<Ty, Rows, Cols> &a, MatrixClassTemplate<Ty, Rows, Cols> &b) {
137   b.Data = a.Data;
138 }
139 
matrix_template_reference_caller(float * Data)140 MatrixClassTemplate<float, 10, 15> matrix_template_reference_caller(float *Data) {
141   // CHECK-LABEL: define{{.*}} void @_Z32matrix_template_reference_callerPf(%class.MatrixClassTemplate* noalias sret(%class.MatrixClassTemplate) align 8 %agg.result, float* %Data
142   // CHECK-NEXT:  entry:
143   // CHECK-NEXT:    %Data.addr = alloca float*, align 8
144   // CHECK-NEXT:    %Arg = alloca %class.MatrixClassTemplate, align 8
145   // CHECK-NEXT:    store float* %Data, float** %Data.addr, align 8
146   // CHECK-NEXT:    %0 = load float*, float** %Data.addr, align 8
147   // CHECK-NEXT:    %1 = bitcast float* %0 to [150 x float]*
148   // CHECK-NEXT:    %2 = bitcast [150 x float]* %1 to <150 x float>*
149   // CHECK-NEXT:    %3 = load <150 x float>, <150 x float>* %2, align 4
150   // CHECK-NEXT:    %Data1 = getelementptr inbounds %class.MatrixClassTemplate, %class.MatrixClassTemplate* %Arg, i32 0, i32 1
151   // CHECK-NEXT:    %4 = bitcast [150 x float]* %Data1 to <150 x float>*
152   // CHECK-NEXT:    store <150 x float> %3, <150 x float>* %4, align 4
153   // CHECK-NEXT:    call void @_Z25matrix_template_referenceIfLj10ELj15EEvR19MatrixClassTemplateIT_XT0_EXT1_EES3_(%class.MatrixClassTemplate* nonnull align 8 dereferenceable(616) %Arg, %class.MatrixClassTemplate* nonnull align 8 dereferenceable(616) %agg.result)
154   // CHECK-NEXT:    ret void
155 
156   // CHECK-LABEL: define linkonce_odr void @_Z25matrix_template_referenceIfLj10ELj15EEvR19MatrixClassTemplateIT_XT0_EXT1_EES3_(%class.MatrixClassTemplate* nonnull align 8 dereferenceable(616) %a, %class.MatrixClassTemplate* nonnull align 8 dereferenceable(616) %b)
157   // CHECK-NEXT:  entry:
158   // CHECK-NEXT:    %a.addr = alloca %class.MatrixClassTemplate*, align 8
159   // CHECK-NEXT:    %b.addr = alloca %class.MatrixClassTemplate*, align 8
160   // CHECK-NEXT:    store %class.MatrixClassTemplate* %a, %class.MatrixClassTemplate** %a.addr, align 8
161   // CHECK-NEXT:    store %class.MatrixClassTemplate* %b, %class.MatrixClassTemplate** %b.addr, align 8
162   // CHECK-NEXT:    %0 = load %class.MatrixClassTemplate*, %class.MatrixClassTemplate** %a.addr, align 8
163   // CHECK-NEXT:    %Data = getelementptr inbounds %class.MatrixClassTemplate, %class.MatrixClassTemplate* %0, i32 0, i32 1
164   // CHECK-NEXT:    %1 = bitcast [150 x float]* %Data to <150 x float>*
165   // CHECK-NEXT:    %2 = load <150 x float>, <150 x float>* %1, align 4
166   // CHECK-NEXT:    %3 = load %class.MatrixClassTemplate*, %class.MatrixClassTemplate** %b.addr, align 8
167   // CHECK-NEXT:    %Data1 = getelementptr inbounds %class.MatrixClassTemplate, %class.MatrixClassTemplate* %3, i32 0, i32 1
168   // CHECK-NEXT:    %4 = bitcast [150 x float]* %Data1 to <150 x float>*
169   // CHECK-NEXT:    store <150 x float> %2, <150 x float>* %4, align 4
170   // CHECK-NEXT:    ret void
171 
172   MatrixClassTemplate<float, 10, 15> Result, Arg;
173   Arg.Data = *((MatrixClassTemplate<float, 10, 15>::MatrixTy *)Data);
174   matrix_template_reference(Arg, Result);
175   return Result;
176 }
177 
178 template <class T, unsigned long R, unsigned long C>
179 using matrix = T __attribute__((matrix_type(R, C)));
180 
181 template <int N>
182 struct selector {};
183 
184 template <class T, unsigned long R, unsigned long C>
use_matrix(matrix<T,R,C> & m)185 selector<0> use_matrix(matrix<T, R, C> &m) {}
186 
187 template <class T, unsigned long R>
use_matrix(matrix<T,R,10> & m)188 selector<1> use_matrix(matrix<T, R, 10> &m) {}
189 
190 template <class T>
use_matrix(matrix<T,10,10> & m)191 selector<2> use_matrix(matrix<T, 10, 10> &m) {}
192 
193 template <class T, unsigned long C>
use_matrix(matrix<T,10,C> & m)194 selector<3> use_matrix(matrix<T, 10, C> &m) {}
195 
196 template <unsigned long R, unsigned long C>
use_matrix(matrix<float,R,C> & m)197 selector<4> use_matrix(matrix<float, R, C> &m) {}
198 
test_template_deduction()199 void test_template_deduction() {
200 
201   // CHECK-LABEL: define{{.*}} void @_Z23test_template_deductionv()
202   // CHECK-NEXT:  entry:
203   // CHECK-NEXT:    %m0 = alloca [120 x i32], align 4
204   // CHECK-NEXT:    %w = alloca %struct.selector, align 1
205   // CHECK-NEXT:    %undef.agg.tmp = alloca %struct.selector, align 1
206   // CHECK-NEXT:    %m1 = alloca [100 x i32], align 4
207   // CHECK-NEXT:    %x = alloca %struct.selector.0, align 1
208   // CHECK-NEXT:    %undef.agg.tmp1 = alloca %struct.selector.0, align 1
209   // CHECK-NEXT:    %m2 = alloca [120 x i32], align 4
210   // CHECK-NEXT:    %y = alloca %struct.selector.1, align 1
211   // CHECK-NEXT:    %undef.agg.tmp2 = alloca %struct.selector.1, align 1
212   // CHECK-NEXT:    %m3 = alloca [144 x i32], align 4
213   // CHECK-NEXT:    %z = alloca %struct.selector.2, align 1
214   // CHECK-NEXT:    %undef.agg.tmp3 = alloca %struct.selector.2, align 1
215   // CHECK-NEXT:    %m4 = alloca [144 x float], align 4
216   // CHECK-NEXT:    %v = alloca %struct.selector.3, align 1
217   // CHECK-NEXT:    %undef.agg.tmp4 = alloca %struct.selector.3, align 1
218   // CHECK-NEXT:    call void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m0)
219   // CHECK-NEXT:    call void @_Z10use_matrixIiE8selectorILi2EERu11matrix_typeILm10ELm10ET_E([100 x i32]* nonnull align 4 dereferenceable(400) %m1)
220   // CHECK-NEXT:    call void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m2)
221   // CHECK-NEXT:    call void @_Z10use_matrixIiLm12ELm12EE8selectorILi0EERu11matrix_typeIXT0_EXT1_ET_E([144 x i32]* nonnull align 4 dereferenceable(576) %m3)
222   // CHECK-NEXT:    call void @_Z10use_matrixILm12ELm12EE8selectorILi4EERu11matrix_typeIXT_EXT0_EfE([144 x float]* nonnull align 4 dereferenceable(576) %m4)
223   // CHECK-NEXT:    ret void
224 
225   // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m)
226   // CHECK-NEXT:  entry:
227   // CHECK-NEXT:    %m.addr = alloca [120 x i32]*, align 8
228   // CHECK-NEXT:    store [120 x i32]* %m, [120 x i32]** %m.addr, align 8
229   // CHECK-NEXT:    call void @llvm.trap()
230   // CHECK-NEXT:    unreachable
231 
232   // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiE8selectorILi2EERu11matrix_typeILm10ELm10ET_E([100 x i32]* nonnull align 4 dereferenceable(400) %m)
233   // CHECK-NEXT:  entry:
234   // CHECK-NEXT:    %m.addr = alloca [100 x i32]*, align 8
235   // CHECK-NEXT:    store [100 x i32]* %m, [100 x i32]** %m.addr, align 8
236   // CHECK-NEXT:    call void @llvm.trap()
237   // CHECK-NEXT:    unreachable
238 
239   // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m)
240   // CHECK-NEXT:  entry:
241   // CHECK-NEXT:    %m.addr = alloca [120 x i32]*, align 8
242   // CHECK-NEXT:    store [120 x i32]* %m, [120 x i32]** %m.addr, align 8
243   // CHECK-NEXT:    call void @llvm.trap()
244   // CHECK-NEXT:    unreachable
245 
246   // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12ELm12EE8selectorILi0EERu11matrix_typeIXT0_EXT1_ET_E([144 x i32]* nonnull align 4 dereferenceable(576) %m)
247   // CHECK-NEXT:  entry:
248   // CHECK-NEXT:    %m.addr = alloca [144 x i32]*, align 8
249   // CHECK-NEXT:    store [144 x i32]* %m, [144 x i32]** %m.addr, align 8
250   // CHECK-NEXT:    call void @llvm.trap()
251   // CHECK-NEXT:    unreachable
252 
253   // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixILm12ELm12EE8selectorILi4EERu11matrix_typeIXT_EXT0_EfE([144 x float]* nonnull align 4 dereferenceable(576)
254   // CHECK-NEXT:  entry:
255   // CHECK-NEXT:    %m.addr = alloca [144 x float]*, align 8
256   // CHECK-NEXT:    store [144 x float]* %m, [144 x float]** %m.addr, align 8
257   // CHECK-NEXT:    call void @llvm.trap()
258   // CHECK-NEXT:    unreachable
259 
260   matrix<int, 10, 12> m0;
261   selector<3> w = use_matrix(m0);
262   matrix<int, 10, 10> m1;
263   selector<2> x = use_matrix(m1);
264   matrix<int, 12, 10> m2;
265   selector<1> y = use_matrix(m2);
266   matrix<int, 12, 12> m3;
267   selector<0> z = use_matrix(m3);
268   matrix<float, 12, 12> m4;
269   selector<4> v = use_matrix(m4);
270 }
271 
272 template <auto R>
foo(matrix<int,R,10> & m)273 void foo(matrix<int, R, 10> &m) {
274 }
275 
test_auto_t()276 void test_auto_t() {
277   // CHECK-LABEL: define{{.*}} void @_Z11test_auto_tv()
278   // CHECK-NEXT:  entry:
279   // CHECK-NEXT:    %m = alloca [130 x i32], align 4
280   // CHECK-NEXT:    call void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m)
281   // CHECK-NEXT:    ret void
282 
283   // CHECK-LABEL: define linkonce_odr void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m)
284   // CHECK-NEXT:  entry:
285   // CHECK-NEXT:    %m.addr = alloca [130 x i32]*, align 8
286   // CHECK-NEXT:    store [130 x i32]* %m, [130 x i32]** %m.addr, align 8
287   // CHECK-NEXT:    ret void
288 
289   matrix<int, 13, 10> m;
290   foo(m);
291 }
292 
293 template <unsigned long R, unsigned long C>
use_matrix_2(matrix<int,R,C> & m)294 matrix<float, R + 1, C + 2> use_matrix_2(matrix<int, R, C> &m) {}
295 
296 template <unsigned long R, unsigned long C>
use_matrix_2(matrix<int,R+2,C/2> & m1,matrix<float,R,C> & m2)297 selector<0> use_matrix_2(matrix<int, R + 2, C / 2> &m1, matrix<float, R, C> &m2) {}
298 
299 template <unsigned long R, unsigned long C>
use_matrix_2(matrix<int,R+C,C> & m1,matrix<float,R,C-R> & m2)300 selector<1> use_matrix_2(matrix<int, R + C, C> &m1, matrix<float, R, C - R> &m2) {}
301 
302 template <unsigned long R>
use_matrix_2(matrix<int,R,10> & m1)303 matrix<float, R + R, R - 3> use_matrix_2(matrix<int, R, 10> &m1) {}
304 
305 template <unsigned long R>
use_matrix_3(matrix<int,R-2,R> & m)306 selector<2> use_matrix_3(matrix<int, R - 2, R> &m) {}
307 
test_use_matrix_2()308 void test_use_matrix_2() {
309   // CHECK-LABEL: define{{.*}} void @_Z17test_use_matrix_2v()
310   // CHECK-NEXT:  entry:
311   // CHECK-NEXT:    %m1 = alloca [24 x i32], align 4
312   // CHECK-NEXT:    %r1 = alloca [40 x float], align 4
313   // CHECK-NEXT:    %m2 = alloca [24 x float], align 4
314   // CHECK-NEXT:    %r2 = alloca %struct.selector.2, align 1
315   // CHECK-NEXT:    %undef.agg.tmp = alloca %struct.selector.2, align 1
316   // CHECK-NEXT:    %m3 = alloca [104 x i32], align 4
317   // CHECK-NEXT:    %m4 = alloca [15 x float], align 4
318   // CHECK-NEXT:    %r3 = alloca %struct.selector.1, align 1
319   // CHECK-NEXT:    %undef.agg.tmp1 = alloca %struct.selector.1, align 1
320   // CHECK-NEXT:    %m5 = alloca [50 x i32], align 4
321   // CHECK-NEXT:    %r4 = alloca [20 x float], align 4
322   // CHECK-NEXT:    %r5 = alloca %struct.selector.0, align 1
323   // CHECK-NEXT:    %undef.agg.tmp3 = alloca %struct.selector.0, align 1
324   // CHECK-NEXT:    %call = call <40 x float> @_Z12use_matrix_2ILm4ELm6EEu11matrix_typeIXplT_Li1EEXplT0_Li2EEfERu11matrix_typeIXT_EXT0_EiE([24 x i32]* nonnull align 4 dereferenceable(96) %m1)
325   // CHECK-NEXT:    %0 = bitcast [40 x float]* %r1 to <40 x float>*
326   // CHECK-NEXT:    store <40 x float> %call, <40 x float>* %0, align 4
327   // CHECK-NEXT:    call void @_Z12use_matrix_2ILm2ELm12EE8selectorILi0EERu11matrix_typeIXplT_Li2EEXdvT0_Li2EEiERu11matrix_typeIXT_EXT0_EfE([24 x i32]* nonnull align 4 dereferenceable(96) %m1, [24 x float]* nonnull align 4 dereferenceable(96) %m2)
328   // CHECK-NEXT:    call void @_Z12use_matrix_2ILm5ELm8EE8selectorILi1EERu11matrix_typeIXplT_T0_EXT0_EiERu11matrix_typeIXT_EXmiT0_T_EfE([104 x i32]* nonnull align 4 dereferenceable(416) %m3, [15 x float]* nonnull align 4 dereferenceable(60) %m4)
329   // CHECK-NEXT:    %call2 = call <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m5)
330   // CHECK-NEXT:    %1 = bitcast [20 x float]* %r4 to <20 x float>*
331   // CHECK-NEXT:    store <20 x float> %call2, <20 x float>* %1, align 4
332   // CHECK-NEXT:    call void @_Z12use_matrix_3ILm6EE8selectorILi2EERu11matrix_typeIXmiT_Li2EEXT_EiE([24 x i32]* nonnull align 4 dereferenceable(96) %m1)
333   // CHECK-NEXT:    ret void
334 
335   // CHECK-LABEL: define linkonce_odr <40 x float> @_Z12use_matrix_2ILm4ELm6EEu11matrix_typeIXplT_Li1EEXplT0_Li2EEfERu11matrix_typeIXT_EXT0_EiE([24 x i32]* nonnull align 4 dereferenceable(96) %m)
336   // CHECK-NEXT:  entry:
337   // CHECK-NEXT:    %m.addr = alloca [24 x i32]*, align 8
338   // CHECK-NEXT:    store [24 x i32]* %m, [24 x i32]** %m.addr, align 8
339   // CHECK-NEXT:    call void @llvm.trap()
340   // CHECK-NEXT:    unreachable
341 
342   // CHECK-LABEL: define linkonce_odr void @_Z12use_matrix_2ILm2ELm12EE8selectorILi0EERu11matrix_typeIXplT_Li2EEXdvT0_Li2EEiERu11matrix_typeIXT_EXT0_EfE([24 x i32]* nonnull align 4 dereferenceable(96) %m1, [24 x float]* nonnull align 4 dereferenceable(96) %m2)
343   // CHECK-NEXT:  entry:
344   // CHECK-NEXT:    %m1.addr = alloca [24 x i32]*, align 8
345   // CHECK-NEXT:    %m2.addr = alloca [24 x float]*, align 8
346   // CHECK-NEXT:    store [24 x i32]* %m1, [24 x i32]** %m1.addr, align 8
347   // CHECK-NEXT:    store [24 x float]* %m2, [24 x float]** %m2.addr, align 8
348   // CHECK-NEXT:    call void @llvm.trap()
349   // CHECK-NEXT:    unreachable
350 
351   // CHECK-LABEL: define linkonce_odr void @_Z12use_matrix_2ILm5ELm8EE8selectorILi1EERu11matrix_typeIXplT_T0_EXT0_EiERu11matrix_typeIXT_EXmiT0_T_EfE([104 x i32]* nonnull align 4 dereferenceable(416) %m1, [15 x float]* nonnull align 4 dereferenceable(60) %m2)
352   // CHECK-NEXT:  entry:
353   // CHECK-NEXT:    %m1.addr = alloca [104 x i32]*, align 8
354   // CHECK-NEXT:    %m2.addr = alloca [15 x float]*, align 8
355   // CHECK-NEXT:    store [104 x i32]* %m1, [104 x i32]** %m1.addr, align 8
356   // CHECK-NEXT:    store [15 x float]* %m2, [15 x float]** %m2.addr, align 8
357   // CHECK-NEXT:    call void @llvm.trap()
358   // CHECK-NEXT:    unreachable
359 
360   // CHECK-LABEL: define linkonce_odr <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m1)
361   // CHECK-NEXT:  entry:
362   // CHECK-NEXT:    %m1.addr = alloca [50 x i32]*, align 8
363   // CHECK-NEXT:    store [50 x i32]* %m1, [50 x i32]** %m1.addr, align 8
364   // CHECK-NEXT:    call void @llvm.trap()
365   // CHECK-NEXT:    unreachable
366 
367   // CHECK-LABEL: define linkonce_odr void @_Z12use_matrix_3ILm6EE8selectorILi2EERu11matrix_typeIXmiT_Li2EEXT_EiE([24 x i32]* nonnull align 4 dereferenceable(96) %m)
368   // CHECK-NEXT:  entry:
369   // CHECK-NEXT:    %m.addr = alloca [24 x i32]*, align 8
370   // CHECK-NEXT:    store [24 x i32]* %m, [24 x i32]** %m.addr, align 8
371   // CHECK-NEXT:    call void @llvm.trap()
372   // CHECK-NEXT:    unreachable
373 
374   matrix<int, 4, 6> m1;
375   matrix<float, 5, 8> r1 = use_matrix_2(m1);
376 
377   matrix<float, 2, 12> m2;
378   selector<0> r2 = use_matrix_2(m1, m2);
379 
380   matrix<int, 13, 8> m3;
381   matrix<float, 5, 3> m4;
382   selector<1> r3 = use_matrix_2(m3, m4);
383 
384   matrix<int, 5, 10> m5;
385   matrix<float, 10, 2> r4 = use_matrix_2(m5);
386 
387   selector<2> r5 = use_matrix_3(m1);
388 }
389