1 // RUN: %clang_cc1 -no-opaque-pointers -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK %s
2 // RUN: %clang_cc1 -no-opaque-pointers -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
3 
4 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
5 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
6 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
7 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
8 
9 // Floating point matrix/scalar additions.
10 
add_matrix_matrix_double(dx5x5_t a,dx5x5_t b,dx5x5_t c)11 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
12   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
13   // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
14   // CHECK-NEXT:  [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
15   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
16   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
17 
18   a = b + c;
19 }
20 
add_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)21 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
22   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
23   // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
24   // CHECK-NEXT:  [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
25   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
26   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
27 
28   a += b;
29 }
30 
subtract_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)31 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
32   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
33   // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
34   // CHECK-NEXT:  [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
35   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
36   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
37 
38   a -= b;
39 }
40 
add_matrix_matrix_float(fx2x3_t a,fx2x3_t b,fx2x3_t c)41 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
42   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
43   // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
44   // CHECK-NEXT:  [[C:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
45   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
46   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
47 
48   a = b + c;
49 }
50 
add_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)51 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
52   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
53   // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
54   // CHECK-NEXT:  [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
55   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
56   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
57 
58   a += b;
59 }
60 
subtract_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)61 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
62   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
63   // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
64   // CHECK-NEXT:  [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
65   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
66   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
67 
68   a -= b;
69 }
70 
add_matrix_scalar_double_float(dx5x5_t a,float vf)71 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
72   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
73   // CHECK:       [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
74   // CHECK-NEXT:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
75   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
76   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
77   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
78   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
79   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
80 
81   a = a + vf;
82 }
83 
add_compound_matrix_scalar_double_float(dx5x5_t a,float vf)84 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
85   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
86   // CHECK:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
87   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
88   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
89   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
90   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
91   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
92   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
93 
94   a += vf;
95 }
96 
subtract_compound_matrix_scalar_double_float(dx5x5_t a,float vf)97 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
98   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
99   // CHECK:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
100   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
101   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
102   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
103   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
104   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
105   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
106 
107   a -= vf;
108 }
109 
add_matrix_scalar_double_double(dx5x5_t a,double vd)110 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
111   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
112   // CHECK:       [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
113   // CHECK-NEXT:  [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
114   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
115   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
116   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
117   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
118 
119   a = a + vd;
120 }
121 
add_compound_matrix_scalar_double_double(dx5x5_t a,double vd)122 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
123   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
124   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
125   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
126   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
127   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
128   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
129   // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
130   a += vd;
131 }
132 
subtract_compound_matrix_scalar_double_double(dx5x5_t a,double vd)133 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
134   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
135   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
136   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
137   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
138   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
139   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
140   // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
141   a -= vd;
142 }
143 
add_matrix_scalar_float_float(fx2x3_t b,float vf)144 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
145   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
146   // CHECK:       [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
147   // CHECK-NEXT:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
148   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
149   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
150   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
151   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
152 
153   b = b + vf;
154 }
155 
add_compound_matrix_scalar_float_float(fx2x3_t b,float vf)156 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
157   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
158   // CHECK:       [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
159   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
160   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
161   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
162   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
163   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
164   b += vf;
165 }
166 
subtract_compound_matrix_scalar_float_float(fx2x3_t b,float vf)167 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
168   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
169   // CHECK:       [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
170   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
171   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
172   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
173   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
174   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
175   b -= vf;
176 }
177 
add_matrix_scalar_float_double(fx2x3_t b,double vd)178 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
179   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
180   // CHECK:       [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
181   // CHECK-NEXT:  [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
182   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
183   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
184   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
185   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
186   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
187 
188   b = b + vd;
189 }
190 
add_compound_matrix_scalar_float_double(fx2x3_t b,double vd)191 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
192   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
193   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
194   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
195   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
196   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
197   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
198   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
199   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
200   b += vd;
201 }
202 
subtract_compound_matrix_scalar_float_double(fx2x3_t b,double vd)203 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
204   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
205   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
206   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
207   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
208   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
209   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
210   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
211   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
212   b -= vd;
213 }
214 
215 // Integer matrix/scalar additions
216 
add_matrix_matrix_int(ix9x3_t a,ix9x3_t b,ix9x3_t c)217 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
218   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
219   // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
220   // CHECK-NEXT:  [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
221   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
222   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
223   a = b + c;
224 }
225 
add_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)226 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
227   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
228   // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
229   // CHECK:       [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
230   // CHECK:       [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
231   // CHECK:       store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
232   a += b;
233 }
234 
subtract_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)235 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
236   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
237   // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
238   // CHECK:       [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
239   // CHECK:       [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
240   // CHECK:       store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
241   a -= b;
242 }
243 
add_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b,ullx4x2_t c)244 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
245   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
246   // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
247   // CHECK-NEXT:  [[C:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
248   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
249   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
250 
251   a = b + c;
252 }
253 
add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)254 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
255   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
256   // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
257   // CHECK-NEXT:  [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
258   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
259   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
260 
261   a += b;
262 }
263 
subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)264 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
265   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
266   // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
267   // CHECK-NEXT:  [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
268   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
269   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
270 
271   a -= b;
272 }
273 
add_matrix_scalar_int_short(ix9x3_t a,short vs)274 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
275   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
276   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
277   // CHECK-NEXT:   [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
278   // CHECK-NEXT:   [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
279   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i32 0
280   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
281   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
282   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
283 
284   a = a + vs;
285 }
286 
add_compound_matrix_scalar_int_short(ix9x3_t a,short vs)287 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
288   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
289   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
290   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
291   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
292   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
293   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
294   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
295   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
296 
297   a += vs;
298 }
299 
subtract_compound_matrix_scalar_int_short(ix9x3_t a,short vs)300 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
301   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
302   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
303   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
304   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
305   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
306   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
307   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
308   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
309 
310   a -= vs;
311 }
312 
add_matrix_scalar_int_long_int(ix9x3_t a,long int vli)313 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
314   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
315   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
316   // CHECK-NEXT:   [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
317   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
318   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
319   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
320   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
321   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
322 
323   a = a + vli;
324 }
325 
add_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)326 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
327   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
328   // CHECK:       [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
329   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
330   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
331   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
332   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
333   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
334   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
335 
336   a += vli;
337 }
338 
subtract_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)339 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
340   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
341   // CHECK:       [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
342   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
343   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
344   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
345   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
346   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
347   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
348 
349   a -= vli;
350 }
351 
add_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)352 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
353   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
354   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
355   // CHECK-NEXT:   [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
356   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
357   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
358   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
359   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
360   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
361 
362   a = a + vulli;
363 }
364 
add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)365 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
366   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
367   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
368   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
369   // CHECK-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
370   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
371   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
372   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
373   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
374 
375   a += vulli;
376 }
377 
subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)378 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
379   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
380   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
381   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
382   // CHECK-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
383   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
384   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
385   // CHECK-NEXT:   [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
386   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
387 
388   a -= vulli;
389 }
390 
add_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)391 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
392   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
393   // CHECK:         [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
394   // CHECK-NEXT:    [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
395   // CHECK-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
396   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
397   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
398   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
399   // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
400 
401   b = vs + b;
402 }
403 
add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)404 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
405   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
406   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
407   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
408   // CHECK-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
409   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
410   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
411   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
412   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
413 
414   b += vs;
415 }
416 
subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)417 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
418   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
419   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
420   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
421   // CHECK-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
422   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
423   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
424   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
425   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
426 
427   b -= vs;
428 }
429 
add_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)430 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
431   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
432   // CHECK:         [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
433   // CHECK-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
434   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
435   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
436   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
437   // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
438 
439   b = vli + b;
440 }
441 
add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)442 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
443   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
444   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
445   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
446   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
447   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
448   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
449   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
450 
451   b += vli;
452 }
453 
subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)454 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
455   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
456   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
457   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
458   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
459   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
460   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
461   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
462 
463   b -= vli;
464 }
465 
add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)466 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
467   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
468   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
469   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
470   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
471   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
472   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
473   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
474   b = vulli + b;
475 }
476 
add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)477 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
478   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
479   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
480   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
481   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
482   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
483   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
484   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
485 
486   b += vulli;
487 }
488 
subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)489 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
490   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
491   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
492   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
493   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
494   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
495   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
496   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
497 
498   b -= vulli;
499 }
500 
501 // Tests for matrix multiplication.
502 
multiply_matrix_matrix_double(dx5x5_t b,dx5x5_t c)503 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
504   // CHECK-LABEL: @multiply_matrix_matrix_double(
505   // CHECK:         [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
506   // CHECK-NEXT:    [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
507   // CHECK-NEXT:    [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
508   // CHECK-NEXT:    [[A_ADDR:%.*]] = bitcast [25 x double]* %a to <25 x double>*
509   // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* [[A_ADDR]], align 8
510   // CHECK:         ret void
511   //
512 
513   dx5x5_t a;
514   a = b * c;
515 }
516 
multiply_compound_matrix_matrix_double(dx5x5_t b,dx5x5_t c)517 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
518   // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
519   // CHECK:        [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
520   // CHECK-NEXT:   [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
521   // CHECK-NEXT:   [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
522   // CHECK-NEXT:   store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
523   // CHECK-NEXT:   ret void
524   b *= c;
525 }
526 
527 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
528 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
529 // CHECK-LABEL: @multiply_matrix_matrix_int(
530 // CHECK:         [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
531 // CHECK-NEXT:    [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
532 // CHECK-NEXT:    [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
533 // CHECK-NEXT:    [[A_ADDR:%.*]] = bitcast [81 x i32]* %a to <81 x i32>*
534 // CHECK-NEXT:    store <81 x i32> [[RES]], <81 x i32>* [[A_ADDR]], align 4
535 // CHECK:         ret void
536 //
multiply_matrix_matrix_int(ix9x3_t b,ix3x9_t c)537 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
538   ix9x9_t a;
539   a = b * c;
540 }
541 
542 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
543 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
544 // CHECK-NEXT:    [[S:%.*]] = load float, float* %s.addr, align 4
545 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
546 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
547 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
548 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
549 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
550 // CHECK-NEXT:    ret void
551 //
multiply_double_matrix_scalar_float(dx5x5_t a,float s)552 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
553   a = a * s;
554 }
555 
556 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
557 // CHECK:         [[S:%.*]] = load float, float* %s.addr, align 4
558 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
559 // CHECK-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
560 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
561 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
562 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
563 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
564 // CHECK-NEXT:    ret void
565 //
multiply_compound_double_matrix_scalar_float(dx5x5_t a,float s)566 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
567   a *= s;
568 }
569 
570 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
571 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
572 // CHECK-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8
573 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
574 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
575 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
576 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
577 // CHECK-NEXT:    ret void
578 //
multiply_double_matrix_scalar_double(dx5x5_t a,double s)579 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
580   a = a * s;
581 }
582 
583 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
584 // CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
585 // CHECK-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
586 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
587 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
588 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
589 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
590 // CHECK-NEXT:    ret void
multiply_compound_double_matrix_scalar_double(dx5x5_t a,double s)591 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
592   a *= s;
593 }
594 
595 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
596 // CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
597 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
598 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
599 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
600 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
601 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
602 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
603 // CHECK-NEXT:    ret void
604 //
multiply_float_matrix_scalar_double(fx2x3_t b,double s)605 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
606   b = s * b;
607 }
608 
609 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
610 // CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
611 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
612 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
613 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
614 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
615 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
616 // store <6 x float> %3, <6 x float>* %0, align 4
617 // ret void
multiply_compound_float_matrix_scalar_double(fx2x3_t b,double s)618 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
619   b *= s;
620 }
621 
622 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
623 // CHECK:         [[S:%.*]] = load i16, i16* %s.addr, align 2
624 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
625 // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
626 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
627 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
628 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
629 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
630 // CHECK-NEXT:    ret void
631 //
multiply_int_matrix_scalar_short(ix9x3_t b,short s)632 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
633   b = s * b;
634 }
635 
636 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
637 // CHECK:        [[S:%.*]] = load i16, i16* %s.addr, align 2
638 // CHECK-NEXT:   [[S_EXT:%.*]] = sext i16 [[S]] to i32
639 // CHECK-NEXT:   [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
640 // CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
641 // CHECK-NEXT:   [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
642 // CHECK-NEXT:   [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
643 // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
644 // CHECK-NEXT:   ret void
645 //
multiply_compound_int_matrix_scalar_short(ix9x3_t b,short s)646 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
647   b *= s;
648 }
649 
650 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
651 // CHECK:         [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
652 // CHECK-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8
653 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
654 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
655 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
656 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
657 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
658 // CHECK-NEXT:    ret void
659 //
multiply_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)660 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
661   b = b * s;
662 }
663 
multiply_compound_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)664 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
665   // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
666   // CHECK:         [[S:%.*]] = load i64, i64* %s.addr, align 8
667   // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
668   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
669   // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
670   // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
671   // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
672   // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
673   // CHECK-NEXT:    ret void
674 
675   b *= s;
676 }
677 
678 // CHECK-LABEL: @multiply_float_matrix_constant(
679 // CHECK-NEXT:  entry:
680 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
681 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
682 // CHECK-NEXT:    store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
683 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
684 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
685 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
686 // CHECK-NEXT:    ret void
687 //
multiply_float_matrix_constant(fx2x3_t a)688 void multiply_float_matrix_constant(fx2x3_t a) {
689   a = a * 2.5;
690 }
691 
692 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
693 // CHECK-NEXT:  entry:
694 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
695 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
696 // CHECK-NEXT:    store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
697 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
698 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
699 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
700 // CHECK-NEXT:    ret void
multiply_compound_float_matrix_constant(fx2x3_t a)701 void multiply_compound_float_matrix_constant(fx2x3_t a) {
702   a *= 2.5;
703 }
704 
705 // CHECK-LABEL: @multiply_int_matrix_constant(
706 // CHECK-NEXT:  entry:
707 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
708 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
709 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
710 // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
711 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
712 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
713 // CHECK-NEXT:    ret void
714 //
multiply_int_matrix_constant(ix9x3_t a)715 void multiply_int_matrix_constant(ix9x3_t a) {
716   a = 5 * a;
717 }
718 
719 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
720 // CHECK-NEXT:  entry:
721 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
722 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
723 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
724 // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
725 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
726 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
727 // CHECK-NEXT:    ret void
728 //
multiply_compound_int_matrix_constant(ix9x3_t a)729 void multiply_compound_int_matrix_constant(ix9x3_t a) {
730   a *= 5;
731 }
732 
733 // CHECK-LABEL: @divide_double_matrix_scalar_float(
734 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
735 // CHECK-NEXT:    [[S:%.*]] = load float, float* %s.addr, align 4
736 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
737 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
738 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
739 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
740 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
741 // CHECK-NEXT:    ret void
742 //
divide_double_matrix_scalar_float(dx5x5_t a,float s)743 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
744   a = a / s;
745 }
746 
747 // CHECK-LABEL: @divide_double_matrix_scalar_double(
748 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
749 // CHECK-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8
750 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
751 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
752 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
753 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
754 // CHECK-NEXT:    ret void
755 //
divide_double_matrix_scalar_double(dx5x5_t a,double s)756 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
757   a = a / s;
758 }
759 
760 // CHECK-LABEL: @divide_float_matrix_scalar_double(
761 // CHECK:         [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
762 // CHECK-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8
763 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
764 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
765 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
766 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
767 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
768 // CHECK-NEXT:    ret void
769 //
divide_float_matrix_scalar_double(fx2x3_t b,double s)770 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
771   b = b / s;
772 }
773 
774 // CHECK-LABEL: @divide_int_matrix_scalar_short(
775 // CHECK:         [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
776 // CHECK-NEXT:    [[S:%.*]] = load i16, i16* %s.addr, align 2
777 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
778 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
779 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
780 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
781 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
782 // CHECK-NEXT:    ret void
783 //
divide_int_matrix_scalar_short(ix9x3_t b,short s)784 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
785   b = b / s;
786 }
787 
788 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
789 // CHECK:         [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
790 // CHECK-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8
791 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
792 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
793 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
794 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
795 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
796 // CHECK-NEXT:    ret void
797 //
divide_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)798 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
799   b = b / s;
800 }
801 
802 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
803 // CHECK:         [[MAT:%.*]] = load <8 x i64>, <8 x i64>* [[MAT_ADDR:%.*]], align 8
804 // CHECK-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8
805 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i32 0
806 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
807 // CHECK-NEXT:    [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
808 // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* [[MAT_ADDR]], align 8
809 // CHECK-NEXT:    ret void
810 //
divide_ull_matrix_scalar_ull(ullx4x2_t b,unsigned long long s)811 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
812   b = b / s;
813 }
814 
815 // CHECK-LABEL: @divide_float_matrix_constant(
816 // CHECK-NEXT:  entry:
817 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
818 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
819 // CHECK-NEXT:    store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
820 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
821 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
822 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
823 // CHECK-NEXT:    ret void
824 //
divide_float_matrix_constant(fx2x3_t a)825 void divide_float_matrix_constant(fx2x3_t a) {
826   a = a / 2.5;
827 }
828 
829 // Tests for the matrix type operators.
830 
831 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
832 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
833 
834 // Check that we can use matrix index expression on different floating point
835 // matrixes and indices.
insert_double_matrix_const_idx_ll_u_double(dx5x5_t a,double d,fx2x3_t b,float e,int j,unsigned k)836 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
837   // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
838   // CHECK:         [[D:%.*]] = load double, double* %d.addr, align 8
839   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
840   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
841   // CHECK-NEXT:    store <25 x double> [[MATINS]], <25 x double>* {{.*}}, align 8
842   // CHECK-NEXT:    ret void
843 
844   a[0ll][1u] = d;
845 }
846 
insert_double_matrix_const_idx_i_u_double(dx5x5_t a,double d)847 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
848   // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
849   // CHECK:         [[D:%.*]] = load double, double* %d.addr, align 8
850   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, <25 x double>* [[MAT_ADDR:%.*]], align 8
851   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
852   // CHECK-NEXT:    store <25 x double> [[MATINS]], <25 x double>* [[MAT_ADDR]], align 8
853   // CHECK-NEXT:    ret void
854 
855   a[1][4u] = d;
856 }
857 
insert_float_matrix_const_idx_ull_i_float(fx2x3_t b,float e)858 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
859   // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
860   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
861   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
862   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
863   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
864   // CHECK-NEXT:    ret void
865 
866   b[1ull][1] = e;
867 }
868 
insert_float_matrix_idx_i_u_float(fx2x3_t b,float e,int j,unsigned k)869 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
870   // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
871   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
872   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
873   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i32 [[J]] to i64
874   // CHECK-NEXT:    [[K:%.*]] = load i32, i32* %k.addr, align 4
875   // CHECK-NEXT:    [[K_EXT:%.*]] = zext i32 [[K]] to i64
876   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
877   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
878   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
879   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
880   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
881   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
882   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
883   // CHECK-NEXT:    ret void
884 
885   b[j][k] = e;
886 }
887 
insert_float_matrix_idx_s_ull_float(fx2x3_t b,float e,short j,unsigned long long k)888 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
889   // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
890   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
891   // CHECK-NEXT:    [[J:%.*]] = load i16, i16* %j.addr, align 2
892   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i16 [[J]] to i64
893   // CHECK-NEXT:    [[K:%.*]] = load i64, i64* %k.addr, align 8
894   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K]], 2
895   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
896   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
897   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
898   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
899   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
900   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
901   // CHECK-NEXT:    ret void
902 
903   (b)[j][k] = e;
904 }
905 
906 // Check that we can can use matrix index expressions on integer matrixes.
907 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_int_idx_expr(ix9x3_t a,int i)908 void insert_int_idx_expr(ix9x3_t a, int i) {
909   // CHECK-LABEL: @insert_int_idx_expr(
910   // CHECK:         [[I1:%.*]] = load i32, i32* %i.addr, align 4
911   // CHECK-NEXT:    [[I2:%.*]] = load i32, i32* %i.addr, align 4
912   // CHECK-NEXT:    [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
913   // CHECK-NEXT:    [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
914   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
915   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
916   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
917   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
918   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
919   // CHECK-NEXT:    store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR]], align 4
920   // CHECK-NEXT:    ret void
921 
922   a[4 + i][1 + 1u] = i;
923 }
924 
925 // Check that we can can use matrix index expressions on FP and integer
926 // matrixes.
927 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_float_into_int_matrix(ix9x3_t * a,int i)928 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
929   // CHECK-LABEL: @insert_float_into_int_matrix(
930   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
931   // CHECK-NEXT:    [[MAT_ADDR1:%.*]] = load [27 x i32]*, [27 x i32]** %a.addr, align 8
932   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [27 x i32]* [[MAT_ADDR1]] to <27 x i32>*
933   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR2]], align 4
934   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
935   // CHECK-NEXT:    store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR2]], align 4
936   // CHECK-NEXT:    ret void
937 
938   (*a)[4][1] = i;
939 }
940 
941 // Check that we can use overloaded matrix index expressions on matrixes with
942 // matching dimensions, but different element types.
943 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
944 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
insert_matching_dimensions1(dx3x3_t a,double i)945 void insert_matching_dimensions1(dx3x3_t a, double i) {
946   // CHECK-LABEL: @insert_matching_dimensions1(
947   // CHECK:         [[I:%.*]] = load double, double* %i.addr, align 8
948   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x double>, <9 x double>* [[MAT_ADDR:%.*]], align 8
949   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
950   // CHECK-NEXT:    store <9 x double> [[MATINS]], <9 x double>* [[MAT_ADDR]], align 8
951   // CHECK-NEXT:    ret void
952 
953   a[2u][1u] = i;
954 }
955 
insert_matching_dimensions(fx3x3_t b,float e)956 void insert_matching_dimensions(fx3x3_t b, float e) {
957   // CHECK-LABEL: @insert_matching_dimensions(
958   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
959   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
960   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
961   // CHECK-NEXT:    store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
962   // CHECK-NEXT:    ret void
963 
964   b[1u][2u] = e;
965 }
966 
extract_double(dx5x5_t a)967 double extract_double(dx5x5_t a) {
968   // CHECK-LABEL: @extract_double(
969   // CHECK:         [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
970   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
971   // CHECK-NEXT:    ret double [[MATEXT]]
972 
973   return a[2][3 - 1u];
974 }
975 
extract_float(fx3x3_t b)976 double extract_float(fx3x3_t b) {
977   // CHECK-LABEL: @extract_float(
978   // CHECK:         [[MAT:%.*]] = load <9 x float>, <9 x float>* {{.*}}, align 4
979   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
980   // CHECK-NEXT:    [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
981   // CHECK-NEXT:    ret double [[TO_DOUBLE]]
982 
983   return b[2][1];
984 }
985 
extract_int(ix9x3_t c,unsigned long j)986 int extract_int(ix9x3_t c, unsigned long j) {
987   // CHECK-LABEL: @extract_int(
988   // CHECK:         [[J1:%.*]] = load i64, i64* %j.addr, align 8
989   // CHECK-NEXT:    [[J2:%.*]] = load i64, i64* %j.addr, align 8
990   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J2]], 9
991   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
992   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
993   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
994   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
995   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
996   // CHECK-NEXT:    ret i32 [[MATEXT]]
997 
998   return c[j][j];
999 }
1000 
1001 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
1002 
test_extract_matrix_pointer1(dx3x2_t ** ptr,unsigned j)1003 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
1004   // CHECK-LABEL: @test_extract_matrix_pointer1(
1005   // CHECK:         [[J:%.*]] = load i32, i32* %j.addr, align 4
1006   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1007   // CHECK-NEXT:    [[IDX:%.*]] = add i64 3, [[J_EXT]]
1008   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1009   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1010   // CHECK-NEXT:    [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
1011   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 1
1012   // CHECK-NEXT:    [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1013   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 2
1014   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1015   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1016   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1017   // CHECK-NEXT:    ret double [[MATEXT]]
1018 
1019   return ptr[1][2][j][1];
1020 }
1021 
test_extract_matrix_pointer2(dx3x2_t ** ptr)1022 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1023   // CHECK-LABEL: @test_extract_matrix_pointer2(
1024   // CHECK-NEXT:  entry:
1025   // CHECK:         [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
1026   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 4
1027   // CHECK-NEXT:    [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1028   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 6
1029   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1030   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1031   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1032   // CHECK-NEXT:    ret double [[MATEXT]]
1033 
1034   return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1035 }
1036 
insert_extract(dx5x5_t a,fx3x3_t b,unsigned long j,short k)1037 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1038   // CHECK-LABEL: @insert_extract(
1039   // CHECK:         [[K:%.*]] = load i16, i16* %k.addr, align 2
1040   // CHECK-NEXT:    [[K_EXT:%.*]] = sext i16 [[K]] to i64
1041   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1042   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], 0
1043   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1044   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1045   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
1046   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1047   // CHECK-NEXT:    [[J:%.*]] = load i64, i64* %j.addr, align 8
1048   // CHECK-NEXT:    [[IDX3:%.*]] = mul i64 [[J]], 3
1049   // CHECK-NEXT:    [[IDX4:%.*]] = add i64 [[IDX3]], 2
1050   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1051   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1052   // CHECK-NEXT:    [[MAT2:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR]], align 4
1053   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1054   // CHECK-NEXT:    store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
1055   // CHECK-NEXT:    ret void
1056 
1057   b[2][j] = b[0][k];
1058 }
1059 
insert_compound_stmt(dx5x5_t a)1060 void insert_compound_stmt(dx5x5_t a) {
1061   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1062   // CHECK:        [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8
1063   // CHECK-NEXT:   [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1064   // CHECK-NEXT:   [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1065   // CHECK-NEXT:   [[A2:%.*]] = load <25 x double>, <25 x double>* [[A_PTR]], align 8
1066   // CHECK-NEXT:   [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1067   // CHECK-NEXT:   store <25 x double> [[INS]], <25 x double>* [[A_PTR]], align 8
1068   // CHECK-NEXT:   ret void
1069 
1070   a[2][3] -= 1.0;
1071 }
1072 
1073 struct Foo {
1074   fx2x3_t mat;
1075 };
1076 
insert_compound_stmt_field(struct Foo * a,float f,unsigned i,unsigned j)1077 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1078   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(%struct.Foo* noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1079   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
1080   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
1081   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
1082   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1083   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1084   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1085   // CHECK-NEXT:    [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
1086   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1087   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1088   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1089   // CHECK-NEXT:    [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1090   // CHECK-NEXT:    [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1091   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1092   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1093   // CHECK-NEXT:    [[MAT2:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1094   // CHECK-NEXT:    [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1095   // CHECK-NEXT:    store <6 x float> [[INS]], <6 x float>* [[MAT_PTR]], align 4
1096   // CHECK-NEXT:    ret void
1097 
1098   a->mat[i][j] += f;
1099 }
1100 
matrix_as_idx(ix9x3_t a,int i,int j,dx5x5_t b)1101 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1102   // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1103   // CHECK:       [[I1:%.*]] = load i32, i32* %i.addr, align 4
1104   // CHECK-NEXT:  [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1105   // CHECK-NEXT:  [[J1:%.*]] = load i32, i32* %j.addr, align 4
1106   // CHECK-NEXT:  [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1107   // CHECK-NEXT:  [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1108   // CHECK-NEXT:  [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1109   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1110   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1111   // CHECK-NEXT:  [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
1112   // CHECK-NEXT:  [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1113   // CHECK-NEXT:  [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1114   // CHECK-NEXT:  [[J2:%.*]] = load i32, i32* %j.addr, align 4
1115   // CHECK-NEXT:  [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1116   // CHECK-NEXT:  [[I2:%.*]] = load i32, i32* %i.addr, align 4
1117   // CHECK-NEXT:  [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1118   // CHECK-NEXT:  [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1119   // CHECK-NEXT:  [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1120   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1121   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1122   // CHECK-NEXT:  [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
1123   // CHECK-NEXT:  [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1124   // CHECK-NEXT:  [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1125   // CHECK-NEXT:  [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1126   // CHECK-NEXT:  [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1127   // CHECK-NEXT:  [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1128   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1129   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1130   // CHECK-NEXT:  [[B:%.*]] = load <25 x double>, <25 x double>* [[B_PTR:%.*]], align 8
1131   // CHECK-NEXT:  [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1132   // CHECK-NEXT:  store <25 x double> [[INS]], <25 x double>* [[B_PTR]], align 8
1133   b[a[i][j]][a[j][i] + 2] = 1.5;
1134 }
1135