1 // RUN: %clang_cc1 -no-opaque-pointers -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK %s
2 // RUN: %clang_cc1 -no-opaque-pointers -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
3
4 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
5 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
6 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
7 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
8
9 // Floating point matrix/scalar additions.
10
add_matrix_matrix_double(dx5x5_t a,dx5x5_t b,dx5x5_t c)11 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
12 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
13 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
14 // CHECK-NEXT: [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
15 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
16 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
17
18 a = b + c;
19 }
20
add_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)21 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
22 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
23 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
24 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
25 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
26 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
27
28 a += b;
29 }
30
subtract_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)31 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
32 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
33 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
34 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
35 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
36 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
37
38 a -= b;
39 }
40
add_matrix_matrix_float(fx2x3_t a,fx2x3_t b,fx2x3_t c)41 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
42 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
43 // CHECK: [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
44 // CHECK-NEXT: [[C:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
45 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
46 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
47
48 a = b + c;
49 }
50
add_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)51 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
52 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
53 // CHECK: [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
54 // CHECK-NEXT: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
55 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
56 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
57
58 a += b;
59 }
60
subtract_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)61 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
62 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
63 // CHECK: [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
64 // CHECK-NEXT: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
65 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
66 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
67
68 a -= b;
69 }
70
add_matrix_scalar_double_float(dx5x5_t a,float vf)71 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
72 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
73 // CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
74 // CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
75 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
76 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
77 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
78 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
79 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
80
81 a = a + vf;
82 }
83
add_compound_matrix_scalar_double_float(dx5x5_t a,float vf)84 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
85 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
86 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
87 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
88 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
89 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
90 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
91 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
92 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
93
94 a += vf;
95 }
96
subtract_compound_matrix_scalar_double_float(dx5x5_t a,float vf)97 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
98 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
99 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
100 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
101 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
102 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
103 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
104 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
105 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
106
107 a -= vf;
108 }
109
add_matrix_scalar_double_double(dx5x5_t a,double vd)110 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
111 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
112 // CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
113 // CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
114 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
115 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
116 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
117 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
118
119 a = a + vd;
120 }
121
add_compound_matrix_scalar_double_double(dx5x5_t a,double vd)122 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
123 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
124 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
125 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
126 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
127 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
128 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
129 // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
130 a += vd;
131 }
132
subtract_compound_matrix_scalar_double_double(dx5x5_t a,double vd)133 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
134 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
135 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
136 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
137 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
138 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
139 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
140 // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
141 a -= vd;
142 }
143
add_matrix_scalar_float_float(fx2x3_t b,float vf)144 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
145 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
146 // CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
147 // CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
148 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
149 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
150 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
151 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
152
153 b = b + vf;
154 }
155
add_compound_matrix_scalar_float_float(fx2x3_t b,float vf)156 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
157 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
158 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
159 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
160 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
161 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
162 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
163 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
164 b += vf;
165 }
166
subtract_compound_matrix_scalar_float_float(fx2x3_t b,float vf)167 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
168 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
169 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
170 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
171 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
172 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
173 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
174 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
175 b -= vf;
176 }
177
add_matrix_scalar_float_double(fx2x3_t b,double vd)178 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
179 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
180 // CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
181 // CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
182 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
183 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
184 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
185 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
186 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
187
188 b = b + vd;
189 }
190
add_compound_matrix_scalar_float_double(fx2x3_t b,double vd)191 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
192 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
193 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
194 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
195 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
196 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
197 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
198 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
199 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
200 b += vd;
201 }
202
subtract_compound_matrix_scalar_float_double(fx2x3_t b,double vd)203 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
204 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
205 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
206 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
207 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
208 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
209 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
210 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
211 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
212 b -= vd;
213 }
214
215 // Integer matrix/scalar additions
216
add_matrix_matrix_int(ix9x3_t a,ix9x3_t b,ix9x3_t c)217 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
218 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
219 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
220 // CHECK-NEXT: [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
221 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
222 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
223 a = b + c;
224 }
225
add_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)226 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
227 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
228 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
229 // CHECK: [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
230 // CHECK: [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
231 // CHECK: store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
232 a += b;
233 }
234
subtract_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)235 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
236 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
237 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
238 // CHECK: [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
239 // CHECK: [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
240 // CHECK: store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
241 a -= b;
242 }
243
add_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b,ullx4x2_t c)244 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
245 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
246 // CHECK: [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
247 // CHECK-NEXT: [[C:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
248 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
249 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
250
251 a = b + c;
252 }
253
add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)254 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
255 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
256 // CHECK: [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
257 // CHECK-NEXT: [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
258 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
259 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
260
261 a += b;
262 }
263
subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)264 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
265 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
266 // CHECK: [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
267 // CHECK-NEXT: [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
268 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
269 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
270
271 a -= b;
272 }
273
add_matrix_scalar_int_short(ix9x3_t a,short vs)274 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
275 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
276 // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
277 // CHECK-NEXT: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
278 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
279 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i32 0
280 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
281 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
282 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
283
284 a = a + vs;
285 }
286
add_compound_matrix_scalar_int_short(ix9x3_t a,short vs)287 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
288 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
289 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
290 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
291 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
292 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
293 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
294 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
295 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
296
297 a += vs;
298 }
299
subtract_compound_matrix_scalar_int_short(ix9x3_t a,short vs)300 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
301 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
302 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
303 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
304 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
305 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
306 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
307 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
308 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
309
310 a -= vs;
311 }
312
add_matrix_scalar_int_long_int(ix9x3_t a,long int vli)313 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
314 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
315 // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
316 // CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
317 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
318 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
319 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
320 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
321 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
322
323 a = a + vli;
324 }
325
add_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)326 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
327 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
328 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
329 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
330 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
331 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
332 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
333 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
334 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
335
336 a += vli;
337 }
338
subtract_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)339 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
340 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
341 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
342 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
343 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
344 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
345 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
346 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
347 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
348
349 a -= vli;
350 }
351
add_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)352 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
353 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
354 // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
355 // CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
356 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
357 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
358 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
359 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
360 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
361
362 a = a + vulli;
363 }
364
add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)365 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
366 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
367 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
368 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
369 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
370 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
371 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
372 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
373 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
374
375 a += vulli;
376 }
377
subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)378 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
379 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
380 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
381 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
382 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
383 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
384 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
385 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
386 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
387
388 a -= vulli;
389 }
390
add_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)391 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
392 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
393 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
394 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
395 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
396 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
397 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
398 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
399 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
400
401 b = vs + b;
402 }
403
add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)404 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
405 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
406 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
407 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
408 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
409 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
410 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
411 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
412 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
413
414 b += vs;
415 }
416
subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)417 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
418 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
419 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
420 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
421 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
422 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
423 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
424 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
425 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
426
427 b -= vs;
428 }
429
add_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)430 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
431 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
432 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
433 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
434 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
435 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
436 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
437 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
438
439 b = vli + b;
440 }
441
add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)442 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
443 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
444 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
445 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
446 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
447 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
448 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
449 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
450
451 b += vli;
452 }
453
subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)454 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
455 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
456 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
457 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
458 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
459 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
460 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
461 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
462
463 b -= vli;
464 }
465
add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)466 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
467 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
468 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
469 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
470 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
471 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
472 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
473 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
474 b = vulli + b;
475 }
476
add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)477 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
478 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
479 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
480 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
481 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
482 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
483 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
484 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
485
486 b += vulli;
487 }
488
subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)489 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
490 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
491 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
492 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
493 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
494 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
495 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
496 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
497
498 b -= vulli;
499 }
500
501 // Tests for matrix multiplication.
502
multiply_matrix_matrix_double(dx5x5_t b,dx5x5_t c)503 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
504 // CHECK-LABEL: @multiply_matrix_matrix_double(
505 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
506 // CHECK-NEXT: [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
507 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
508 // CHECK-NEXT: [[A_ADDR:%.*]] = bitcast [25 x double]* %a to <25 x double>*
509 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* [[A_ADDR]], align 8
510 // CHECK: ret void
511 //
512
513 dx5x5_t a;
514 a = b * c;
515 }
516
multiply_compound_matrix_matrix_double(dx5x5_t b,dx5x5_t c)517 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
518 // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
519 // CHECK: [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
520 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
521 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
522 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
523 // CHECK-NEXT: ret void
524 b *= c;
525 }
526
527 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
528 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
529 // CHECK-LABEL: @multiply_matrix_matrix_int(
530 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
531 // CHECK-NEXT: [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
532 // CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
533 // CHECK-NEXT: [[A_ADDR:%.*]] = bitcast [81 x i32]* %a to <81 x i32>*
534 // CHECK-NEXT: store <81 x i32> [[RES]], <81 x i32>* [[A_ADDR]], align 4
535 // CHECK: ret void
536 //
multiply_matrix_matrix_int(ix9x3_t b,ix3x9_t c)537 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
538 ix9x9_t a;
539 a = b * c;
540 }
541
542 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
543 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
544 // CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4
545 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
546 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
547 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
548 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
549 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
550 // CHECK-NEXT: ret void
551 //
multiply_double_matrix_scalar_float(dx5x5_t a,float s)552 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
553 a = a * s;
554 }
555
556 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
557 // CHECK: [[S:%.*]] = load float, float* %s.addr, align 4
558 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
559 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
560 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
561 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
562 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
563 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
564 // CHECK-NEXT: ret void
565 //
multiply_compound_double_matrix_scalar_float(dx5x5_t a,float s)566 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
567 a *= s;
568 }
569
570 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
571 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
572 // CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
573 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
574 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
575 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
576 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
577 // CHECK-NEXT: ret void
578 //
multiply_double_matrix_scalar_double(dx5x5_t a,double s)579 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
580 a = a * s;
581 }
582
583 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
584 // CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
585 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
586 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
587 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
588 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
589 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
590 // CHECK-NEXT: ret void
multiply_compound_double_matrix_scalar_double(dx5x5_t a,double s)591 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
592 a *= s;
593 }
594
595 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
596 // CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
597 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
598 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
599 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
600 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
601 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
602 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
603 // CHECK-NEXT: ret void
604 //
multiply_float_matrix_scalar_double(fx2x3_t b,double s)605 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
606 b = s * b;
607 }
608
609 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
610 // CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
611 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
612 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
613 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
614 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
615 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
616 // store <6 x float> %3, <6 x float>* %0, align 4
617 // ret void
multiply_compound_float_matrix_scalar_double(fx2x3_t b,double s)618 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
619 b *= s;
620 }
621
622 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
623 // CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2
624 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
625 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
626 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
627 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
628 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
629 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
630 // CHECK-NEXT: ret void
631 //
multiply_int_matrix_scalar_short(ix9x3_t b,short s)632 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
633 b = s * b;
634 }
635
636 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
637 // CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2
638 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
639 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
640 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
641 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
642 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
643 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
644 // CHECK-NEXT: ret void
645 //
multiply_compound_int_matrix_scalar_short(ix9x3_t b,short s)646 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
647 b *= s;
648 }
649
650 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
651 // CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
652 // CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
653 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
654 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
655 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
656 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
657 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
658 // CHECK-NEXT: ret void
659 //
multiply_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)660 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
661 b = b * s;
662 }
663
multiply_compound_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)664 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
665 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
666 // CHECK: [[S:%.*]] = load i64, i64* %s.addr, align 8
667 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
668 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
669 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
670 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
671 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
672 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
673 // CHECK-NEXT: ret void
674
675 b *= s;
676 }
677
678 // CHECK-LABEL: @multiply_float_matrix_constant(
679 // CHECK-NEXT: entry:
680 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
681 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
682 // CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
683 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
684 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
685 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
686 // CHECK-NEXT: ret void
687 //
multiply_float_matrix_constant(fx2x3_t a)688 void multiply_float_matrix_constant(fx2x3_t a) {
689 a = a * 2.5;
690 }
691
692 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
693 // CHECK-NEXT: entry:
694 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
695 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
696 // CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
697 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
698 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
699 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
700 // CHECK-NEXT: ret void
multiply_compound_float_matrix_constant(fx2x3_t a)701 void multiply_compound_float_matrix_constant(fx2x3_t a) {
702 a *= 2.5;
703 }
704
705 // CHECK-LABEL: @multiply_int_matrix_constant(
706 // CHECK-NEXT: entry:
707 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
708 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
709 // CHECK-NEXT: store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
710 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
711 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
712 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
713 // CHECK-NEXT: ret void
714 //
multiply_int_matrix_constant(ix9x3_t a)715 void multiply_int_matrix_constant(ix9x3_t a) {
716 a = 5 * a;
717 }
718
719 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
720 // CHECK-NEXT: entry:
721 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
722 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
723 // CHECK-NEXT: store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
724 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
725 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
726 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
727 // CHECK-NEXT: ret void
728 //
multiply_compound_int_matrix_constant(ix9x3_t a)729 void multiply_compound_int_matrix_constant(ix9x3_t a) {
730 a *= 5;
731 }
732
733 // CHECK-LABEL: @divide_double_matrix_scalar_float(
734 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
735 // CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4
736 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
737 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
738 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
739 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
740 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
741 // CHECK-NEXT: ret void
742 //
divide_double_matrix_scalar_float(dx5x5_t a,float s)743 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
744 a = a / s;
745 }
746
747 // CHECK-LABEL: @divide_double_matrix_scalar_double(
748 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
749 // CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
750 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
751 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
752 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
753 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
754 // CHECK-NEXT: ret void
755 //
divide_double_matrix_scalar_double(dx5x5_t a,double s)756 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
757 a = a / s;
758 }
759
760 // CHECK-LABEL: @divide_float_matrix_scalar_double(
761 // CHECK: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
762 // CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
763 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
764 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
765 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
766 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
767 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
768 // CHECK-NEXT: ret void
769 //
divide_float_matrix_scalar_double(fx2x3_t b,double s)770 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
771 b = b / s;
772 }
773
774 // CHECK-LABEL: @divide_int_matrix_scalar_short(
775 // CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
776 // CHECK-NEXT: [[S:%.*]] = load i16, i16* %s.addr, align 2
777 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
778 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
779 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
780 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
781 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
782 // CHECK-NEXT: ret void
783 //
divide_int_matrix_scalar_short(ix9x3_t b,short s)784 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
785 b = b / s;
786 }
787
788 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
789 // CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
790 // CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
791 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
792 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
793 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
794 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
795 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
796 // CHECK-NEXT: ret void
797 //
divide_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)798 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
799 b = b / s;
800 }
801
802 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
803 // CHECK: [[MAT:%.*]] = load <8 x i64>, <8 x i64>* [[MAT_ADDR:%.*]], align 8
804 // CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
805 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i32 0
806 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
807 // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
808 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* [[MAT_ADDR]], align 8
809 // CHECK-NEXT: ret void
810 //
divide_ull_matrix_scalar_ull(ullx4x2_t b,unsigned long long s)811 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
812 b = b / s;
813 }
814
815 // CHECK-LABEL: @divide_float_matrix_constant(
816 // CHECK-NEXT: entry:
817 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
818 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
819 // CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
820 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
821 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
822 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
823 // CHECK-NEXT: ret void
824 //
divide_float_matrix_constant(fx2x3_t a)825 void divide_float_matrix_constant(fx2x3_t a) {
826 a = a / 2.5;
827 }
828
829 // Tests for the matrix type operators.
830
831 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
832 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
833
834 // Check that we can use matrix index expression on different floating point
835 // matrixes and indices.
insert_double_matrix_const_idx_ll_u_double(dx5x5_t a,double d,fx2x3_t b,float e,int j,unsigned k)836 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
837 // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
838 // CHECK: [[D:%.*]] = load double, double* %d.addr, align 8
839 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
840 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
841 // CHECK-NEXT: store <25 x double> [[MATINS]], <25 x double>* {{.*}}, align 8
842 // CHECK-NEXT: ret void
843
844 a[0ll][1u] = d;
845 }
846
insert_double_matrix_const_idx_i_u_double(dx5x5_t a,double d)847 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
848 // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
849 // CHECK: [[D:%.*]] = load double, double* %d.addr, align 8
850 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, <25 x double>* [[MAT_ADDR:%.*]], align 8
851 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
852 // CHECK-NEXT: store <25 x double> [[MATINS]], <25 x double>* [[MAT_ADDR]], align 8
853 // CHECK-NEXT: ret void
854
855 a[1][4u] = d;
856 }
857
insert_float_matrix_const_idx_ull_i_float(fx2x3_t b,float e)858 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
859 // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
860 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
861 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
862 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
863 // CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
864 // CHECK-NEXT: ret void
865
866 b[1ull][1] = e;
867 }
868
insert_float_matrix_idx_i_u_float(fx2x3_t b,float e,int j,unsigned k)869 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
870 // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
871 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
872 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
873 // CHECK-NEXT: [[J_EXT:%.*]] = sext i32 [[J]] to i64
874 // CHECK-NEXT: [[K:%.*]] = load i32, i32* %k.addr, align 4
875 // CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64
876 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
877 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
878 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
879 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
880 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
881 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
882 // CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
883 // CHECK-NEXT: ret void
884
885 b[j][k] = e;
886 }
887
insert_float_matrix_idx_s_ull_float(fx2x3_t b,float e,short j,unsigned long long k)888 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
889 // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
890 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
891 // CHECK-NEXT: [[J:%.*]] = load i16, i16* %j.addr, align 2
892 // CHECK-NEXT: [[J_EXT:%.*]] = sext i16 [[J]] to i64
893 // CHECK-NEXT: [[K:%.*]] = load i64, i64* %k.addr, align 8
894 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2
895 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
896 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
897 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
898 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
899 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
900 // CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
901 // CHECK-NEXT: ret void
902
903 (b)[j][k] = e;
904 }
905
906 // Check that we can can use matrix index expressions on integer matrixes.
907 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_int_idx_expr(ix9x3_t a,int i)908 void insert_int_idx_expr(ix9x3_t a, int i) {
909 // CHECK-LABEL: @insert_int_idx_expr(
910 // CHECK: [[I1:%.*]] = load i32, i32* %i.addr, align 4
911 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i.addr, align 4
912 // CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
913 // CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
914 // CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
915 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
916 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
917 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
918 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
919 // CHECK-NEXT: store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR]], align 4
920 // CHECK-NEXT: ret void
921
922 a[4 + i][1 + 1u] = i;
923 }
924
925 // Check that we can can use matrix index expressions on FP and integer
926 // matrixes.
927 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_float_into_int_matrix(ix9x3_t * a,int i)928 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
929 // CHECK-LABEL: @insert_float_into_int_matrix(
930 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
931 // CHECK-NEXT: [[MAT_ADDR1:%.*]] = load [27 x i32]*, [27 x i32]** %a.addr, align 8
932 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [27 x i32]* [[MAT_ADDR1]] to <27 x i32>*
933 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR2]], align 4
934 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
935 // CHECK-NEXT: store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR2]], align 4
936 // CHECK-NEXT: ret void
937
938 (*a)[4][1] = i;
939 }
940
941 // Check that we can use overloaded matrix index expressions on matrixes with
942 // matching dimensions, but different element types.
943 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
944 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
insert_matching_dimensions1(dx3x3_t a,double i)945 void insert_matching_dimensions1(dx3x3_t a, double i) {
946 // CHECK-LABEL: @insert_matching_dimensions1(
947 // CHECK: [[I:%.*]] = load double, double* %i.addr, align 8
948 // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, <9 x double>* [[MAT_ADDR:%.*]], align 8
949 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
950 // CHECK-NEXT: store <9 x double> [[MATINS]], <9 x double>* [[MAT_ADDR]], align 8
951 // CHECK-NEXT: ret void
952
953 a[2u][1u] = i;
954 }
955
insert_matching_dimensions(fx3x3_t b,float e)956 void insert_matching_dimensions(fx3x3_t b, float e) {
957 // CHECK-LABEL: @insert_matching_dimensions(
958 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
959 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
960 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
961 // CHECK-NEXT: store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
962 // CHECK-NEXT: ret void
963
964 b[1u][2u] = e;
965 }
966
extract_double(dx5x5_t a)967 double extract_double(dx5x5_t a) {
968 // CHECK-LABEL: @extract_double(
969 // CHECK: [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
970 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
971 // CHECK-NEXT: ret double [[MATEXT]]
972
973 return a[2][3 - 1u];
974 }
975
extract_float(fx3x3_t b)976 double extract_float(fx3x3_t b) {
977 // CHECK-LABEL: @extract_float(
978 // CHECK: [[MAT:%.*]] = load <9 x float>, <9 x float>* {{.*}}, align 4
979 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
980 // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
981 // CHECK-NEXT: ret double [[TO_DOUBLE]]
982
983 return b[2][1];
984 }
985
extract_int(ix9x3_t c,unsigned long j)986 int extract_int(ix9x3_t c, unsigned long j) {
987 // CHECK-LABEL: @extract_int(
988 // CHECK: [[J1:%.*]] = load i64, i64* %j.addr, align 8
989 // CHECK-NEXT: [[J2:%.*]] = load i64, i64* %j.addr, align 8
990 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9
991 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
992 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
993 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
994 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
995 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
996 // CHECK-NEXT: ret i32 [[MATEXT]]
997
998 return c[j][j];
999 }
1000
1001 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
1002
test_extract_matrix_pointer1(dx3x2_t ** ptr,unsigned j)1003 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
1004 // CHECK-LABEL: @test_extract_matrix_pointer1(
1005 // CHECK: [[J:%.*]] = load i32, i32* %j.addr, align 4
1006 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1007 // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]]
1008 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1009 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1010 // CHECK-NEXT: [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
1011 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 1
1012 // CHECK-NEXT: [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1013 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 2
1014 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1015 // CHECK-NEXT: [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1016 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1017 // CHECK-NEXT: ret double [[MATEXT]]
1018
1019 return ptr[1][2][j][1];
1020 }
1021
test_extract_matrix_pointer2(dx3x2_t ** ptr)1022 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1023 // CHECK-LABEL: @test_extract_matrix_pointer2(
1024 // CHECK-NEXT: entry:
1025 // CHECK: [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
1026 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 4
1027 // CHECK-NEXT: [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1028 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 6
1029 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1030 // CHECK-NEXT: [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1031 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1032 // CHECK-NEXT: ret double [[MATEXT]]
1033
1034 return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1035 }
1036
insert_extract(dx5x5_t a,fx3x3_t b,unsigned long j,short k)1037 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1038 // CHECK-LABEL: @insert_extract(
1039 // CHECK: [[K:%.*]] = load i16, i16* %k.addr, align 2
1040 // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64
1041 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1042 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0
1043 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1044 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1045 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
1046 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1047 // CHECK-NEXT: [[J:%.*]] = load i64, i64* %j.addr, align 8
1048 // CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3
1049 // CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2
1050 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1051 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1052 // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR]], align 4
1053 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1054 // CHECK-NEXT: store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
1055 // CHECK-NEXT: ret void
1056
1057 b[2][j] = b[0][k];
1058 }
1059
insert_compound_stmt(dx5x5_t a)1060 void insert_compound_stmt(dx5x5_t a) {
1061 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1062 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8
1063 // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1064 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1065 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, <25 x double>* [[A_PTR]], align 8
1066 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1067 // CHECK-NEXT: store <25 x double> [[INS]], <25 x double>* [[A_PTR]], align 8
1068 // CHECK-NEXT: ret void
1069
1070 a[2][3] -= 1.0;
1071 }
1072
1073 struct Foo {
1074 fx2x3_t mat;
1075 };
1076
insert_compound_stmt_field(struct Foo * a,float f,unsigned i,unsigned j)1077 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1078 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(%struct.Foo* noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1079 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
1080 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
1081 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
1082 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1083 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1084 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1085 // CHECK-NEXT: [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
1086 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1087 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1088 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1089 // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1090 // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1091 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1092 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1093 // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1094 // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1095 // CHECK-NEXT: store <6 x float> [[INS]], <6 x float>* [[MAT_PTR]], align 4
1096 // CHECK-NEXT: ret void
1097
1098 a->mat[i][j] += f;
1099 }
1100
matrix_as_idx(ix9x3_t a,int i,int j,dx5x5_t b)1101 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1102 // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1103 // CHECK: [[I1:%.*]] = load i32, i32* %i.addr, align 4
1104 // CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1105 // CHECK-NEXT: [[J1:%.*]] = load i32, i32* %j.addr, align 4
1106 // CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1107 // CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1108 // CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1109 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1110 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1111 // CHECK-NEXT: [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
1112 // CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1113 // CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1114 // CHECK-NEXT: [[J2:%.*]] = load i32, i32* %j.addr, align 4
1115 // CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1116 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i.addr, align 4
1117 // CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1118 // CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1119 // CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1120 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1121 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1122 // CHECK-NEXT: [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
1123 // CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1124 // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1125 // CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1126 // CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1127 // CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1128 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1129 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1130 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, <25 x double>* [[B_PTR:%.*]], align 8
1131 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1132 // CHECK-NEXT: store <25 x double> [[INS]], <25 x double>* [[B_PTR]], align 8
1133 b[a[i][j]][a[j][i] + 2] = 1.5;
1134 }
1135