Lines Matching refs:vector

1 // RUN: mlir-opt %s -test-vector-contraction-lowering | FileCheck %s
2 // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-lower-matrix-intrinsics=1 | FileCheck …
3 // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-outerproduct=1 | FileCheck %s --check-…
4 // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-filter-outerproduct=1 | FileCheck %s -…
5 // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-parallel-arith=1 | FileCheck %s --chec…
18 // CHECK-SAME: %[[A:.*0]]: vector<4xf32>,
19 // CHECK-SAME: %[[B:.*1]]: vector<4xf32>,
21 // CHECK: %[[F:.*]] = arith.mulf %[[A]], %[[B]] : vector<4xf32>
22 // CHECK: %[[R:.*]] = vector.reduction <add>, %[[F]], %[[C]] : vector<4xf32> into f32
25 func.func @extract_contract1(%arg0: vector<4xf32>, %arg1: vector<4xf32>, %arg2: f32) -> f32 {
26 %0 = vector.contract #dotp_trait %arg0, %arg1, %arg2
27 : vector<4xf32>, vector<4xf32> into f32
32 // CHECK-SAME: %[[A:.*0]]: vector<4xi32>,
33 // CHECK-SAME: %[[B:.*1]]: vector<4xi32>,
35 // CHECK: %[[F:.*]] = arith.muli %[[A]], %[[B]] : vector<4xi32>
36 // CHECK: %[[R:.*]] = vector.reduction <add>, %[[F]], %[[C]] : vector<4xi32> into i32
39 func.func @extract_contract1_int(%arg0: vector<4xi32>, %arg1: vector<4xi32>, %arg2: i32) -> i32 {
40 %0 = vector.contract #dotp_trait %arg0, %arg1, %arg2
41 : vector<4xi32>, vector<4xi32> into i32
56 // CHECK-SAME: %[[A:.*0]]: vector<2x3xf32>,
57 // CHECK-SAME: %[[B:.*1]]: vector<3xf32>,
58 // CHECK-SAME: %[[C:.*2]]: vector<2xf32>
59 // CHECK: %[[R:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32>
60 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2x3xf32>
61 // CHECK: %[[T2:.*]] = arith.mulf %[[T0]], %[[B]] : vector<3xf32>
62 // CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T2]] : vector<3xf32> into f32
63 // CHECK: %[[T4:.*]] = vector.insert %[[T3]], %[[R]] [0] : f32 into vector<2xf32>
64 // CHECK: %[[T5:.*]] = vector.extract %[[A]][1] : vector<2x3xf32>
65 // CHECK: %[[T7:.*]] = arith.mulf %[[T5]], %[[B]] : vector<3xf32>
66 // CHECK: %[[T8:.*]] = vector.reduction <add>, %[[T7]] : vector<3xf32> into f32
67 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T4]] [1] : f32 into vector<2xf32>
68 // CHECK: %[[T10:.*]] = arith.addf %[[T9]], %[[C]] : vector<2xf32>
69 // CHECK: return %[[T10]] : vector<2xf32>
71 func.func @extract_contract2(%arg0: vector<2x3xf32>,
72 %arg1: vector<3xf32>,
73 %arg2: vector<2xf32>) -> vector<2xf32> {
74 %0 = vector.contract #matvec_trait %arg0, %arg1, %arg2
75 : vector<2x3xf32>, vector<3xf32> into vector<2xf32>
76 return %0 : vector<2xf32>
80 // CHECK-SAME: %[[A:.*0]]: vector<2x3xi32>,
81 // CHECK-SAME: %[[B:.*1]]: vector<3xi32>,
82 // CHECK-SAME: %[[C:.*2]]: vector<2xi32>
83 // CHECK: %[[R:.*]] = arith.constant dense<0> : vector<2xi32>
84 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2x3xi32>
85 // CHECK: %[[T2:.*]] = arith.muli %[[T0]], %[[B]] : vector<3xi32>
86 // CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T2]] : vector<3xi32> into i32
87 // CHECK: %[[T4:.*]] = vector.insert %[[T3]], %[[R]] [0] : i32 into vector<2xi32>
88 // CHECK: %[[T5:.*]] = vector.extract %[[A]][1] : vector<2x3xi32>
89 // CHECK: %[[T7:.*]] = arith.muli %[[T5]], %[[B]] : vector<3xi32>
90 // CHECK: %[[T8:.*]] = vector.reduction <add>, %[[T7]] : vector<3xi32> into i32
91 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T4]] [1] : i32 into vector<2xi32>
92 // CHECK: %[[T10:.*]] = arith.addi %[[T9]], %[[C]] : vector<2xi32>
93 // CHECK: return %[[T10]] : vector<2xi32>
94 func.func @extract_contract2_int(%arg0: vector<2x3xi32>,
95 %arg1: vector<3xi32>,
96 %arg2: vector<2xi32>) -> vector<2xi32> {
97 %0 = vector.contract #matvec_trait %arg0, %arg1, %arg2
98 : vector<2x3xi32>, vector<3xi32> into vector<2xi32>
99 return %0 : vector<2xi32>
113 // CHECK-SAME: %[[A:.*0]]: vector<3xf32>,
114 // CHECK-SAME: %[[B:.*1]]: vector<2x3xf32>,
115 // CHECK-SAME: %[[C:.*2]]: vector<2xf32>
116 // CHECK: %[[R:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32>
117 // CHECK: %[[T0:.*]] = vector.extract %[[B]][0] : vector<2x3xf32>
118 // CHECK: %[[T2:.*]] = arith.mulf %[[T0]], %[[A]] : vector<3xf32>
119 // CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T2]] : vector<3xf32> into f32
120 // CHECK: %[[T4:.*]] = vector.insert %[[T3]], %[[R]] [0] : f32 into vector<2xf32>
121 // CHECK: %[[T5:.*]] = vector.extract %[[B]][1] : vector<2x3xf32>
122 // CHECK: %[[T7:.*]] = arith.mulf %[[T5]], %[[A]] : vector<3xf32>
123 // CHECK: %[[T8:.*]] = vector.reduction <add>, %[[T7]] : vector<3xf32> into f32
124 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T4]] [1] : f32 into vector<2xf32>
125 // CHECK: %[[T10:.*]] = arith.addf %[[T9]], %[[C]] : vector<2xf32>
126 // CHECK: return %[[T10]] : vector<2xf32>
128 func.func @extract_contract3(%arg0: vector<3xf32>,
129 %arg1: vector<2x3xf32>,
130 %arg2: vector<2xf32>) -> vector<2xf32> {
131 %0 = vector.contract #vecmat_trait %arg0, %arg1, %arg2
132 : vector<3xf32>, vector<2x3xf32> into vector<2xf32>
133 return %0 : vector<2xf32>
147 // CHECK-SAME: %[[A:.*0]]: vector<2x2xf32>,
148 // CHECK-SAME: %[[B:.*1]]: vector<2x2xf32>,
149 // CHECK-SAME: %[[C:.*2]]: vector<2x2xf32>
150 // CHECK: %[[R:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>
151 // CHECK: %[[Bt:.*]] = vector.transpose %arg1, [1, 0] : vector<2x2xf32> to vector<2x2xf32>
152 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2x2xf32>
153 // CHECK: %[[T2:.*]] = vector.extract %[[Bt]][0] : vector<2x2xf32>
154 // CHECK: %[[T9:.*]] = arith.mulf %[[T0]], %[[T2]] : vector<2xf32>
155 // CHECK: %[[T10:.*]] = vector.reduction <add>, %[[T9]] : vector<2xf32> into f32
156 // CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[R]] [0, 0] : f32 into vector<2x2xf32>
158 // CHECK: %[[T12:.*]] = vector.extract %[[Bt]][1] : vector<2x2xf32>
159 // CHECK: %[[T19:.*]] = arith.mulf %[[T0]], %[[T12]] : vector<2xf32>
160 // CHECK: %[[T20:.*]] = vector.reduction <add>, %[[T19]] : vector<2xf32> into f32
161 // CHECK: %[[T21:.*]] = vector.insert %[[T20]], %[[T11]] [0, 1] : f32 into vector<2x2xf32>
163 // CHECK: %[[T23:.*]] = vector.extract %[[A]][1] : vector<2x2xf32>
164 // CHECK: %[[T24:.*]] = vector.extract %[[Bt]][0] : vector<2x2xf32>
165 // CHECK: %[[T32:.*]] = arith.mulf %[[T23]], %[[T24]] : vector<2xf32>
166 // CHECK: %[[T33:.*]] = vector.reduction <add>, %[[T32]] : vector<2xf32> into f32
167 // CHECK: %[[T34:.*]] = vector.insert %[[T33]], %[[T21]] [1, 0] : f32 into vector<2x2xf32>
169 // CHECK: %[[T40:.*]] = vector.extract %[[Bt]][1] : vector<2x2xf32>
170 // CHECK: %[[T41:.*]] = arith.mulf %[[T23]], %[[T40]] : vector<2xf32>
171 // CHECK: %[[T42:.*]] = vector.reduction <add>, %[[T41]] : vector<2xf32> into f32
172 // CHECK: %[[T43:.*]] = vector.insert %[[T42]], %[[T34]] [1, 1] : f32 into vector<2x2xf32>
174 // CHECK: %[[T52:.*]] = arith.addf %[[T43]], %[[C]] : vector<2x2xf32>
175 // CHECK: return %[[T52]] : vector<2x2xf32>
177 func.func @extract_contract4(%arg0: vector<2x2xf32>,
178 %arg1: vector<2x2xf32>,
179 %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
180 %0 = vector.contract #matmat_trait %arg0, %arg1, %arg2
181 : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
182 return %0 : vector<2x2xf32>
196 // CHECK-SAME: %[[A:.*0]]: vector<2x3xf32>,
197 // CHECK-SAME: %[[B:.*1]]: vector<2x3xf32>,
199 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2x3xf32>
200 // CHECK: %[[T1:.*]] = vector.extract %[[B]][0] : vector<2x3xf32>
201 // CHECK: %[[T2:.*]] = arith.mulf %[[T0]], %[[T1]] : vector<3xf32>
202 // CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T2]], %[[C]] : vector<3xf32> into f32
203 // CHECK: %[[T5:.*]] = vector.extract %[[A]][1] : vector<2x3xf32>
204 // CHECK: %[[T6:.*]] = vector.extract %[[B]][1] : vector<2x3xf32>
205 // CHECK: %[[T7:.*]] = arith.mulf %[[T5]], %[[T6]] : vector<3xf32>
206 // CHECK: %[[T8:.*]] = vector.reduction <add>, %[[T7]], %[[T3]] : vector<3xf32> into f32
209 func.func @full_contract1(%arg0: vector<2x3xf32>,
210 %arg1: vector<2x3xf32>,
212 %0 = vector.contract #contraction2d_trait %arg0, %arg1, %arg2
213 : vector<2x3xf32>, vector<2x3xf32> into f32
228 // CHECK-SAME: %[[A:.*0]]: vector<2x3xf32>,
229 // CHECK-SAME: %[[B:.*1]]: vector<3x2xf32>,
231 // CHECK: %[[Z:.*]] = arith.constant dense<0.000000e+00> : vector<3xf32>
232 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2x3xf32>
233 // CHECK: %[[T1:.*]] = vector.extract %[[B]][0, 0] : vector<3x2xf32>
234 // CHECK: %[[T3:.*]] = vector.insert %[[T1]], %[[Z]] [0] : f32 into vector<3xf32>
235 // CHECK: %[[T4:.*]] = vector.extract %[[B]][1, 0] : vector<3x2xf32>
236 // CHECK: %[[T6:.*]] = vector.insert %[[T4]], %[[T3]] [1] : f32 into vector<3xf32>
237 // CHECK: %[[T7:.*]] = vector.extract %[[B]][2, 0] : vector<3x2xf32>
238 // CHECK: %[[T9:.*]] = vector.insert %[[T7]], %[[T6]] [2] : f32 into vector<3xf32>
239 // CHECK: %[[T10:.*]] = arith.mulf %[[T0]], %[[T9]] : vector<3xf32>
240 // CHECK: %[[T11:.*]] = vector.reduction <add>, %[[T10]], %[[C]] : vector<3xf32> into f32
242 // CHECK: %[[T12:.*]] = vector.extract %[[A]][1] : vector<2x3xf32>
243 // CHECK: %[[T13:.*]] = vector.extract %[[B]][0, 1] : vector<3x2xf
244 // CHECK: %[[T15:.*]] = vector.insert %[[T13]], %[[Z]] [0] : f32 into vector<3xf32>
245 // CHECK: %[[T16:.*]] = vector.extract %[[B]][1, 1] : vector<3x2xf32>
246 // CHECK: %[[T18:.*]] = vector.insert %[[T16]], %[[T15]] [1] : f32 into vector<3xf32>
247 // CHECK: %[[T19:.*]] = vector.extract %[[B]][2, 1] : vector<3x2xf32>
248 // CHECK: %[[T21:.*]] = vector.insert %[[T19]], %[[T18]] [2] : f32 into vector<3xf32>
249 // CHECK: %[[T22:.*]] = arith.mulf %[[T12]], %[[T21]] : vector<3xf32>
250 // CHECK: %[[T23:.*]] = vector.reduction <add>, %[[T22]], %[[T11]] : vector<3xf32> into f32
253 func.func @full_contract2(%arg0: vector<2x3xf32>,
254 %arg1: vector<3x2xf32>,
256 %0 = vector.contract #contraction2d_trans_trait %arg0, %arg1, %arg2
257 : vector<2x3xf32>, vector<3x2xf32> into f32
262 // CHECK-SAME: %[[A:.*0]]: vector<2xf32>,
263 // CHECK-SAME: %[[B:.*1]]: vector<3xf32>
264 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32>
265 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2xf32>
266 // CHECK: %[[T1:.*]] = vector.splat %[[T0]] : vector<3xf32>
267 // CHECK: %[[T2:.*]] = arith.mulf %[[T1]], %[[B]] : vector<3xf32>
268 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C0]] [0] : vector<3xf32> into vector<2x3xf32>
269 // CHECK: %[[T4:.*]] = vector.extract %[[A]][1] : vector<2xf32>
270 // CHECK: %[[T5:.*]] = vector.splat %[[T4]] : vector<3xf32>
271 // CHECK: %[[T6:.*]] = arith.mulf %[[T5]], %[[B]] : vector<3xf32>
272 // CHECK: %[[T7:.*]] = vector.insert %[[T6]], %[[T3]] [1] : vector<3xf32> into vector<2x3xf32>
273 // CHECK: return %[[T7]] : vector<2x3xf32>
275 func.func @outerproduct_noacc(%arg0: vector<2xf32>,
276 %arg1: vector<3xf32>) -> vector<2x3xf32> {
277 %0 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32>
278 return %0: vector<2x3xf32>
282 // CHECK-SAME: %[[A:.*0]]: vector<2xf32>,
283 // CHECK-SAME: %[[B:.*1]]: vector<3xf32>,
284 // CHECK-SAME: %[[C:.*2]]: vector<2x3xf32>
285 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32>
286 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2xf32>
287 // CHECK: %[[T1:.*]] = vector.splat %[[T0]] : vector<3xf32>
288 // CHECK: %[[T2:.*]] = vector.extract %[[C]][0] : vector<2x3xf32>
289 // CHECK: %[[T3:.*]] = vector.fma %[[T1]], %[[B]], %[[T2]] : vector<3xf32>
290 // CHECK: %[[T4:.*]] = vector.insert %[[T3]], %[[C0]] [0] : vector<3xf32> into vector<2x3xf32>
291 // CHECK: %[[T5:.*]] = vector.extract %[[A]][1] : vector<2xf32>
292 // CHECK: %[[T6:.*]] = vector.splat %[[T5]] : vector<3xf32>
293 // CHECK: %[[T7:.*]] = vector.extract %[[C]][1] : vector<2x3xf32>
294 // CHECK: %[[T8:.*]] = vector.fma %[[T6]], %[[B]], %[[T7]] : vector<3xf32>
295 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T4]] [1] : vector<3xf32> into vector<2x3xf32>
296 // CHECK: return %[[T9]] : vector<2x3xf32>
298 func.func @outerproduct_acc(%arg0: vector<2xf32>,
299 %arg1: vector<3xf32>,
300 %arg2: vector<2x3xf32>) -> vector<2x3xf32> {
301 %0 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32>
302 return %0: vector<2x3xf32>
306 // CHECK-SAME: %[[A:.*0]]: vector<2xi32>,
307 // CHECK-SAME: %[[B:.*1]]: vector<3xi32>
308 // CHECK: %[[C0:.*]] = arith.constant dense<0> : vector<2x3xi32>
309 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2xi32>
310 // CHECK: %[[T1:.*]] = vector.splat %[[T0]] : vector<3xi32>
311 // CHECK: %[[T2:.*]] = arith.muli %[[T1]], %[[B]] : vector<3xi32>
312 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C0]] [0] : vector<3xi32> into vector<2x3xi32>
313 // CHECK: %[[T4:.*]] = vector.extract %[[A]][1] : vector<2xi32>
314 // CHECK: %[[T5:.*]] = vector.splat %[[T4]] : vector<3xi32>
315 // CHECK: %[[T6:.*]] = arith.muli %[[T5]], %[[B]] : vector<3xi32>
316 // CHECK: %[[T7:.*]] = vector.insert %[[T6]], %[[T3]] [1] : vector<3xi32> into vector<2x3xi32>
317 // CHECK: return %[[T7]] : vector<2x3xi32>
318 func.func @outerproduct_noacc_int(%arg0: vector<2xi32>,
319 %arg1: vector<3xi32>) -> vector<2x3xi32> {
320 %0 = vector.outerproduct %arg0, %arg1 : vector<2xi32>, vector<3xi32>
321 return %0: vector<2x3xi32>
325 // CHECK-SAME: %[[A:.*0]]: vector<2xi32>,
326 // CHECK-SAME: %[[B:.*1]]: vector<3xi32>,
327 // CHECK-SAME: %[[C:.*2]]: vector<2x3xi32>
328 // CHECK: %[[C0:.*]] = arith.constant dense<0> : vector<2x3xi32>
329 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2xi32>
330 // CHECK: %[[T1:.*]] = vector.splat %[[T0]] : vector<3xi32>
331 // CHECK: %[[T2:.*]] = vector.extract %[[C]][0] : vector<2x3xi32>
332 // CHECK: %[[T3:.*]] = arith.muli %[[T1]], %[[B]] : vector<3xi32>
333 // CHECK: %[[T4:.*]] = arith.addi %[[T3]], %[[T2]] : vector<3xi32>
334 // CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[C0]] [0] : vector<3xi32> into vector<2x3xi32>
335 // CHECK: %[[T6:.*]] = vector.extract %[[A]][1] : vector<2xi32>
336 // CHECK: %[[T7:.*]] = vector.splat %[[T6]] : vector<3xi32>
337 // CHECK: %[[T8:.*]] = vector.extract %[[C]][1] : vector<2x3xi32>
338 // CHECK: %[[T9:.*]] = arith.muli %[[T7]], %[[B]] : vector<3xi32>
339 // CHECK: %[[T10:.*]] = arith.addi %[[T9]], %[[T8]] : vector<3xi32>
340 // CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[T5]] [1] : vector<3xi32> into vector<2x3xi3…
341 // CHECK: return %[[T11]] : vector<2x3xi32>
342 func.func @outerproduct_acc_int(%arg0: vector<2xi32>,
343 %arg1: vector<3xi32>,
344 %arg2: vector<2x3xi32>) -> vector<2x3xi32> {
345 %0 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xi32>, vector<3xi32>
346 return %0: vector<2x3xi32>
350 // CHECK-SAME: %[[A:.*0]]: vector<16xf32>,
352 // CHECK: %[[T0:.*]] = vector.splat %[[B]] : vector<16xf32>
353 // CHECK: %[[T1:.*]] = arith.mulf %[[A]], %[[T0]] : vector<16xf32>
354 // CHECK: return %[[T1]] : vector<16xf32>
355 func.func @axpy_fp(%arg0: vector<16xf32>, %arg1: f32) -> vector<16xf32> {
356 %0 = vector.outerproduct %arg0, %arg1: vector<16xf32>, f32
357 return %0: vector<16xf32>
361 // CHECK-SAME: %[[A:.*0]]: vector<16xf32>,
363 // CHECK-SAME: %[[C:.*2]]: vector<16xf32>)
364 // CHECK: %[[T0:.*]] = vector.splat %[[B]] : vector<16xf32>
365 // CHECK: %[[T1:.*]] = vector.fma %[[A]], %[[T0]], %[[C]] : vector<16xf32>
366 // CHECK: return %[[T1]] : vector<16xf32>
367 func.func @axpy_fp_add(%arg0: vector<16xf32>, %arg1: f32, %arg2 : vector<16xf32>) -> vector<16xf32>…
368 %0 = vector.outerproduct %arg0, %arg1, %arg2: vector<16xf32>, f32
369 return %0: vector<16xf32>
373 // CHECK-SAME: %[[A:.*0]]: vector<16xi32>,
375 // CHECK: %[[T0:.*]] = vector.splat %[[B]] : vector<16xi32>
376 // CHECK: %[[T1:.*]] = arith.muli %[[A]], %[[T0]] : vector<16xi32>
377 // CHECK: return %[[T1]] : vector<16xi32>
378 func.func @axpy_int(%arg0: vector<16xi32>, %arg1: i32) -> vector<16xi32> {
379 %0 = vector.outerproduct %arg0, %arg1: vector<16xi32>, i32
380 return %0: vector<16xi32>
384 // CHECK-SAME: %[[A:.*0]]: vector<16xi32>,
386 // CHECK-SAME: %[[C:.*2]]: vector<16xi32>)
387 // CHECK: %[[T0:.*]] = vector.splat %[[B]] : vector<16xi32>
388 // CHECK: %[[T1:.*]] = arith.muli %[[A]], %[[T0]] : vector<16xi32>
389 // CHECK: %[[T2:.*]] = arith.addi %[[T1]], %[[C]] : vector<16xi32>
390 // CHECK: return %[[T2]] : vector<16xi32>
391 func.func @axpy_int_add(%arg0: vector<16xi32>, %arg1: i32, %arg2: vector<16xi32>) -> vector<16xi32>…
392 %0 = vector.outerproduct %arg0, %arg1, %arg2: vector<16xi32>, i32
393 return %0: vector<16xi32>
397 // CHECK-SAME: %[[A:.*]]: vector<16xf32>
398 // CHECK: return %[[A]] : vector<16xf32>
400 func.func @nop_shape_cast(%arg0: vector<16xf32>) -> vector<16xf32> {
401 %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<16xf32>
402 return %0 : vector<16xf32>
407 // HECK-SAME: %[[A:.*]]: vector<16xf32>
408 // HECK: return %[[A]] : vector<16xf32>
410 func.func @cancel_shape_cast(%arg0: vector<16xf32>) -> vector<16xf32> {
411 %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<4x4xf32>
412 %1 = vector.shape_cast %0 : vector<4x4xf32> to vector<16xf32>
413 return %1 : vector<16xf32>
419 func.func @shape_casts(%a: vector<2x2xf32>) -> (vector<4xf32>, vector<2x2xf32>) {
420 // CHECK-DAG: %[[cst22:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>
421 // CHECK-DAG: %[[cst:.*]] = arith.constant dense<0.000000e+00> : vector<4xf32>
422 // CHECK: %[[ex0:.*]] = vector.extract %{{.*}}[0] : vector<2x2xf32>
424 // CHECK: %[[in0:.*]] = vector.insert_strided_slice %[[ex0]], %[[cst]]
425 // CHECK-SAME: {offsets = [0], strides = [1]} : vector<2xf32> into vector<4xf32>
427 // CHECK: %[[ex1:.*]] = vector.extract %{{.*}}[1] : vector<2x2xf32>
429 // CHECK: %[[in2:.*]] = vector.insert_strided_slice %[[ex1]], %[[in0]]
430 // CHECK-SAME: {offsets = [2], strides = [1]} : vector<2xf32> into vector<4xf32>
432 %0 = vector.shape_cast %a : vector<2x2xf32> to vector<4xf32>
433 // CHECK: %[[add:.*]] = arith.addf %[[in2]], %[[in2]] : vector<4xf32>
434 %r0 = arith.addf %0, %0: vector<4xf32>
436 // CHECK: %[[ss0:.*]] = vector.extract_strided_slice %[[add]]
438 // CHECK-SAME: vector<4xf32> to vector<2xf32>
440 // CHECK: %[[res0:.*]] = vector.insert %[[ss0]], %[[cst22]] [0] :
441 // CHECK-SAME: vector<2xf32> into vector<2x2xf32>
443 // CHECK: %[[s2:.*]] = vector.extract_strided_slice %[[add]]
445 // CHECK-SAME: vector<4xf32> to vector<2xf32>
447 // CHECK: %[[res1:.*]] = vector.insert %[[s2]], %[[res0]] [1] :
448 // CHECK-SAME: vector<2xf32> into vector<2x2xf32>
450 %1 = vector.shape_cast %r0 : vector<4xf32> to vector<2x2xf32>
451 // CHECK: return %[[add]], %[[res1]] : vector<4xf32>, vector<2x2xf32>
452 return %r0, %1 : vector<4xf32>, vector<2x2xf32>
456 // CHECK-SAME: %[[A:.*]]: vector<3x2xf32>
457 // CHECK: %[[C:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32>
458 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0, 0] : vector<3x2xf32>
459 // CHECK: %[[T1:.*]] = vector.insert %[[T0]], %[[C]] [0, 0] : f32 into vector<2x3xf32>
460 // CHECK: %[[T2:.*]] = vector.extract %[[A]][0, 1] : vector<3x2xf32>
461 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[T1]] [0, 1] : f32 into vector<2x3xf32>
462 // CHECK: %[[T4:.*]] = vector.extract %[[A]][1, 0] : vector<3x2xf32>
463 // CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[T3]] [0, 2] : f32 into vector<2x3xf32>
464 // CHECK: %[[T6:.*]] = vector.extract %[[A]][1, 1] : vector<3x2xf32>
465 // CHECK: %[[T7:.*]] = vector.insert %[[T6]], %[[T5]] [1, 0] : f32 into vector<2x3xf32>
466 // CHECK: %[[T8:.*]] = vector.extract %[[A]][2, 0] : vector<3x2xf32>
467 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T7]] [1, 1] : f32 into vector<2x3xf32>
468 // CHECK: %[[T10:.*]] = vector.extract %[[A]][2, 1] : vector<3x2xf32>
469 // CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[T9]] [1, 2] : f32 into vector<2x3xf32>
470 // CHECK: return %[[T11]] : vector<2x3xf32>
472 func.func @shape_cast_2d2d(%arg0 : vector<3x2xf32>) -> vector<2x3xf32> {
473 %s = vector.shape_cast %arg0: vector<3x2xf32> to vector<2x3xf32>
474 return %s : vector<2x3xf32>
478 // CHECK-SAME: %[[A:.*]]: vector<1x3x2xf32>
479 // CHECK: %[[C:.*]] = arith.constant dense<0.000000e+00> : vector<6xf32>
480 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0, 0, 0] : vector<1x3x2xf32>
481 // CHECK: %[[T1:.*]] = vector.insert %[[T0]], %[[C]] [0] : f32 into vector<6xf32>
482 // CHECK: %[[T2:.*]] = vector.extract %[[A]][0, 0, 1] : vector<1x3x2xf32>
483 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[T1]] [1] : f32 into vector<6xf32>
484 // CHECK: %[[T4:.*]] = vector.extract %[[A]][0, 1, 0] : vector<1x3x2xf32>
485 // CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[T3]] [2] : f32 into vector<6xf32>
486 // CHECK: %[[T6:.*]] = vector.extract %[[A]][0, 1, 1] : vector<1x3x2xf32>
487 // CHECK: %[[T7:.*]] = vector.insert %[[T6]], %[[T5]] [3] : f32 into vector<6xf32>
488 // CHECK: %[[T8:.*]] = vector.extract %[[A]][0, 2, 0] : vector<1x3x2xf32>
489 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T7]] [4] : f32 into vector<6xf32>
490 // CHECK: %[[T10:.*]] = vector.extract %[[A]][0, 2, 1] : vector<1x3x2xf32>
491 // CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[T9]] [5] : f32 into vector<6xf32>
492 // CHECK: return %[[T11]] : vector<6xf32>
494 func.func @shape_cast_3d1d(%arg0 : vector<1x3x2xf32>) -> vector<6xf32> {
495 %s = vector.shape_cast %arg0 : vector<1x3x2xf32> to vector<6xf32>
496 return %s : vector<6xf32>
500 // CHECK-SAME: %[[A:.*]]: vector<6xf32>
501 // CHECK: %[[C:.*]] = arith.constant dense<0.000000e+00> : vector<2x1x3xf32>
502 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<6xf32>
503 // CHECK: %[[T1:.*]] = vector.insert %[[T0]], %[[C]] [0, 0, 0] : f32 into vector<2x1x3xf32>
504 // CHECK: %[[T2:.*]] = vector.extract %[[A]][1] : vector<6xf32>
505 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[T1]] [0, 0, 1] : f32 into vector<2x1x3xf32>
506 // CHECK: %[[T4:.*]] = vector.extract %[[A]][2] : vector<6xf32>
507 // CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[T3]] [0, 0, 2] : f32 into vector<2x1x3xf32>
508 // CHECK: %[[T6:.*]] = vector.extract %[[A]][3] : vector<6xf32>
509 // CHECK: %[[T7:.*]] = vector.insert %[[T6]], %[[T5]] [1, 0, 0] : f32 into vector<2x1x3xf32>
510 // CHECK: %[[T8:.*]] = vector.extract %[[A]][4] : vector<6xf32>
511 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T7]] [1, 0, 1] : f32 into vector<2x1x3xf32>
512 // CHECK: %[[T10:.*]] = vector.extract %[[A]][5] : vector<6xf32>
513 // CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[T9]] [1, 0, 2] : f32 into vector<2x1x3xf32>
514 // CHECK: return %[[T11]] : vector<2x1x3xf32>
516 func.func @shape_cast_1d3d(%arg0 : vector<6xf32>) -> vector<2x1x3xf32> {
517 %s = vector.shape_cast %arg0 : vector<6xf32> to vector<2x1x3xf32>
518 return %s : vector<2x1x3xf32>
522 // MATRIX-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x4xf32>,
523 // MATRIX-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x3xf32>,
524 // MATRIX-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
525 // MATRIX: %[[vcst:.*]] = arith.constant dense<0.000000e+00> : vector<8xf32>
526 // MATRIX: %[[vcst_0:.*]] = arith.constant dense<0.000000e+00> : vector<12xf32>
527 // MATRIX: %[[vcst_1:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32>
528 // MATRIX: %[[a0:.*]] = vector.extract %[[A]][0] : vector<2x4xf32>
529 … MATRIX: %[[a1:.*]] = vector.insert_strided_slice %[[a0]], %[[vcst]] {offsets = [0], strides = […
530 // MATRIX: %[[a2:.*]] = vector.extract %[[A]][1] : vector<2x4xf32>
531 … MATRIX: %[[a3:.*]] = vector.insert_strided_slice %[[a2]], %[[a1]] {offsets = [4], strides = [1…
532 // MATRIX: %[[b0:.*]] = vector.extract %[[B]][0] : vector<4x3xf32>
533 … MATRIX: %[[b1:.*]] = vector.insert_strided_slice %[[b0]], %[[vcst_0]] {offsets = [0], strides = …
534 // MATRIX: %[[b2:.*]] = vector.extract %[[B]][1] : vector<4x3xf32>
535 … MATRIX: %[[b3:.*]] = vector.insert_strided_slice %[[b2]], %[[b1]] {offsets = [3], strides = [1…
536 // MATRIX: %[[b4:.*]] = vector.extract %[[B]][2] : vector<4x3xf32>
537 … MATRIX: %[[b5:.*]] = vector.insert_strided_slice %[[b4]], %[[b3]] {offsets = [6], strides = [1…
538 // MATRIX: %[[b6:.*]] = vector.extract %[[B]][3] : vector<4x3xf32>
539 … MATRIX: %[[b7:.*]] = vector.insert_strided_slice %[[b6]], %[[b5]] {offsets = [9], strides = [1…
540vector.matrix_multiply %[[a3]], %[[b7]] {lhs_columns = 4 : i32, lhs_rows = 2 : i32, rhs_columns =…
541 …MATRIX: %[[mm2:.*]] = vector.extract_strided_slice %[[mm1]] {offsets = [0], sizes = [3], strides …
542 // MATRIX: %[[mm3:.*]] = vector.insert %[[mm2]], %[[vcst_1]] [0] : vector<3xf32> into vector<…
543 …MATRIX: %[[mm4:.*]] = vector.extract_strided_slice %[[mm1]] {offsets = [3], sizes = [3], strides …
544 // MATRIX: %[[mm5:.*]] = vector.insert %[[mm4]], %[[mm3]] [1] : vector<3xf32> into vector<2x3…
545 // MATRIX: %[[mm6:.*]] = arith.addf %[[C]], %[[mm5]] : vector<2x3xf32>
548 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x4xf32>,
549 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x3xf32>,
550 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
551 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
552 // OUTERPRODUCT-SAME: : vector<2x4xf32> to vector<4x2xf32>
554 // OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<4x2xf32>
555 // OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<4x3xf32>
556 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]]
557 // OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32>
559 // OUTERPRODUCT: %[[a1:.*]] = vector.extract %[[At]][1] : vector<4x2xf32>
560 // OUTERPRODUCT: %[[b1:.*]] = vector.extract %[[B]][1] : vector<4x3xf32>
561 // OUTERPRODUCT: %[[c1:.*]] = vector.outerproduct %[[a1]], %[[b1]], %[[c0]]
562 // OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32>
564 // OUTERPRODUCT: %[[a2:.*]] = vector.extract %[[At]][2] : vector<4x2xf32>
565 // OUTERPRODUCT: %[[b2:.*]] = vector.extract %[[B]][2] : vector<4x3xf32>
566 // OUTERPRODUCT: %[[c2:.*]] = vector.outerproduct %[[a2]], %[[b2]], %[[c1]]
567 // OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32>
569 // OUTERPRODUCT: %[[a3:.*]] = vector.extract %[[At]][3] : vector<4x2xf32>
570 // OUTERPRODUCT: %[[b3:.*]] = vector.extract %[[B]][3] : vector<4x3xf32>
571 // OUTERPRODUCT: %[[c3:.*]] = vector.outerproduct %[[a3]], %[[b3]], %[[c2]]
572 // OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32>
574 // OUTERPRODUCT: return %[[c3]] : vector<2x3xf32>
577 // REDUCE-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x4xf32>,
578 // REDUCE-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x3xf32>,
579 // REDUCE-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
581 // REDUCE: %[[RES:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32>
582 // REDUCE: %[[Bt:.*]] = vector.transpose %[[B]], [1, 0]
583 // REDUCE-SAME: : vector<4x3f32> to vector<3x4xf32>
585 // REDUCE: %[[a0:.*]] = vector.extract %[[A]][0] : vector<2x4xf32>
586 // REDUCE-NEXT: %[[b0:.*]] = vector.extract %[[Bt]][0] : vector<3x4xf32>
587 // REDUCE-NEXT: %[[ab00:.*]] = mul %[[a0]], %[[b0]] : vector<4xf32>
588 // REDUCE-NEXT: %[[s00:.*]] = vector.reduction <add>, %[[ab00]] : vector<4xf32> into f32
589 // REDUCE-NEXT: %[[r00:.*]] = vector.insert %[[s00]], %[[RES]] [0, 0] : f32 into vector<2x3xf32>
593 // REDUCE: %[[a1:.*]] = vector.extract %[[A]][1] : vector<2x4xf32>
594 // REDUCE-NEXT: %[[b2:.*]] = vector.extract %[[Bt]][2] : vector<3x4xf32>
595 // REDUCE-NEXT: %[[ab12:.*]] = mul %[[a1]], %[[b02]] : vector<4xf32>
596 // REDUCE-NEXT: %[[s12:.*]] = vector.reduction <add>, %[[ab12]] : vector<4xf32> into f32
597 // REDUCE-NEXT: %[[r12:.*]] = vector.insert %[[s12]], %{{.*}} [1, 2] : f32 into vector<2x3xf32>
599 // REDUCE: return %[[c3]] : vector<2x3xf32>
600 func.func @matmul(%arg0: vector<2x4xf32>,
601 %arg1: vector<4x3xf32>,
602 %arg2: vector<2x3xf32>) -> vector<2x3xf32> {
603 %0 = vector.contract #matmat_trait %arg0, %arg1, %arg2
604 : vector<2x4xf32>, vector<4x3xf32> into vector<2x3xf32>
605 return %0 : vector<2x3xf32>
610 // CHECK: %[[T0:.*]] = vector.splat %[[A]] : vector<2xf32>
611 // CHECK: return %[[T0]] : vector<2xf32>
613 func.func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> {
614 %0 = vector.broadcast %arg0 : f32 to vector<2xf32>
615 return %0 : vector<2xf32>
620 // CHECK: %[[T0:.*]] = vector.splat %[[A]] : vector<2x3xf32>
621 // CHECK: return %[[T0]] : vector<2x3xf32>
623 func.func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> {
624 %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32>
625 return %0 : vector<2x3xf32>
630 // CHECK: %[[T0:.*]] = vector.splat %[[A]] : vector<2x3x4xf32>
631 // CHECK: return %[[T0]] : vector<2x3x4xf32>
633 func.func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> {
634 %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32>
635 return %0 : vector<2x3x4xf32>
639 // CHECK-SAME: %[[A:.*0]]: vector<2xf32>
640 // CHECK: return %[[A]] : vector<2xf32>
642 func.func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> {
643 %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32>
644 return %0 : vector<2xf32>
648 // CHECK-SAME: %[[A:.*0]]: vector<2xf32>
649 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
650 // CHECK: %[[T0:.*]] = vector.insert %[[A]], %[[C0]] [0] : vector<2xf32> into vector<3x2xf32>
651 // CHECK: %[[T1:.*]] = vector.insert %[[A]], %[[T0]] [1] : vector<2xf32> into vector<3x2xf32>
652 // CHECK: %[[T2:.*]] = vector.insert %[[A]], %[[T1]] [2] : vector<2xf32> into vector<3x2xf32>
653 // CHECK: return %[[T2]] : vector<3x2xf32>
655 func.func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> {
656 %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32>
657 return %0 : vector<3x2xf32>
661 // CHECK-SAME: %[[A:.*0]]: vector<2xf32>
662 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
663 // CHECK: %[[C1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32>
664 // CHECK: %[[T0:.*]] = vector.insert %[[A]], %[[C0]] [0] : vector<2xf32> into vector<3x2xf32>
665 // CHECK: %[[T1:.*]] = vector.insert %[[A]], %[[T0]] [1] : vector<2xf32> into vector<3x2xf32>
666 // CHECK: %[[T2:.*]] = vector.insert %[[A]], %[[T1]] [2] : vector<2xf32> into vector<3x2xf32>
667 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C1]] [0] : vector<3x2xf32> into vector<4x3x2x…
668 // CHECK: %[[T4:.*]] = vector.insert %[[T2]], %[[T3]] [1] : vector<3x2xf32> into vector<4x3x2x…
669 // CHECK: %[[T5:.*]] = vector.insert %[[T2]], %[[T4]] [2] : vector<3x2xf32> into vector<4x3x2x…
670 // CHECK: %[[T6:.*]] = vector.insert %[[T2]], %[[T5]] [3] : vector<3x2xf32> into vector<4x3x2x…
671 // CHECK: return %[[T6]] : vector<4x3x2xf32>
673 func.func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> {
674 %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32>
675 return %0 : vector<4x3x2xf32>
679 // CHECK-SAME: %[[A:.*0]]: vector<3x2xf32>
680 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32>
681 // CHECK: %[[T0:.*]] = vector.insert %[[A]], %[[C0]] [0] : vector<3x2xf32> into vector<4x3x2xf…
682 // CHECK: %[[T1:.*]] = vector.insert %[[A]], %[[T0]] [1] : vector<3x2xf32> into vector<4x3x2xf…
683 // CHECK: %[[T2:.*]] = vector.insert %[[A]], %[[T1]] [2] : vector<3x2xf32> into vector<4x3x2xf…
684 // CHECK: %[[T3:.*]] = vector.insert %[[A]], %[[T2]] [3] : vector<3x2xf32> into vector<4x3x2xf…
685 // CHECK: return %[[T3]] : vector<4x3x2xf32>
687 func.func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> {
688 %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32>
689 return %0 : vector<4x3x2xf32>
693 // CHECK-SAME: %[[A:.*0]]: vector<1xf32>
694 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<1xf32>
695 // CHECK: %[[T1:.*]] = vector.splat %[[T0]] : vector<4xf32>
696 // CHECK: return %[[T1]] : vector<4xf32>
698 func.func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> {
699 %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32>
700 return %0 : vector<4xf32>
704 // CHECK-SAME: %[[A:.*0]]: vector<1x4xf32>
705 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<3x4xf32>
706 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<1x4xf32>
707 // CHECK: %[[T1:.*]] = vector.insert %[[T0]], %[[C0]] [0] : vector<4xf32> into vector<3x4xf32>
708 // CHECK: %[[T2:.*]] = vector.insert %[[T0]], %[[T1]] [1] : vector<4xf32> into vector<3x4xf32>
709 // CHECK: %[[T3:.*]] = vector.insert %[[T0]], %[[T2]] [2] : vector<4xf32> into vector<3x4xf32>
710 // CHECK: return %[[T3]] : vector<3x4xf32>
712 func.func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> {
713 %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32>
714 return %0 : vector<3x4xf32>
718 // CHECK-SAME: %[[A:.*0]]: vector<4x1xf32>
719 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<4x3xf32>
720 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0, 0] : vector<4x1xf32>
721 // CHECK: %[[T2:.*]] = vector.splat %[[T0]] : vector<3xf32>
722 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C0]] [0] : vector<3xf32> into vector<4x3xf32>
723 // CHECK: %[[T4:.*]] = vector.extract %[[A]][1, 0] : vector<4x1xf32>
724 // CHECK: %[[T6:.*]] = vector.splat %[[T4]] : vector<3xf32>
725 // CHECK: %[[T7:.*]] = vector.insert %[[T6]], %[[T3]] [1] : vector<3xf32> into vector<4x3xf32>
726 // CHECK: %[[T8:.*]] = vector.extract %[[A]][2, 0] : vector<4x1xf32>
727 // CHECK: %[[T10:.*]] = vector.splat %[[T8]] : vector<3xf32>
728 // CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[T7]] [2] : vector<3xf32> into vector<4x3xf3…
729 // CHECK: %[[T12:.*]] = vector.extract %[[A]][3, 0] : vector<4x1xf32>
730 // CHECK: %[[T14:.*]] = vector.splat %[[T12]] : vector<3xf32>
731 // CHECK: %[[T15:.*]] = vector.insert %[[T14]], %[[T11]] [3] : vector<3xf32> into vector<4x3xf…
732 // CHECK: return %[[T15]] : vector<4x3xf32>
734 func.func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> {
735 %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32>
736 return %0 : vector<4x3xf32>
740 // CHECK-SAME: %[[A:.*0]]: vector<4x1x2xf32>
741 // CHECK: %[[C0:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32>
742 // CHECK: %[[C1:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
743 // CHECK: %[[T0:.*]] = vector.extract %[[A]][0, 0] : vector<4x1x2xf32>
744 // CHECK: %[[T2:.*]] = vector.insert %[[T0]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf32>
745 // CHECK: %[[T3:.*]] = vector.insert %[[T0]], %[[T2]] [1] : vector<2xf32> into vector<3x2xf32>
746 // CHECK: %[[T4:.*]] = vector.insert %[[T0]], %[[T3]] [2] : vector<2xf32> into vector<3x2xf32>
747 // CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[C0]] [0] : vector<3x2xf32> into vector<4x3x2x…
748 // CHECK: %[[T6:.*]] = vector.extract %[[A]][1, 0] : vector<4x1x2xf32>
749 // CHECK: %[[T8:.*]] = vector.insert %[[T6]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf32>
750 // CHECK: %[[T9:.*]] = vector.insert %[[T6]], %[[T8]] [1] : vector<2xf32> into vector<3x2xf32>
751 // CHECK: %[[T10:.*]] = vector.insert %[[T6]], %[[T9]] [2] : vector<2xf32> into vector<3x2xf32>
752 // CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[T5]] [1] : vector<3x2xf32> into vector<4x3x…
753 // CHECK: %[[T12:.*]] = vector.extract %[[A]][2, 0] : vector<4x1x2xf32>
754 // CHECK: %[[T14:.*]] = vector.insert %[[T12]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf3…
755 // CHECK: %[[T15:.*]] = vector.insert %[[T12]], %[[T14]] [1] : vector<2xf32> into vector<3x2xf…
756 // CHECK: %[[T16:.*]] = vector.insert %[[T12]], %[[T15]] [2] : vector<2xf32> into vector<3x2xf…
757 // CHECK: %[[T17:.*]] = vector.insert %[[T16]], %[[T11]] [2] : vector<3x2xf32> into vector<4x3…
758 // CHECK: %[[T18:.*]] = vector.extract %[[A]][3, 0] : vector<4x1x2xf32>
759 // CHECK: %[[T20:.*]] = vector.insert %[[T18]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf3…
760 // CHECK: %[[T21:.*]] = vector.insert %[[T18]], %[[T20]] [1] : vector<2xf32> into vector<3x2xf…
761 // CHECK: %[[T22:.*]] = vector.insert %[[T18]], %[[T21]] [2] : vector<2xf32> into vector<3x2xf…
762 // CHECK: %[[T23:.*]] = vector.insert %[[T22]], %[[T17]] [3] : vector<3x2xf32> into vector<4x3…
763 // CHECK: return %[[T23]] : vector<4x3x2xf32>
765 func.func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> {
766 %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32>
767 return %0 : vector<4x3x2xf32>
771 …T0:.*]] = arith.constant dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>
772 // CHECK: return %[[T0]] : vector<8xi1>
774 func.func @genbool_1d() -> vector<8xi1> {
775 %0 = vector.constant_mask [4] : vector<8xi1>
776 return %0 : vector<8xi1>
780 // CHECK: %[[C1:.*]] = arith.constant dense<[true, true, false, false]> : vector<4xi1>
781 // CHECK: %[[C2:.*]] = arith.constant dense<false> : vector<4x4xi1>
782 // CHECK: %[[T0:.*]] = vector.insert %[[C1]], %[[C2]] [0] : vector<4xi1> into vector<4x4xi1>
783 // CHECK: %[[T1:.*]] = vector.insert %[[C1]], %[[T0]] [1] : vector<4xi1> into vector<4x4xi1>
784 // CHECK: return %[[T1]] : vector<4x4xi1>
786 func.func @genbool_2d() -> vector<4x4xi1> {
787 %v = vector.constant_mask [2, 2] : vector<4x4xi1>
788 return %v: vector<4x4xi1>
792 // CHECK: %[[C1:.*]] = arith.constant dense<[true, true, true, false]> : vector<4xi1>
793 // CHECK: %[[C2:.*]] = arith.constant dense<false> : vector<3x4xi1>
794 // CHECK: %[[C3:.*]] = arith.constant dense<false> : vector<2x3x4xi1>
795 // CHECK: %[[T0:.*]] = vector.insert %[[C1]], %[[C2]] [0] : vector<4xi1> into vector<3x4xi1>
796 // CHECK: %[[T1:.*]] = vector.insert %[[T0]], %[[C3]] [0] : vector<3x4xi1> into vector<2x3x4xi1>
797 // CHECK: return %[[T1]] : vector<2x3x4xi1>
799 func.func @genbool_3d() -> vector<2x3x4xi1> {
800 %v = vector.constant_mask [1, 1, 3] : vector<2x3x4xi1>
801 return %v: vector<2x3x4xi1>
806 // CHECK: %[[T0:.*]] = vector.create_mask %[[A]] : vector<3xi1>
807 // CHECK: return %[[T0]] : vector<3xi1>
809 func.func @genbool_var_1d(%arg0: index) -> vector<3xi1> {
810 %0 = vector.create_mask %arg0 : vector<3xi1>
811 return %0 : vector<3xi1>
817 // CHECK: %[[C1:.*]] = arith.constant dense<false> : vector<3xi1>
818 // CHECK: %[[C2:.*]] = arith.constant dense<false> : vector<2x3xi1>
821 // CHECK: %[[T0:.*]] = vector.create_mask %[[B]] : vector<3xi1>
823 // CHECK: %[[T2:.*]] = arith.select %[[T1]], %[[T0]], %[[C1]] : vector<3xi1>
824 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C2]] [0] : vector<3xi1> into vector<2x3xi1>
826 // CHECK: %[[T5:.*]] = arith.select %[[T4]], %[[T0]], %[[C1]] : vector<3xi1>
827 // CHECK: %[[T6:.*]] = vector.insert %[[T5]], %[[T3]] [1] : vector<3xi1> into vector<2x3xi1>
828 // CHECK: return %[[T6]] : vector<2x3xi1>
830 func.func @genbool_var_2d(%arg0: index, %arg1: index) -> vector<2x3xi1> {
831 %0 = vector.create_mask %arg0, %arg1 : vector<2x3xi1>
832 return %0 : vector<2x3xi1>
839 // CHECK-DAG: %[[C1:.*]] = arith.constant dense<false> : vector<7xi1>
840 // CHECK-DAG: %[[C2:.*]] = arith.constant dense<false> : vector<1x7xi1>
841 // CHECK-DAG: %[[C3:.*]] = arith.constant dense<false> : vector<2x1x7xi1>
844 // CHECK: %[[T0:.*]] = vector.create_mask %[[C]] : vector<7xi1>
846 // CHECK: %[[T2:.*]] = arith.select %[[T1]], %[[T0]], %[[C1]] : vector<7xi1>
847 // CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C2]] [0] : vector<7xi1> into vector<1x7xi1>
849 // CHECK: %[[T5:.*]] = arith.select %[[T4]], %[[T3]], %[[C2]] : vector<1x7xi1>
850 // CHECK: %[[T6:.*]] = vector.insert %[[T5]], %[[C3]] [0] : vector<1x7xi1> into vector<2x1x7xi…
852 // CHECK: %[[T8:.*]] = arith.select %[[T7]], %[[T3]], %[[C2]] : vector<1x7xi1>
853 // CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T6]] [1] : vector<1x7xi1> into vector<2x1x7xi…
854 // CHECK: return %[[T9]] : vector<2x1x7xi1>
856 func.func @genbool_var_3d(%arg0: index, %arg1: index, %arg2: index) -> vector<2x1x7xi1> {
857 %0 = vector.create_mask %arg0, %arg1, %arg2 : vector<2x1x7xi1>
858 return %0 : vector<2x1x7xi1>
862 // CHECK-SAME: (%[[A0:.+]]: vector<1x2xi32>, %[[A1:.+]]: vector<2x2xi32>, %[[A2:.+]]: vector<2xi32>)
863 // CHECK-DAG: %[[C:.+]] = arith.constant dense<0> : vector<2xi32>
864 // CHECK-DAG: %[[E00:.+]] = vector.extract %[[A0]][0] : vector<1x2xi32>
865 // CHECK-DAG: %[[E10:.+]] = vector.extract %[[A1]][0] : vector<2x2xi32>
866 // CHECK: %[[M0:.+]] = arith.muli %[[E10]], %[[E00]] : vector<2xi32>
867 // CHECK: %[[R0:.+]] = vector.reduction <add>, %[[M0]] : vector<2xi32> into i32
868 // CHECK: %[[I0:.+]] = vector.insert %[[R0]], %[[C]] [0] : i32 into vector<2xi32>
869 // CHECK: %[[E11:.+]] = vector.extract %[[A1]][1] : vector<2x2xi32>
870 // CHECK: %[[M1:.+]] = arith.muli %[[E11]], %[[E00]] : vector<2xi32>
871 // CHECK: %[[R1:.+]] = vector.reduction <add>, %[[M1]] : vector<2xi32> into i32
872 // CHECK: %[[I1:.+]] = vector.insert %[[R1]], %[[I0]] [1] : i32 into vector<2xi32>
873 // CHECK: %[[S:.+]] = arith.addi %[[I1]], %[[A2]] : vector<2xi32>
874 // CHECK: return %[[S]] : vector<2xi32>
876 …t_one_sided_unit_reduction_dim(%arg0 : vector<1x2xi32>, %arg1 : vector<2x2xi32>, %arg2 : vector<2x…
877 %res = vector.contract {
884 kind = #vector.kind<add>
885 } %arg0, %arg1, %arg2 : vector<1x2xi32>, vector<2x2xi32>, vector<2xi32> into vector<2xi32>
886 return %res : vector<2xi32>
900 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>,
901 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>,
902 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
903 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
904 // OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32>
905 // OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32>
906 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]]
907 // OUTERPRODUCT: return %[[c0]] : vector<2x3xf32>
908 func.func @matmul_0(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<2x3xf32>)
909 -> vector<2x3xf32>
911 %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2
912 : vector<2x1xf32>, vector<1x3xf32> into vector<2x3xf32>
913 return %0 : vector<2x3xf32>
917 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf16>,
918 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf16>,
919 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
920 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
921 // OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf16>
922 // OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf16>
923 // OUTERPRODUCT: %[[a1:.*]] = arith.extf %[[a0]] : vector<2xf16> to vector<2xf32>
924 // OUTERPRODUCT: %[[b1:.*]] = arith.extf %[[b0]] : vector<3xf16> to vector<3xf32>
925 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a1]], %[[b1]], %[[C]]
926 // OUTERPRODUCT: return %[[c0]] : vector<2x3xf32>
927 func.func @matmul_0_mixed(%arg0: vector<2x1xf16>, %arg1: vector<1x3xf16>, %arg2: vector<2x3xf32>)
928 -> vector<2x3xf32>
930 %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2
931 : vector<2x1xf16>, vector<1x3xf16> into vector<2x3xf32>
932 return %0 : vector<2x3xf32>
946 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>,
947 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<3x1xf32>,
948 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
949 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
950 // OUTERPRODUCT: %[[Bt:.*]] = vector.transpose %[[B]], [1, 0]
951 // OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32>
952 // OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[Bt]][0] : vector<1x3xf32>
953 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]]
954 // OUTERPRODUCT: return %[[c0]] : vector<2x3xf32>
955 func.func @matmul_1(%arg0: vector<2x1xf32>, %arg1: vector<3x1xf32>, %arg2: vector<2x3xf32>)
956 -> vector<2x3xf32>
958 %0 = vector.contract #matmat_trait_1 %arg0, %arg1, %arg2
959 : vector<2x1xf32>, vector<3x1xf32> into vector<2x3xf32>
960 return %0 : vector<2x3xf32>
974 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<1x2xf32>,
975 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>,
976 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
977 // OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[A]][0] : vector<1x2xf32>
978 // OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32>
979 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]]
980 // OUTERPRODUCT: return %[[c0]] : vector<2x3xf32>
981 func.func @matmul_2(%arg0: vector<1x2xf32>, %arg1: vector<1x3xf32>, %arg2: vector<2x3xf32>)
982 -> vector<2x3xf32>
984 %0 = vector.contract #matmat_trait_2 %arg0, %arg1, %arg2
985 : vector<1x2xf32>, vector<1x3xf32> into vector<2x3xf32>
986 return %0 : vector<2x3xf32>
1000 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<1x2xf32>,
1001 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<3x1xf32>,
1002 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32>
1003 // OUTERPRODUCT: %[[Bt:.*]] = vector.transpose %[[B]], [1, 0]
1004 // OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[A]][0] : vector<1x2xf32>
1005 // OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[Bt]][0] : vector<1x3xf32>
1006 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]]
1007 // OUTERPRODUCT: return %[[c0]] : vector<2x3xf32>
1008 func.func @matmul_3(%arg0: vector<1x2xf32>, %arg1: vector<3x1xf32>, %arg2: vector<2x3xf32>)
1009 -> vector<2x3xf32>
1011 %0 = vector.contract #matmat_trait_3 %arg0, %arg1, %arg2
1012 : vector<1x2xf32>, vector<3x1xf32> into vector<2x3xf32>
1013 return %0 : vector<2x3xf32>
1027 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>,
1028 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>,
1029 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32>
1030 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
1031 // OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32>
1032 // OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32>
1033 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]]
1034 // OUTERPRODUCT: return %[[c0]] : vector<3x2xf32>
1035 func.func @matmul_4(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>)
1036 -> vector<3x2xf32>
1038 %0 = vector.contract #matmat_trait_4 %arg0, %arg1, %arg2
1039 : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32>
1040 return %0 : vector<3x2xf32>
1054 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>,
1055 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>,
1056 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32>
1057 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
1058 // OUTERPRODUCT-DAG: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32>
1059 // OUTERPRODUCT-DAG: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32>
1060 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]]
1061 // OUTERPRODUCT: return %[[c0]] : vector<3x2xf32>
1062 func.func @matmul_5(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>)
1063 -> vector<3x2xf32>
1065 %0 = vector.contract #matmat_trait_5 %arg0, %arg1, %arg2
1066 : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32>
1067 return %0 : vector<3x2xf32>
1081 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>,
1082 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>,
1083 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32>
1084 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
1085 // OUTERPRODUCT-DAG: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32>
1086 // OUTERPRODUCT-DAG: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32>
1087 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]]
1088 // OUTERPRODUCT: return %[[c0]] : vector<3x2xf32>
1089 func.func @matmul_6(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>)
1090 -> vector<3x2xf32>
1092 %0 = vector.contract #matmat_trait_6 %arg0, %arg1, %arg2
1093 : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32>
1094 return %0 : vector<3x2xf32>
1108 // OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>,
1109 // OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>,
1110 // OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32>
1111 // OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0]
1112 // OUTERPRODUCT-DAG: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32>
1113 // OUTERPRODUCT-DAG: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32>
1114 // OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]]
1115 // OUTERPRODUCT: return %[[c0]] : vector<3x2xf32>
1116 func.func @matmul_7(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>)
1117 -> vector<3x2xf32>
1119 %0 = vector.contract #matmat_trait_7 %arg0, %arg1, %arg2
1120 : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32>
1121 return %0 : vector<3x2xf32>
1125 // FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<4x4xf32>,
1126 // FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>,
1127 // FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<4x4xf32>
1128 // FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]]
1129 func.func @matmul_4_filtered(%arg0: vector<4x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<4x4xf32>)
1130 -> vector<4x4xf32>
1132 %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2
1133 : vector<4x4xf32>, vector<4x4xf32> into vector<4x4xf32>
1134 return %0 : vector<4x4xf32>
1138 // FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4xf32>,
1139 // FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>,
1140 // FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x4xf32>
1141 // FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]]
1142 func.func @matmul_4_not_filtered(%arg0: vector<3x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<3x4x…
1143 -> vector<3x4xf32>
1145 %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2
1146 : vector<3x4xf32>, vector<4x4xf32> into vector<3x4xf32>
1147 return %0 : vector<3x4xf32>
1151 // PARALLEL: %[[E0:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32>
1152 // PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32>
1153 // PARALLEL: %[[F:.*]] = vector.fma %[[E0]], %[[E1]], %{{.*}} : vector<4xf32>
1154 // PARALLEL: return %[[F]] : vector<4xf32>
1155 …c @parrallel_contract_lowering(%arg0: vector<1x1x4xf32>, %arg1: vector<1x1x4xf32>, %arg2: vector<4…
1156vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2…
1157 return %0 : vector<4xf32>
1161 // PARALLEL: %[[B:.*]] = vector.broadcast %{{.*}} : vector<1x1xf32> to vector<4x1x1xf32>
1162 // PARALLEL: %[[T:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<4x1x1xf32> to vector<1…
1163 // PARALLEL: %[[E0:.*]] = vector.extract %[[T]][0, 0] : vector<1x1x4xf32>
1164 // PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32>
1165 // PARALLEL: %[[F:.*]] = vector.fma %[[E0]], %[[E1]], %{{.*}} : vector<4xf32>
1166 // PARALLEL: return %[[F]] : vector<4xf32>
1167 …lel_contract_lowering_broadcast(%arg0: vector<1x1xf32>, %arg1: vector<1x1x4xf32>, %arg2: vector<4x…
1168vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) ->…
1169 return %0 : vector<4xf32>
1173 // PARALLEL: %[[B:.*]] = vector.broadcast %{{.*}} : vector<1x1xf32> to vector<4x1x1xf32>
1174 // PARALLEL: %[[T0:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<4x1x1xf32> to vector<…
1175 // PARALLEL: %[[T1:.*]] = vector.transpose %{{.*}}, [0, 2, 1] : vector<1x4x1xf32> to vector
1176 // PARALLEL: %[[E0:.*]] = vector.extract %[[T0]][0, 0] : vector<1x1x4xf32>
1177 // PARALLEL: %[[E1:.*]] = vector.extract %[[T1]][0, 0] : vector<1x1x4xf32>
1178 // PARALLEL: %[[F:.*]] = vector.fma %[[E0]], %[[E1]], %arg2 : vector<4xf32>
1179 // PARALLEL: return %[[F]] : vector<4xf32>
1180 …lel_contract_lowering_transpose(%arg0: vector<1x1xf32>, %arg1: vector<1x4x1xf32>, %arg2: vector<4x…
1181vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) ->…
1182 return %0 : vector<4xf32>
1186 // PARALLEL: %[[E0:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1xf32>
1187 // PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1xf32>
1191 func.func @parrallel_contract_lowering_scalar(%arg0: vector<1x1xf32>, %arg1: vector<1x1xf32>, %arg2…
1192 %0 = vector.contract {
1196 iterator_types = ["reduction", "reduction"], kind = #vector.kind<add>}
1197 %arg0, %arg1, %arg2 : vector<1x1xf32>, vector<1x1xf32> into f32