1// RUN: mlir-opt %s | mlir-opt | FileCheck %s
2
3// CHECK-LABEL: func @vector_transfer_ops_0d(
4func.func @vector_transfer_ops_0d(%arg0: tensor<f32>, %arg1: memref<f32>)
5  -> tensor<f32> {
6    %f0 = arith.constant 0.0 : f32
7    %0 = vector.transfer_read %arg0[], %f0 {permutation_map = affine_map<()->()>} :
8      tensor<f32>, vector<f32>
9    %1 = vector.transfer_write %0, %arg0[] {permutation_map = affine_map<()->()>} :
10      vector<f32>, tensor<f32>
11    %2 = vector.transfer_read %arg1[], %f0 {permutation_map = affine_map<()->()>} :
12      memref<f32>, vector<f32>
13    vector.transfer_write %2, %arg1[] {permutation_map = affine_map<()->()>} :
14      vector<f32>, memref<f32>
15    return %1: tensor<f32>
16}
17
18// CHECK-LABEL: func @vector_transfer_ops_0d_from_higher_d(
19func.func @vector_transfer_ops_0d_from_higher_d(%arg0: tensor<?xf32>, %arg1: memref<?x?xf32>)
20  -> tensor<?xf32> {
21    %c0 = arith.constant 0 : index
22    %f0 = arith.constant 0.0 : f32
23    %0 = vector.transfer_read %arg0[%c0], %f0 {permutation_map = affine_map<(d0)->()>} :
24      tensor<?xf32>, vector<f32>
25    %1 = vector.transfer_write %0, %arg0[%c0] {permutation_map = affine_map<(d0)->()>} :
26      vector<f32>, tensor<?xf32>
27    %2 = vector.transfer_read %arg1[%c0, %c0], %f0 {permutation_map = affine_map<(d0, d1)->()>} :
28      memref<?x?xf32>, vector<f32>
29    vector.transfer_write %2, %arg1[%c0, %c0] {permutation_map = affine_map<(d0, d1)->()>} :
30      vector<f32>, memref<?x?xf32>
31    return %1: tensor<?xf32>
32}
33
34// CHECK-LABEL: func @vector_transfer_ops(
35func.func @vector_transfer_ops(%arg0: memref<?x?xf32>,
36                          %arg1 : memref<?x?xvector<4x3xf32>>,
37                          %arg2 : memref<?x?xvector<4x3xi32>>,
38                          %arg3 : memref<?x?xvector<4x3xindex>>,
39                          %arg4 : memref<?x?x?xf32>) {
40  // CHECK: %[[C3:.*]] = arith.constant 3 : index
41  %c3 = arith.constant 3 : index
42  %cst = arith.constant 3.0 : f32
43  %f0 = arith.constant 0.0 : f32
44  %c0 = arith.constant 0 : i32
45  %i0 = arith.constant 0 : index
46  %i1 = arith.constant 1 : i1
47
48  %vf0 = vector.splat %f0 : vector<4x3xf32>
49  %v0 = vector.splat %c0 : vector<4x3xi32>
50  %vi0 = vector.splat %i0 : vector<4x3xindex>
51  %m = arith.constant dense<[0, 0, 1, 0, 1]> : vector<5xi1>
52  %m2 = vector.splat %i1 : vector<5x4xi1>
53  //
54  // CHECK: vector.transfer_read
55  %0 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = affine_map<(d0, d1)->(d0)>} : memref<?x?xf32>, vector<128xf32>
56  // CHECK: vector.transfer_read
57  %1 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : memref<?x?xf32>, vector<3x7xf32>
58  // CHECK: vector.transfer_read
59  %2 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d0)>} : memref<?x?xf32>,  vector<128xf32>
60  // CHECK: vector.transfer_read
61  %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : memref<?x?xf32>,  vector<128xf32>
62  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
63  %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
64  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {in_bounds = [false, true]} : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
65  %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {in_bounds = [false, true]} : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
66  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref<?x?xvector<4x3xi32>>, vector<5x24xi8>
67  %6 = vector.transfer_read %arg2[%c3, %c3], %v0 : memref<?x?xvector<4x3xi32>>, vector<5x24xi8>
68  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref<?x?xvector<4x3xindex>>, vector<5x48xi8>
69  %7 = vector.transfer_read %arg3[%c3, %c3], %vi0 : memref<?x?xvector<4x3xindex>>, vector<5x48xi8>
70  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}}, %{{.*}} : memref<?x?xf32>, vector<5xf32>
71  %8 = vector.transfer_read %arg0[%c3, %c3], %f0, %m : memref<?x?xf32>, vector<5xf32>
72  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]], %[[C3]]], %{{.*}}, %{{.*}} : memref<?x?x?xf32>, vector<5x4x8xf32>
73  %9 = vector.transfer_read %arg4[%c3, %c3, %c3], %f0, %m2 {permutation_map = affine_map<(d0, d1, d2)->(d1, d0, 0)>} : memref<?x?x?xf32>, vector<5x4x8xf32>
74
75  // CHECK: vector.transfer_write
76  vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0)>} : vector<128xf32>, memref<?x?xf32>
77  // CHECK: vector.transfer_write
78  vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, memref<?x?xf32>
79  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
80  vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
81  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
82  vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
83  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x24xi8>, memref<?x?xvector<4x3xi32>>
84  vector.transfer_write %6, %arg2[%c3, %c3] : vector<5x24xi8>, memref<?x?xvector<4x3xi32>>
85  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x48xi8>, memref<?x?xvector<4x3xindex>>
86  vector.transfer_write %7, %arg3[%c3, %c3] : vector<5x48xi8>, memref<?x?xvector<4x3xindex>>
87  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : vector<5xf32>, memref<?x?xf32>
88  vector.transfer_write %8, %arg0[%c3, %c3], %m : vector<5xf32>, memref<?x?xf32>
89
90  return
91}
92
93
94// CHECK-LABEL: func @vector_transfer_ops_tensor(
95func.func @vector_transfer_ops_tensor(%arg0: tensor<?x?xf32>,
96                          %arg1 : tensor<?x?xvector<4x3xf32>>,
97                          %arg2 : tensor<?x?xvector<4x3xi32>>,
98                          %arg3 : tensor<?x?xvector<4x3xindex>>) ->
99  (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xvector<4x3xf32>>,
100   tensor<?x?xvector<4x3xf32>>, tensor<?x?xvector<4x3xi32>>,
101   tensor<?x?xvector<4x3xindex>>){
102  // CHECK: %[[C3:.*]] = arith.constant 3 : index
103  %c3 = arith.constant 3 : index
104  %cst = arith.constant 3.0 : f32
105  %f0 = arith.constant 0.0 : f32
106  %c0 = arith.constant 0 : i32
107  %i0 = arith.constant 0 : index
108
109  %vf0 = vector.splat %f0 : vector<4x3xf32>
110  %v0 = vector.splat %c0 : vector<4x3xi32>
111  %vi0 = vector.splat %i0 : vector<4x3xindex>
112
113  //
114  // CHECK: vector.transfer_read
115  %0 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = affine_map<(d0, d1)->(d0)>} : tensor<?x?xf32>, vector<128xf32>
116  // CHECK: vector.transfer_read
117  %1 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : tensor<?x?xf32>, vector<3x7xf32>
118  // CHECK: vector.transfer_read
119  %2 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d0)>} : tensor<?x?xf32>,  vector<128xf32>
120  // CHECK: vector.transfer_read
121  %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : tensor<?x?xf32>,  vector<128xf32>
122  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
123  %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
124  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {in_bounds = [false, true]} : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
125  %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {in_bounds = [false, true]} : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
126  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor<?x?xvector<4x3xi32>>, vector<5x24xi8>
127  %6 = vector.transfer_read %arg2[%c3, %c3], %v0 : tensor<?x?xvector<4x3xi32>>, vector<5x24xi8>
128  // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor<?x?xvector<4x3xindex>>, vector<5x48xi8>
129  %7 = vector.transfer_read %arg3[%c3, %c3], %vi0 : tensor<?x?xvector<4x3xindex>>, vector<5x48xi8>
130
131
132  // CHECK: vector.transfer_write
133  %8 = vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0)>} : vector<128xf32>, tensor<?x?xf32>
134  // CHECK: vector.transfer_write
135  %9 = vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, tensor<?x?xf32>
136  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
137  %10 = vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
138  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
139  %11 = vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
140  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x24xi8>, tensor<?x?xvector<4x3xi32>>
141  %12 = vector.transfer_write %6, %arg2[%c3, %c3] : vector<5x24xi8>, tensor<?x?xvector<4x3xi32>>
142  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x48xi8>, tensor<?x?xvector<4x3xindex>>
143  %13 = vector.transfer_write %7, %arg3[%c3, %c3] : vector<5x48xi8>, tensor<?x?xvector<4x3xindex>>
144
145  return %8, %9, %10, %11, %12, %13 :
146    tensor<?x?xf32>, tensor<?x?xf32>,  tensor<?x?xvector<4x3xf32>>,
147    tensor<?x?xvector<4x3xf32>>, tensor<?x?xvector<4x3xi32>>,
148    tensor<?x?xvector<4x3xindex>>
149}
150
151// CHECK-LABEL: @vector_broadcast
152func.func @vector_broadcast(%a: f32, %b: vector<f32>, %c: vector<16xf32>, %d: vector<1x16xf32>, %e: vector<8x1xf32>) -> vector<8x16xf32> {
153  // CHECK: vector.broadcast %{{.*}} : f32 to vector<f32>
154  %0 = vector.broadcast %a : f32 to vector<f32>
155  // CHECK: vector.broadcast %{{.*}} : vector<f32> to vector<4xf32>
156  %1 = vector.broadcast %b : vector<f32> to vector<4xf32>
157  // CHECK: vector.broadcast %{{.*}} : f32 to vector<16xf32>
158  %2 = vector.broadcast %a : f32 to vector<16xf32>
159  // CHECK-NEXT: vector.broadcast %{{.*}} : vector<16xf32> to vector<8x16xf32>
160  %3 = vector.broadcast %c : vector<16xf32> to vector<8x16xf32>
161  // CHECK-NEXT: vector.broadcast %{{.*}} : vector<1x16xf32> to vector<8x16xf32>
162  %4 = vector.broadcast %d : vector<1x16xf32> to vector<8x16xf32>
163  // CHECK-NEXT: vector.broadcast %{{.*}} : vector<8x1xf32> to vector<8x16xf32>
164  %5 = vector.broadcast %e : vector<8x1xf32> to vector<8x16xf32>
165  return %4 : vector<8x16xf32>
166}
167
168// CHECK-LABEL: @shuffle1D
169func.func @shuffle1D(%a: vector<2xf32>, %b: vector<4xf32>) -> vector<2xf32> {
170  // CHECK: vector.shuffle %{{.*}}, %{{.*}}[0, 1, 2, 3] : vector<2xf32>, vector<2xf32>
171  %1 = vector.shuffle %a, %a[0, 1, 2, 3] : vector<2xf32>, vector<2xf32>
172  // CHECK-NEXT: vector.shuffle %{{.*}}, %{{.*}}[0, 1, 2] : vector<4xf32>, vector<4xf32>
173  %2 = vector.shuffle %1, %b[0, 1, 2] : vector<4xf32>, vector<4xf32>
174  // CHECK-NEXT: vector.shuffle %{{.*}}, %{{.*}}[0, 6] : vector<3xf32>, vector<4xf32>
175  %3 = vector.shuffle %2, %b[0, 6] : vector<3xf32>, vector<4xf32>
176  return %3 : vector<2xf32>
177}
178
179// CHECK-LABEL: @shuffle2D
180func.func @shuffle2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> {
181  // CHECK: vector.shuffle %{{.*}}, %{{.*}}[0, 1, 2] : vector<1x4xf32>, vector<2x4xf32>
182  %1 = vector.shuffle %a, %b[0, 1, 2] : vector<1x4xf32>, vector<2x4xf32>
183  return %1 : vector<3x4xf32>
184}
185
186// CHECK-LABEL: @extract_element_0d
187func.func @extract_element_0d(%a: vector<f32>) -> f32 {
188  // CHECK-NEXT: vector.extractelement %{{.*}}[] : vector<f32>
189  %1 = vector.extractelement %a[] : vector<f32>
190  return %1 : f32
191}
192
193// CHECK-LABEL: @extract_element
194func.func @extract_element(%a: vector<16xf32>) -> f32 {
195  // CHECK:      %[[C15:.*]] = arith.constant 15 : i32
196  %c = arith.constant 15 : i32
197  // CHECK-NEXT: vector.extractelement %{{.*}}[%[[C15]] : i32] : vector<16xf32>
198  %1 = vector.extractelement %a[%c : i32] : vector<16xf32>
199  return %1 : f32
200}
201
202// CHECK-LABEL: @extract
203func.func @extract(%arg0: vector<4x8x16xf32>) -> (vector<4x8x16xf32>, vector<8x16xf32>, vector<16xf32>, f32) {
204  // CHECK: vector.extract {{.*}}[] : vector<4x8x16xf32>
205  %0 = vector.extract %arg0[] : vector<4x8x16xf32>
206  // CHECK: vector.extract {{.*}}[3] : vector<4x8x16xf32>
207  %1 = vector.extract %arg0[3] : vector<4x8x16xf32>
208  // CHECK-NEXT: vector.extract {{.*}}[3, 3] : vector<4x8x16xf32>
209  %2 = vector.extract %arg0[3, 3] : vector<4x8x16xf32>
210  // CHECK-NEXT: vector.extract {{.*}}[3, 3, 3] : vector<4x8x16xf32>
211  %3 = vector.extract %arg0[3, 3, 3] : vector<4x8x16xf32>
212  return %0, %1, %2, %3 : vector<4x8x16xf32>, vector<8x16xf32>, vector<16xf32>, f32
213}
214
215// CHECK-LABEL: @insert_element_0d
216func.func @insert_element_0d(%a: f32, %b: vector<f32>) -> vector<f32> {
217  // CHECK-NEXT: vector.insertelement %{{.*}}, %{{.*}}[] : vector<f32>
218  %1 = vector.insertelement %a, %b[] : vector<f32>
219  return %1 : vector<f32>
220}
221
222// CHECK-LABEL: @insert_element
223func.func @insert_element(%a: f32, %b: vector<16xf32>) -> vector<16xf32> {
224  // CHECK:      %[[C15:.*]] = arith.constant 15 : i32
225  %c = arith.constant 15 : i32
226  // CHECK-NEXT: vector.insertelement %{{.*}}, %{{.*}}[%[[C15]] : i32] : vector<16xf32>
227  %1 = vector.insertelement %a, %b[%c : i32] : vector<16xf32>
228  return %1 : vector<16xf32>
229}
230
231// CHECK-LABEL: @insert
232func.func @insert(%a: f32, %b: vector<16xf32>, %c: vector<8x16xf32>, %res: vector<4x8x16xf32>) -> vector<4x8x16xf32> {
233  // CHECK: vector.insert %{{.*}}, %{{.*}}[3] : vector<8x16xf32> into vector<4x8x16xf32>
234  %1 = vector.insert %c, %res[3] : vector<8x16xf32> into vector<4x8x16xf32>
235  // CHECK: vector.insert %{{.*}}, %{{.*}}[3, 3] : vector<16xf32> into vector<4x8x16xf32>
236  %2 = vector.insert %b, %res[3, 3] : vector<16xf32> into vector<4x8x16xf32>
237  // CHECK: vector.insert %{{.*}}, %{{.*}}[3, 3, 3] : f32 into vector<4x8x16xf32>
238  %3 = vector.insert %a, %res[3, 3, 3] : f32 into vector<4x8x16xf32>
239  // CHECK: vector.insert %{{.*}}, %{{.*}}[] : vector<4x8x16xf32> into vector<4x8x16xf32>
240  %4 = vector.insert %3, %3[] : vector<4x8x16xf32> into vector<4x8x16xf32>
241  return %4 : vector<4x8x16xf32>
242}
243
244// CHECK-LABEL: @outerproduct
245func.func @outerproduct(%arg0: vector<4xf32>, %arg1: vector<8xf32>, %arg2: vector<4x8xf32>) -> vector<4x8xf32> {
246  // CHECK: vector.outerproduct {{.*}} : vector<4xf32>, vector<8xf32>
247  %0 = vector.outerproduct %arg0, %arg1 : vector<4xf32>, vector<8xf32>
248  // CHECK: vector.outerproduct {{.*}}, {{.*}}, {{.*}} : vector<4xf32>, vector<8xf32>
249  %1 = vector.outerproduct %arg0, %arg1, %arg2 : vector<4xf32>, vector<8xf32>
250  return %1 : vector<4x8xf32>
251}
252
253// CHECK-LABEL: @insert_strided_slice
254func.func @insert_strided_slice(%a: vector<4x4xf32>, %b: vector<4x8x16xf32>) {
255  // CHECK: vector.insert_strided_slice %{{.*}}, %{{.*}} {offsets = [2, 2, 2], strides = [1, 1]} : vector<4x4xf32> into vector<4x8x16xf32>
256  %1 = vector.insert_strided_slice %a, %b {offsets = [2, 2, 2], strides = [1, 1]} : vector<4x4xf32> into vector<4x8x16xf32>
257  return
258}
259
260// CHECK-LABEL: @extract_strided_slice
261func.func @extract_strided_slice(%arg0: vector<4x8x16xf32>) -> vector<2x2x16xf32> {
262  // CHECK: vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8x16xf32>
263  %1 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8x16xf32> to vector<2x2x16xf32>
264  return %1: vector<2x2x16xf32>
265}
266
267#contraction_to_scalar_accesses = [
268  affine_map<(i) -> (i)>,
269  affine_map<(i) -> (i)>,
270  affine_map<(i) -> ()>
271]
272#contraction_to_scalar_trait = {
273  indexing_maps = #contraction_to_scalar_accesses,
274  iterator_types = ["reduction"]
275}
276// CHECK-LABEL: @contraction_to_scalar
277func.func @contraction_to_scalar(%arg0: vector<10xf32>, %arg1: vector<10xf32>) -> f32 {
278  // CHECK:      %[[C0:.*]] = arith.constant 0.000000e+00 : f32
279  %f0 = arith.constant 0.0: f32
280  // CHECK:      %[[X:.*]] = vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["reduction"], kind = #vector.kind<add>} %{{.*}}, %{{.*}}, %[[C0]] : vector<10xf32>, vector<10xf32> into f32
281  %0 = vector.contract #contraction_to_scalar_trait %arg0, %arg1, %f0
282    : vector<10xf32>, vector<10xf32> into f32
283  // CHECK:      return %[[X]] : f32
284  return %0 : f32
285}
286
287#contraction_to_scalar_max_accesses = [
288  affine_map<(i) -> (i)>,
289  affine_map<(i) -> (i)>,
290  affine_map<(i) -> ()>
291]
292#contraction_to_scalar_max_trait = {
293  indexing_maps = #contraction_to_scalar_max_accesses,
294  iterator_types = ["reduction"],
295  kind = #vector.kind<maxf>
296}
297// CHECK-LABEL: @contraction_to_scalar_with_max
298func.func @contraction_to_scalar_with_max(%arg0: vector<10xf32>, %arg1: vector<10xf32>) -> f32 {
299  // CHECK:      %[[C0:.*]] = arith.constant 0.000000e+00 : f32
300  %f0 = arith.constant 0.0: f32
301  // CHECK:      %[[X:.*]] = vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["reduction"], kind = #vector.kind<maxf>} %{{.*}}, %{{.*}}, %[[C0]] : vector<10xf32>, vector<10xf32> into f32
302  %0 = vector.contract #contraction_to_scalar_max_trait %arg0, %arg1, %f0
303    : vector<10xf32>, vector<10xf32> into f32
304  // CHECK:      return %[[X]] : f32
305  return %0 : f32
306}
307
308#contraction_accesses0 = [
309  affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
310  affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
311  affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
312]
313#contraction_trait0 = {
314  indexing_maps = #contraction_accesses0,
315  iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]
316}
317#contraction_accesses1 = [              // 7,  8, 16, 15
318  affine_map<(f0, f1, f2, f3, c0, c1) -> (c0, f0, c1, f2)>,
319                                        // 8, 16,  7,  5
320  affine_map<(f0, f1, f2, f3, c0, c1) -> (f1, c1, c0, f3)>,
321                                        // 8,  8, 15,  5
322  affine_map<(f0, f1, f2, f3, c0, c1) -> (f0, f1, f2, f3)>
323]
324#iterator_types1 = ["parallel", "parallel", "parallel", "parallel", "reduction",
325                    "reduction"]
326#contraction_trait1 = {
327  indexing_maps = #contraction_accesses1,
328  iterator_types = #iterator_types1
329}
330#contraction_trait2 = {
331  indexing_maps = #contraction_accesses1,
332  iterator_types = #iterator_types1,
333  kind = #vector.kind<maxf>
334}
335// CHECK-LABEL: @contraction
336func.func @contraction(%arg0 : vector<7x8x16x15xf32>, %arg1 : vector<8x16x7x5xf32>,
337                  %arg2 : vector<8x15x5xf32>, %arg3 : vector<8x8x15x5xf32>,
338                  %arg4 : vector<7x8x16x15xf16>, %arg5 : vector<8x16x7x5xf16>) {
339  // Test contraction with batch and contracting dims.
340  // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"], kind = #vector.kind<add>} {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
341  %0 = vector.contract #contraction_trait0 %arg0, %arg1, %arg2
342      : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
343  // Test contraction with only contracting dims. In this case the lhs/rhs
344  // dimension of size 8 will be considered a parallel dim for lhs/rhs and will
345  // appear twice in the output.
346  // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"], kind = #vector.kind<add>} {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32>
347  %1 = vector.contract #contraction_trait1 %arg0, %arg1, %arg3
348      : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32>
349  // Test contraction with optional vector mask arguments.
350  %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
351  %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
352  // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"], kind = #vector.kind<add>} {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32>
353  %2 = vector.contract #contraction_trait1 %arg0, %arg1, %arg3, %lhs_mask,
354                                           %rhs_mask
355      : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32>
356  // Test contraction with mixed type.
357  // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"], kind = #vector.kind<add>} {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf16>, vector<8x16x7x5xf16> into vector<8x8x15x5xf32>
358  %3 = vector.contract #contraction_trait1 %arg4, %arg5, %arg3
359      : vector<7x8x16x15xf16>, vector<8x16x7x5xf16> into vector<8x8x15x5xf32>
360  // Test contraction with "max" instead of "add".
361  // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"], kind = #vector.kind<maxf>} {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32>
362  %4 = vector.contract #contraction_trait2 %arg0, %arg1, %arg3
363      : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32>
364  return
365}
366
367// CHECK-LABEL: @create_vector_mask
368func.func @create_vector_mask() {
369  // CHECK:      %[[C2:.*]] = arith.constant 2 : index
370  %c2 = arith.constant 2 : index
371  // CHECK-NEXT: %[[C3:.*]] = arith.constant 3 : index
372  %c3 = arith.constant 3 : index
373  // CHECK-NEXT: vector.create_mask %[[C3]], %[[C2]] : vector<4x3xi1>
374  %0 = vector.create_mask %c3, %c2 : vector<4x3xi1>
375
376  return
377}
378
379// CHECK-LABEL: @constant_vector_mask_0d
380func.func @constant_vector_mask_0d() {
381  // CHECK: vector.constant_mask [0] : vector<i1>
382  %0 = vector.constant_mask [0] : vector<i1>
383  // CHECK: vector.constant_mask [1] : vector<i1>
384  %1 = vector.constant_mask [1] : vector<i1>
385  return
386}
387
388// CHECK-LABEL: @constant_vector_mask
389func.func @constant_vector_mask() {
390  // CHECK: vector.constant_mask [3, 2] : vector<4x3xi1>
391  %0 = vector.constant_mask [3, 2] : vector<4x3xi1>
392  // CHECK: vector.constant_mask [0] : vector<[4]xi1>
393  %1 = vector.constant_mask [0] : vector<[4]xi1>
394  return
395}
396
397// CHECK-LABEL: @vector_print
398func.func @vector_print(%arg0: vector<8x4xf32>) {
399  // CHECK: vector.print %{{.*}} : vector<8x4xf32>
400  vector.print %arg0 : vector<8x4xf32>
401  return
402}
403
404// CHECK-LABEL: @reshape
405func.func @reshape(%arg0 : vector<3x2x4xf32>) -> (vector<2x3x4xf32>) {
406  // CHECK:      %[[C2:.*]] = arith.constant 2 : index
407  %c2 = arith.constant 2 : index
408  // CHECK:      %[[C3:.*]] = arith.constant 3 : index
409  %c3 = arith.constant 3 : index
410  // CHECK:      %[[C6:.*]] = arith.constant 6 : index
411  %c6 = arith.constant 6 : index
412  // CHECK:      %[[C9:.*]] = arith.constant 9 : index
413  %c9 = arith.constant 9 : index
414  // CHECK: vector.reshape %{{.*}}, [%[[C3]], %[[C6]]], [%[[C2]], %[[C9]]], [4] : vector<3x2x4xf32> to vector<2x3x4xf32>
415  %1 = vector.reshape %arg0, [%c3, %c6], [%c2, %c9], [4]
416    : vector<3x2x4xf32> to vector<2x3x4xf32>
417
418  return %1 : vector<2x3x4xf32>
419}
420
421// CHECK-LABEL: @shape_cast
422func.func @shape_cast(%arg0 : vector<5x1x3x2xf32>,
423                 %arg1 : vector<8x1xf32>,
424                 %arg2 : vector<16x1x1xf32>)
425  -> (vector<15x2xf32>, vector<8xf32>, vector<16xf32>, vector<16x1xf32>) {
426
427  // CHECK: vector.shape_cast %{{.*}} : vector<5x1x3x2xf32> to vector<15x2xf32>
428  %0 = vector.shape_cast %arg0 : vector<5x1x3x2xf32> to vector<15x2xf32>
429
430  // CHECK-NEXT: vector.shape_cast %{{.*}} : vector<8x1xf32> to vector<8xf32>
431  %1 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32>
432
433  // CHECK-NEXT: vector.shape_cast %{{.*}} : vector<16x1x1xf32> to vector<16xf32>
434  %2 = vector.shape_cast %arg2 : vector<16x1x1xf32> to vector<16xf32>
435
436  // CHECK-NEXT: vector.shape_cast %{{.*}} : vector<16x1x1xf32> to vector<16x1xf32>
437  %3 = vector.shape_cast %arg2 : vector<16x1x1xf32> to vector<16x1xf32>
438
439  return %0, %1, %2, %3 : vector<15x2xf32>, vector<8xf32>, vector<16xf32>, vector<16x1xf32>
440}
441
442// CHECK-LABEL: @bitcast
443func.func @bitcast(%arg0 : vector<5x1x3x2xf32>,
444                 %arg1 : vector<8x1xi32>,
445                 %arg2 : vector<16x1x8xi8>,
446                 %arg3 : vector<8x2x1xindex>,
447                 %arg4 : vector<f32>)
448  -> (vector<5x1x3x4xf16>, vector<5x1x3x8xi8>, vector<8x4xi8>, vector<8x1xf32>, vector<16x1x2xi32>, vector<16x1x4xi16>, vector<16x1x1xindex>, vector<8x2x2xf32>, vector<i32>) {
449
450  // CHECK: vector.bitcast %{{.*}} : vector<5x1x3x2xf32> to vector<5x1x3x4xf16>
451  %0 = vector.bitcast %arg0 : vector<5x1x3x2xf32> to vector<5x1x3x4xf16>
452
453  // CHECK-NEXT: vector.bitcast %{{.*}} : vector<5x1x3x2xf32> to vector<5x1x3x8xi8>
454  %1 = vector.bitcast %arg0 : vector<5x1x3x2xf32> to vector<5x1x3x8xi8>
455
456  // CHECK-NEXT: vector.bitcast %{{.*}} : vector<8x1xi32> to vector<8x4xi8>
457  %2 = vector.bitcast %arg1 : vector<8x1xi32> to vector<8x4xi8>
458
459  // CHECK-NEXT: vector.bitcast %{{.*}} : vector<8x1xi32> to vector<8x1xf32>
460  %3 = vector.bitcast %arg1 : vector<8x1xi32> to vector<8x1xf32>
461
462  // CHECK-NEXT: vector.bitcast %{{.*}} : vector<16x1x8xi8> to vector<16x1x2xi32>
463  %4 = vector.bitcast %arg2 : vector<16x1x8xi8> to vector<16x1x2xi32>
464
465  // CHECK-NEXT: vector.bitcast %{{.*}} : vector<16x1x8xi8> to vector<16x1x4xi16>
466  %5 = vector.bitcast %arg2 : vector<16x1x8xi8> to vector<16x1x4xi16>
467
468  // CHECK-NEXT: vector.bitcast %{{.*}} : vector<16x1x8xi8> to vector<16x1x1xindex>
469  %6 = vector.bitcast %arg2 : vector<16x1x8xi8> to vector<16x1x1xindex>
470
471  // CHECK-NEXT: vector.bitcast %{{.*}} : vector<8x2x1xindex> to vector<8x2x2xf32>
472  %7 = vector.bitcast %arg3 : vector<8x2x1xindex> to vector<8x2x2xf32>
473
474  // CHECK: vector.bitcast %{{.*}} : vector<f32> to vector<i32>
475  %8 = vector.bitcast %arg4 : vector<f32> to vector<i32>
476
477  return %0, %1, %2, %3, %4, %5, %6, %7, %8 : vector<5x1x3x4xf16>, vector<5x1x3x8xi8>, vector<8x4xi8>, vector<8x1xf32>, vector<16x1x2xi32>, vector<16x1x4xi16>, vector<16x1x1xindex>, vector<8x2x2xf32>, vector<i32>
478}
479
480// CHECK-LABEL: @vector_fma
481func.func @vector_fma(%a: vector<8xf32>, %b: vector<8x4xf32>) {
482  // CHECK: vector.fma %{{.*}} : vector<8xf32>
483  vector.fma %a, %a, %a : vector<8xf32>
484  // CHECK: vector.fma %{{.*}} : vector<8x4xf32>
485  vector.fma %b, %b, %b : vector<8x4xf32>
486  return
487}
488
489// CHECK-LABEL: @reduce_fp
490func.func @reduce_fp(%arg0: vector<16xf32>, %arg1: f32) -> f32 {
491  // CHECK:    vector.reduction <add>, %{{.*}} : vector<16xf32> into f32
492  vector.reduction <add>, %arg0 : vector<16xf32> into f32
493  // CHECK:    vector.reduction <add>, %{{.*}}, %{{.*}} : vector<16xf32> into f32
494  vector.reduction <add>, %arg0, %arg1 : vector<16xf32> into f32
495  // CHECK:    vector.reduction <mul>, %{{.*}} : vector<16xf32> into f32
496  vector.reduction <mul>, %arg0 : vector<16xf32> into f32
497  // CHECK:    vector.reduction <mul>, %{{.*}}, %{{.*}} : vector<16xf32> into f32
498  vector.reduction <mul>, %arg0, %arg1 : vector<16xf32> into f32
499  // CHECK:    vector.reduction <minf>, %{{.*}} : vector<16xf32> into f32
500  vector.reduction <minf>, %arg0 : vector<16xf32> into f32
501  // CHECK:    %[[X:.*]] = vector.reduction <maxf>, %{{.*}} : vector<16xf32> into f32
502  %0 = vector.reduction <maxf>, %arg0 : vector<16xf32> into f32
503  // CHECK:    return %[[X]] : f32
504  return %0 : f32
505}
506
507// CHECK-LABEL: @reduce_int
508func.func @reduce_int(%arg0: vector<16xi32>) -> i32 {
509  // CHECK:    vector.reduction <add>, %{{.*}} : vector<16xi32> into i32
510  vector.reduction <add>, %arg0 : vector<16xi32> into i32
511  // CHECK:    vector.reduction <mul>, %{{.*}} : vector<16xi32> into i32
512  vector.reduction <mul>, %arg0 : vector<16xi32> into i32
513  // CHECK:    vector.reduction <minui>, %{{.*}} : vector<16xi32> into i32
514  vector.reduction <minui>, %arg0 : vector<16xi32> into i32
515  // CHECK:    vector.reduction <minsi>, %{{.*}} : vector<16xi32> into i32
516  vector.reduction <minsi>, %arg0 : vector<16xi32> into i32
517  // CHECK:    vector.reduction <maxui>, %{{.*}} : vector<16xi32> into i32
518  vector.reduction <maxui>, %arg0 : vector<16xi32> into i32
519  // CHECK:    vector.reduction <maxsi>, %{{.*}} : vector<16xi32> into i32
520  vector.reduction <maxsi>, %arg0 : vector<16xi32> into i32
521  // CHECK:    vector.reduction <and>, %{{.*}} : vector<16xi32> into i32
522  vector.reduction <and>, %arg0 : vector<16xi32> into i32
523  // CHECK:    vector.reduction <or>, %{{.*}} : vector<16xi32> into i32
524  vector.reduction <or>, %arg0 : vector<16xi32> into i32
525  // CHECK:    %[[X:.*]] = vector.reduction <xor>, %{{.*}} : vector<16xi32> into i32
526  %0 = vector.reduction <xor>, %arg0 : vector<16xi32> into i32
527  // CHECK:    return %[[X]] : i32
528  return %0 : i32
529}
530
531// CHECK-LABEL: @transpose_fp
532func.func @transpose_fp(%arg0: vector<3x7xf32>) -> vector<7x3xf32> {
533  // CHECK: %[[X:.*]] = vector.transpose %{{.*}}, [1, 0] : vector<3x7xf32> to vector<7x3xf32>
534  %0 = vector.transpose %arg0, [1, 0] : vector<3x7xf32> to vector<7x3xf32>
535  // CHECK: return %[[X]] : vector<7x3xf32>
536  return %0 : vector<7x3xf32>
537}
538
539// CHECK-LABEL: @transpose_int
540func.func @transpose_int(%arg0: vector<11x7x3x2xi32>) -> vector<2x11x7x3xi32> {
541  // CHECK: %[[X:.*]] = vector.transpose %{{.*}}, [3, 0, 1, 2] : vector<11x7x3x2xi32> to vector<2x11x7x3xi32>
542  %0 = vector.transpose %arg0, [3, 0, 1, 2] : vector<11x7x3x2xi32> to vector<2x11x7x3xi32>
543  // CHECK: return %[[X]] : vector<2x11x7x3xi32>
544  return %0 : vector<2x11x7x3xi32>
545}
546
547// CHECK-LABEL: @flat_transpose_fp
548func.func @flat_transpose_fp(%arg0: vector<16xf32>) -> vector<16xf32> {
549  // CHECK: %[[X:.*]] = vector.flat_transpose %{{.*}} {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
550  %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } : vector<16xf32> -> vector<16xf32>
551  // CHECK: return %[[X]] : vector<16xf32>
552  return %0 : vector<16xf32>
553}
554
555// CHECK-LABEL: @flat_transpose_int
556func.func @flat_transpose_int(%arg0: vector<16xi32>) -> vector<16xi32> {
557  // CHECK: %[[X:.*]] = vector.flat_transpose %{{.*}} {columns = 8 : i32, rows = 2 : i32} : vector<16xi32> -> vector<16xi32>
558  %0 = vector.flat_transpose %arg0 { rows = 2: i32, columns = 8: i32 } : vector<16xi32> -> vector<16xi32>
559  // CHECK: return %[[X]] : vector<16xi32>
560  return %0 : vector<16xi32>
561}
562
563// CHECK-LABEL: @vector_load_and_store_1d_scalar_memref
564func.func @vector_load_and_store_1d_scalar_memref(%memref : memref<200x100xf32>,
565                                             %i : index, %j : index) {
566  // CHECK: %[[ld:.*]] = vector.load %{{.*}}[%{{.*}}] : memref<200x100xf32>, vector<8xf32>
567  %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<8xf32>
568  // CHECK: vector.store %[[ld]], %{{.*}}[%{{.*}}] : memref<200x100xf32>, vector<8xf32>
569  vector.store %0, %memref[%i, %j] : memref<200x100xf32>, vector<8xf32>
570  return
571}
572
573// CHECK-LABEL: @vector_load_and_store_1d_vector_memref
574func.func @vector_load_and_store_1d_vector_memref(%memref : memref<200x100xvector<8xf32>>,
575                                             %i : index, %j : index) {
576  // CHECK: %[[ld:.*]] = vector.load %{{.*}}[%{{.*}}] : memref<200x100xvector<8xf32>>, vector<8xf32>
577  %0 = vector.load %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
578  // CHECK: vector.store %[[ld]], %{{.*}}[%{{.*}}] : memref<200x100xvector<8xf32>>, vector<8xf32>
579  vector.store %0, %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
580  return
581}
582
583// CHECK-LABEL: @vector_load_and_store_scalable_vector_memref
584func.func @vector_load_and_store_scalable_vector_memref(%v: vector<[4]xi32>, %m: memref<?xi32>) -> vector<[4]xi32> {
585  %c0 = arith.constant 0 : index
586  // CHECK: vector.load {{.*}}: memref<?xi32>, vector<[4]xi32>
587  %0 = vector.load %m[%c0] : memref<?xi32>, vector<[4]xi32>
588  // CHECK: vector.store {{.*}}: memref<?xi32>, vector<[4]xi32>
589  vector.store %v, %m[%c0] : memref<?xi32>, vector<[4]xi32>
590  return %0 : vector<[4]xi32>
591}
592
593func.func @vector_load_and_store_1d_scalable_vector_memref(%memref : memref<200x100xvector<8xf32>>,
594                                                      %i : index, %j : index) {
595  // CHECK: %[[ld:.*]] = vector.load %{{.*}}[%{{.*}}] : memref<200x100xvector<8xf32>>, vector<8xf32>
596  %0 = vector.load %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
597  // CHECK: vector.store %[[ld]], %{{.*}}[%{{.*}}] : memref<200x100xvector<8xf32>>, vector<8xf32>
598  vector.store %0, %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
599  return
600}
601
602// CHECK-LABEL: @vector_load_and_store_out_of_bounds
603func.func @vector_load_and_store_out_of_bounds(%memref : memref<7xf32>) {
604  %c0 = arith.constant 0 : index
605  // CHECK: %[[ld:.*]] = vector.load %{{.*}}[%{{.*}}] : memref<7xf32>, vector<8xf32>
606  %0 = vector.load %memref[%c0] : memref<7xf32>, vector<8xf32>
607  // CHECK: vector.store %[[ld]], %{{.*}}[%{{.*}}] : memref<7xf32>, vector<8xf32>
608  vector.store %0, %memref[%c0] : memref<7xf32>, vector<8xf32>
609  return
610}
611
612// CHECK-LABEL: @vector_load_and_store_2d_scalar_memref
613func.func @vector_load_and_store_2d_scalar_memref(%memref : memref<200x100xf32>,
614                                             %i : index, %j : index) {
615  // CHECK: %[[ld:.*]] = vector.load %{{.*}}[%{{.*}}] : memref<200x100xf32>, vector<4x8xf32>
616  %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32>
617  // CHECK: vector.store %[[ld]], %{{.*}}[%{{.*}}] : memref<200x100xf32>, vector<4x8xf32>
618  vector.store %0, %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32>
619  return
620}
621
622// CHECK-LABEL: @vector_load_and_store_2d_vector_memref
623func.func @vector_load_and_store_2d_vector_memref(%memref : memref<200x100xvector<4x8xf32>>,
624                                             %i : index, %j : index) {
625  // CHECK: %[[ld:.*]] = vector.load %{{.*}}[%{{.*}}] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
626  %0 = vector.load %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
627  // CHECK: vector.store %[[ld]], %{{.*}}[%{{.*}}] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
628  vector.store %0, %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
629  return
630}
631
632// CHECK-LABEL: @masked_load_and_store
633func.func @masked_load_and_store(%base: memref<?xf32>, %mask: vector<16xi1>, %passthru: vector<16xf32>) {
634  %c0 = arith.constant 0 : index
635  // CHECK: %[[X:.*]] = vector.maskedload %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
636  %0 = vector.maskedload %base[%c0], %mask, %passthru : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
637  // CHECK: vector.maskedstore %{{.*}}[%{{.*}}], %{{.*}}, %[[X]] : memref<?xf32>, vector<16xi1>, vector<16xf32>
638  vector.maskedstore %base[%c0], %mask, %0 : memref<?xf32>, vector<16xi1>, vector<16xf32>
639  return
640}
641
642// CHECK-LABEL: @masked_load_and_store2d
643func.func @masked_load_and_store2d(%base: memref<?x?xf32>, %mask: vector<16xi1>, %passthru: vector<16xf32>) {
644  %c0 = arith.constant 0 : index
645  // CHECK: %[[X:.*]] = vector.maskedload %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
646  %0 = vector.maskedload %base[%c0, %c0], %mask, %passthru : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
647  // CHECK: vector.maskedstore %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}, %[[X]] : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
648  vector.maskedstore %base[%c0, %c0], %mask, %0 : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
649  return
650}
651
652// CHECK-LABEL: @gather_and_scatter
653func.func @gather_and_scatter(%base: memref<?xf32>, %v: vector<16xi32>, %mask: vector<16xi1>, %pass_thru: vector<16xf32>) {
654  %c0 = arith.constant 0 : index
655  // CHECK: %[[X:.*]] = vector.gather %{{.*}}[%{{.*}}] [%{{.*}}], %{{.*}}, %{{.*}} : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
656  %0 = vector.gather %base[%c0][%v], %mask, %pass_thru : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
657  // CHECK: vector.scatter %{{.*}}[%{{.*}}] [%{{.*}}], %{{.*}}, %[[X]] : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
658  vector.scatter %base[%c0][%v], %mask, %0 : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
659  return
660}
661
662// CHECK-LABEL: @gather_and_scatter2d
663func.func @gather_and_scatter2d(%base: memref<?x?xf32>, %v: vector<16xi32>, %mask: vector<16xi1>, %pass_thru: vector<16xf32>) {
664  %c0 = arith.constant 0 : index
665  // CHECK: %[[X:.*]] = vector.gather %{{.*}}[%{{.*}}, %{{.*}}] [%{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
666  %0 = vector.gather %base[%c0, %c0][%v], %mask, %pass_thru : memref<?x?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
667  // CHECK: vector.scatter %{{.*}}[%{{.*}}] [%{{.*}}], %{{.*}}, %[[X]] : memref<?x?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
668  vector.scatter %base[%c0, %c0][%v], %mask, %0 : memref<?x?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
669  return
670}
671
672// CHECK-LABEL: @expand_and_compress
673func.func @expand_and_compress(%base: memref<?xf32>, %mask: vector<16xi1>, %pass_thru: vector<16xf32>) {
674  %c0 = arith.constant 0 : index
675  // CHECK: %[[X:.*]] = vector.expandload %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
676  %0 = vector.expandload %base[%c0], %mask, %pass_thru : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
677  // CHECK: vector.compressstore %{{.*}}[%{{.*}}], %{{.*}}, %[[X]] : memref<?xf32>, vector<16xi1>, vector<16xf32>
678  vector.compressstore %base[%c0], %mask, %0 : memref<?xf32>, vector<16xi1>, vector<16xf32>
679  return
680}
681
682// CHECK-LABEL: @expand_and_compress2d
683func.func @expand_and_compress2d(%base: memref<?x?xf32>, %mask: vector<16xi1>, %pass_thru: vector<16xf32>) {
684  %c0 = arith.constant 0 : index
685  // CHECK: %[[X:.*]] = vector.expandload %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
686  %0 = vector.expandload %base[%c0, %c0], %mask, %pass_thru : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
687  // CHECK: vector.compressstore %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}, %[[X]] : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
688  vector.compressstore %base[%c0, %c0], %mask, %0 : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
689  return
690}
691
692// CHECK-LABEL: @extract_insert_map
693func.func @extract_insert_map(%v: vector<32xf32>, %v2: vector<16x32xf32>,
694  %id0 : index, %id1 : index) -> (vector<32xf32>, vector<16x32xf32>) {
695  // CHECK: %[[V:.*]] = vector.extract_map %{{.*}}[%{{.*}}] : vector<32xf32> to vector<2xf32>
696  %vd = vector.extract_map %v[%id0] : vector<32xf32> to vector<2xf32>
697  // CHECK: %[[V1:.*]] = vector.extract_map %{{.*}}[%{{.*}}, %{{.*}}] : vector<16x32xf32> to vector<4x2xf32>
698  %vd2 = vector.extract_map %v2[%id0, %id1] : vector<16x32xf32> to vector<4x2xf32>
699  // CHECK: %[[R:.*]] = vector.insert_map %[[V]], %{{.*}}[%{{.*}}] : vector<2xf32> into vector<32xf32>
700  %r = vector.insert_map %vd, %v[%id0] : vector<2xf32> into vector<32xf32>
701  // CHECK: %[[R1:.*]] = vector.insert_map %[[V1]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x2xf32> into vector<16x32xf32>
702  %r2 = vector.insert_map %vd2, %v2[%id0, %id1] : vector<4x2xf32> into vector<16x32xf32>
703  // CHECK: return %[[R]], %[[R1]] : vector<32xf32>, vector<16x32xf32>
704  return %r, %r2 : vector<32xf32>, vector<16x32xf32>
705}
706
707// CHECK-LABEL: @multi_reduction
708func.func @multi_reduction(%0: vector<4x8x16x32xf32>, %acc0: vector<4x16xf32>,
709                           %acc1: f32) -> f32 {
710  // CHECK: vector.multi_reduction <add>, %{{.*}}, %{{.*}} [1, 3] : vector<4x8x16x32xf32> to vector<4x16xf32>
711  %1 = vector.multi_reduction <add>, %0, %acc0 [1, 3] :
712    vector<4x8x16x32xf32> to vector<4x16xf32>
713  // CHECK: vector.multi_reduction <add>, %{{.*}}, %{{.*}} [0, 1] : vector<4x16xf32> to f32
714  %2 = vector.multi_reduction <add>, %1, %acc1 [0, 1] :
715    vector<4x16xf32> to f32
716  return %2 : f32
717}
718
719// CHECK-LABEL: @get_vector_scale
720func.func @get_vector_scale() -> index {
721  // CHECK: vector.vscale
722  %0 = vector.vscale
723  return %0 : index
724}
725
726// CHECK-LABEL: @vector_scan
727func.func @vector_scan(%0: vector<4x8x16x32xf32>) -> vector<4x8x16x32xf32> {
728  %1 = arith.constant dense<0.0> : vector<4x16x32xf32>
729  %2:2 = vector.scan <add>, %0, %1 {reduction_dim = 1 : i64, inclusive = true} :
730    vector<4x8x16x32xf32>, vector<4x16x32xf32>
731  return %2#0 : vector<4x8x16x32xf32>
732}
733
734// CHECK-LABEL: func @test_splat_op
735// CHECK-SAME: [[S:%arg[0-9]+]]: f32
736func.func @test_splat_op(%s : f32) {
737  // CHECK: vector.splat [[S]] : vector<8xf32>
738  %v = vector.splat %s : vector<8xf32>
739
740  // CHECK: vector.splat [[S]] : vector<4xf32>
741  %u = "vector.splat"(%s) : (f32) -> vector<4xf32>
742  return
743}
744
745// CHECK-LABEL: func @vector_splat_0d(
746func.func @vector_splat_0d(%a: f32) -> vector<f32> {
747  // CHECK: vector.splat %{{.*}} : vector<f32>
748  %0 = vector.splat %a : vector<f32>
749  return %0 : vector<f32>
750}
751
752// CHECK-LABEL:   func @warp_execute_on_lane_0(
753func.func @warp_execute_on_lane_0(%laneid: index) {
754//  CHECK-NEXT:     vector.warp_execute_on_lane_0(%{{.*}})[32] {
755  vector.warp_execute_on_lane_0(%laneid)[32] {
756//  CHECK-NEXT:     }
757  }
758//  CHECK-NEXT:     return
759  return
760}
761
762// CHECK-LABEL:   func @warp_operand_result(
763func.func @warp_operand_result(%laneid: index, %v0 : vector<4xi32>) -> (vector<4xi32>) {
764//  CHECK-NEXT:     %{{.*}} = vector.warp_execute_on_lane_0(%{{.*}})[32] args(%{{.*}} : vector<4xi32>) -> (vector<4xi32>) {
765  %2 = vector.warp_execute_on_lane_0(%laneid)[32]
766  args(%v0 : vector<4xi32>) -> (vector<4xi32>) {
767   ^bb0(%arg0 : vector<128xi32>) :
768    %0 = arith.constant dense<2>: vector<128xi32>
769    %1 = arith.addi %arg0, %0 : vector<128xi32>
770//       CHECK:       vector.yield %{{.*}} : vector<128xi32>
771    vector.yield %1 : vector<128xi32>
772//  CHECK-NEXT:     }
773  }
774  return %2 : vector<4xi32>
775}
776
777
778