1// RUN: mlir-opt %s -test-vector-transfer-full-partial-split -split-input-file | FileCheck %s
2// RUN: mlir-opt %s -test-vector-transfer-full-partial-split=use-memref-copy -split-input-file | FileCheck %s --check-prefix=LINALG
3
4// CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)>
5// CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)>
6// CHECK-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
7
8// LINALG-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)>
9// LINALG-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)>
10// LINALG-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
11// LINALG-DAG: #[[$map_2d_stride_8x1:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>
12// LINALG-DAG: #[[$bounds_map_4:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)>
13// LINALG-DAG: #[[$bounds_map_8:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)>
14
15// CHECK-LABEL: split_vector_transfer_read_2d(
16//  CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref
17//  CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index
18//  CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index
19
20// LINALG-LABEL: split_vector_transfer_read_2d(
21//  LINALG-SAME: %[[A:[a-zA-Z0-9]*]]: memref
22//  LINALG-SAME: %[[i:[a-zA-Z0-9]*]]: index
23//  LINALG-SAME: %[[j:[a-zA-Z0-9]*]]: index
24func.func @split_vector_transfer_read_2d(%A: memref<?x8xf32>, %i: index, %j: index) -> vector<4x8xf32> {
25  %c0 = arith.constant 0 : index
26  %f0 = arith.constant 0.0 : f32
27
28  //  CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index
29  //  CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
30  // alloca for boundary full tile
31  //      CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
32  // %i + 4 <= dim(%A, 0)
33  //      CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
34  //      CHECK: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
35  //      CHECK: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[d0]] : index
36  // %j + 8 <= dim(%A, 1)
37  //      CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
38  //      CHECK: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index
39  // are both conds true
40  //      CHECK: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1
41  //      CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32>, index, index) {
42  //               inBounds, just yield %A
43  //      CHECK:   scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index
44  //      CHECK: } else {
45  //               slow path, fill tmp alloc and yield a memref_casted version of it
46  //      CHECK:   %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst :
47  // CHECK-SAME:     memref<?x8xf32>, vector<4x8xf32>
48  //      CHECK:   %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] :
49  // CHECK-SAME:     memref<4x8xf32> to memref<vector<4x8xf32>>
50  //      CHECK:   store %[[slow]], %[[cast_alloc]][] : memref<vector<4x8xf32>>
51  //      CHECK:   %[[yielded:.*]] = memref.cast %[[alloc]] :
52  // CHECK-SAME:     memref<4x8xf32> to memref<?x8xf32>
53  //      CHECK:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
54  // CHECK-SAME:     memref<?x8xf32>, index, index
55  //      CHECK: }
56  //      CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst
57  // CHECK-SAME:   {in_bounds = [true, true]} : memref<?x8xf32>, vector<4x8xf32>
58
59  //  LINALG-DAG: %[[c0:.*]] = arith.constant 0 : index
60  //  LINALG-DAG: %[[c4:.*]] = arith.constant 4 : index
61  //  LINALG-DAG: %[[c8:.*]] = arith.constant 8 : index
62  // alloca for boundary full tile
63  //      LINALG: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
64  // %i + 4 <= dim(%A, 0)
65  //      LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
66  //      LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
67  //      LINALG: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[d0]] : index
68  // %j + 8 <= dim(%A, 1)
69  //      LINALG: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
70  //      LINALG: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index
71  // are both conds true
72  //      LINALG: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1
73  //      LINALG: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32>, index, index) {
74  //               inBounds, just yield %A
75  //      LINALG:   scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index
76  //      LINALG: } else {
77  //               slow path, fill tmp alloc and yield a memref_casted version of it
78  //      LINALG:   linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
79  //      LINALG:   %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
80  //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]])
81  //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
82  //      LINALG:   %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
83  // LINALG-SAME:     memref<?x8xf32> to memref<?x?xf32, #[[$map_2d_stride_8x1]]>
84  //      LINALG:   %[[alloc_view:.*]] = memref.subview %[[alloc]][0, 0] [%[[sv0]], %[[sv1]]] [1, 1]
85  //      LINALG:   memref.copy %[[sv]], %[[alloc_view]] : memref<?x?xf32, #[[$map_2d_stride_8x1]]> to memref<?x?xf32, #{{.*}}>
86  //      LINALG:   %[[yielded:.*]] = memref.cast %[[alloc]] :
87  // LINALG-SAME:     memref<4x8xf32> to memref<?x8xf32>
88  //      LINALG:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
89  // LINALG-SAME:     memref<?x8xf32>, index, index
90  //      LINALG: }
91  //      LINALG: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst
92  // LINALG-SAME:   {in_bounds = [true, true]} : memref<?x8xf32>, vector<4x8xf32>
93  %1 = vector.transfer_read %A[%i, %j], %f0 : memref<?x8xf32>, vector<4x8xf32>
94
95  // LINALG: return %[[res]] : vector<4x8xf32>
96  return %1: vector<4x8xf32>
97}
98
99// CHECK-LABEL: split_vector_transfer_read_strided_2d(
100//  CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref
101//  CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index
102//  CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index
103
104// LINALG-LABEL: split_vector_transfer_read_strided_2d(
105//  LINALG-SAME: %[[A:[a-zA-Z0-9]*]]: memref
106//  LINALG-SAME: %[[i:[a-zA-Z0-9]*]]: index
107//  LINALG-SAME: %[[j:[a-zA-Z0-9]*]]: index
108func.func @split_vector_transfer_read_strided_2d(
109    %A: memref<7x8xf32, offset:?, strides:[?, 1]>,
110    %i: index, %j: index) -> vector<4x8xf32> {
111  %c0 = arith.constant 0 : index
112  %f0 = arith.constant 0.0 : f32
113
114  //  CHECK-DAG: %[[c7:.*]] = arith.constant 7 : index
115  //  CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index
116  //  CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
117  // alloca for boundary full tile
118  //      CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
119  // %i + 4 <= dim(%A, 0)
120  //      CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
121  //      CHECK: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[c7]] : index
122  // %j + 8 <= dim(%A, 1)
123  //      CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
124  //      CHECK: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index
125  // are both conds true
126  //      CHECK: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1
127  //      CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index) {
128  //               inBounds but not cast-compatible: yield a memref_casted form of %A
129  //      CHECK:   %[[casted:.*]] = memref.cast %arg0 :
130  // CHECK-SAME:     memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x8xf32, #[[$map_2d_stride_1]]>
131  //      CHECK:   scf.yield %[[casted]], %[[i]], %[[j]] :
132  // CHECK-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
133  //      CHECK: } else {
134  //               slow path, fill tmp alloc and yield a memref_casted version of it
135  //      CHECK:   %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst :
136  // CHECK-SAME:     memref<7x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32>
137  //      CHECK:   %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] :
138  // CHECK-SAME:     memref<4x8xf32> to memref<vector<4x8xf32>>
139  //      CHECK:   store %[[slow]], %[[cast_alloc]][] :
140  // CHECK-SAME:     memref<vector<4x8xf32>>
141  //      CHECK:   %[[yielded:.*]] = memref.cast %[[alloc]] :
142  // CHECK-SAME:     memref<4x8xf32> to memref<?x8xf32, #[[$map_2d_stride_1]]>
143  //      CHECK:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
144  // CHECK-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
145  //      CHECK: }
146  //      CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true]} :
147  // CHECK-SAME:   memref<?x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32>
148
149  //  LINALG-DAG: %[[c0:.*]] = arith.constant 0 : index
150  //  LINALG-DAG: %[[c4:.*]] = arith.constant 4 : index
151  //  LINALG-DAG: %[[c7:.*]] = arith.constant 7 : index
152  //  LINALG-DAG: %[[c8:.*]] = arith.constant 8 : index
153  // alloca for boundary full tile
154  //      LINALG: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
155  // %i + 4 <= dim(%A, 0)
156  //      LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
157  //      LINALG: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[c7]] : index
158  // %j + 8 <= dim(%A, 1)
159  //      LINALG: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
160  //      LINALG: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index
161  // are both conds true
162  //      LINALG: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1
163  //      LINALG: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index) {
164  //               inBounds but not cast-compatible: yield a memref_casted form of %A
165  //      LINALG:   %[[casted:.*]] = memref.cast %arg0 :
166  // LINALG-SAME:     memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x8xf32, #[[$map_2d_stride_1]]>
167  //      LINALG:   scf.yield %[[casted]], %[[i]], %[[j]] :
168  // LINALG-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
169  //      LINALG: } else {
170  //               slow path, fill tmp alloc and yield a memref_casted version of it
171  //      LINALG:   linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
172  //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]])
173  //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
174  //      LINALG:   %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
175  // LINALG-SAME:     memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x?xf32, #[[$map_2d_stride_1]]>
176  //      LINALG:   %[[alloc_view:.*]] = memref.subview %[[alloc]][0, 0] [%[[sv0]], %[[sv1]]] [1, 1]
177  //      LINALG:   memref.copy %[[sv]], %[[alloc_view]] : memref<?x?xf32, #[[$map_2d_stride_1]]> to memref<?x?xf32, #{{.*}}>
178  //      LINALG:   %[[yielded:.*]] = memref.cast %[[alloc]] :
179  // LINALG-SAME:     memref<4x8xf32> to memref<?x8xf32, #[[$map_2d_stride_1]]>
180  //      LINALG:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
181  // LINALG-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
182  //      LINALG: }
183  //      LINALG: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true]} :
184  // LINALG-SAME:   memref<?x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32>
185  %1 = vector.transfer_read %A[%i, %j], %f0 :
186    memref<7x8xf32, offset:?, strides:[?, 1]>, vector<4x8xf32>
187
188  // CHECK: return %[[res]] : vector<4x8xf32>
189  return %1 : vector<4x8xf32>
190}
191
192// -----
193
194func.func @split_vector_transfer_write_2d(%V: vector<4x8xf32>, %A: memref<?x8xf32>, %i: index, %j: index) {
195  vector.transfer_write %V, %A[%i, %j] :
196    vector<4x8xf32>, memref<?x8xf32>
197  return
198}
199
200// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 4)>
201// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
202// CHECK:     func @split_vector_transfer_write_2d(
203// CHECK-SAME:                                         %[[VEC:.*]]: vector<4x8xf32>,
204// CHECK-SAME:                                         %[[DEST:.*]]: memref<?x8xf32>,
205// CHECK-SAME:                                         %[[I:.*]]: index,
206// CHECK-SAME:                                         %[[J:.*]]: index) {
207// CHECK-DAG:       %[[C8:.*]] = arith.constant 8 : index
208// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
209// CHECK-DAG:       %[[CT:.*]] = arith.constant true
210// CHECK:           %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
211// CHECK:           %[[VAL_8:.*]] = affine.apply #[[MAP0]]()[%[[I]]]
212// CHECK:           %[[DIM0:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32>
213// CHECK:           %[[DIM0_IN:.*]] = arith.cmpi sle, %[[VAL_8]], %[[DIM0]] : index
214// CHECK:           %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[J]]]
215// CHECK:           %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index
216// CHECK:           %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1
217// CHECK:           %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]] ->
218// CHECK-SAME:          (memref<?x8xf32>, index, index) {
219// CHECK:             scf.yield %[[DEST]], %[[I]], %[[J]] : memref<?x8xf32>, index, index
220// CHECK:           } else {
221// CHECK:             %[[VAL_15:.*]] = memref.cast %[[TEMP]]
222// CHECK-SAME:            : memref<4x8xf32> to memref<?x8xf32>
223// CHECK:             scf.yield %[[VAL_15]], %[[C0]], %[[C0]]
224// CHECK-SAME:            : memref<?x8xf32>, index, index
225// CHECK:           }
226// CHECK:           vector.transfer_write %[[VEC]],
227// CHECK-SAME:           %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
228// CHECK-SAME:           {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32>
229// CHECK:           %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1
230// CHECK:           scf.if %[[OUT_BOUNDS]] {
231// CHECK:             %[[CASTED:.*]] = vector.type_cast %[[TEMP]]
232// CHECK-SAME:            : memref<4x8xf32> to memref<vector<4x8xf32>>
233// CHECK:             %[[RESULT_COPY:.*]] = memref.load %[[CASTED]][]
234// CHECK-SAME:            : memref<vector<4x8xf32>>
235// CHECK:             vector.transfer_write %[[RESULT_COPY]],
236// CHECK-SAME:            %[[DEST]][%[[I]], %[[J]]]
237// CHECK-SAME:            : vector<4x8xf32>, memref<?x8xf32>
238// CHECK:           }
239// CHECK:           return
240// CHECK:         }
241
242// LINALG-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 4)>
243// LINALG-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
244// LINALG-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)>
245// LINALG-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)>
246// LINALG-DAG: #[[MAP4:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>
247// LINALG:     func @split_vector_transfer_write_2d(
248// LINALG-SAME:                                         %[[VEC:.*]]: vector<4x8xf32>,
249// LINALG-SAME:                                         %[[DEST:.*]]: memref<?x8xf32>,
250// LINALG-SAME:                                         %[[I:.*]]: index,
251// LINALG-SAME:                                         %[[J:.*]]: index) {
252// LINALG-DAG:       %[[CT:.*]] = arith.constant true
253// LINALG-DAG:       %[[C0:.*]] = arith.constant 0 : index
254// LINALG-DAG:       %[[C4:.*]] = arith.constant 4 : index
255// LINALG-DAG:       %[[C8:.*]] = arith.constant 8 : index
256// LINALG:           %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
257// LINALG:           %[[IDX0:.*]] = affine.apply #[[MAP0]]()[%[[I]]]
258// LINALG:           %[[DIM0:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32>
259// LINALG:           %[[DIM0_IN:.*]] = arith.cmpi sle, %[[IDX0]], %[[DIM0]] : index
260// LINALG:           %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[J]]]
261// LINALG:           %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index
262// LINALG:           %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1
263// LINALG:           %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]]
264// LINALG-SAME:          -> (memref<?x8xf32>, index, index) {
265// LINALG:             scf.yield %[[DEST]], %[[I]], %[[J]] : memref<?x8xf32>, index, index
266// LINALG:           } else {
267// LINALG:             %[[VAL_16:.*]] = memref.cast %[[TEMP]] : memref<4x8xf32> to memref<?x8xf32>
268// LINALG:             scf.yield %[[VAL_16]], %[[C0]], %[[C0]] : memref<?x8xf32>, index, index
269// LINALG:           }
270// LINALG:           vector.transfer_write %[[VEC]],
271// LINALG-SAME:          %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
272// LINALG-SAME:          {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32>
273// LINALG:           %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1
274// LINALG:           scf.if %[[OUT_BOUNDS]] {
275// LINALG:             %[[VAL_19:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32>
276// LINALG-DAG:         %[[VAL_20:.*]] = affine.min #[[MAP2]](%[[VAL_19]], %[[I]], %[[C4]])
277// LINALG-DAG:         %[[VAL_21:.*]] = affine.min #[[MAP3]](%[[C8]], %[[J]], %[[C8]])
278// LINALG:             %[[VAL_22:.*]] = memref.subview %[[TEMP]]
279// LINALG-SAME:            [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]]
280// LINALG-SAME:            [1, 1] : memref<4x8xf32> to memref<?x?xf32, #[[MAP4]]>
281// LINALG:             %[[DEST_VIEW:.*]] = memref.subview %[[DEST]][0, 0] [%[[VAL_20]], %[[VAL_21]]] [1, 1]
282// LINALG:             memref.copy %[[VAL_22]], %[[DEST_VIEW]]
283// LINALG-SAME:            : memref<?x?xf32, #[[MAP4]]> to memref<?x?xf32, #{{.*}}>
284// LINALG:           }
285// LINALG:           return
286// LINALG:         }
287
288// -----
289
290func.func @split_vector_transfer_write_strided_2d(
291    %V: vector<4x8xf32>, %A: memref<7x8xf32, offset:?, strides:[?, 1]>,
292    %i: index, %j: index) {
293  vector.transfer_write %V, %A[%i, %j] :
294    vector<4x8xf32>, memref<7x8xf32, offset:?, strides:[?, 1]>
295  return
296}
297
298// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
299// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 4)>
300// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (s0 + 8)>
301// CHECK:   func @split_vector_transfer_write_strided_2d(
302// CHECK-SAME:                                                 %[[VEC:.*]]: vector<4x8xf32>,
303// CHECK-SAME:                                                 %[[DEST:.*]]: memref<7x8xf32, #[[MAP0]]>,
304// CHECK-SAME:                                                 %[[I:.*]]: index,
305// CHECK-SAME:                                                 %[[J:.*]]: index) {
306// CHECK-DAG:       %[[C7:.*]] = arith.constant 7 : index
307// CHECK-DAG:       %[[C8:.*]] = arith.constant 8 : index
308// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
309// CHECK-DAG:       %[[CT:.*]] = arith.constant true
310// CHECK:           %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
311// CHECK:           %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[I]]]
312// CHECK:           %[[DIM0_IN:.*]] = arith.cmpi sle, %[[DIM0]], %[[C7]] : index
313// CHECK:           %[[DIM1:.*]] = affine.apply #[[MAP2]]()[%[[J]]]
314// CHECK:           %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index
315// CHECK:           %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1
316// CHECK:           %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]]
317// CHECK-SAME:          -> (memref<?x8xf32, #[[MAP0]]>, index, index) {
318// CHECK:             %[[VAL_15:.*]] = memref.cast %[[DEST]]
319// CHECK-SAME:            : memref<7x8xf32, #[[MAP0]]> to memref<?x8xf32, #[[MAP0]]>
320// CHECK:             scf.yield %[[VAL_15]], %[[I]], %[[J]]
321// CHECK-SAME:            : memref<?x8xf32, #[[MAP0]]>, index, index
322// CHECK:           } else {
323// CHECK:             %[[VAL_16:.*]] = memref.cast %[[TEMP]]
324// CHECK-SAME:            : memref<4x8xf32> to memref<?x8xf32, #[[MAP0]]>
325// CHECK:             scf.yield %[[VAL_16]], %[[C0]], %[[C0]]
326// CHECK-SAME:            : memref<?x8xf32, #[[MAP0]]>, index, index
327// CHECK:           }
328// CHECK:           vector.transfer_write %[[VEC]],
329// CHECK-SAME:          %[[IN_BOUND_DEST:.*]]#0
330// CHECK-SAME:          [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
331// CHECK-SAME:          {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32, #[[MAP0]]>
332// CHECK:           %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1
333// CHECK:           scf.if %[[OUT_BOUNDS]] {
334// CHECK:             %[[VAL_19:.*]] = vector.type_cast %[[TEMP]]
335// CHECK-SAME:            : memref<4x8xf32> to memref<vector<4x8xf32>>
336// CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_19]][]
337// CHECK-SAME:            : memref<vector<4x8xf32>>
338// CHECK:             vector.transfer_write %[[VAL_20]], %[[DEST]][%[[I]], %[[J]]]
339// CHECK-SAME:            : vector<4x8xf32>, memref<7x8xf32, #[[MAP0]]>
340// CHECK:           }
341// CHECK:           return
342// CHECK:         }
343
344// LINALG-DAG: #[[MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
345// LINALG-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 4)>
346// LINALG-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (s0 + 8)>
347// LINALG-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)>
348// LINALG-DAG: #[[MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)>
349// LINALG-DAG: #[[MAP5:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>
350// LINALG:   func @split_vector_transfer_write_strided_2d(
351// LINALG-SAME:                                                 %[[VEC:.*]]: vector<4x8xf32>,
352// LINALG-SAME:                                                 %[[DEST:.*]]: memref<7x8xf32, #[[MAP0]]>,
353// LINALG-SAME:                                                 %[[I:.*]]: index,
354// LINALG-SAME:                                                 %[[J:.*]]: index) {
355// LINALG-DAG:       %[[C0:.*]] = arith.constant 0 : index
356// LINALG-DAG:       %[[CT:.*]] = arith.constant true
357// LINALG-DAG:       %[[C7:.*]] = arith.constant 7 : index
358// LINALG-DAG:       %[[C4:.*]] = arith.constant 4 : index
359// LINALG-DAG:       %[[C8:.*]] = arith.constant 8 : index
360// LINALG:           %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
361// LINALG:           %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[I]]]
362// LINALG:           %[[DIM0_IN:.*]] = arith.cmpi sle, %[[DIM0]], %[[C7]] : index
363// LINALG:           %[[DIM1:.*]] = affine.apply #[[MAP2]]()[%[[J]]]
364// LINALG:           %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index
365// LINALG:           %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1
366// LINALG:           %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]]
367// LINALG-SAME:          -> (memref<?x8xf32, #[[MAP0]]>, index, index) {
368// LINALG:             %[[VAL_16:.*]] = memref.cast %[[DEST]]
369// LINALG-SAME:            : memref<7x8xf32, #[[MAP0]]> to memref<?x8xf32, #[[MAP0]]>
370// LINALG:             scf.yield %[[VAL_16]], %[[I]], %[[J]]
371// LINALG-SAME:            : memref<?x8xf32, #[[MAP0]]>, index, index
372// LINALG:           } else {
373// LINALG:             %[[VAL_17:.*]] = memref.cast %[[TEMP]]
374// LINALG-SAME:            : memref<4x8xf32> to memref<?x8xf32, #[[MAP0]]>
375// LINALG:             scf.yield %[[VAL_17]], %[[C0]], %[[C0]]
376// LINALG-SAME:            : memref<?x8xf32, #[[MAP0]]>, index, index
377// LINALG:           }
378// LINALG:           vector.transfer_write %[[VEC]],
379// LINALG-SAME:          %[[IN_BOUND_DEST:.*]]#0
380// LINALG-SAME:          [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
381// LINALG-SAME:          {in_bounds = [true, true]}
382// LINALG-SAME:          : vector<4x8xf32>, memref<?x8xf32, #[[MAP0]]>
383// LINALG:           %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1
384// LINALG:           scf.if %[[OUT_BOUNDS]] {
385// LINALG-DAG:         %[[VAL_20:.*]] = affine.min #[[MAP3]](%[[C7]], %[[I]], %[[C4]])
386// LINALG-DAG:         %[[VAL_21:.*]] = affine.min #[[MAP4]](%[[C8]], %[[J]], %[[C8]])
387// LINALG:             %[[VAL_22:.*]] = memref.subview %[[TEMP]]
388// LINALG-SAME:            [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]]
389// LINALG-SAME:            [1, 1] : memref<4x8xf32> to memref<?x?xf32, #[[MAP5]]>
390// LINALG:             %[[DEST_VIEW:.*]] = memref.subview %[[DEST]][0, 0] [%[[VAL_20]], %[[VAL_21]]] [1, 1]
391// LINALG:             memref.copy %[[VAL_22]], %[[DEST_VIEW]]
392// LINALG-SAME:            : memref<?x?xf32, #[[MAP5]]> to memref<?x?xf32, #[[MAP0]]>
393// LINALG:           }
394// LINALG:           return
395// LINALG:         }
396
397// -----
398
399func.func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> ()
400
401// CHECK-LABEL: transfer_read_within_async_execute
402func.func @transfer_read_within_async_execute(%A : memref<?x?xf32>) -> !async.token {
403  %c0 = arith.constant 0 : index
404  %f0 = arith.constant 0.0 : f32
405  // CHECK-NOT: alloca
406  //     CHECK: async.execute
407  //     CHECK:   alloca
408  %token = async.execute {
409    %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x?xf32>, vector<2x2xf32>
410    func.call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> ()
411    async.yield
412  }
413  return %token : !async.token
414}
415
416// -----
417
418func.func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> ()
419
420// Ensure that `alloca`s are inserted outside of loops even though loops are
421// consdered allocation scopes.
422// CHECK-LABEL: transfer_read_within_scf_for
423func.func @transfer_read_within_scf_for(%A : memref<?x?xf32>, %lb : index, %ub : index, %step : index) {
424  %c0 = arith.constant 0 : index
425  %f0 = arith.constant 0.0 : f32
426  // CHECK: alloca
427  // CHECK: scf.for
428  // CHECK-NOT: alloca
429  scf.for %i = %lb to %ub step %step {
430    %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x?xf32>, vector<2x2xf32>
431    func.call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> ()
432  }
433  return
434}
435