1 //===- VectorToSCF.cpp - Convert vector to SCF dialect ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements lowering of vector transfer operations to SCF.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <type_traits>
14 
15 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
16 
17 #include "../PassDetail.h"
18 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
19 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
20 #include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
21 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
22 #include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
23 #include "mlir/Dialect/Vector/VectorOps.h"
24 #include "mlir/Dialect/Vector/VectorUtils.h"
25 #include "mlir/IR/Builders.h"
26 #include "mlir/Pass/Pass.h"
27 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
28 #include "mlir/Transforms/Passes.h"
29 
30 using namespace mlir;
31 using namespace mlir::edsc;
32 using namespace mlir::edsc::intrinsics;
33 using vector::TransferReadOp;
34 using vector::TransferWriteOp;
35 
36 namespace {
37 
38 /// Attribute name used for labeling transfer ops during progressive lowering.
39 static const char kPassLabel[] = "__vector_to_scf_lowering__";
40 
41 /// Patterns that inherit from this struct have access to
42 /// VectorTransferToSCFOptions.
43 template <typename OpTy>
44 struct VectorToSCFPattern : public OpRewritePattern<OpTy> {
45   explicit VectorToSCFPattern(MLIRContext *context,
46                               VectorTransferToSCFOptions opt)
47       : OpRewritePattern<OpTy>(context), options(opt) {}
48 
49   VectorTransferToSCFOptions options;
50 };
51 
52 /// Given a vector transfer op, calculate which dimension of the `source`
53 /// memref should be unpacked in the next application of TransferOpConversion.
54 /// A return value of None indicates a broadcast.
55 template <typename OpTy>
56 static Optional<int64_t> unpackedDim(OpTy xferOp) {
57   auto map = xferOp.permutation_map();
58   if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
59     return expr.getPosition();
60   }
61   assert(xferOp.isBroadcastDim(0) &&
62          "Expected AffineDimExpr or AffineConstantExpr");
63   return None;
64 }
65 
66 /// Compute the permutation map for the new (N-1)-D vector transfer op. This
67 /// map is identical to the current permutation map, but the first result is
68 /// omitted.
69 template <typename OpTy>
70 static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
71   auto map = xferOp.permutation_map();
72   return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(),
73                         builder.getContext());
74 }
75 
76 /// Calculate the indices for the new vector transfer op.
77 ///
78 /// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ...
79 ///       --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32>
80 ///                                 ^^^^^^
81 ///              `iv` is the iteration variable of the (new) surrounding loop.
82 template <typename OpTy>
83 static void getXferIndices(OpTy xferOp, Value iv,
84                            SmallVector<Value, 8> &indices) {
85   typename OpTy::Adaptor adaptor(xferOp);
86   // Corresponding memref dim of the vector dim that is unpacked.
87   auto dim = unpackedDim(xferOp);
88   auto prevIndices = adaptor.indices();
89   indices.append(prevIndices.begin(), prevIndices.end());
90 
91   bool isBroadcast = !dim.hasValue();
92   if (!isBroadcast) {
93     using edsc::op::operator+;
94     indices[dim.getValue()] = adaptor.indices()[dim.getValue()] + iv;
95   }
96 }
97 
98 static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc,
99                             Value value) {
100   if (hasRetVal) {
101     builder.create<scf::YieldOp>(loc, value);
102   } else {
103     builder.create<scf::YieldOp>(loc);
104   }
105 }
106 
107 /// Generates a boolean Value that is true if the iv-th bit in xferOp's mask
108 /// is set to true. No such check is generated under following circumstances:
109 /// * xferOp does not have a mask.
110 /// * xferOp's mask is not 1D. (In case of (N>1)-D, a subvector of the mask is
111 ///   computed and attached to the new transfer op in the pattern.)
112 /// * The to-be-unpacked dim of xferOp is a broadcast.
113 template <typename OpTy>
114 static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
115   if (!xferOp.mask())
116     return Value();
117   if (xferOp.getMaskType().getRank() != 1)
118     return Value();
119   if (xferOp.isBroadcastDim(0))
120     return Value();
121 
122   auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
123   return vector_extract_element(xferOp.mask(), ivI32).value;
124 }
125 
126 /// Helper function TransferOpConversion and TransferOp1dConversion.
127 /// Generate an in-bounds check if the transfer op may go out-of-bounds on the
128 /// specified dimension `dim` with the loop iteration variable `iv`.
129 /// E.g., when unpacking dimension 0 from:
130 /// ```
131 /// %vec = vector.transfer_read %A[%a, %b] %cst
132 ///     : vector<5x4xf32>, memref<?x?xf32>
133 /// ```
134 /// An if check similar to this will be generated inside the loop:
135 /// ```
136 /// %d = memref.dim %A, %c0 : memref<?x?xf32>
137 /// if (%a + iv < %d) {
138 ///   (in-bounds case)
139 /// } else {
140 ///   (out-of-bounds case)
141 /// }
142 /// ```
143 ///
144 /// If the transfer is 1D and has a mask, this function generates a more complex
145 /// check also accounts for potentially masked out elements.
146 ///
147 /// This function variant returns the value returned by `inBoundsCase` or
148 /// `outOfBoundsCase`. The MLIR type of the return value must be specified in
149 /// `resultTypes`.
150 template <typename OpTy>
151 static Value generateInBoundsCheck(
152     OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
153     TypeRange resultTypes,
154     function_ref<Value(OpBuilder &, Location)> inBoundsCase,
155     function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
156   bool hasRetVal = !resultTypes.empty();
157   Value cond; // Condition to be built...
158 
159   // Condition check 1: Access in-bounds?
160   bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts.
161   if (!xferOp.isDimInBounds(0) && !isBroadcast) {
162     auto memrefDim =
163         memref_dim(xferOp.source(), std_constant_index(dim.getValue()));
164     using edsc::op::operator+;
165     auto memrefIdx = xferOp.indices()[dim.getValue()] + iv;
166     cond = std_cmpi_sgt(memrefDim.value, memrefIdx);
167   }
168 
169   // Condition check 2: Masked in?
170   if (auto maskCond = generateMaskCheck(builder, xferOp, iv)) {
171     if (cond) {
172       cond = builder.create<AndOp>(xferOp.getLoc(), cond, maskCond);
173     } else {
174       cond = maskCond;
175     }
176   }
177 
178   // If the condition is non-empty, generate an SCF::IfOp.
179   if (cond) {
180     auto check = builder.create<scf::IfOp>(
181         xferOp.getLoc(), resultTypes, cond,
182         /*thenBuilder=*/
183         [&](OpBuilder &builder, Location loc) {
184           maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc));
185         },
186         /*elseBuilder=*/
187         [&](OpBuilder &builder, Location loc) {
188           if (outOfBoundsCase) {
189             maybeYieldValue(hasRetVal, builder, loc,
190                             outOfBoundsCase(builder, loc));
191           } else {
192             builder.create<scf::YieldOp>(loc);
193           }
194         });
195 
196     return hasRetVal ? check.getResult(0) : Value();
197   }
198 
199   // Condition is empty, no need for an SCF::IfOp.
200   return inBoundsCase(builder, xferOp.getLoc());
201 }
202 
203 /// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
204 /// a return value. Consequently, this function does not have a return value.
205 template <typename OpTy>
206 static void generateInBoundsCheck(
207     OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
208     function_ref<void(OpBuilder &, Location)> inBoundsCase,
209     function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
210   generateInBoundsCheck(
211       xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(),
212       /*inBoundsCase=*/
213       [&](OpBuilder &builder, Location loc) {
214         inBoundsCase(builder, loc);
215         return Value();
216       },
217       /*outOfBoundsCase=*/
218       [&](OpBuilder &builder, Location loc) {
219         if (outOfBoundsCase)
220           outOfBoundsCase(builder, loc);
221         return Value();
222       });
223 }
224 
225 /// Given an ArrayAttr, return a copy where the first element is dropped.
226 static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
227   if (!attr)
228     return attr;
229   return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
230 }
231 
232 /// Add the pass label to a vector transfer op if its rank is not the target
233 /// rank.
234 template <typename OpTy>
235 static void maybeApplyPassLabel(OpBuilder &builder, OpTy newXferOp,
236                                 unsigned targetRank) {
237   if (newXferOp.getVectorType().getRank() > targetRank)
238     newXferOp->setAttr(kPassLabel, builder.getUnitAttr());
239 }
240 
241 namespace lowering_n_d {
242 
243 /// Helper data structure for data and mask buffers.
244 struct BufferAllocs {
245   Value dataBuffer;
246   Value maskBuffer;
247 };
248 
249 /// Allocate temporary buffers for data (vector) and mask (if present).
250 /// TODO: Parallelism and threadlocal considerations.
251 template <typename OpTy>
252 static BufferAllocs allocBuffers(OpTy xferOp) {
253   auto &b = ScopedContext::getBuilderRef();
254   OpBuilder::InsertionGuard guard(b);
255   Operation *scope =
256       xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
257   assert(scope && "Expected op to be inside automatic allocation scope");
258   b.setInsertionPointToStart(&scope->getRegion(0).front());
259 
260   BufferAllocs result;
261   auto bufferType = MemRefType::get({}, xferOp.getVectorType());
262   result.dataBuffer = memref_alloca(bufferType).value;
263 
264   if (xferOp.mask()) {
265     auto maskType = MemRefType::get({}, xferOp.mask().getType());
266     auto maskBuffer = memref_alloca(maskType).value;
267     memref_store(xferOp.mask(), maskBuffer);
268     result.maskBuffer = memref_load(maskBuffer);
269   }
270 
271   return result;
272 }
273 
274 /// Given a MemRefType with VectorType element type, unpack one dimension from
275 /// the VectorType into the MemRefType.
276 ///
277 /// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>>
278 static MemRefType unpackOneDim(MemRefType type) {
279   auto vectorType = type.getElementType().dyn_cast<VectorType>();
280   auto memrefShape = type.getShape();
281   SmallVector<int64_t, 8> newMemrefShape;
282   newMemrefShape.append(memrefShape.begin(), memrefShape.end());
283   newMemrefShape.push_back(vectorType.getDimSize(0));
284   return MemRefType::get(newMemrefShape,
285                          VectorType::get(vectorType.getShape().drop_front(),
286                                          vectorType.getElementType()));
287 }
288 
289 /// Given a transfer op, find the memref from which the mask is loaded. This
290 /// is similar to Strategy<TransferWriteOp>::getBuffer.
291 template <typename OpTy>
292 static Value getMaskBuffer(OpTy xferOp) {
293   assert(xferOp.mask() && "Expected that transfer op has mask");
294   auto loadOp = xferOp.mask().template getDefiningOp<memref::LoadOp>();
295   assert(loadOp && "Expected transfer op mask produced by LoadOp");
296   return loadOp.getMemRef();
297 }
298 
299 /// Codegen strategy, depending on the operation.
300 template <typename OpTy>
301 struct Strategy;
302 
303 /// Code strategy for vector TransferReadOp.
304 template <>
305 struct Strategy<TransferReadOp> {
306   /// Find the StoreOp that is used for writing the current TransferReadOp's
307   /// result to the temporary buffer allocation.
308   static memref::StoreOp getStoreOp(TransferReadOp xferOp) {
309     assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp");
310     auto storeOp = dyn_cast<memref::StoreOp>((*xferOp->use_begin()).getOwner());
311     assert(storeOp && "Expected TransferReadOp result used by StoreOp");
312     return storeOp;
313   }
314 
315   /// Find the temporary buffer allocation. All labeled TransferReadOps are
316   /// used like this, where %buf is either the buffer allocation or a type cast
317   /// of the buffer allocation:
318   /// ```
319   /// %vec = vector.transfer_read ... { __vector_to_scf_lowering__ } ...
320   /// memref.store %vec, %buf[...] ...
321   /// ```
322   static Value getBuffer(TransferReadOp xferOp) {
323     return getStoreOp(xferOp).getMemRef();
324   }
325 
326   /// Retrieve the indices of the current StoreOp that stores into the buffer.
327   static void getBufferIndices(TransferReadOp xferOp,
328                                SmallVector<Value, 8> &indices) {
329     auto storeOp = getStoreOp(xferOp);
330     auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
331     indices.append(prevIndices.begin(), prevIndices.end());
332   }
333 
334   /// Rewrite the TransferReadOp, assuming that there are no out-of-bounds
335   /// accesses on the to-be-unpacked dimension.
336   ///
337   /// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration
338   ///    variable `iv`.
339   /// 2. Store the result into the (already `vector.type_cast`ed) buffer.
340   ///
341   /// E.g.:
342   /// ```
343   /// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst
344   ///     : memref<?x?x?xf32>, vector<4x3xf32>
345   /// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>>
346   /// ```
347   /// Is rewritten to:
348   /// ```
349   /// %casted = vector.type_cast %buf
350   ///     : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
351   /// for %j = 0 to 4 {
352   ///   %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst
353   ///       : memref<?x?x?xf32>, vector<3xf32>
354   ///   memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>>
355   /// }
356   /// ```
357   ///
358   /// Note: The loop and type cast are generated in TransferOpConversion.
359   ///       The original TransferReadOp and store op are deleted in `cleanup`.
360   /// Note: The `mask` operand is set in TransferOpConversion.
361   static TransferReadOp rewriteOp(OpBuilder &builder,
362                                   VectorTransferToSCFOptions options,
363                                   TransferReadOp xferOp, Value buffer,
364                                   Value iv) {
365     SmallVector<Value, 8> storeIndices;
366     getBufferIndices(xferOp, storeIndices);
367     storeIndices.push_back(iv);
368 
369     SmallVector<Value, 8> xferIndices;
370     getXferIndices(xferOp, iv, xferIndices);
371 
372     auto bufferType = buffer.getType().dyn_cast<ShapedType>();
373     auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
374     auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
375     auto newXfer =
376         vector_transfer_read(
377             vecType, xferOp.source(), xferIndices,
378             AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)),
379             xferOp.padding(), Value(), inBoundsAttr)
380             .value;
381 
382     maybeApplyPassLabel(builder,
383                         dyn_cast<TransferReadOp>(newXfer.getDefiningOp()),
384                         options.targetRank);
385 
386     memref_store(newXfer, buffer, storeIndices);
387     return newXfer.getDefiningOp<TransferReadOp>();
388   }
389 
390   /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
391   /// padding value to the temporary buffer.
392   static void handleOutOfBoundsDim(OpBuilder & /*builder*/,
393                                    TransferReadOp xferOp, Value buffer,
394                                    Value iv) {
395     SmallVector<Value, 8> storeIndices;
396     getBufferIndices(xferOp, storeIndices);
397     storeIndices.push_back(iv);
398 
399     auto bufferType = buffer.getType().dyn_cast<ShapedType>();
400     auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
401     auto vec = std_splat(vecType, xferOp.padding());
402     memref_store(vec, buffer, storeIndices);
403   }
404 
405   /// Cleanup after rewriting the op.
406   static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp) {
407     rewriter.eraseOp(getStoreOp(xferOp));
408     rewriter.eraseOp(xferOp);
409   }
410 };
411 
412 /// Codegen strategy for vector TransferWriteOp.
413 template <>
414 struct Strategy<TransferWriteOp> {
415   /// Find the temporary buffer allocation. All labeled TransferWriteOps are
416   /// used like this, where %buf is either the buffer allocation or a type cast
417   /// of the buffer allocation:
418   /// ```
419   /// %vec = memref.load %buf[...] ...
420   /// vector.transfer_write %vec ... { __vector_to_scf_lowering__ } ...
421   /// ```
422   static Value getBuffer(TransferWriteOp xferOp) {
423     auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
424     assert(loadOp && "Expected transfer op vector produced by LoadOp");
425     return loadOp.getMemRef();
426   }
427 
428   /// Retrieve the indices of the current LoadOp that loads from the buffer.
429   static void getBufferIndices(TransferWriteOp xferOp,
430                                SmallVector<Value, 8> &indices) {
431     auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
432     auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
433     indices.append(prevIndices.begin(), prevIndices.end());
434   }
435 
436   /// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds
437   /// accesses on the to-be-unpacked dimension.
438   ///
439   /// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer,
440   ///    using the loop iteration variable `iv`.
441   /// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back
442   ///    to memory.
443   ///
444   /// Note: For more details, see comments on Strategy<TransferReadOp>.
445   static TransferWriteOp rewriteOp(OpBuilder &builder,
446                                    VectorTransferToSCFOptions options,
447                                    TransferWriteOp xferOp, Value buffer,
448                                    Value iv) {
449     SmallVector<Value, 8> loadIndices;
450     getBufferIndices(xferOp, loadIndices);
451     loadIndices.push_back(iv);
452 
453     SmallVector<Value, 8> xferIndices;
454     getXferIndices(xferOp, iv, xferIndices);
455 
456     auto vec = memref_load(buffer, loadIndices);
457     auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
458     auto newXfer = vector_transfer_write(
459         Type(), vec, xferOp.source(), xferIndices,
460         AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), Value(),
461         inBoundsAttr);
462 
463     maybeApplyPassLabel(builder, newXfer.op, options.targetRank);
464 
465     return newXfer;
466   }
467 
468   /// Handle out-of-bounds accesses on the to-be-unpacked dimension.
469   static void handleOutOfBoundsDim(OpBuilder &builder, TransferWriteOp xferOp,
470                                    Value buffer, Value iv) {}
471 
472   /// Cleanup after rewriting the op.
473   static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) {
474     rewriter.eraseOp(xferOp);
475   }
476 };
477 
478 template <typename OpTy>
479 LogicalResult checkPrepareXferOp(OpTy xferOp, unsigned targetRank) {
480   if (xferOp->hasAttr(kPassLabel))
481     return failure();
482   if (xferOp.getVectorType().getRank() <= targetRank)
483     return failure();
484   return success();
485 }
486 
487 /// Prepare a TransferReadOp for progressive lowering.
488 ///
489 /// 1. Allocate a temporary buffer.
490 /// 2. Label the TransferReadOp, marking it eligible for progressive lowering.
491 /// 3. Store the result of the TransferReadOp into the temporary buffer.
492 /// 4. Load the result from the temporary buffer and replace all uses of the
493 ///    original TransferReadOp with this load.
494 ///
495 /// E.g.:
496 /// ```
497 /// %vec = vector.transfer_read %A[%a, %b, %c], %cst
498 ///     : vector<5x4xf32>, memref<?x?x?xf32>
499 /// ```
500 /// is rewritten to:
501 /// ```
502 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
503 /// %1 = vector.transfer_read %A[%a, %b, %c], %cst
504 ///     { __vector_to_scf_lowering__ } : vector<5x4xf32>, memref<?x?x?xf32>
505 /// memref.store %1, %0[] : memref<vector<5x4xf32>>
506 /// %vec = memref.load %0[] : memref<vector<5x4xf32>>
507 /// ```
508 ///
509 /// Note: A second temporary buffer may be allocated for the `mask` operand.
510 struct PrepareTransferReadConversion
511     : public VectorToSCFPattern<TransferReadOp> {
512   using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
513 
514   LogicalResult matchAndRewrite(TransferReadOp xferOp,
515                                 PatternRewriter &rewriter) const override {
516     if (checkPrepareXferOp(xferOp, options.targetRank).failed())
517       return failure();
518 
519     ScopedContext scope(rewriter, xferOp.getLoc());
520     auto buffers = allocBuffers(xferOp);
521     auto *newXfer = rewriter.clone(*xferOp.getOperation());
522     newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
523     if (xferOp.mask()) {
524       dyn_cast<TransferReadOp>(newXfer).maskMutable().assign(
525           buffers.maskBuffer);
526     }
527 
528     memref_store(newXfer->getResult(0), buffers.dataBuffer);
529     rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
530 
531     return success();
532   }
533 };
534 
535 /// Prepare a TransferWriteOp for progressive lowering.
536 ///
537 /// 1. Allocate a temporary buffer.
538 /// 2. Store the vector into the buffer.
539 /// 3. Load the vector from the buffer again.
540 /// 4. Use the loaded vector as a TransferWriteOp operand and label the op,
541 ///    marking it eligible for progressive lowering via TransferOpConversion.
542 ///
543 /// E.g.:
544 /// ```
545 /// vector.transfer_write %vec, %A[%a, %b, %c]
546 ///     : vector<5x4xf32>, memref<?x?x?xf32>
547 /// ```
548 /// is rewritten to:
549 /// ```
550 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
551 /// memref.store %vec, %0[] : memref<vector<5x4xf32>>
552 /// %1 = memref.load %0[] : memref<vector<5x4xf32>>
553 /// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
554 ///     : vector<5x4xf32>, memref<?x?x?xf32>
555 /// ```
556 ///
557 /// Note: A second temporary buffer may be allocated for the `mask` operand.
558 struct PrepareTransferWriteConversion
559     : public VectorToSCFPattern<TransferWriteOp> {
560   using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
561 
562   LogicalResult matchAndRewrite(TransferWriteOp xferOp,
563                                 PatternRewriter &rewriter) const override {
564     if (checkPrepareXferOp(xferOp, options.targetRank).failed())
565       return failure();
566 
567     ScopedContext scope(rewriter, xferOp.getLoc());
568     auto buffers = allocBuffers(xferOp);
569     memref_store(xferOp.vector(), buffers.dataBuffer);
570     auto loadedVec = memref_load(buffers.dataBuffer);
571     rewriter.updateRootInPlace(xferOp, [&]() {
572       xferOp.vectorMutable().assign(loadedVec);
573       xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
574     });
575 
576     if (xferOp.mask()) {
577       rewriter.updateRootInPlace(
578           xferOp, [&]() { xferOp.maskMutable().assign(buffers.maskBuffer); });
579     }
580 
581     return success();
582   }
583 };
584 
585 /// Progressive lowering of vector transfer ops: Unpack one dimension.
586 ///
587 /// 1. Unpack one dimension from the current buffer type and cast the buffer
588 ///    to that new type. E.g.:
589 ///    ```
590 ///    %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>>
591 ///    vector.transfer_write %vec ...
592 ///    ```
593 ///    The following cast is generated:
594 ///    ```
595 ///    %casted = vector.type_cast %0
596 ///        : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
597 ///    ```
598 /// 2. Generate a for loop and rewrite the transfer op according to the
599 ///    corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be
600 ///    out-of-bounds, generate an if-check and handle both cases separately.
601 /// 3. Clean up according to the corresponding Strategy<OpTy>.
602 template <typename OpTy>
603 struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
604   using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
605 
606   LogicalResult matchAndRewrite(OpTy xferOp,
607                                 PatternRewriter &rewriter) const override {
608     if (!xferOp->hasAttr(kPassLabel))
609       return failure();
610 
611     ScopedContext scope(rewriter, xferOp.getLoc());
612 
613     // Find and cast data buffer. How the buffer can be found depends on OpTy.
614     auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
615     auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
616     auto castedDataType = unpackOneDim(dataBufferType);
617     auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
618 
619     // If the xferOp has a mask: Find and cast mask buffer.
620     Value castedMaskBuffer;
621     if (xferOp.mask()) {
622       auto maskBuffer = getMaskBuffer(xferOp);
623       auto maskBufferType =
624           maskBuffer.getType().template dyn_cast<MemRefType>();
625       if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
626         // Do not unpack a dimension of the mask, if:
627         // * To-be-unpacked transfer op dimension is a broadcast.
628         // * Mask is 1D, i.e., the mask cannot be further unpacked.
629         //   (That means that all remaining dimensions of the transfer op must
630         //   be broadcasted.)
631         castedMaskBuffer = maskBuffer;
632       } else {
633         auto castedMaskType = unpackOneDim(maskBufferType);
634         castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
635       }
636     }
637 
638     // Loop bounds and step.
639     auto lb = std_constant_index(0).value;
640     auto ub = std_constant_index(
641                   castedDataType.getDimSize(castedDataType.getRank() - 1))
642                   .value;
643     auto step = std_constant_index(1).value;
644 
645     // Generate for loop.
646     rewriter.create<scf::ForOp>(
647         xferOp.getLoc(), lb, ub, step, ValueRange(),
648         [&](OpBuilder &b, Location loc, Value iv, ValueRange /*loopState*/) {
649           ScopedContext scope(b, loc);
650           generateInBoundsCheck(
651               xferOp, iv, b, unpackedDim(xferOp),
652               /*inBoundsCase=*/
653               [&](OpBuilder &b, Location /*loc*/) {
654                 // Create new transfer op.
655                 OpTy newXfer = Strategy<OpTy>::rewriteOp(
656                     b, this->options, xferOp, castedDataBuffer, iv);
657 
658                 // If old transfer op has a mask: Set mask on new transfer op.
659                 // Special case: If the mask of the old transfer op is 1D and
660                 // the
661                 //               unpacked dim is not a broadcast, no mask is
662                 //               needed on the new transfer op.
663                 if (xferOp.mask() && (xferOp.isBroadcastDim(0) ||
664                                       xferOp.getMaskType().getRank() > 1)) {
665                   OpBuilder::InsertionGuard guard(b);
666                   b.setInsertionPoint(newXfer); // Insert load before newXfer.
667 
668                   SmallVector<Value, 8> loadIndices;
669                   Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
670                   // In case of broadcast: Use same indices to load from memref
671                   // as before.
672                   if (!xferOp.isBroadcastDim(0))
673                     loadIndices.push_back(iv);
674 
675                   auto mask = memref_load(castedMaskBuffer, loadIndices);
676                   rewriter.updateRootInPlace(
677                       newXfer, [&]() { newXfer.maskMutable().assign(mask); });
678                 }
679               },
680               /*outOfBoundsCase=*/
681               [&](OpBuilder &b, Location /*loc*/) {
682                 Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp,
683                                                      castedDataBuffer, iv);
684               });
685           b.create<scf::YieldOp>(loc);
686         });
687 
688     Strategy<OpTy>::cleanup(rewriter, xferOp);
689     return success();
690   }
691 };
692 
693 } // namespace lowering_n_d
694 
695 namespace lowering_n_d_unrolled {
696 
697 /// If the original transfer op has a mask, compute the mask of the new transfer
698 /// op (for the current iteration `i`) and assign it.
699 template <typename OpTy>
700 static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
701                             int64_t i) {
702   if (!xferOp.mask())
703     return;
704 
705   if (xferOp.isBroadcastDim(0)) {
706     // To-be-unpacked dimension is a broadcast, which does not have a
707     // corresponding mask dimension. Mask attribute remains unchanged.
708     newXferOp.maskMutable().assign(xferOp.mask());
709     return;
710   }
711 
712   if (xferOp.getMaskType().getRank() > 1) {
713     // Unpack one dimension of the mask.
714     OpBuilder::InsertionGuard guard(builder);
715     builder.setInsertionPoint(newXferOp); // Insert load before newXfer.
716 
717     llvm::SmallVector<int64_t, 1> indices({i});
718     auto newMask = vector_extract(xferOp.mask(), indices).value;
719     newXferOp.maskMutable().assign(newMask);
720   }
721 
722   // If we end up here: The mask of the old transfer op is 1D and the unpacked
723   // dim is not a broadcast, so no mask is needed on the new transfer op.
724   // `generateInBoundsCheck` will have evaluated the mask already.
725 }
726 
727 /// Progressive lowering of vector TransferReadOp with unrolling: Unpack one
728 /// dimension. This is similar to TransferOpConversion<TransferReadOp>, but no
729 /// memref buffer is allocated and the SCF loop is fully unrolled.
730 ///
731 /// ```
732 /// E.g.:
733 /// ```
734 /// %vec = vector.transfer_read %A[%a, %b, %c], %padding
735 ///     : memref<?x?x?xf32>, vector<5x4xf32>
736 /// ```
737 /// is rewritten to IR such as (simplified):
738 /// ```
739 /// %v_init = splat %padding : vector<5x4xf32>
740 /// %tmp0 = vector.transfer_read %A[%a, %b, %c], %padding
741 ///     : memref<?x?x?xf32>, vector<4xf32>
742 /// %v0 = vector.insert %tmp0, %v_init[0] : vector<4xf32> into vector<5x4xf32>
743 /// %tmp1 = vector.transfer_read %A[%a, %b + 1, %c], %padding
744 ///     : memref<?x?x?xf32>, vector<4xf32>
745 /// %v1 = vector.insert %tmp1, %v0[1] : vector<4xf32> into vector<5x4xf32>
746 /// ...
747 /// %tmp4 = vector.transfer_read %A[%a, %b + 4, %c], %padding
748 ///     : memref<?x?x?xf32>, vector<4xf32>
749 /// %vec = vector.insert %tmp1, %v3[4] : vector<4xf32> into vector<5x4xf32>
750 /// ```
751 ///
752 /// Note: As an optimization, if the result of the original TransferReadOp
753 /// was directly inserted into another vector, no new %v_init vector is created.
754 /// Instead, the new TransferReadOp results are inserted into that vector.
755 struct UnrollTransferReadConversion
756     : public VectorToSCFPattern<TransferReadOp> {
757   using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
758 
759   /// Return the vector into which the newly created TransferReadOp results
760   /// are inserted.
761   Value getResultVector(TransferReadOp xferOp,
762                         PatternRewriter &rewriter) const {
763     if (auto insertOp = getInsertOp(xferOp))
764       return insertOp.dest();
765     return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
766   }
767 
768   /// If the result of the TransferReadOp has exactly one user, which is a
769   /// vector::InsertOp, return that operation.
770   vector::InsertOp getInsertOp(TransferReadOp xferOp) const {
771     if (xferOp->hasOneUse()) {
772       Operation *xferOpUser = *xferOp->getUsers().begin();
773       if (auto insertOp = dyn_cast<vector::InsertOp>(xferOpUser))
774         return insertOp;
775     }
776 
777     return vector::InsertOp();
778   }
779 
780   /// If the result of the TransferReadOp has exactly one user, which is a
781   /// vector::InsertOp, return that operation's indices.
782   void getInsertionIndices(TransferReadOp xferOp,
783                            SmallVector<int64_t, 8> &indices) const {
784     if (auto insertOp = getInsertOp(xferOp)) {
785       llvm::for_each(insertOp.position(), [&](Attribute attr) {
786         indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
787       });
788     }
789   }
790 
791   /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
792   /// accesses, and broadcasts and transposes in permutation maps.
793   LogicalResult matchAndRewrite(TransferReadOp xferOp,
794                                 PatternRewriter &rewriter) const override {
795     if (xferOp.getVectorType().getRank() <= options.targetRank)
796       return failure();
797 
798     ScopedContext scope(rewriter, xferOp.getLoc());
799     auto insertOp = getInsertOp(xferOp);
800     auto vec = getResultVector(xferOp, rewriter);
801     auto vecType = vec.getType().dyn_cast<VectorType>();
802     auto xferVecType = xferOp.getVectorType();
803     auto newXferVecType = VectorType::get(xferVecType.getShape().drop_front(),
804                                           xferVecType.getElementType());
805     int64_t dimSize = xferVecType.getShape()[0];
806 
807     // Generate fully unrolled loop of transfer ops.
808     for (int64_t i = 0; i < dimSize; ++i) {
809       Value iv = std_constant_index(i);
810 
811       vec = generateInBoundsCheck(
812           xferOp, iv, rewriter, unpackedDim(xferOp), TypeRange(vecType),
813           /*inBoundsCase=*/
814           [&](OpBuilder &b, Location loc) {
815             ScopedContext scope(b, loc);
816 
817             // Indices for the new transfer op.
818             SmallVector<Value, 8> xferIndices;
819             getXferIndices(xferOp, iv, xferIndices);
820 
821             // Indices for the new vector.insert op.
822             SmallVector<int64_t, 8> insertionIndices;
823             getInsertionIndices(xferOp, insertionIndices);
824             insertionIndices.push_back(i);
825 
826             auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
827             auto newXferOpVal =
828                 vector_transfer_read(
829                     newXferVecType, xferOp.source(), xferIndices,
830                     AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
831                     xferOp.padding(), Value(), inBoundsAttr)
832                     .value;
833             auto newXferOp =
834                 dyn_cast<TransferReadOp>(newXferOpVal.getDefiningOp());
835 
836             maybeAssignMask(b, xferOp, newXferOp, i);
837 
838             return vector_insert(newXferOp, vec, insertionIndices).value;
839           },
840           /*outOfBoundsCase=*/
841           [&](OpBuilder &b, Location loc) {
842             // Loop through original (unmodified) vector.
843             return vec;
844           });
845     }
846 
847     if (insertOp) {
848       // Rewrite single user of the old TransferReadOp, which was an InsertOp.
849       rewriter.replaceOp(insertOp, vec);
850       rewriter.eraseOp(xferOp);
851     } else {
852       rewriter.replaceOp(xferOp, vec);
853     }
854 
855     return success();
856   }
857 };
858 
859 /// Progressive lowering of vector TransferWriteOp with unrolling: Unpack one
860 /// dimension. This is similar to TransferOpConversion<TransferWriteOp>, but no
861 /// memref buffer is allocated and the SCF loop is fully unrolled.
862 ///
863 /// ```
864 /// E.g.:
865 /// ```
866 /// vector.transfer_write %vec, %A[%a, %b, %c]
867 ///     : vector<5x4xf32>, memref<?x?x?xf32>
868 /// ```
869 /// is rewritten to IR such as (simplified):
870 /// ```
871 /// %v0 = vector.extract %vec[0] : vector<5x4xf32>
872 /// vector.transfer_write %v0, %A[%a, %b, %c] : vector<4xf32>, memref<...>
873 /// %v1 = vector.extract %vec[1] : vector<5x4xf32>
874 /// vector.transfer_write %v1, %A[%a, %b + 1, %c] : vector<4xf32>, memref<...>
875 /// ...
876 /// %v4 = vector.extract %vec[4] : vector<5x4xf32>
877 /// vector.transfer_write %v4, %A[%a, %b + 4, %c] : vector<4xf32>, memref<...>
878 /// ```
879 ///
880 /// Note: As an optimization, if the vector of the original TransferWriteOp
881 /// was directly extracted from another vector via an ExtractOp `a`, extract
882 /// the vectors for the newly generated TransferWriteOps from `a`'s input. By
883 /// doing so, `a` may become dead, and the number of ExtractOps generated during
884 /// recursive application of this pattern will be minimal.
885 struct UnrollTransferWriteConversion
886     : public VectorToSCFPattern<TransferWriteOp> {
887   using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
888 
889   /// Return the vector from which newly generated ExtracOps will extract.
890   Value getDataVector(TransferWriteOp xferOp) const {
891     if (auto extractOp = getExtractOp(xferOp))
892       return extractOp.vector();
893     return xferOp.vector();
894   }
895 
896   /// If the input of the given TransferWriteOp is an ExtractOp, return it.
897   vector::ExtractOp getExtractOp(TransferWriteOp xferOp) const {
898     if (auto *op = xferOp.vector().getDefiningOp())
899       return dyn_cast<vector::ExtractOp>(op);
900     return vector::ExtractOp();
901   }
902 
903   /// If the input of the given TransferWriteOp is an ExtractOp, return its
904   /// indices.
905   void getExtractionIndices(TransferWriteOp xferOp,
906                             SmallVector<int64_t, 8> &indices) const {
907     if (auto extractOp = getExtractOp(xferOp)) {
908       llvm::for_each(extractOp.position(), [&](Attribute attr) {
909         indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
910       });
911     }
912   }
913 
914   /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
915   /// accesses, and broadcasts and transposes in permutation maps.
916   LogicalResult matchAndRewrite(TransferWriteOp xferOp,
917                                 PatternRewriter &rewriter) const override {
918     if (xferOp.getVectorType().getRank() <= options.targetRank)
919       return failure();
920 
921     ScopedContext scope(rewriter, xferOp.getLoc());
922     auto vec = getDataVector(xferOp);
923     auto xferVecType = xferOp.getVectorType();
924     int64_t dimSize = xferVecType.getShape()[0];
925 
926     // Generate fully unrolled loop of transfer ops.
927     for (int64_t i = 0; i < dimSize; ++i) {
928       Value iv = std_constant_index(i);
929 
930       generateInBoundsCheck(
931           xferOp, iv, rewriter, unpackedDim(xferOp),
932           /*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
933             ScopedContext scope(b, loc);
934 
935             // Indices for the new transfer op.
936             SmallVector<Value, 8> xferIndices;
937             getXferIndices(xferOp, iv, xferIndices);
938 
939             // Indices for the new vector.extract op.
940             SmallVector<int64_t, 8> extractionIndices;
941             getExtractionIndices(xferOp, extractionIndices);
942             extractionIndices.push_back(i);
943 
944             auto extracted = vector_extract(vec, extractionIndices).value;
945             auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
946 
947             auto newXferOp =
948                 vector_transfer_write(
949                     Type(), extracted, xferOp.source(), xferIndices,
950                     AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
951                     Value(), inBoundsAttr)
952                     .op;
953 
954             maybeAssignMask(b, xferOp, newXferOp, i);
955           });
956     }
957 
958     rewriter.eraseOp(xferOp);
959     return success();
960   }
961 };
962 
963 } // namespace lowering_n_d_unrolled
964 
965 namespace lowering_1_d {
966 
967 /// Compute the indices into the memref for the LoadOp/StoreOp generated as
968 /// part of TransferOp1dConversion. Return the memref dimension on which
969 /// the transfer is operating. A return value of None indicates a broadcast.
970 template <typename OpTy>
971 static Optional<int64_t>
972 get1dMemrefIndices(OpTy xferOp, Value iv,
973                    SmallVector<Value, 8> &memrefIndices) {
974   auto indices = xferOp.indices();
975   auto map = xferOp.permutation_map();
976 
977   memrefIndices.append(indices.begin(), indices.end());
978   assert(map.getNumResults() == 1 &&
979          "Expected 1 permutation map result for 1D transfer");
980   if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
981     auto dim = expr.getPosition();
982     using edsc::op::operator+;
983     memrefIndices[dim] = memrefIndices[dim] + iv;
984     return dim;
985   }
986 
987   assert(xferOp.isBroadcastDim(0) &&
988          "Expected AffineDimExpr or AffineConstantExpr");
989   return None;
990 }
991 
992 /// Codegen strategy for TransferOp1dConversion, depending on the
993 /// operation.
994 template <typename OpTy>
995 struct Strategy1d;
996 
997 /// Codegen strategy for TransferReadOp.
998 template <>
999 struct Strategy1d<TransferReadOp> {
1000   static void generateForLoopBody(OpBuilder &builder, Location loc,
1001                                   TransferReadOp xferOp, Value iv,
1002                                   ValueRange loopState) {
1003     SmallVector<Value, 8> indices;
1004     auto dim = get1dMemrefIndices(xferOp, iv, indices);
1005     auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
1006     auto vec = loopState[0];
1007 
1008     // In case of out-of-bounds access, leave `vec` as is (was initialized with
1009     // padding value).
1010     auto nextVec = generateInBoundsCheck(
1011         xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()),
1012         /*inBoundsCase=*/
1013         [&](OpBuilder & /*b*/, Location loc) {
1014           auto val = memref_load(xferOp.source(), indices);
1015           return vector_insert_element(val, vec, ivI32.value).value;
1016         },
1017         /*outOfBoundsCase=*/
1018         [&](OpBuilder & /*b*/, Location loc) { return vec; });
1019     builder.create<scf::YieldOp>(loc, nextVec);
1020   }
1021 
1022   static Value initialLoopState(TransferReadOp xferOp) {
1023     // Inititalize vector with padding value.
1024     return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
1025   }
1026 };
1027 
1028 /// Codegen strategy for TransferWriteOp.
1029 template <>
1030 struct Strategy1d<TransferWriteOp> {
1031   static void generateForLoopBody(OpBuilder &builder, Location loc,
1032                                   TransferWriteOp xferOp, Value iv,
1033                                   ValueRange /*loopState*/) {
1034     SmallVector<Value, 8> indices;
1035     auto dim = get1dMemrefIndices(xferOp, iv, indices);
1036     auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
1037 
1038     // Nothing to do in case of out-of-bounds access.
1039     generateInBoundsCheck(
1040         xferOp, iv, builder, dim,
1041         /*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) {
1042           auto val = vector_extract_element(xferOp.vector(), ivI32.value);
1043           memref_store(val, xferOp.source(), indices);
1044         });
1045     builder.create<scf::YieldOp>(loc);
1046   }
1047 
1048   static Value initialLoopState(TransferWriteOp xferOp) { return Value(); }
1049 };
1050 
1051 /// Return true if the last dimension of the MemRefType has unit stride.
1052 static bool isLastMemrefDimUnitStride(MemRefType type) {
1053   int64_t offset;
1054   SmallVector<int64_t, 4> strides;
1055   auto successStrides = getStridesAndOffset(type, strides, offset);
1056   return succeeded(successStrides) && strides.back() == 1;
1057 }
1058 
1059 /// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is
1060 /// necessary in cases where a 1D vector transfer op cannot be lowered into
1061 /// vector load/stores due to non-unit strides or broadcasts:
1062 ///
1063 /// * Transfer dimension is not the last memref dimension
1064 /// * Transfer dimension is a broadcast (i.e., scalar load + broadcast)
1065 /// * Memref has a layout map with non-unit stride on the last dimension
1066 ///
1067 /// This pattern generates IR as follows:
1068 ///
1069 /// 1. Generate a for loop iterating over each vector element.
1070 /// 2. Inside the loop, generate a InsertElementOp or ExtractElementOp,
1071 ///    depending on OpTy.
1072 ///
1073 /// TODO: In some cases (no masking, etc.), LLVM::MatrixColumnMajorLoadOp
1074 ///       can be generated instead of TransferOp1dConversion. Add such a pattern
1075 ///       to ConvertVectorToLLVM.
1076 ///
1077 /// E.g.:
1078 /// ```
1079 /// vector.transfer_write %vec, %A[%a, %b]
1080 ///    {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]}
1081 ///    : vector<9xf32>, memref<?x?xf32>
1082 /// ```
1083 /// Is rewritten to approximately the following pseudo-IR:
1084 /// ```
1085 /// for i = 0 to 9 {
1086 ///   %t = vector.extractelement %vec[i] : vector<9xf32>
1087 ///   memref.store %t, %arg0[%a + i, %b] : memref<?x?xf32>
1088 /// }
1089 /// ```
1090 template <typename OpTy>
1091 struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
1092   using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
1093 
1094   LogicalResult matchAndRewrite(OpTy xferOp,
1095                                 PatternRewriter &rewriter) const override {
1096     ScopedContext scope(rewriter, xferOp.getLoc());
1097     auto map = xferOp.permutation_map();
1098     auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
1099 
1100     if (!memRefType)
1101       return failure();
1102     if (xferOp.getVectorType().getRank() != 1)
1103       return failure();
1104     if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType))
1105       return failure(); // Handled by ConvertVectorToLLVM
1106 
1107     // Loop bounds, step, state...
1108     auto vecType = xferOp.getVectorType();
1109     auto lb = std_constant_index(0);
1110     auto ub = std_constant_index(vecType.getDimSize(0));
1111     auto step = std_constant_index(1);
1112     auto loopState = Strategy1d<OpTy>::initialLoopState(xferOp);
1113 
1114     // Generate for loop.
1115     rewriter.replaceOpWithNewOp<scf::ForOp>(
1116         xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
1117         [&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) {
1118           ScopedContext nestedScope(builder, loc);
1119           Strategy1d<OpTy>::generateForLoopBody(builder, loc, xferOp, iv,
1120                                                 loopState);
1121         });
1122 
1123     return success();
1124   }
1125 };
1126 
1127 } // namespace lowering_1_d
1128 } // namespace
1129 
1130 namespace mlir {
1131 
1132 void populateVectorToSCFConversionPatterns(
1133     RewritePatternSet &patterns, const VectorTransferToSCFOptions &options) {
1134   if (options.unroll) {
1135     patterns.add<lowering_n_d_unrolled::UnrollTransferReadConversion,
1136                  lowering_n_d_unrolled::UnrollTransferWriteConversion>(
1137         patterns.getContext(), options);
1138   } else {
1139     patterns.add<lowering_n_d::PrepareTransferReadConversion,
1140                  lowering_n_d::PrepareTransferWriteConversion,
1141                  lowering_n_d::TransferOpConversion<TransferReadOp>,
1142                  lowering_n_d::TransferOpConversion<TransferWriteOp>>(
1143         patterns.getContext(), options);
1144   }
1145 
1146   if (options.targetRank == 1) {
1147     patterns.add<lowering_1_d::TransferOp1dConversion<TransferReadOp>,
1148                  lowering_1_d::TransferOp1dConversion<TransferWriteOp>>(
1149         patterns.getContext(), options);
1150   }
1151 }
1152 
1153 } // namespace mlir
1154 
1155 namespace {
1156 
1157 struct ConvertVectorToSCFPass
1158     : public ConvertVectorToSCFBase<ConvertVectorToSCFPass> {
1159   ConvertVectorToSCFPass() = default;
1160   ConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) {
1161     this->fullUnroll = options.unroll;
1162     this->targetRank = options.targetRank;
1163   }
1164 
1165   void runOnFunction() override {
1166     VectorTransferToSCFOptions options;
1167     options.setUnroll(fullUnroll);
1168     options.setTargetRank(targetRank);
1169 
1170     RewritePatternSet patterns(getFunction().getContext());
1171     populateVectorToSCFConversionPatterns(patterns, options);
1172     (void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
1173   }
1174 };
1175 
1176 } // namespace
1177 
1178 std::unique_ptr<Pass>
1179 mlir::createConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) {
1180   return std::make_unique<ConvertVectorToSCFPass>(options);
1181 }
1182