1 //===- VectorToSCF.cpp - Convert vector to SCF dialect ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements lowering of vector transfer operations to SCF.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <type_traits>
14 
15 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
16 
17 #include "../PassDetail.h"
18 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
19 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
20 #include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
21 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
22 #include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
23 #include "mlir/Dialect/Vector/VectorOps.h"
24 #include "mlir/Dialect/Vector/VectorUtils.h"
25 #include "mlir/IR/Builders.h"
26 #include "mlir/Pass/Pass.h"
27 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
28 #include "mlir/Transforms/Passes.h"
29 
30 using namespace mlir;
31 using namespace mlir::edsc;
32 using namespace mlir::edsc::intrinsics;
33 using vector::TransferReadOp;
34 using vector::TransferWriteOp;
35 
36 namespace {
37 
38 /// Attribute name used for labeling transfer ops during progressive lowering.
39 static const char kPassLabel[] = "__vector_to_scf_lowering__";
40 
41 /// Patterns that inherit from this struct have access to
42 /// VectorTransferToSCFOptions.
43 template <typename OpTy>
44 struct VectorToSCFPattern : public OpRewritePattern<OpTy> {
45   explicit VectorToSCFPattern(MLIRContext *context,
46                               VectorTransferToSCFOptions opt)
47       : OpRewritePattern<OpTy>(context), options(opt) {}
48 
49   VectorTransferToSCFOptions options;
50 };
51 
52 /// Given a vector transfer op, calculate which dimension of the `source`
53 /// memref should be unpacked in the next application of TransferOpConversion.
54 /// A return value of None indicates a broadcast.
55 template <typename OpTy>
56 static Optional<int64_t> unpackedDim(OpTy xferOp) {
57   auto map = xferOp.permutation_map();
58   if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
59     return expr.getPosition();
60   }
61   assert(xferOp.isBroadcastDim(0) &&
62          "Expected AffineDimExpr or AffineConstantExpr");
63   return None;
64 }
65 
66 /// Compute the permutation map for the new (N-1)-D vector transfer op. This
67 /// map is identical to the current permutation map, but the first result is
68 /// omitted.
69 template <typename OpTy>
70 static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
71   auto map = xferOp.permutation_map();
72   return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(),
73                         builder.getContext());
74 }
75 
76 /// Calculate the indices for the new vector transfer op.
77 ///
78 /// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ...
79 ///       --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32>
80 ///                                 ^^^^^^
81 ///              `iv` is the iteration variable of the (new) surrounding loop.
82 template <typename OpTy>
83 static void getXferIndices(OpTy xferOp, Value iv,
84                            SmallVector<Value, 8> &indices) {
85   typename OpTy::Adaptor adaptor(xferOp);
86   // Corresponding memref dim of the vector dim that is unpacked.
87   auto dim = unpackedDim(xferOp);
88   auto prevIndices = adaptor.indices();
89   indices.append(prevIndices.begin(), prevIndices.end());
90 
91   bool isBroadcast = !dim.hasValue();
92   if (!isBroadcast) {
93     using edsc::op::operator+;
94     indices[dim.getValue()] = adaptor.indices()[dim.getValue()] + iv;
95   }
96 }
97 
98 static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc,
99                             Value value) {
100   if (hasRetVal) {
101     builder.create<scf::YieldOp>(loc, value);
102   } else {
103     builder.create<scf::YieldOp>(loc);
104   }
105 }
106 
107 /// Generates a boolean Value that is true if the iv-th bit in xferOp's mask
108 /// is set to true. No such check is generated under following circumstances:
109 /// * xferOp does not have a mask.
110 /// * xferOp's mask is not 1D. (In case of (N>1)-D, a subvector of the mask is
111 ///   computed and attached to the new transfer op in the pattern.)
112 /// * The to-be-unpacked dim of xferOp is a broadcast.
113 template <typename OpTy>
114 static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
115   if (!xferOp.mask())
116     return Value();
117   if (xferOp.getMaskType().getRank() != 1)
118     return Value();
119   if (xferOp.isBroadcastDim(0))
120     return Value();
121 
122   auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
123   return vector_extract_element(xferOp.mask(), ivI32).value;
124 }
125 
126 /// Helper function TransferOpConversion and TransferOp1dConversion.
127 /// Generate an in-bounds check if the transfer op may go out-of-bounds on the
128 /// specified dimension `dim` with the loop iteration variable `iv`.
129 /// E.g., when unpacking dimension 0 from:
130 /// ```
131 /// %vec = vector.transfer_read %A[%a, %b] %cst
132 ///     : vector<5x4xf32>, memref<?x?xf32>
133 /// ```
134 /// An if check similar to this will be generated inside the loop:
135 /// ```
136 /// %d = memref.dim %A, %c0 : memref<?x?xf32>
137 /// if (%a + iv < %d) {
138 ///   (in-bounds case)
139 /// } else {
140 ///   (out-of-bounds case)
141 /// }
142 /// ```
143 ///
144 /// If the transfer is 1D and has a mask, this function generates a more complex
145 /// check also accounts for potentially masked out elements.
146 ///
147 /// This function variant returns the value returned by `inBoundsCase` or
148 /// `outOfBoundsCase`. The MLIR type of the return value must be specified in
149 /// `resultTypes`.
150 template <typename OpTy>
151 static Value generateInBoundsCheck(
152     OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
153     TypeRange resultTypes,
154     function_ref<Value(OpBuilder &, Location)> inBoundsCase,
155     function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
156   bool hasRetVal = !resultTypes.empty();
157   Value cond; // Condition to be built...
158 
159   // Condition check 1: Access in-bounds?
160   bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts.
161   if (!xferOp.isDimInBounds(0) && !isBroadcast) {
162     auto memrefDim =
163         memref_dim(xferOp.source(), std_constant_index(dim.getValue()));
164     using edsc::op::operator+;
165     auto memrefIdx = xferOp.indices()[dim.getValue()] + iv;
166     cond = std_cmpi_sgt(memrefDim.value, memrefIdx);
167   }
168 
169   // Condition check 2: Masked in?
170   if (auto maskCond = generateMaskCheck(builder, xferOp, iv)) {
171     if (cond) {
172       cond = builder.create<AndOp>(xferOp.getLoc(), cond, maskCond);
173     } else {
174       cond = maskCond;
175     }
176   }
177 
178   // If the condition is non-empty, generate an SCF::IfOp.
179   if (cond) {
180     auto check = builder.create<scf::IfOp>(
181         xferOp.getLoc(), resultTypes, cond,
182         /*thenBuilder=*/
183         [&](OpBuilder &builder, Location loc) {
184           maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc));
185         },
186         /*elseBuilder=*/
187         [&](OpBuilder &builder, Location loc) {
188           if (outOfBoundsCase) {
189             maybeYieldValue(hasRetVal, builder, loc,
190                             outOfBoundsCase(builder, loc));
191           } else {
192             builder.create<scf::YieldOp>(loc);
193           }
194         });
195 
196     return hasRetVal ? check.getResult(0) : Value();
197   }
198 
199   // Condition is empty, no need for an SCF::IfOp.
200   return inBoundsCase(builder, xferOp.getLoc());
201 }
202 
203 /// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
204 /// a return value. Consequently, this function does not have a return value.
205 template <typename OpTy>
206 static void generateInBoundsCheck(
207     OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
208     function_ref<void(OpBuilder &, Location)> inBoundsCase,
209     function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
210   generateInBoundsCheck(
211       xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(),
212       /*inBoundsCase=*/
213       [&](OpBuilder &builder, Location loc) {
214         inBoundsCase(builder, loc);
215         return Value();
216       },
217       /*outOfBoundsCase=*/
218       [&](OpBuilder &builder, Location loc) {
219         if (outOfBoundsCase)
220           outOfBoundsCase(builder, loc);
221         return Value();
222       });
223 }
224 
225 /// Given an ArrayAttr, return a copy where the first element is dropped.
226 static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
227   if (!attr)
228     return attr;
229   return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
230 }
231 
232 /// Add the pass label to a vector transfer op if its rank is not the target
233 /// rank.
234 template <typename OpTy>
235 static void maybeApplyPassLabel(OpBuilder &builder, OpTy newXferOp,
236                                 unsigned targetRank) {
237   if (newXferOp.getVectorType().getRank() > targetRank)
238     newXferOp->setAttr(kPassLabel, builder.getUnitAttr());
239 }
240 
241 namespace lowering_n_d {
242 
243 /// Helper data structure for data and mask buffers.
244 struct BufferAllocs {
245   Value dataBuffer;
246   Value maskBuffer;
247 };
248 
249 /// Allocate temporary buffers for data (vector) and mask (if present).
250 /// TODO: Parallelism and threadlocal considerations.
251 template <typename OpTy>
252 static BufferAllocs allocBuffers(OpTy xferOp) {
253   auto &b = ScopedContext::getBuilderRef();
254   OpBuilder::InsertionGuard guard(b);
255   Operation *scope =
256       xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
257   assert(scope && "Expected op to be inside automatic allocation scope");
258   b.setInsertionPointToStart(&scope->getRegion(0).front());
259 
260   BufferAllocs result;
261   auto bufferType = MemRefType::get({}, xferOp.getVectorType());
262   result.dataBuffer = memref_alloca(bufferType).value;
263 
264   if (xferOp.mask()) {
265     auto maskType = MemRefType::get({}, xferOp.mask().getType());
266     auto maskBuffer = memref_alloca(maskType).value;
267     b.setInsertionPoint(xferOp);
268     memref_store(xferOp.mask(), maskBuffer);
269     result.maskBuffer = memref_load(maskBuffer);
270   }
271 
272   return result;
273 }
274 
275 /// Given a MemRefType with VectorType element type, unpack one dimension from
276 /// the VectorType into the MemRefType.
277 ///
278 /// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>>
279 static MemRefType unpackOneDim(MemRefType type) {
280   auto vectorType = type.getElementType().dyn_cast<VectorType>();
281   auto memrefShape = type.getShape();
282   SmallVector<int64_t, 8> newMemrefShape;
283   newMemrefShape.append(memrefShape.begin(), memrefShape.end());
284   newMemrefShape.push_back(vectorType.getDimSize(0));
285   return MemRefType::get(newMemrefShape,
286                          VectorType::get(vectorType.getShape().drop_front(),
287                                          vectorType.getElementType()));
288 }
289 
290 /// Given a transfer op, find the memref from which the mask is loaded. This
291 /// is similar to Strategy<TransferWriteOp>::getBuffer.
292 template <typename OpTy>
293 static Value getMaskBuffer(OpTy xferOp) {
294   assert(xferOp.mask() && "Expected that transfer op has mask");
295   auto loadOp = xferOp.mask().template getDefiningOp<memref::LoadOp>();
296   assert(loadOp && "Expected transfer op mask produced by LoadOp");
297   return loadOp.getMemRef();
298 }
299 
300 /// Codegen strategy, depending on the operation.
301 template <typename OpTy>
302 struct Strategy;
303 
304 /// Code strategy for vector TransferReadOp.
305 template <>
306 struct Strategy<TransferReadOp> {
307   /// Find the StoreOp that is used for writing the current TransferReadOp's
308   /// result to the temporary buffer allocation.
309   static memref::StoreOp getStoreOp(TransferReadOp xferOp) {
310     assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp");
311     auto storeOp = dyn_cast<memref::StoreOp>((*xferOp->use_begin()).getOwner());
312     assert(storeOp && "Expected TransferReadOp result used by StoreOp");
313     return storeOp;
314   }
315 
316   /// Find the temporary buffer allocation. All labeled TransferReadOps are
317   /// used like this, where %buf is either the buffer allocation or a type cast
318   /// of the buffer allocation:
319   /// ```
320   /// %vec = vector.transfer_read ... { __vector_to_scf_lowering__ } ...
321   /// memref.store %vec, %buf[...] ...
322   /// ```
323   static Value getBuffer(TransferReadOp xferOp) {
324     return getStoreOp(xferOp).getMemRef();
325   }
326 
327   /// Retrieve the indices of the current StoreOp that stores into the buffer.
328   static void getBufferIndices(TransferReadOp xferOp,
329                                SmallVector<Value, 8> &indices) {
330     auto storeOp = getStoreOp(xferOp);
331     auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
332     indices.append(prevIndices.begin(), prevIndices.end());
333   }
334 
335   /// Rewrite the TransferReadOp, assuming that there are no out-of-bounds
336   /// accesses on the to-be-unpacked dimension.
337   ///
338   /// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration
339   ///    variable `iv`.
340   /// 2. Store the result into the (already `vector.type_cast`ed) buffer.
341   ///
342   /// E.g.:
343   /// ```
344   /// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst
345   ///     : memref<?x?x?xf32>, vector<4x3xf32>
346   /// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>>
347   /// ```
348   /// Is rewritten to:
349   /// ```
350   /// %casted = vector.type_cast %buf
351   ///     : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
352   /// for %j = 0 to 4 {
353   ///   %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst
354   ///       : memref<?x?x?xf32>, vector<3xf32>
355   ///   memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>>
356   /// }
357   /// ```
358   ///
359   /// Note: The loop and type cast are generated in TransferOpConversion.
360   ///       The original TransferReadOp and store op are deleted in `cleanup`.
361   /// Note: The `mask` operand is set in TransferOpConversion.
362   static TransferReadOp rewriteOp(OpBuilder &builder,
363                                   VectorTransferToSCFOptions options,
364                                   TransferReadOp xferOp, Value buffer,
365                                   Value iv) {
366     SmallVector<Value, 8> storeIndices;
367     getBufferIndices(xferOp, storeIndices);
368     storeIndices.push_back(iv);
369 
370     SmallVector<Value, 8> xferIndices;
371     getXferIndices(xferOp, iv, xferIndices);
372 
373     auto bufferType = buffer.getType().dyn_cast<ShapedType>();
374     auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
375     auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
376     auto newXfer =
377         vector_transfer_read(
378             vecType, xferOp.source(), xferIndices,
379             AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)),
380             xferOp.padding(), Value(), inBoundsAttr)
381             .value;
382 
383     maybeApplyPassLabel(builder,
384                         dyn_cast<TransferReadOp>(newXfer.getDefiningOp()),
385                         options.targetRank);
386 
387     memref_store(newXfer, buffer, storeIndices);
388     return newXfer.getDefiningOp<TransferReadOp>();
389   }
390 
391   /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
392   /// padding value to the temporary buffer.
393   static void handleOutOfBoundsDim(OpBuilder & /*builder*/,
394                                    TransferReadOp xferOp, Value buffer,
395                                    Value iv) {
396     SmallVector<Value, 8> storeIndices;
397     getBufferIndices(xferOp, storeIndices);
398     storeIndices.push_back(iv);
399 
400     auto bufferType = buffer.getType().dyn_cast<ShapedType>();
401     auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
402     auto vec = std_splat(vecType, xferOp.padding());
403     memref_store(vec, buffer, storeIndices);
404   }
405 
406   /// Cleanup after rewriting the op.
407   static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp) {
408     rewriter.eraseOp(getStoreOp(xferOp));
409     rewriter.eraseOp(xferOp);
410   }
411 };
412 
413 /// Codegen strategy for vector TransferWriteOp.
414 template <>
415 struct Strategy<TransferWriteOp> {
416   /// Find the temporary buffer allocation. All labeled TransferWriteOps are
417   /// used like this, where %buf is either the buffer allocation or a type cast
418   /// of the buffer allocation:
419   /// ```
420   /// %vec = memref.load %buf[...] ...
421   /// vector.transfer_write %vec ... { __vector_to_scf_lowering__ } ...
422   /// ```
423   static Value getBuffer(TransferWriteOp xferOp) {
424     auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
425     assert(loadOp && "Expected transfer op vector produced by LoadOp");
426     return loadOp.getMemRef();
427   }
428 
429   /// Retrieve the indices of the current LoadOp that loads from the buffer.
430   static void getBufferIndices(TransferWriteOp xferOp,
431                                SmallVector<Value, 8> &indices) {
432     auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
433     auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
434     indices.append(prevIndices.begin(), prevIndices.end());
435   }
436 
437   /// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds
438   /// accesses on the to-be-unpacked dimension.
439   ///
440   /// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer,
441   ///    using the loop iteration variable `iv`.
442   /// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back
443   ///    to memory.
444   ///
445   /// Note: For more details, see comments on Strategy<TransferReadOp>.
446   static TransferWriteOp rewriteOp(OpBuilder &builder,
447                                    VectorTransferToSCFOptions options,
448                                    TransferWriteOp xferOp, Value buffer,
449                                    Value iv) {
450     SmallVector<Value, 8> loadIndices;
451     getBufferIndices(xferOp, loadIndices);
452     loadIndices.push_back(iv);
453 
454     SmallVector<Value, 8> xferIndices;
455     getXferIndices(xferOp, iv, xferIndices);
456 
457     auto vec = memref_load(buffer, loadIndices);
458     auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
459     auto newXfer = vector_transfer_write(
460         Type(), vec, xferOp.source(), xferIndices,
461         AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), Value(),
462         inBoundsAttr);
463 
464     maybeApplyPassLabel(builder, newXfer.op, options.targetRank);
465 
466     return newXfer;
467   }
468 
469   /// Handle out-of-bounds accesses on the to-be-unpacked dimension.
470   static void handleOutOfBoundsDim(OpBuilder &builder, TransferWriteOp xferOp,
471                                    Value buffer, Value iv) {}
472 
473   /// Cleanup after rewriting the op.
474   static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) {
475     rewriter.eraseOp(xferOp);
476   }
477 };
478 
479 template <typename OpTy>
480 LogicalResult checkPrepareXferOp(OpTy xferOp,
481                                  VectorTransferToSCFOptions options) {
482   if (xferOp->hasAttr(kPassLabel))
483     return failure();
484   if (xferOp.getVectorType().getRank() <= options.targetRank)
485     return failure();
486   return success();
487 }
488 
489 /// Prepare a TransferReadOp for progressive lowering.
490 ///
491 /// 1. Allocate a temporary buffer.
492 /// 2. Label the TransferReadOp, marking it eligible for progressive lowering.
493 /// 3. Store the result of the TransferReadOp into the temporary buffer.
494 /// 4. Load the result from the temporary buffer and replace all uses of the
495 ///    original TransferReadOp with this load.
496 ///
497 /// E.g.:
498 /// ```
499 /// %vec = vector.transfer_read %A[%a, %b, %c], %cst
500 ///     : vector<5x4xf32>, memref<?x?x?xf32>
501 /// ```
502 /// is rewritten to:
503 /// ```
504 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
505 /// %1 = vector.transfer_read %A[%a, %b, %c], %cst
506 ///     { __vector_to_scf_lowering__ } : vector<5x4xf32>, memref<?x?x?xf32>
507 /// memref.store %1, %0[] : memref<vector<5x4xf32>>
508 /// %vec = memref.load %0[] : memref<vector<5x4xf32>>
509 /// ```
510 ///
511 /// Note: A second temporary buffer may be allocated for the `mask` operand.
512 struct PrepareTransferReadConversion
513     : public VectorToSCFPattern<TransferReadOp> {
514   using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
515 
516   LogicalResult matchAndRewrite(TransferReadOp xferOp,
517                                 PatternRewriter &rewriter) const override {
518     if (checkPrepareXferOp(xferOp, options).failed())
519       return failure();
520 
521     ScopedContext scope(rewriter, xferOp.getLoc());
522     auto buffers = allocBuffers(xferOp);
523     auto *newXfer = rewriter.clone(*xferOp.getOperation());
524     newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
525     if (xferOp.mask()) {
526       dyn_cast<TransferReadOp>(newXfer).maskMutable().assign(
527           buffers.maskBuffer);
528     }
529 
530     memref_store(newXfer->getResult(0), buffers.dataBuffer);
531     rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
532 
533     return success();
534   }
535 };
536 
537 /// Prepare a TransferWriteOp for progressive lowering.
538 ///
539 /// 1. Allocate a temporary buffer.
540 /// 2. Store the vector into the buffer.
541 /// 3. Load the vector from the buffer again.
542 /// 4. Use the loaded vector as a TransferWriteOp operand and label the op,
543 ///    marking it eligible for progressive lowering via TransferOpConversion.
544 ///
545 /// E.g.:
546 /// ```
547 /// vector.transfer_write %vec, %A[%a, %b, %c]
548 ///     : vector<5x4xf32>, memref<?x?x?xf32>
549 /// ```
550 /// is rewritten to:
551 /// ```
552 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
553 /// memref.store %vec, %0[] : memref<vector<5x4xf32>>
554 /// %1 = memref.load %0[] : memref<vector<5x4xf32>>
555 /// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
556 ///     : vector<5x4xf32>, memref<?x?x?xf32>
557 /// ```
558 ///
559 /// Note: A second temporary buffer may be allocated for the `mask` operand.
560 struct PrepareTransferWriteConversion
561     : public VectorToSCFPattern<TransferWriteOp> {
562   using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
563 
564   LogicalResult matchAndRewrite(TransferWriteOp xferOp,
565                                 PatternRewriter &rewriter) const override {
566     if (checkPrepareXferOp(xferOp, options).failed())
567       return failure();
568 
569     ScopedContext scope(rewriter, xferOp.getLoc());
570     auto buffers = allocBuffers(xferOp);
571     memref_store(xferOp.vector(), buffers.dataBuffer);
572     auto loadedVec = memref_load(buffers.dataBuffer);
573     rewriter.updateRootInPlace(xferOp, [&]() {
574       xferOp.vectorMutable().assign(loadedVec);
575       xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
576     });
577 
578     if (xferOp.mask()) {
579       rewriter.updateRootInPlace(
580           xferOp, [&]() { xferOp.maskMutable().assign(buffers.maskBuffer); });
581     }
582 
583     return success();
584   }
585 };
586 
587 /// Progressive lowering of vector transfer ops: Unpack one dimension.
588 ///
589 /// 1. Unpack one dimension from the current buffer type and cast the buffer
590 ///    to that new type. E.g.:
591 ///    ```
592 ///    %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>>
593 ///    vector.transfer_write %vec ...
594 ///    ```
595 ///    The following cast is generated:
596 ///    ```
597 ///    %casted = vector.type_cast %0
598 ///        : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
599 ///    ```
600 /// 2. Generate a for loop and rewrite the transfer op according to the
601 ///    corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be
602 ///    out-of-bounds, generate an if-check and handle both cases separately.
603 /// 3. Clean up according to the corresponding Strategy<OpTy>.
604 template <typename OpTy>
605 struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
606   using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
607 
608   LogicalResult matchAndRewrite(OpTy xferOp,
609                                 PatternRewriter &rewriter) const override {
610     if (!xferOp->hasAttr(kPassLabel))
611       return failure();
612 
613     ScopedContext scope(rewriter, xferOp.getLoc());
614 
615     // Find and cast data buffer. How the buffer can be found depends on OpTy.
616     auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
617     auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
618     auto castedDataType = unpackOneDim(dataBufferType);
619     auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
620 
621     // If the xferOp has a mask: Find and cast mask buffer.
622     Value castedMaskBuffer;
623     if (xferOp.mask()) {
624       auto maskBuffer = getMaskBuffer(xferOp);
625       auto maskBufferType =
626           maskBuffer.getType().template dyn_cast<MemRefType>();
627       if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
628         // Do not unpack a dimension of the mask, if:
629         // * To-be-unpacked transfer op dimension is a broadcast.
630         // * Mask is 1D, i.e., the mask cannot be further unpacked.
631         //   (That means that all remaining dimensions of the transfer op must
632         //   be broadcasted.)
633         castedMaskBuffer = maskBuffer;
634       } else {
635         auto castedMaskType = unpackOneDim(maskBufferType);
636         castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
637       }
638     }
639 
640     // Loop bounds and step.
641     auto lb = std_constant_index(0).value;
642     auto ub = std_constant_index(
643                   castedDataType.getDimSize(castedDataType.getRank() - 1))
644                   .value;
645     auto step = std_constant_index(1).value;
646 
647     // Generate for loop.
648     rewriter.create<scf::ForOp>(
649         xferOp.getLoc(), lb, ub, step, ValueRange(),
650         [&](OpBuilder &b, Location loc, Value iv, ValueRange /*loopState*/) {
651           ScopedContext scope(b, loc);
652           generateInBoundsCheck(
653               xferOp, iv, b, unpackedDim(xferOp),
654               /*inBoundsCase=*/
655               [&](OpBuilder &b, Location /*loc*/) {
656                 // Create new transfer op.
657                 OpTy newXfer = Strategy<OpTy>::rewriteOp(
658                     b, this->options, xferOp, castedDataBuffer, iv);
659 
660                 // If old transfer op has a mask: Set mask on new transfer op.
661                 // Special case: If the mask of the old transfer op is 1D and
662                 // the
663                 //               unpacked dim is not a broadcast, no mask is
664                 //               needed on the new transfer op.
665                 if (xferOp.mask() && (xferOp.isBroadcastDim(0) ||
666                                       xferOp.getMaskType().getRank() > 1)) {
667                   OpBuilder::InsertionGuard guard(b);
668                   b.setInsertionPoint(newXfer); // Insert load before newXfer.
669 
670                   SmallVector<Value, 8> loadIndices;
671                   Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
672                   // In case of broadcast: Use same indices to load from memref
673                   // as before.
674                   if (!xferOp.isBroadcastDim(0))
675                     loadIndices.push_back(iv);
676 
677                   auto mask = memref_load(castedMaskBuffer, loadIndices);
678                   rewriter.updateRootInPlace(
679                       newXfer, [&]() { newXfer.maskMutable().assign(mask); });
680                 }
681               },
682               /*outOfBoundsCase=*/
683               [&](OpBuilder &b, Location /*loc*/) {
684                 Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp,
685                                                      castedDataBuffer, iv);
686               });
687           b.create<scf::YieldOp>(loc);
688         });
689 
690     Strategy<OpTy>::cleanup(rewriter, xferOp);
691     return success();
692   }
693 };
694 
695 } // namespace lowering_n_d
696 
697 namespace lowering_n_d_unrolled {
698 
699 /// If the original transfer op has a mask, compute the mask of the new transfer
700 /// op (for the current iteration `i`) and assign it.
701 template <typename OpTy>
702 static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
703                             int64_t i) {
704   if (!xferOp.mask())
705     return;
706 
707   if (xferOp.isBroadcastDim(0)) {
708     // To-be-unpacked dimension is a broadcast, which does not have a
709     // corresponding mask dimension. Mask attribute remains unchanged.
710     newXferOp.maskMutable().assign(xferOp.mask());
711     return;
712   }
713 
714   if (xferOp.getMaskType().getRank() > 1) {
715     // Unpack one dimension of the mask.
716     OpBuilder::InsertionGuard guard(builder);
717     builder.setInsertionPoint(newXferOp); // Insert load before newXfer.
718 
719     llvm::SmallVector<int64_t, 1> indices({i});
720     auto newMask = vector_extract(xferOp.mask(), indices).value;
721     newXferOp.maskMutable().assign(newMask);
722   }
723 
724   // If we end up here: The mask of the old transfer op is 1D and the unpacked
725   // dim is not a broadcast, so no mask is needed on the new transfer op.
726   // `generateInBoundsCheck` will have evaluated the mask already.
727 }
728 
729 /// Progressive lowering of vector TransferReadOp with unrolling: Unpack one
730 /// dimension. This is similar to TransferOpConversion<TransferReadOp>, but no
731 /// memref buffer is allocated and the SCF loop is fully unrolled.
732 ///
733 /// ```
734 /// E.g.:
735 /// ```
736 /// %vec = vector.transfer_read %A[%a, %b, %c], %padding
737 ///     : memref<?x?x?xf32>, vector<5x4xf32>
738 /// ```
739 /// is rewritten to IR such as (simplified):
740 /// ```
741 /// %v_init = splat %padding : vector<5x4xf32>
742 /// %tmp0 = vector.transfer_read %A[%a, %b, %c], %padding
743 ///     : memref<?x?x?xf32>, vector<4xf32>
744 /// %v0 = vector.insert %tmp0, %v_init[0] : vector<4xf32> into vector<5x4xf32>
745 /// %tmp1 = vector.transfer_read %A[%a, %b + 1, %c], %padding
746 ///     : memref<?x?x?xf32>, vector<4xf32>
747 /// %v1 = vector.insert %tmp1, %v0[1] : vector<4xf32> into vector<5x4xf32>
748 /// ...
749 /// %tmp4 = vector.transfer_read %A[%a, %b + 4, %c], %padding
750 ///     : memref<?x?x?xf32>, vector<4xf32>
751 /// %vec = vector.insert %tmp1, %v3[4] : vector<4xf32> into vector<5x4xf32>
752 /// ```
753 ///
754 /// Note: As an optimization, if the result of the original TransferReadOp
755 /// was directly inserted into another vector, no new %v_init vector is created.
756 /// Instead, the new TransferReadOp results are inserted into that vector.
757 struct UnrollTransferReadConversion
758     : public VectorToSCFPattern<TransferReadOp> {
759   using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
760 
761   /// Return the vector into which the newly created TransferReadOp results
762   /// are inserted.
763   Value getResultVector(TransferReadOp xferOp,
764                         PatternRewriter &rewriter) const {
765     if (auto insertOp = getInsertOp(xferOp))
766       return insertOp.dest();
767     return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
768   }
769 
770   /// If the result of the TransferReadOp has exactly one user, which is a
771   /// vector::InsertOp, return that operation.
772   vector::InsertOp getInsertOp(TransferReadOp xferOp) const {
773     if (xferOp->hasOneUse()) {
774       Operation *xferOpUser = *xferOp->getUsers().begin();
775       if (auto insertOp = dyn_cast<vector::InsertOp>(xferOpUser))
776         return insertOp;
777     }
778 
779     return vector::InsertOp();
780   }
781 
782   /// If the result of the TransferReadOp has exactly one user, which is a
783   /// vector::InsertOp, return that operation's indices.
784   void getInsertionIndices(TransferReadOp xferOp,
785                            SmallVector<int64_t, 8> &indices) const {
786     if (auto insertOp = getInsertOp(xferOp)) {
787       llvm::for_each(insertOp.position(), [&](Attribute attr) {
788         indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
789       });
790     }
791   }
792 
793   /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
794   /// accesses, and broadcasts and transposes in permutation maps.
795   LogicalResult matchAndRewrite(TransferReadOp xferOp,
796                                 PatternRewriter &rewriter) const override {
797     if (xferOp.getVectorType().getRank() <= options.targetRank)
798       return failure();
799 
800     ScopedContext scope(rewriter, xferOp.getLoc());
801     auto insertOp = getInsertOp(xferOp);
802     auto vec = getResultVector(xferOp, rewriter);
803     auto vecType = vec.getType().dyn_cast<VectorType>();
804     auto xferVecType = xferOp.getVectorType();
805     auto newXferVecType = VectorType::get(xferVecType.getShape().drop_front(),
806                                           xferVecType.getElementType());
807     int64_t dimSize = xferVecType.getShape()[0];
808 
809     // Generate fully unrolled loop of transfer ops.
810     for (int64_t i = 0; i < dimSize; ++i) {
811       Value iv = std_constant_index(i);
812 
813       vec = generateInBoundsCheck(
814           xferOp, iv, rewriter, unpackedDim(xferOp), TypeRange(vecType),
815           /*inBoundsCase=*/
816           [&](OpBuilder &b, Location loc) {
817             ScopedContext scope(b, loc);
818 
819             // Indices for the new transfer op.
820             SmallVector<Value, 8> xferIndices;
821             getXferIndices(xferOp, iv, xferIndices);
822 
823             // Indices for the new vector.insert op.
824             SmallVector<int64_t, 8> insertionIndices;
825             getInsertionIndices(xferOp, insertionIndices);
826             insertionIndices.push_back(i);
827 
828             auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
829             auto newXferOpVal =
830                 vector_transfer_read(
831                     newXferVecType, xferOp.source(), xferIndices,
832                     AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
833                     xferOp.padding(), Value(), inBoundsAttr)
834                     .value;
835             auto newXferOp =
836                 dyn_cast<TransferReadOp>(newXferOpVal.getDefiningOp());
837 
838             maybeAssignMask(b, xferOp, newXferOp, i);
839 
840             return vector_insert(newXferOp, vec, insertionIndices).value;
841           },
842           /*outOfBoundsCase=*/
843           [&](OpBuilder &b, Location loc) {
844             // Loop through original (unmodified) vector.
845             return vec;
846           });
847     }
848 
849     if (insertOp) {
850       // Rewrite single user of the old TransferReadOp, which was an InsertOp.
851       rewriter.replaceOp(insertOp, vec);
852       rewriter.eraseOp(xferOp);
853     } else {
854       rewriter.replaceOp(xferOp, vec);
855     }
856 
857     return success();
858   }
859 };
860 
861 /// Progressive lowering of vector TransferWriteOp with unrolling: Unpack one
862 /// dimension. This is similar to TransferOpConversion<TransferWriteOp>, but no
863 /// memref buffer is allocated and the SCF loop is fully unrolled.
864 ///
865 /// ```
866 /// E.g.:
867 /// ```
868 /// vector.transfer_write %vec, %A[%a, %b, %c]
869 ///     : vector<5x4xf32>, memref<?x?x?xf32>
870 /// ```
871 /// is rewritten to IR such as (simplified):
872 /// ```
873 /// %v0 = vector.extract %vec[0] : vector<5x4xf32>
874 /// vector.transfer_write %v0, %A[%a, %b, %c] : vector<4xf32>, memref<...>
875 /// %v1 = vector.extract %vec[1] : vector<5x4xf32>
876 /// vector.transfer_write %v1, %A[%a, %b + 1, %c] : vector<4xf32>, memref<...>
877 /// ...
878 /// %v4 = vector.extract %vec[4] : vector<5x4xf32>
879 /// vector.transfer_write %v4, %A[%a, %b + 4, %c] : vector<4xf32>, memref<...>
880 /// ```
881 ///
882 /// Note: As an optimization, if the vector of the original TransferWriteOp
883 /// was directly extracted from another vector via an ExtractOp `a`, extract
884 /// the vectors for the newly generated TransferWriteOps from `a`'s input. By
885 /// doing so, `a` may become dead, and the number of ExtractOps generated during
886 /// recursive application of this pattern will be minimal.
887 struct UnrollTransferWriteConversion
888     : public VectorToSCFPattern<TransferWriteOp> {
889   using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
890 
891   /// Return the vector from which newly generated ExtracOps will extract.
892   Value getDataVector(TransferWriteOp xferOp) const {
893     if (auto extractOp = getExtractOp(xferOp))
894       return extractOp.vector();
895     return xferOp.vector();
896   }
897 
898   /// If the input of the given TransferWriteOp is an ExtractOp, return it.
899   vector::ExtractOp getExtractOp(TransferWriteOp xferOp) const {
900     if (auto *op = xferOp.vector().getDefiningOp())
901       return dyn_cast<vector::ExtractOp>(op);
902     return vector::ExtractOp();
903   }
904 
905   /// If the input of the given TransferWriteOp is an ExtractOp, return its
906   /// indices.
907   void getExtractionIndices(TransferWriteOp xferOp,
908                             SmallVector<int64_t, 8> &indices) const {
909     if (auto extractOp = getExtractOp(xferOp)) {
910       llvm::for_each(extractOp.position(), [&](Attribute attr) {
911         indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
912       });
913     }
914   }
915 
916   /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
917   /// accesses, and broadcasts and transposes in permutation maps.
918   LogicalResult matchAndRewrite(TransferWriteOp xferOp,
919                                 PatternRewriter &rewriter) const override {
920     if (xferOp.getVectorType().getRank() <= options.targetRank)
921       return failure();
922 
923     ScopedContext scope(rewriter, xferOp.getLoc());
924     auto vec = getDataVector(xferOp);
925     auto xferVecType = xferOp.getVectorType();
926     int64_t dimSize = xferVecType.getShape()[0];
927 
928     // Generate fully unrolled loop of transfer ops.
929     for (int64_t i = 0; i < dimSize; ++i) {
930       Value iv = std_constant_index(i);
931 
932       generateInBoundsCheck(
933           xferOp, iv, rewriter, unpackedDim(xferOp),
934           /*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
935             ScopedContext scope(b, loc);
936 
937             // Indices for the new transfer op.
938             SmallVector<Value, 8> xferIndices;
939             getXferIndices(xferOp, iv, xferIndices);
940 
941             // Indices for the new vector.extract op.
942             SmallVector<int64_t, 8> extractionIndices;
943             getExtractionIndices(xferOp, extractionIndices);
944             extractionIndices.push_back(i);
945 
946             auto extracted = vector_extract(vec, extractionIndices).value;
947             auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
948 
949             auto newXferOp =
950                 vector_transfer_write(
951                     Type(), extracted, xferOp.source(), xferIndices,
952                     AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
953                     Value(), inBoundsAttr)
954                     .op;
955 
956             maybeAssignMask(b, xferOp, newXferOp, i);
957           });
958     }
959 
960     rewriter.eraseOp(xferOp);
961     return success();
962   }
963 };
964 
965 } // namespace lowering_n_d_unrolled
966 
967 namespace lowering_1_d {
968 
969 /// Compute the indices into the memref for the LoadOp/StoreOp generated as
970 /// part of TransferOp1dConversion. Return the memref dimension on which
971 /// the transfer is operating. A return value of None indicates a broadcast.
972 template <typename OpTy>
973 static Optional<int64_t>
974 get1dMemrefIndices(OpTy xferOp, Value iv,
975                    SmallVector<Value, 8> &memrefIndices) {
976   auto indices = xferOp.indices();
977   auto map = xferOp.permutation_map();
978 
979   memrefIndices.append(indices.begin(), indices.end());
980   assert(map.getNumResults() == 1 &&
981          "Expected 1 permutation map result for 1D transfer");
982   if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
983     auto dim = expr.getPosition();
984     using edsc::op::operator+;
985     memrefIndices[dim] = memrefIndices[dim] + iv;
986     return dim;
987   }
988 
989   assert(xferOp.isBroadcastDim(0) &&
990          "Expected AffineDimExpr or AffineConstantExpr");
991   return None;
992 }
993 
994 /// Codegen strategy for TransferOp1dConversion, depending on the
995 /// operation.
996 template <typename OpTy>
997 struct Strategy1d;
998 
999 /// Codegen strategy for TransferReadOp.
1000 template <>
1001 struct Strategy1d<TransferReadOp> {
1002   static void generateForLoopBody(OpBuilder &builder, Location loc,
1003                                   TransferReadOp xferOp, Value iv,
1004                                   ValueRange loopState) {
1005     SmallVector<Value, 8> indices;
1006     auto dim = get1dMemrefIndices(xferOp, iv, indices);
1007     auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
1008     auto vec = loopState[0];
1009 
1010     // In case of out-of-bounds access, leave `vec` as is (was initialized with
1011     // padding value).
1012     auto nextVec = generateInBoundsCheck(
1013         xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()),
1014         /*inBoundsCase=*/
1015         [&](OpBuilder & /*b*/, Location loc) {
1016           auto val = memref_load(xferOp.source(), indices);
1017           return vector_insert_element(val, vec, ivI32.value).value;
1018         },
1019         /*outOfBoundsCase=*/
1020         [&](OpBuilder & /*b*/, Location loc) { return vec; });
1021     builder.create<scf::YieldOp>(loc, nextVec);
1022   }
1023 
1024   static Value initialLoopState(TransferReadOp xferOp) {
1025     // Inititalize vector with padding value.
1026     return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
1027   }
1028 };
1029 
1030 /// Codegen strategy for TransferWriteOp.
1031 template <>
1032 struct Strategy1d<TransferWriteOp> {
1033   static void generateForLoopBody(OpBuilder &builder, Location loc,
1034                                   TransferWriteOp xferOp, Value iv,
1035                                   ValueRange /*loopState*/) {
1036     SmallVector<Value, 8> indices;
1037     auto dim = get1dMemrefIndices(xferOp, iv, indices);
1038     auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
1039 
1040     // Nothing to do in case of out-of-bounds access.
1041     generateInBoundsCheck(
1042         xferOp, iv, builder, dim,
1043         /*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) {
1044           auto val = vector_extract_element(xferOp.vector(), ivI32.value);
1045           memref_store(val, xferOp.source(), indices);
1046         });
1047     builder.create<scf::YieldOp>(loc);
1048   }
1049 
1050   static Value initialLoopState(TransferWriteOp xferOp) { return Value(); }
1051 };
1052 
1053 /// Return true if the last dimension of the MemRefType has unit stride.
1054 static bool isLastMemrefDimUnitStride(MemRefType type) {
1055   int64_t offset;
1056   SmallVector<int64_t, 4> strides;
1057   auto successStrides = getStridesAndOffset(type, strides, offset);
1058   return succeeded(successStrides) && strides.back() == 1;
1059 }
1060 
1061 /// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is
1062 /// necessary in cases where a 1D vector transfer op cannot be lowered into
1063 /// vector load/stores due to non-unit strides or broadcasts:
1064 ///
1065 /// * Transfer dimension is not the last memref dimension
1066 /// * Transfer dimension is a broadcast (i.e., scalar load + broadcast)
1067 /// * Memref has a layout map with non-unit stride on the last dimension
1068 ///
1069 /// This pattern generates IR as follows:
1070 ///
1071 /// 1. Generate a for loop iterating over each vector element.
1072 /// 2. Inside the loop, generate a InsertElementOp or ExtractElementOp,
1073 ///    depending on OpTy.
1074 ///
1075 /// TODO: In some cases (no masking, etc.), LLVM::MatrixColumnMajorLoadOp
1076 ///       can be generated instead of TransferOp1dConversion. Add such a pattern
1077 ///       to ConvertVectorToLLVM.
1078 ///
1079 /// E.g.:
1080 /// ```
1081 /// vector.transfer_write %vec, %A[%a, %b]
1082 ///    {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]}
1083 ///    : vector<9xf32>, memref<?x?xf32>
1084 /// ```
1085 /// Is rewritten to approximately the following pseudo-IR:
1086 /// ```
1087 /// for i = 0 to 9 {
1088 ///   %t = vector.extractelement %vec[i] : vector<9xf32>
1089 ///   memref.store %t, %arg0[%a + i, %b] : memref<?x?xf32>
1090 /// }
1091 /// ```
1092 template <typename OpTy>
1093 struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
1094   using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
1095 
1096   LogicalResult matchAndRewrite(OpTy xferOp,
1097                                 PatternRewriter &rewriter) const override {
1098     ScopedContext scope(rewriter, xferOp.getLoc());
1099     auto map = xferOp.permutation_map();
1100     auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
1101 
1102     if (!memRefType)
1103       return failure();
1104     if (xferOp.getVectorType().getRank() != 1)
1105       return failure();
1106     if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType))
1107       return failure(); // Handled by ConvertVectorToLLVM
1108 
1109     // Loop bounds, step, state...
1110     auto vecType = xferOp.getVectorType();
1111     auto lb = std_constant_index(0);
1112     auto ub = std_constant_index(vecType.getDimSize(0));
1113     auto step = std_constant_index(1);
1114     auto loopState = Strategy1d<OpTy>::initialLoopState(xferOp);
1115 
1116     // Generate for loop.
1117     rewriter.replaceOpWithNewOp<scf::ForOp>(
1118         xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
1119         [&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) {
1120           ScopedContext nestedScope(builder, loc);
1121           Strategy1d<OpTy>::generateForLoopBody(builder, loc, xferOp, iv,
1122                                                 loopState);
1123         });
1124 
1125     return success();
1126   }
1127 };
1128 
1129 } // namespace lowering_1_d
1130 } // namespace
1131 
1132 namespace mlir {
1133 
1134 void populateVectorToSCFConversionPatterns(
1135     RewritePatternSet &patterns, const VectorTransferToSCFOptions &options) {
1136   if (options.unroll) {
1137     patterns.add<lowering_n_d_unrolled::UnrollTransferReadConversion,
1138                  lowering_n_d_unrolled::UnrollTransferWriteConversion>(
1139         patterns.getContext(), options);
1140   } else {
1141     patterns.add<lowering_n_d::PrepareTransferReadConversion,
1142                  lowering_n_d::PrepareTransferWriteConversion,
1143                  lowering_n_d::TransferOpConversion<TransferReadOp>,
1144                  lowering_n_d::TransferOpConversion<TransferWriteOp>>(
1145         patterns.getContext(), options);
1146   }
1147 
1148   if (options.targetRank == 1) {
1149     patterns.add<lowering_1_d::TransferOp1dConversion<TransferReadOp>,
1150                  lowering_1_d::TransferOp1dConversion<TransferWriteOp>>(
1151         patterns.getContext(), options);
1152   }
1153 }
1154 
1155 } // namespace mlir
1156 
1157 namespace {
1158 
1159 struct ConvertVectorToSCFPass
1160     : public ConvertVectorToSCFBase<ConvertVectorToSCFPass> {
1161   ConvertVectorToSCFPass() = default;
1162   ConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) {
1163     this->fullUnroll = options.unroll;
1164     this->targetRank = options.targetRank;
1165     this->lowerPermutationMaps = options.lowerPermutationMaps;
1166   }
1167 
1168   void runOnFunction() override {
1169     VectorTransferToSCFOptions options;
1170     options.unroll = fullUnroll;
1171     options.targetRank = targetRank;
1172     options.lowerPermutationMaps = lowerPermutationMaps;
1173 
1174     // Lower permutation maps first.
1175     if (lowerPermutationMaps) {
1176       RewritePatternSet lowerTransferPatterns(getFunction().getContext());
1177       mlir::vector::populateVectorTransferPermutationMapLoweringPatterns(
1178           lowerTransferPatterns);
1179       (void)applyPatternsAndFoldGreedily(getFunction(),
1180                                          std::move(lowerTransferPatterns));
1181     }
1182 
1183     RewritePatternSet patterns(getFunction().getContext());
1184     populateVectorToSCFConversionPatterns(patterns, options);
1185     (void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
1186   }
1187 };
1188 
1189 } // namespace
1190 
1191 std::unique_ptr<Pass>
1192 mlir::createConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) {
1193   return std::make_unique<ConvertVectorToSCFPass>(options);
1194 }
1195