1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps   ------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utility methods for working with the Vector dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"
14 
15 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
16 #include "mlir/Dialect/Affine/IR/AffineOps.h"
17 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
18 #include "mlir/Dialect/MemRef/IR/MemRef.h"
19 #include "mlir/Dialect/StandardOps/IR/Ops.h"
20 #include "mlir/Dialect/Tensor/IR/Tensor.h"
21 #include "mlir/Dialect/Vector/IR/VectorOps.h"
22 #include "mlir/IR/Builders.h"
23 #include "mlir/IR/IntegerSet.h"
24 #include "mlir/IR/Operation.h"
25 #include "mlir/Support/LLVM.h"
26 #include "mlir/Support/MathExtras.h"
27 #include <numeric>
28 
29 #include "llvm/ADT/DenseSet.h"
30 #include "llvm/ADT/SetVector.h"
31 
32 using namespace mlir;
33 
34 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
35 /// the type of `source`.
36 Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source,
37                                       int64_t dim) {
38   if (source.getType().isa<UnrankedMemRefType, MemRefType>())
39     return b.createOrFold<memref::DimOp>(loc, source, dim);
40   if (source.getType().isa<UnrankedTensorType, RankedTensorType>())
41     return b.createOrFold<tensor::DimOp>(loc, source, dim);
42   llvm_unreachable("Expected MemRefType or TensorType");
43 }
44 
45 /// Return the number of elements of basis, `0` if empty.
46 int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) {
47   if (basis.empty())
48     return 0;
49   return std::accumulate(basis.begin(), basis.end(), 1,
50                          std::multiplies<int64_t>());
51 }
52 
53 SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape,
54                                              ArrayRef<int64_t> sizes) {
55   int64_t rank = shape.size();
56   // Compute the count for each dimension.
57   SmallVector<int64_t, 4> sliceDimCounts(rank);
58   for (int64_t r = 0; r < rank; ++r)
59     sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);
60   // Use that to compute the slice stride for each dimension.
61   SmallVector<int64_t, 4> sliceStrides(rank);
62   sliceStrides[rank - 1] = 1;
63   for (int64_t r = rank - 2; r >= 0; --r)
64     sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1];
65   return sliceStrides;
66 }
67 
68 SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets(
69     ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) {
70   SmallVector<int64_t, 4> result;
71   for (auto it : llvm::zip(vectorOffsets, sizes))
72     result.push_back(std::get<0>(it) * std::get<1>(it));
73   return result;
74 }
75 
76 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape,
77                                                    ArrayRef<int64_t> subShape) {
78   if (superShape.size() < subShape.size()) {
79     return Optional<SmallVector<int64_t, 4>>();
80   }
81 
82   // Starting from the end, compute the integer divisors.
83   std::vector<int64_t> result;
84   result.reserve(superShape.size());
85   int64_t superSize = 0, subSize = 0;
86   for (auto it :
87        llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) {
88     std::tie(superSize, subSize) = it;
89     assert(superSize > 0 && "superSize must be > 0");
90     assert(subSize > 0 && "subSize must be > 0");
91 
92     // If integral division does not occur, return and let the caller decide.
93     if (superSize % subSize != 0)
94       return None;
95     result.push_back(superSize / subSize);
96   }
97 
98   // At this point we computed the ratio (in reverse) for the common
99   // size. Fill with the remaining entries from the super-vector shape (still in
100   // reverse).
101   int commonSize = subShape.size();
102   std::copy(superShape.rbegin() + commonSize, superShape.rend(),
103             std::back_inserter(result));
104 
105   assert(result.size() == superShape.size() &&
106          "super to sub shape ratio is not of the same size as the super rank");
107 
108   // Reverse again to get it back in the proper order and return.
109   return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
110 }
111 
112 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType,
113                                                    VectorType subVectorType) {
114   assert(superVectorType.getElementType() == subVectorType.getElementType() &&
115          "vector types must be of the same elemental type");
116   return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
117 }
118 
119 /// Constructs a permutation map from memref indices to vector dimension.
120 ///
121 /// The implementation uses the knowledge of the mapping of enclosing loop to
122 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
123 /// map with:
124 ///   - keys representing "vectorized enclosing loops";
125 ///   - values representing the corresponding vector dimension.
126 /// The algorithm traverses "vectorized enclosing loops" and extracts the
127 /// at-most-one MemRef index that is invariant along said loop. This index is
128 /// guaranteed to be at most one by construction: otherwise the MemRef is not
129 /// vectorizable.
130 /// If this invariant index is found, it is added to the permutation_map at the
131 /// proper vector dimension.
132 /// If no index is found to be invariant, 0 is added to the permutation_map and
133 /// corresponds to a vector broadcast along that dimension.
134 ///
135 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
136 /// signalling that no permutation map can be constructed given
137 /// `enclosingLoopToVectorDim`.
138 ///
139 /// Examples can be found in the documentation of `makePermutationMap`, in the
140 /// header file.
141 static AffineMap makePermutationMap(
142     ArrayRef<Value> indices,
143     const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
144   if (enclosingLoopToVectorDim.empty())
145     return AffineMap();
146   MLIRContext *context =
147       enclosingLoopToVectorDim.begin()->getFirst()->getContext();
148   SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
149                                   getAffineConstantExpr(0, context));
150 
151   for (auto kvp : enclosingLoopToVectorDim) {
152     assert(kvp.second < perm.size());
153     auto invariants = getInvariantAccesses(
154         cast<AffineForOp>(kvp.first).getInductionVar(), indices);
155     unsigned numIndices = indices.size();
156     unsigned countInvariantIndices = 0;
157     for (unsigned dim = 0; dim < numIndices; ++dim) {
158       if (!invariants.count(indices[dim])) {
159         assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
160                "permutationMap already has an entry along dim");
161         perm[kvp.second] = getAffineDimExpr(dim, context);
162       } else {
163         ++countInvariantIndices;
164       }
165     }
166     assert((countInvariantIndices == numIndices ||
167             countInvariantIndices == numIndices - 1) &&
168            "Vectorization prerequisite violated: at most 1 index may be "
169            "invariant wrt a vectorized loop");
170   }
171   return AffineMap::get(indices.size(), 0, perm, context);
172 }
173 
174 /// Implementation detail that walks up the parents and records the ones with
175 /// the specified type.
176 /// TODO: could also be implemented as a collect parents followed by a
177 /// filter and made available outside this file.
178 template <typename T>
179 static SetVector<Operation *> getParentsOfType(Block *block) {
180   SetVector<Operation *> res;
181   auto *current = block->getParentOp();
182   while (current) {
183     if (auto typedParent = dyn_cast<T>(current)) {
184       assert(res.count(current) == 0 && "Already inserted");
185       res.insert(current);
186     }
187     current = current->getParentOp();
188   }
189   return res;
190 }
191 
192 /// Returns the enclosing AffineForOp, from closest to farthest.
193 static SetVector<Operation *> getEnclosingforOps(Block *block) {
194   return getParentsOfType<AffineForOp>(block);
195 }
196 
197 AffineMap mlir::makePermutationMap(
198     Block *insertPoint, ArrayRef<Value> indices,
199     const DenseMap<Operation *, unsigned> &loopToVectorDim) {
200   DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
201   auto enclosingLoops = getEnclosingforOps(insertPoint);
202   for (auto *forInst : enclosingLoops) {
203     auto it = loopToVectorDim.find(forInst);
204     if (it != loopToVectorDim.end()) {
205       enclosingLoopToVectorDim.insert(*it);
206     }
207   }
208   return ::makePermutationMap(indices, enclosingLoopToVectorDim);
209 }
210 
211 AffineMap mlir::makePermutationMap(
212     Operation *op, ArrayRef<Value> indices,
213     const DenseMap<Operation *, unsigned> &loopToVectorDim) {
214   return makePermutationMap(op->getBlock(), indices, loopToVectorDim);
215 }
216 
217 bool matcher::operatesOnSuperVectorsOf(Operation &op,
218                                        VectorType subVectorType) {
219   // First, extract the vector type and distinguish between:
220   //   a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
221   //      vector.transfer_write); and
222   //   b. ops that *may* lower a super-vector (all other ops).
223   // The ops that *may* lower a super-vector only do so if the super-vector to
224   // sub-vector ratio exists. The ops that *must* lower a super-vector are
225   // explicitly checked for this property.
226   /// TODO: there should be a single function for all ops to do this so we
227   /// do not have to special case. Maybe a trait, or just a method, unclear atm.
228   bool mustDivide = false;
229   (void)mustDivide;
230   VectorType superVectorType;
231   if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
232     superVectorType = transfer.getVectorType();
233     mustDivide = true;
234   } else if (op.getNumResults() == 0) {
235     if (!isa<ReturnOp>(op)) {
236       op.emitError("NYI: assuming only return operations can have 0 "
237                    " results at this point");
238     }
239     return false;
240   } else if (op.getNumResults() == 1) {
241     if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) {
242       superVectorType = v;
243     } else {
244       // Not a vector type.
245       return false;
246     }
247   } else {
248     // Not a vector.transfer and has more than 1 result, fail hard for now to
249     // wake us up when something changes.
250     op.emitError("NYI: operation has more than 1 result");
251     return false;
252   }
253 
254   // Get the ratio.
255   auto ratio = shapeRatio(superVectorType, subVectorType);
256 
257   // Sanity check.
258   assert((ratio.hasValue() || !mustDivide) &&
259          "vector.transfer operation in which super-vector size is not an"
260          " integer multiple of sub-vector size");
261 
262   // This catches cases that are not strictly necessary to have multiplicity but
263   // still aren't divisible by the sub-vector shape.
264   // This could be useful information if we wanted to reshape at the level of
265   // the vector type (but we would have to look at the compute and distinguish
266   // between parallel, reduction and possibly other cases.
267   return ratio.hasValue();
268 }
269