1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utility methods for working with the Vector dialect.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"
14
15 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
16 #include "mlir/Dialect/Affine/IR/AffineOps.h"
17 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
18 #include "mlir/Dialect/Func/IR/FuncOps.h"
19 #include "mlir/Dialect/MemRef/IR/MemRef.h"
20 #include "mlir/Dialect/Tensor/IR/Tensor.h"
21 #include "mlir/Dialect/Vector/IR/VectorOps.h"
22 #include "mlir/IR/Builders.h"
23 #include "mlir/IR/IntegerSet.h"
24 #include "mlir/IR/Operation.h"
25 #include "mlir/IR/TypeUtilities.h"
26 #include "mlir/Support/LLVM.h"
27 #include "mlir/Support/MathExtras.h"
28 #include <numeric>
29
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/ADT/SetVector.h"
32
33 using namespace mlir;
34
35 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
36 /// the type of `source`.
createOrFoldDimOp(OpBuilder & b,Location loc,Value source,int64_t dim)37 Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source,
38 int64_t dim) {
39 if (source.getType().isa<UnrankedMemRefType, MemRefType>())
40 return b.createOrFold<memref::DimOp>(loc, source, dim);
41 if (source.getType().isa<UnrankedTensorType, RankedTensorType>())
42 return b.createOrFold<tensor::DimOp>(loc, source, dim);
43 llvm_unreachable("Expected MemRefType or TensorType");
44 }
45
46 /// Return the number of elements of basis, `0` if empty.
computeMaxLinearIndex(ArrayRef<int64_t> basis)47 int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) {
48 if (basis.empty())
49 return 0;
50 return std::accumulate(basis.begin(), basis.end(), 1,
51 std::multiplies<int64_t>());
52 }
53
computeStrides(ArrayRef<int64_t> shape,ArrayRef<int64_t> sizes)54 SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape,
55 ArrayRef<int64_t> sizes) {
56 int64_t rank = shape.size();
57 // Compute the count for each dimension.
58 SmallVector<int64_t, 4> sliceDimCounts(rank);
59 for (int64_t r = 0; r < rank; ++r)
60 sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);
61 // Use that to compute the slice stride for each dimension.
62 SmallVector<int64_t, 4> sliceStrides(rank);
63 sliceStrides[rank - 1] = 1;
64 for (int64_t r = rank - 2; r >= 0; --r)
65 sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1];
66 return sliceStrides;
67 }
68
computeElementOffsetsFromVectorSliceOffsets(ArrayRef<int64_t> sizes,ArrayRef<int64_t> vectorOffsets)69 SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets(
70 ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) {
71 SmallVector<int64_t, 4> result;
72 for (auto it : llvm::zip(vectorOffsets, sizes))
73 result.push_back(std::get<0>(it) * std::get<1>(it));
74 return result;
75 }
76
shapeRatio(ArrayRef<int64_t> superShape,ArrayRef<int64_t> subShape)77 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape,
78 ArrayRef<int64_t> subShape) {
79 if (superShape.size() < subShape.size()) {
80 return Optional<SmallVector<int64_t, 4>>();
81 }
82
83 // Starting from the end, compute the integer divisors.
84 std::vector<int64_t> result;
85 result.reserve(superShape.size());
86 int64_t superSize = 0, subSize = 0;
87 for (auto it :
88 llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) {
89 std::tie(superSize, subSize) = it;
90 assert(superSize > 0 && "superSize must be > 0");
91 assert(subSize > 0 && "subSize must be > 0");
92
93 // If integral division does not occur, return and let the caller decide.
94 if (superSize % subSize != 0)
95 return None;
96 result.push_back(superSize / subSize);
97 }
98
99 // At this point we computed the ratio (in reverse) for the common
100 // size. Fill with the remaining entries from the super-vector shape (still in
101 // reverse).
102 int commonSize = subShape.size();
103 std::copy(superShape.rbegin() + commonSize, superShape.rend(),
104 std::back_inserter(result));
105
106 assert(result.size() == superShape.size() &&
107 "super to sub shape ratio is not of the same size as the super rank");
108
109 // Reverse again to get it back in the proper order and return.
110 return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
111 }
112
shapeRatio(VectorType superVectorType,VectorType subVectorType)113 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType,
114 VectorType subVectorType) {
115 assert(superVectorType.getElementType() == subVectorType.getElementType() &&
116 "vector types must be of the same elemental type");
117 return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
118 }
119
120 /// Constructs a permutation map from memref indices to vector dimension.
121 ///
122 /// The implementation uses the knowledge of the mapping of enclosing loop to
123 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
124 /// map with:
125 /// - keys representing "vectorized enclosing loops";
126 /// - values representing the corresponding vector dimension.
127 /// The algorithm traverses "vectorized enclosing loops" and extracts the
128 /// at-most-one MemRef index that is invariant along said loop. This index is
129 /// guaranteed to be at most one by construction: otherwise the MemRef is not
130 /// vectorizable.
131 /// If this invariant index is found, it is added to the permutation_map at the
132 /// proper vector dimension.
133 /// If no index is found to be invariant, 0 is added to the permutation_map and
134 /// corresponds to a vector broadcast along that dimension.
135 ///
136 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
137 /// signalling that no permutation map can be constructed given
138 /// `enclosingLoopToVectorDim`.
139 ///
140 /// Examples can be found in the documentation of `makePermutationMap`, in the
141 /// header file.
makePermutationMap(ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & enclosingLoopToVectorDim)142 static AffineMap makePermutationMap(
143 ArrayRef<Value> indices,
144 const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
145 if (enclosingLoopToVectorDim.empty())
146 return AffineMap();
147 MLIRContext *context =
148 enclosingLoopToVectorDim.begin()->getFirst()->getContext();
149 SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
150 getAffineConstantExpr(0, context));
151
152 for (auto kvp : enclosingLoopToVectorDim) {
153 assert(kvp.second < perm.size());
154 auto invariants = getInvariantAccesses(
155 cast<AffineForOp>(kvp.first).getInductionVar(), indices);
156 unsigned numIndices = indices.size();
157 unsigned countInvariantIndices = 0;
158 for (unsigned dim = 0; dim < numIndices; ++dim) {
159 if (!invariants.count(indices[dim])) {
160 assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
161 "permutationMap already has an entry along dim");
162 perm[kvp.second] = getAffineDimExpr(dim, context);
163 } else {
164 ++countInvariantIndices;
165 }
166 }
167 assert((countInvariantIndices == numIndices ||
168 countInvariantIndices == numIndices - 1) &&
169 "Vectorization prerequisite violated: at most 1 index may be "
170 "invariant wrt a vectorized loop");
171 }
172 return AffineMap::get(indices.size(), 0, perm, context);
173 }
174
175 /// Implementation detail that walks up the parents and records the ones with
176 /// the specified type.
177 /// TODO: could also be implemented as a collect parents followed by a
178 /// filter and made available outside this file.
179 template <typename T>
getParentsOfType(Block * block)180 static SetVector<Operation *> getParentsOfType(Block *block) {
181 SetVector<Operation *> res;
182 auto *current = block->getParentOp();
183 while (current) {
184 if (auto typedParent = dyn_cast<T>(current)) {
185 assert(res.count(current) == 0 && "Already inserted");
186 res.insert(current);
187 }
188 current = current->getParentOp();
189 }
190 return res;
191 }
192
193 /// Returns the enclosing AffineForOp, from closest to farthest.
getEnclosingforOps(Block * block)194 static SetVector<Operation *> getEnclosingforOps(Block *block) {
195 return getParentsOfType<AffineForOp>(block);
196 }
197
makePermutationMap(Block * insertPoint,ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & loopToVectorDim)198 AffineMap mlir::makePermutationMap(
199 Block *insertPoint, ArrayRef<Value> indices,
200 const DenseMap<Operation *, unsigned> &loopToVectorDim) {
201 DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
202 auto enclosingLoops = getEnclosingforOps(insertPoint);
203 for (auto *forInst : enclosingLoops) {
204 auto it = loopToVectorDim.find(forInst);
205 if (it != loopToVectorDim.end()) {
206 enclosingLoopToVectorDim.insert(*it);
207 }
208 }
209 return ::makePermutationMap(indices, enclosingLoopToVectorDim);
210 }
211
makePermutationMap(Operation * op,ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & loopToVectorDim)212 AffineMap mlir::makePermutationMap(
213 Operation *op, ArrayRef<Value> indices,
214 const DenseMap<Operation *, unsigned> &loopToVectorDim) {
215 return makePermutationMap(op->getBlock(), indices, loopToVectorDim);
216 }
217
operatesOnSuperVectorsOf(Operation & op,VectorType subVectorType)218 bool matcher::operatesOnSuperVectorsOf(Operation &op,
219 VectorType subVectorType) {
220 // First, extract the vector type and distinguish between:
221 // a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
222 // vector.transfer_write); and
223 // b. ops that *may* lower a super-vector (all other ops).
224 // The ops that *may* lower a super-vector only do so if the super-vector to
225 // sub-vector ratio exists. The ops that *must* lower a super-vector are
226 // explicitly checked for this property.
227 /// TODO: there should be a single function for all ops to do this so we
228 /// do not have to special case. Maybe a trait, or just a method, unclear atm.
229 bool mustDivide = false;
230 (void)mustDivide;
231 VectorType superVectorType;
232 if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
233 superVectorType = transfer.getVectorType();
234 mustDivide = true;
235 } else if (op.getNumResults() == 0) {
236 if (!isa<func::ReturnOp>(op)) {
237 op.emitError("NYI: assuming only return operations can have 0 "
238 " results at this point");
239 }
240 return false;
241 } else if (op.getNumResults() == 1) {
242 if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) {
243 superVectorType = v;
244 } else {
245 // Not a vector type.
246 return false;
247 }
248 } else {
249 // Not a vector.transfer and has more than 1 result, fail hard for now to
250 // wake us up when something changes.
251 op.emitError("NYI: operation has more than 1 result");
252 return false;
253 }
254
255 // Get the ratio.
256 auto ratio = shapeRatio(superVectorType, subVectorType);
257
258 // Sanity check.
259 assert((ratio || !mustDivide) &&
260 "vector.transfer operation in which super-vector size is not an"
261 " integer multiple of sub-vector size");
262
263 // This catches cases that are not strictly necessary to have multiplicity but
264 // still aren't divisible by the sub-vector shape.
265 // This could be useful information if we wanted to reshape at the level of
266 // the vector type (but we would have to look at the compute and distinguish
267 // between parallel, reduction and possibly other cases.
268 return ratio.has_value();
269 }
270