1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===// 2 // 3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utility methods for working with the Vector dialect. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "mlir/Dialect/Vector/Utils/VectorUtils.h" 14 15 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" 16 #include "mlir/Dialect/Affine/IR/AffineOps.h" 17 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 18 #include "mlir/Dialect/MemRef/IR/MemRef.h" 19 #include "mlir/Dialect/Tensor/IR/Tensor.h" 20 #include "mlir/Dialect/Vector/IR/VectorOps.h" 21 #include "mlir/IR/Builders.h" 22 #include "mlir/IR/IntegerSet.h" 23 #include "mlir/IR/Operation.h" 24 #include "mlir/Support/LLVM.h" 25 #include "mlir/Support/MathExtras.h" 26 #include <numeric> 27 28 #include "llvm/ADT/DenseSet.h" 29 #include "llvm/ADT/SetVector.h" 30 31 using namespace mlir; 32 33 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on 34 /// the type of `source`. 35 Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source, 36 int64_t dim) { 37 if (source.getType().isa<UnrankedMemRefType, MemRefType>()) 38 return b.createOrFold<memref::DimOp>(loc, source, dim); 39 if (source.getType().isa<UnrankedTensorType, RankedTensorType>()) 40 return b.createOrFold<tensor::DimOp>(loc, source, dim); 41 llvm_unreachable("Expected MemRefType or TensorType"); 42 } 43 44 /// Return the number of elements of basis, `0` if empty. 45 int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) { 46 if (basis.empty()) 47 return 0; 48 return std::accumulate(basis.begin(), basis.end(), 1, 49 std::multiplies<int64_t>()); 50 } 51 52 SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape, 53 ArrayRef<int64_t> sizes) { 54 int64_t rank = shape.size(); 55 // Compute the count for each dimension. 56 SmallVector<int64_t, 4> sliceDimCounts(rank); 57 for (int64_t r = 0; r < rank; ++r) 58 sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]); 59 // Use that to compute the slice stride for each dimension. 60 SmallVector<int64_t, 4> sliceStrides(rank); 61 sliceStrides[rank - 1] = 1; 62 for (int64_t r = rank - 2; r >= 0; --r) 63 sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1]; 64 return sliceStrides; 65 } 66 67 SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets( 68 ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) { 69 SmallVector<int64_t, 4> result; 70 for (auto it : llvm::zip(vectorOffsets, sizes)) 71 result.push_back(std::get<0>(it) * std::get<1>(it)); 72 return result; 73 } 74 75 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape, 76 ArrayRef<int64_t> subShape) { 77 if (superShape.size() < subShape.size()) { 78 return Optional<SmallVector<int64_t, 4>>(); 79 } 80 81 // Starting from the end, compute the integer divisors. 82 std::vector<int64_t> result; 83 result.reserve(superShape.size()); 84 int64_t superSize = 0, subSize = 0; 85 for (auto it : 86 llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) { 87 std::tie(superSize, subSize) = it; 88 assert(superSize > 0 && "superSize must be > 0"); 89 assert(subSize > 0 && "subSize must be > 0"); 90 91 // If integral division does not occur, return and let the caller decide. 92 if (superSize % subSize != 0) 93 return None; 94 result.push_back(superSize / subSize); 95 } 96 97 // At this point we computed the ratio (in reverse) for the common 98 // size. Fill with the remaining entries from the super-vector shape (still in 99 // reverse). 100 int commonSize = subShape.size(); 101 std::copy(superShape.rbegin() + commonSize, superShape.rend(), 102 std::back_inserter(result)); 103 104 assert(result.size() == superShape.size() && 105 "super to sub shape ratio is not of the same size as the super rank"); 106 107 // Reverse again to get it back in the proper order and return. 108 return SmallVector<int64_t, 4>{result.rbegin(), result.rend()}; 109 } 110 111 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType, 112 VectorType subVectorType) { 113 assert(superVectorType.getElementType() == subVectorType.getElementType() && 114 "vector types must be of the same elemental type"); 115 return shapeRatio(superVectorType.getShape(), subVectorType.getShape()); 116 } 117 118 /// Constructs a permutation map from memref indices to vector dimension. 119 /// 120 /// The implementation uses the knowledge of the mapping of enclosing loop to 121 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a 122 /// map with: 123 /// - keys representing "vectorized enclosing loops"; 124 /// - values representing the corresponding vector dimension. 125 /// The algorithm traverses "vectorized enclosing loops" and extracts the 126 /// at-most-one MemRef index that is invariant along said loop. This index is 127 /// guaranteed to be at most one by construction: otherwise the MemRef is not 128 /// vectorizable. 129 /// If this invariant index is found, it is added to the permutation_map at the 130 /// proper vector dimension. 131 /// If no index is found to be invariant, 0 is added to the permutation_map and 132 /// corresponds to a vector broadcast along that dimension. 133 /// 134 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty, 135 /// signalling that no permutation map can be constructed given 136 /// `enclosingLoopToVectorDim`. 137 /// 138 /// Examples can be found in the documentation of `makePermutationMap`, in the 139 /// header file. 140 static AffineMap makePermutationMap( 141 ArrayRef<Value> indices, 142 const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) { 143 if (enclosingLoopToVectorDim.empty()) 144 return AffineMap(); 145 MLIRContext *context = 146 enclosingLoopToVectorDim.begin()->getFirst()->getContext(); 147 SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(), 148 getAffineConstantExpr(0, context)); 149 150 for (auto kvp : enclosingLoopToVectorDim) { 151 assert(kvp.second < perm.size()); 152 auto invariants = getInvariantAccesses( 153 cast<AffineForOp>(kvp.first).getInductionVar(), indices); 154 unsigned numIndices = indices.size(); 155 unsigned countInvariantIndices = 0; 156 for (unsigned dim = 0; dim < numIndices; ++dim) { 157 if (!invariants.count(indices[dim])) { 158 assert(perm[kvp.second] == getAffineConstantExpr(0, context) && 159 "permutationMap already has an entry along dim"); 160 perm[kvp.second] = getAffineDimExpr(dim, context); 161 } else { 162 ++countInvariantIndices; 163 } 164 } 165 assert((countInvariantIndices == numIndices || 166 countInvariantIndices == numIndices - 1) && 167 "Vectorization prerequisite violated: at most 1 index may be " 168 "invariant wrt a vectorized loop"); 169 } 170 return AffineMap::get(indices.size(), 0, perm, context); 171 } 172 173 /// Implementation detail that walks up the parents and records the ones with 174 /// the specified type. 175 /// TODO: could also be implemented as a collect parents followed by a 176 /// filter and made available outside this file. 177 template <typename T> 178 static SetVector<Operation *> getParentsOfType(Block *block) { 179 SetVector<Operation *> res; 180 auto *current = block->getParentOp(); 181 while (current) { 182 if (auto typedParent = dyn_cast<T>(current)) { 183 assert(res.count(current) == 0 && "Already inserted"); 184 res.insert(current); 185 } 186 current = current->getParentOp(); 187 } 188 return res; 189 } 190 191 /// Returns the enclosing AffineForOp, from closest to farthest. 192 static SetVector<Operation *> getEnclosingforOps(Block *block) { 193 return getParentsOfType<AffineForOp>(block); 194 } 195 196 AffineMap mlir::makePermutationMap( 197 Block *insertPoint, ArrayRef<Value> indices, 198 const DenseMap<Operation *, unsigned> &loopToVectorDim) { 199 DenseMap<Operation *, unsigned> enclosingLoopToVectorDim; 200 auto enclosingLoops = getEnclosingforOps(insertPoint); 201 for (auto *forInst : enclosingLoops) { 202 auto it = loopToVectorDim.find(forInst); 203 if (it != loopToVectorDim.end()) { 204 enclosingLoopToVectorDim.insert(*it); 205 } 206 } 207 return ::makePermutationMap(indices, enclosingLoopToVectorDim); 208 } 209 210 AffineMap mlir::makePermutationMap( 211 Operation *op, ArrayRef<Value> indices, 212 const DenseMap<Operation *, unsigned> &loopToVectorDim) { 213 return makePermutationMap(op->getBlock(), indices, loopToVectorDim); 214 } 215 216 bool matcher::operatesOnSuperVectorsOf(Operation &op, 217 VectorType subVectorType) { 218 // First, extract the vector type and distinguish between: 219 // a. ops that *must* lower a super-vector (i.e. vector.transfer_read, 220 // vector.transfer_write); and 221 // b. ops that *may* lower a super-vector (all other ops). 222 // The ops that *may* lower a super-vector only do so if the super-vector to 223 // sub-vector ratio exists. The ops that *must* lower a super-vector are 224 // explicitly checked for this property. 225 /// TODO: there should be a single function for all ops to do this so we 226 /// do not have to special case. Maybe a trait, or just a method, unclear atm. 227 bool mustDivide = false; 228 (void)mustDivide; 229 VectorType superVectorType; 230 if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) { 231 superVectorType = transfer.getVectorType(); 232 mustDivide = true; 233 } else if (op.getNumResults() == 0) { 234 if (!isa<ReturnOp>(op)) { 235 op.emitError("NYI: assuming only return operations can have 0 " 236 " results at this point"); 237 } 238 return false; 239 } else if (op.getNumResults() == 1) { 240 if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) { 241 superVectorType = v; 242 } else { 243 // Not a vector type. 244 return false; 245 } 246 } else { 247 // Not a vector.transfer and has more than 1 result, fail hard for now to 248 // wake us up when something changes. 249 op.emitError("NYI: operation has more than 1 result"); 250 return false; 251 } 252 253 // Get the ratio. 254 auto ratio = shapeRatio(superVectorType, subVectorType); 255 256 // Sanity check. 257 assert((ratio.hasValue() || !mustDivide) && 258 "vector.transfer operation in which super-vector size is not an" 259 " integer multiple of sub-vector size"); 260 261 // This catches cases that are not strictly necessary to have multiplicity but 262 // still aren't divisible by the sub-vector shape. 263 // This could be useful information if we wanted to reshape at the level of 264 // the vector type (but we would have to look at the compute and distinguish 265 // between parallel, reduction and possibly other cases. 266 return ratio.hasValue(); 267 } 268