1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===// 2 // 3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utility methods for working with the Vector dialect. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "mlir/Dialect/Vector/Utils/VectorUtils.h" 14 15 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" 16 #include "mlir/Dialect/Affine/IR/AffineOps.h" 17 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 18 #include "mlir/Dialect/MemRef/IR/MemRef.h" 19 #include "mlir/Dialect/StandardOps/IR/Ops.h" 20 #include "mlir/Dialect/Tensor/IR/Tensor.h" 21 #include "mlir/Dialect/Vector/IR/VectorOps.h" 22 #include "mlir/IR/Builders.h" 23 #include "mlir/IR/IntegerSet.h" 24 #include "mlir/IR/Operation.h" 25 #include "mlir/Support/LLVM.h" 26 #include "mlir/Support/MathExtras.h" 27 #include <numeric> 28 29 #include "llvm/ADT/DenseSet.h" 30 #include "llvm/ADT/SetVector.h" 31 32 using namespace mlir; 33 34 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on 35 /// the type of `source`. 36 Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source, 37 int64_t dim) { 38 if (source.getType().isa<UnrankedMemRefType, MemRefType>()) 39 return b.createOrFold<memref::DimOp>(loc, source, dim); 40 if (source.getType().isa<UnrankedTensorType, RankedTensorType>()) 41 return b.createOrFold<tensor::DimOp>(loc, source, dim); 42 llvm_unreachable("Expected MemRefType or TensorType"); 43 } 44 45 /// Return the number of elements of basis, `0` if empty. 46 int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) { 47 if (basis.empty()) 48 return 0; 49 return std::accumulate(basis.begin(), basis.end(), 1, 50 std::multiplies<int64_t>()); 51 } 52 53 SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape, 54 ArrayRef<int64_t> sizes) { 55 int64_t rank = shape.size(); 56 // Compute the count for each dimension. 57 SmallVector<int64_t, 4> sliceDimCounts(rank); 58 for (int64_t r = 0; r < rank; ++r) 59 sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]); 60 // Use that to compute the slice stride for each dimension. 61 SmallVector<int64_t, 4> sliceStrides(rank); 62 sliceStrides[rank - 1] = 1; 63 for (int64_t r = rank - 2; r >= 0; --r) 64 sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1]; 65 return sliceStrides; 66 } 67 68 SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets( 69 ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) { 70 SmallVector<int64_t, 4> result; 71 for (auto it : llvm::zip(vectorOffsets, sizes)) 72 result.push_back(std::get<0>(it) * std::get<1>(it)); 73 return result; 74 } 75 76 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape, 77 ArrayRef<int64_t> subShape) { 78 if (superShape.size() < subShape.size()) { 79 return Optional<SmallVector<int64_t, 4>>(); 80 } 81 82 // Starting from the end, compute the integer divisors. 83 std::vector<int64_t> result; 84 result.reserve(superShape.size()); 85 int64_t superSize = 0, subSize = 0; 86 for (auto it : 87 llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) { 88 std::tie(superSize, subSize) = it; 89 assert(superSize > 0 && "superSize must be > 0"); 90 assert(subSize > 0 && "subSize must be > 0"); 91 92 // If integral division does not occur, return and let the caller decide. 93 if (superSize % subSize != 0) 94 return None; 95 result.push_back(superSize / subSize); 96 } 97 98 // At this point we computed the ratio (in reverse) for the common 99 // size. Fill with the remaining entries from the super-vector shape (still in 100 // reverse). 101 int commonSize = subShape.size(); 102 std::copy(superShape.rbegin() + commonSize, superShape.rend(), 103 std::back_inserter(result)); 104 105 assert(result.size() == superShape.size() && 106 "super to sub shape ratio is not of the same size as the super rank"); 107 108 // Reverse again to get it back in the proper order and return. 109 return SmallVector<int64_t, 4>{result.rbegin(), result.rend()}; 110 } 111 112 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType, 113 VectorType subVectorType) { 114 assert(superVectorType.getElementType() == subVectorType.getElementType() && 115 "vector types must be of the same elemental type"); 116 return shapeRatio(superVectorType.getShape(), subVectorType.getShape()); 117 } 118 119 /// Constructs a permutation map from memref indices to vector dimension. 120 /// 121 /// The implementation uses the knowledge of the mapping of enclosing loop to 122 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a 123 /// map with: 124 /// - keys representing "vectorized enclosing loops"; 125 /// - values representing the corresponding vector dimension. 126 /// The algorithm traverses "vectorized enclosing loops" and extracts the 127 /// at-most-one MemRef index that is invariant along said loop. This index is 128 /// guaranteed to be at most one by construction: otherwise the MemRef is not 129 /// vectorizable. 130 /// If this invariant index is found, it is added to the permutation_map at the 131 /// proper vector dimension. 132 /// If no index is found to be invariant, 0 is added to the permutation_map and 133 /// corresponds to a vector broadcast along that dimension. 134 /// 135 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty, 136 /// signalling that no permutation map can be constructed given 137 /// `enclosingLoopToVectorDim`. 138 /// 139 /// Examples can be found in the documentation of `makePermutationMap`, in the 140 /// header file. 141 static AffineMap makePermutationMap( 142 ArrayRef<Value> indices, 143 const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) { 144 if (enclosingLoopToVectorDim.empty()) 145 return AffineMap(); 146 MLIRContext *context = 147 enclosingLoopToVectorDim.begin()->getFirst()->getContext(); 148 SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(), 149 getAffineConstantExpr(0, context)); 150 151 for (auto kvp : enclosingLoopToVectorDim) { 152 assert(kvp.second < perm.size()); 153 auto invariants = getInvariantAccesses( 154 cast<AffineForOp>(kvp.first).getInductionVar(), indices); 155 unsigned numIndices = indices.size(); 156 unsigned countInvariantIndices = 0; 157 for (unsigned dim = 0; dim < numIndices; ++dim) { 158 if (!invariants.count(indices[dim])) { 159 assert(perm[kvp.second] == getAffineConstantExpr(0, context) && 160 "permutationMap already has an entry along dim"); 161 perm[kvp.second] = getAffineDimExpr(dim, context); 162 } else { 163 ++countInvariantIndices; 164 } 165 } 166 assert((countInvariantIndices == numIndices || 167 countInvariantIndices == numIndices - 1) && 168 "Vectorization prerequisite violated: at most 1 index may be " 169 "invariant wrt a vectorized loop"); 170 } 171 return AffineMap::get(indices.size(), 0, perm, context); 172 } 173 174 /// Implementation detail that walks up the parents and records the ones with 175 /// the specified type. 176 /// TODO: could also be implemented as a collect parents followed by a 177 /// filter and made available outside this file. 178 template <typename T> 179 static SetVector<Operation *> getParentsOfType(Block *block) { 180 SetVector<Operation *> res; 181 auto *current = block->getParentOp(); 182 while (current) { 183 if (auto typedParent = dyn_cast<T>(current)) { 184 assert(res.count(current) == 0 && "Already inserted"); 185 res.insert(current); 186 } 187 current = current->getParentOp(); 188 } 189 return res; 190 } 191 192 /// Returns the enclosing AffineForOp, from closest to farthest. 193 static SetVector<Operation *> getEnclosingforOps(Block *block) { 194 return getParentsOfType<AffineForOp>(block); 195 } 196 197 AffineMap mlir::makePermutationMap( 198 Block *insertPoint, ArrayRef<Value> indices, 199 const DenseMap<Operation *, unsigned> &loopToVectorDim) { 200 DenseMap<Operation *, unsigned> enclosingLoopToVectorDim; 201 auto enclosingLoops = getEnclosingforOps(insertPoint); 202 for (auto *forInst : enclosingLoops) { 203 auto it = loopToVectorDim.find(forInst); 204 if (it != loopToVectorDim.end()) { 205 enclosingLoopToVectorDim.insert(*it); 206 } 207 } 208 return ::makePermutationMap(indices, enclosingLoopToVectorDim); 209 } 210 211 AffineMap mlir::makePermutationMap( 212 Operation *op, ArrayRef<Value> indices, 213 const DenseMap<Operation *, unsigned> &loopToVectorDim) { 214 return makePermutationMap(op->getBlock(), indices, loopToVectorDim); 215 } 216 217 bool matcher::operatesOnSuperVectorsOf(Operation &op, 218 VectorType subVectorType) { 219 // First, extract the vector type and distinguish between: 220 // a. ops that *must* lower a super-vector (i.e. vector.transfer_read, 221 // vector.transfer_write); and 222 // b. ops that *may* lower a super-vector (all other ops). 223 // The ops that *may* lower a super-vector only do so if the super-vector to 224 // sub-vector ratio exists. The ops that *must* lower a super-vector are 225 // explicitly checked for this property. 226 /// TODO: there should be a single function for all ops to do this so we 227 /// do not have to special case. Maybe a trait, or just a method, unclear atm. 228 bool mustDivide = false; 229 (void)mustDivide; 230 VectorType superVectorType; 231 if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) { 232 superVectorType = transfer.getVectorType(); 233 mustDivide = true; 234 } else if (op.getNumResults() == 0) { 235 if (!isa<ReturnOp>(op)) { 236 op.emitError("NYI: assuming only return operations can have 0 " 237 " results at this point"); 238 } 239 return false; 240 } else if (op.getNumResults() == 1) { 241 if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) { 242 superVectorType = v; 243 } else { 244 // Not a vector type. 245 return false; 246 } 247 } else { 248 // Not a vector.transfer and has more than 1 result, fail hard for now to 249 // wake us up when something changes. 250 op.emitError("NYI: operation has more than 1 result"); 251 return false; 252 } 253 254 // Get the ratio. 255 auto ratio = shapeRatio(superVectorType, subVectorType); 256 257 // Sanity check. 258 assert((ratio.hasValue() || !mustDivide) && 259 "vector.transfer operation in which super-vector size is not an" 260 " integer multiple of sub-vector size"); 261 262 // This catches cases that are not strictly necessary to have multiplicity but 263 // still aren't divisible by the sub-vector shape. 264 // This could be useful information if we wanted to reshape at the level of 265 // the vector type (but we would have to look at the compute and distinguish 266 // between parallel, reduction and possibly other cases. 267 return ratio.hasValue(); 268 } 269