1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===// 2 // 3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utility methods for working with the Vector dialect. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "mlir/Dialect/Vector/Utils/VectorUtils.h" 14 15 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" 16 #include "mlir/Dialect/Affine/IR/AffineOps.h" 17 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 18 #include "mlir/Dialect/Func/IR/FuncOps.h" 19 #include "mlir/Dialect/MemRef/IR/MemRef.h" 20 #include "mlir/Dialect/Tensor/IR/Tensor.h" 21 #include "mlir/Dialect/Vector/IR/VectorOps.h" 22 #include "mlir/IR/Builders.h" 23 #include "mlir/IR/IntegerSet.h" 24 #include "mlir/IR/Operation.h" 25 #include "mlir/IR/TypeUtilities.h" 26 #include "mlir/Support/LLVM.h" 27 #include "mlir/Support/MathExtras.h" 28 #include <numeric> 29 30 #include "llvm/ADT/DenseSet.h" 31 #include "llvm/ADT/SetVector.h" 32 33 using namespace mlir; 34 35 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on 36 /// the type of `source`. 37 Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source, 38 int64_t dim) { 39 if (source.getType().isa<UnrankedMemRefType, MemRefType>()) 40 return b.createOrFold<memref::DimOp>(loc, source, dim); 41 if (source.getType().isa<UnrankedTensorType, RankedTensorType>()) 42 return b.createOrFold<tensor::DimOp>(loc, source, dim); 43 llvm_unreachable("Expected MemRefType or TensorType"); 44 } 45 46 /// Return the number of elements of basis, `0` if empty. 47 int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) { 48 if (basis.empty()) 49 return 0; 50 return std::accumulate(basis.begin(), basis.end(), 1, 51 std::multiplies<int64_t>()); 52 } 53 54 SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape, 55 ArrayRef<int64_t> sizes) { 56 int64_t rank = shape.size(); 57 // Compute the count for each dimension. 58 SmallVector<int64_t, 4> sliceDimCounts(rank); 59 for (int64_t r = 0; r < rank; ++r) 60 sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]); 61 // Use that to compute the slice stride for each dimension. 62 SmallVector<int64_t, 4> sliceStrides(rank); 63 sliceStrides[rank - 1] = 1; 64 for (int64_t r = rank - 2; r >= 0; --r) 65 sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1]; 66 return sliceStrides; 67 } 68 69 SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets( 70 ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) { 71 SmallVector<int64_t, 4> result; 72 for (auto it : llvm::zip(vectorOffsets, sizes)) 73 result.push_back(std::get<0>(it) * std::get<1>(it)); 74 return result; 75 } 76 77 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape, 78 ArrayRef<int64_t> subShape) { 79 if (superShape.size() < subShape.size()) { 80 return Optional<SmallVector<int64_t, 4>>(); 81 } 82 83 // Starting from the end, compute the integer divisors. 84 std::vector<int64_t> result; 85 result.reserve(superShape.size()); 86 int64_t superSize = 0, subSize = 0; 87 for (auto it : 88 llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) { 89 std::tie(superSize, subSize) = it; 90 assert(superSize > 0 && "superSize must be > 0"); 91 assert(subSize > 0 && "subSize must be > 0"); 92 93 // If integral division does not occur, return and let the caller decide. 94 if (superSize % subSize != 0) 95 return None; 96 result.push_back(superSize / subSize); 97 } 98 99 // At this point we computed the ratio (in reverse) for the common 100 // size. Fill with the remaining entries from the super-vector shape (still in 101 // reverse). 102 int commonSize = subShape.size(); 103 std::copy(superShape.rbegin() + commonSize, superShape.rend(), 104 std::back_inserter(result)); 105 106 assert(result.size() == superShape.size() && 107 "super to sub shape ratio is not of the same size as the super rank"); 108 109 // Reverse again to get it back in the proper order and return. 110 return SmallVector<int64_t, 4>{result.rbegin(), result.rend()}; 111 } 112 113 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType, 114 VectorType subVectorType) { 115 assert(superVectorType.getElementType() == subVectorType.getElementType() && 116 "vector types must be of the same elemental type"); 117 return shapeRatio(superVectorType.getShape(), subVectorType.getShape()); 118 } 119 120 /// Constructs a permutation map from memref indices to vector dimension. 121 /// 122 /// The implementation uses the knowledge of the mapping of enclosing loop to 123 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a 124 /// map with: 125 /// - keys representing "vectorized enclosing loops"; 126 /// - values representing the corresponding vector dimension. 127 /// The algorithm traverses "vectorized enclosing loops" and extracts the 128 /// at-most-one MemRef index that is invariant along said loop. This index is 129 /// guaranteed to be at most one by construction: otherwise the MemRef is not 130 /// vectorizable. 131 /// If this invariant index is found, it is added to the permutation_map at the 132 /// proper vector dimension. 133 /// If no index is found to be invariant, 0 is added to the permutation_map and 134 /// corresponds to a vector broadcast along that dimension. 135 /// 136 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty, 137 /// signalling that no permutation map can be constructed given 138 /// `enclosingLoopToVectorDim`. 139 /// 140 /// Examples can be found in the documentation of `makePermutationMap`, in the 141 /// header file. 142 static AffineMap makePermutationMap( 143 ArrayRef<Value> indices, 144 const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) { 145 if (enclosingLoopToVectorDim.empty()) 146 return AffineMap(); 147 MLIRContext *context = 148 enclosingLoopToVectorDim.begin()->getFirst()->getContext(); 149 SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(), 150 getAffineConstantExpr(0, context)); 151 152 for (auto kvp : enclosingLoopToVectorDim) { 153 assert(kvp.second < perm.size()); 154 auto invariants = getInvariantAccesses( 155 cast<AffineForOp>(kvp.first).getInductionVar(), indices); 156 unsigned numIndices = indices.size(); 157 unsigned countInvariantIndices = 0; 158 for (unsigned dim = 0; dim < numIndices; ++dim) { 159 if (!invariants.count(indices[dim])) { 160 assert(perm[kvp.second] == getAffineConstantExpr(0, context) && 161 "permutationMap already has an entry along dim"); 162 perm[kvp.second] = getAffineDimExpr(dim, context); 163 } else { 164 ++countInvariantIndices; 165 } 166 } 167 assert((countInvariantIndices == numIndices || 168 countInvariantIndices == numIndices - 1) && 169 "Vectorization prerequisite violated: at most 1 index may be " 170 "invariant wrt a vectorized loop"); 171 } 172 return AffineMap::get(indices.size(), 0, perm, context); 173 } 174 175 /// Implementation detail that walks up the parents and records the ones with 176 /// the specified type. 177 /// TODO: could also be implemented as a collect parents followed by a 178 /// filter and made available outside this file. 179 template <typename T> 180 static SetVector<Operation *> getParentsOfType(Block *block) { 181 SetVector<Operation *> res; 182 auto *current = block->getParentOp(); 183 while (current) { 184 if (auto typedParent = dyn_cast<T>(current)) { 185 assert(res.count(current) == 0 && "Already inserted"); 186 res.insert(current); 187 } 188 current = current->getParentOp(); 189 } 190 return res; 191 } 192 193 /// Returns the enclosing AffineForOp, from closest to farthest. 194 static SetVector<Operation *> getEnclosingforOps(Block *block) { 195 return getParentsOfType<AffineForOp>(block); 196 } 197 198 AffineMap mlir::makePermutationMap( 199 Block *insertPoint, ArrayRef<Value> indices, 200 const DenseMap<Operation *, unsigned> &loopToVectorDim) { 201 DenseMap<Operation *, unsigned> enclosingLoopToVectorDim; 202 auto enclosingLoops = getEnclosingforOps(insertPoint); 203 for (auto *forInst : enclosingLoops) { 204 auto it = loopToVectorDim.find(forInst); 205 if (it != loopToVectorDim.end()) { 206 enclosingLoopToVectorDim.insert(*it); 207 } 208 } 209 return ::makePermutationMap(indices, enclosingLoopToVectorDim); 210 } 211 212 AffineMap mlir::makePermutationMap( 213 Operation *op, ArrayRef<Value> indices, 214 const DenseMap<Operation *, unsigned> &loopToVectorDim) { 215 return makePermutationMap(op->getBlock(), indices, loopToVectorDim); 216 } 217 218 bool matcher::operatesOnSuperVectorsOf(Operation &op, 219 VectorType subVectorType) { 220 // First, extract the vector type and distinguish between: 221 // a. ops that *must* lower a super-vector (i.e. vector.transfer_read, 222 // vector.transfer_write); and 223 // b. ops that *may* lower a super-vector (all other ops). 224 // The ops that *may* lower a super-vector only do so if the super-vector to 225 // sub-vector ratio exists. The ops that *must* lower a super-vector are 226 // explicitly checked for this property. 227 /// TODO: there should be a single function for all ops to do this so we 228 /// do not have to special case. Maybe a trait, or just a method, unclear atm. 229 bool mustDivide = false; 230 (void)mustDivide; 231 VectorType superVectorType; 232 if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) { 233 superVectorType = transfer.getVectorType(); 234 mustDivide = true; 235 } else if (op.getNumResults() == 0) { 236 if (!isa<func::ReturnOp>(op)) { 237 op.emitError("NYI: assuming only return operations can have 0 " 238 " results at this point"); 239 } 240 return false; 241 } else if (op.getNumResults() == 1) { 242 if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) { 243 superVectorType = v; 244 } else { 245 // Not a vector type. 246 return false; 247 } 248 } else { 249 // Not a vector.transfer and has more than 1 result, fail hard for now to 250 // wake us up when something changes. 251 op.emitError("NYI: operation has more than 1 result"); 252 return false; 253 } 254 255 // Get the ratio. 256 auto ratio = shapeRatio(superVectorType, subVectorType); 257 258 // Sanity check. 259 assert((ratio || !mustDivide) && 260 "vector.transfer operation in which super-vector size is not an" 261 " integer multiple of sub-vector size"); 262 263 // This catches cases that are not strictly necessary to have multiplicity but 264 // still aren't divisible by the sub-vector shape. 265 // This could be useful information if we wanted to reshape at the level of 266 // the vector type (but we would have to look at the compute and distinguish 267 // between parallel, reduction and possibly other cases. 268 return ratio.has_value(); 269 } 270