1 //===- Utils.h - Affine dialect utilities -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file declares a set of utilities for the affine dialect ops. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef MLIR_DIALECT_AFFINE_UTILS_H 14 #define MLIR_DIALECT_AFFINE_UTILS_H 15 16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" 17 18 namespace mlir { 19 20 class AffineForOp; 21 class AffineIfOp; 22 class AffineParallelOp; 23 class DominanceInfo; 24 class Operation; 25 class PostDominanceInfo; 26 27 namespace func { 28 class FuncOp; 29 } // namespace func 30 31 namespace memref { 32 class AllocOp; 33 } // namespace memref 34 35 struct LogicalResult; 36 37 using ReductionLoopMap = DenseMap<Operation *, SmallVector<LoopReduction, 2>>; 38 39 /// Replaces a parallel affine.for op with a 1-d affine.parallel op. `forOp`'s 40 /// body is taken by the affine.parallel op and the former is erased. 41 /// (mlir::isLoopParallel can be used to detect a parallel affine.for op.) The 42 /// reductions specified in `parallelReductions` are also parallelized. 43 /// Parallelization will fail in the presence of loop iteration arguments that 44 /// are not listed in `parallelReductions`. 45 LogicalResult 46 affineParallelize(AffineForOp forOp, 47 ArrayRef<LoopReduction> parallelReductions = {}); 48 49 /// Hoists out affine.if/else to as high as possible, i.e., past all invariant 50 /// affine.fors/parallel's. Returns success if any hoisting happened; folded` is 51 /// set to true if the op was folded or erased. This hoisting could lead to 52 /// significant code expansion in some cases. 53 LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr); 54 55 /// Holds parameters to perform n-D vectorization on a single loop nest. 56 /// For example, for the following loop nest: 57 /// 58 /// func @vec2d(%in: memref<64x128x512xf32>, %out: memref<64x128x512xf32>) { 59 /// affine.for %i0 = 0 to 64 { 60 /// affine.for %i1 = 0 to 128 { 61 /// affine.for %i2 = 0 to 512 { 62 /// %ld = affine.load %in[%i0, %i1, %i2] : memref<64x128x512xf32> 63 /// affine.store %ld, %out[%i0, %i1, %i2] : memref<64x128x512xf32> 64 /// } 65 /// } 66 /// } 67 /// return 68 /// } 69 /// 70 /// and VectorizationStrategy = 'vectorSizes = {8, 4}', 'loopToVectorDim = 71 /// {{i1->0}, {i2->1}}', SuperVectorizer will generate: 72 /// 73 /// func @vec2d(%arg0: memref<64x128x512xf32>, %arg1: memref<64x128x512xf32>) { 74 /// affine.for %arg2 = 0 to 64 { 75 /// affine.for %arg3 = 0 to 128 step 8 { 76 /// affine.for %arg4 = 0 to 512 step 4 { 77 /// %cst = arith.constant 0.000000e+00 : f32 78 /// %0 = vector.transfer_read %arg0[%arg2, %arg3, %arg4], %cst : ... 79 /// vector.transfer_write %0, %arg1[%arg2, %arg3, %arg4] : ... 80 /// } 81 /// } 82 /// } 83 /// return 84 /// } 85 // TODO: Hoist to a VectorizationStrategy.cpp when appropriate. 86 struct VectorizationStrategy { 87 // Vectorization factors to apply to each target vector dimension. 88 // Each factor will be applied to a different loop. 89 SmallVector<int64_t, 8> vectorSizes; 90 // Maps each AffineForOp vectorization candidate with its vector dimension. 91 // The candidate will be vectorized using the vectorization factor in 92 // 'vectorSizes' for that dimension. 93 DenseMap<Operation *, unsigned> loopToVectorDim; 94 // Maps loops that implement vectorizable reductions to the corresponding 95 // reduction descriptors. 96 ReductionLoopMap reductionLoops; 97 }; 98 99 /// Replace affine store and load accesses by scalars by forwarding stores to 100 /// loads and eliminate invariant affine loads; consequently, eliminate dead 101 /// allocs. 102 void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, 103 PostDominanceInfo &postDomInfo); 104 105 /// Vectorizes affine loops in 'loops' using the n-D vectorization factors in 106 /// 'vectorSizes'. By default, each vectorization factor is applied 107 /// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can 108 /// be optionally used to provide a different loop vectorization order. 109 /// If `reductionLoops` is not empty, the given reduction loops may be 110 /// vectorized along the reduction dimension. 111 /// TODO: Vectorizing reductions is supported only for 1-D vectorization. 112 void vectorizeAffineLoops( 113 Operation *parentOp, 114 llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops, 115 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern, 116 const ReductionLoopMap &reductionLoops = ReductionLoopMap()); 117 118 /// External utility to vectorize affine loops from a single loop nest using an 119 /// n-D vectorization strategy (see doc in VectorizationStrategy definition). 120 /// Loops are provided in a 2D vector container. The first dimension represents 121 /// the nesting level relative to the loops to be vectorized. The second 122 /// dimension contains the loops. This means that: 123 /// a) every loop in 'loops[i]' must have a parent loop in 'loops[i-1]', 124 /// b) a loop in 'loops[i]' may or may not have a child loop in 'loops[i+1]'. 125 /// 126 /// For example, for the following loop nest: 127 /// 128 /// func @vec2d(%in0: memref<64x128x512xf32>, %in1: memref<64x128x128xf32>, 129 /// %out0: memref<64x128x512xf32>, 130 /// %out1: memref<64x128x128xf32>) { 131 /// affine.for %i0 = 0 to 64 { 132 /// affine.for %i1 = 0 to 128 { 133 /// affine.for %i2 = 0 to 512 { 134 /// %ld = affine.load %in0[%i0, %i1, %i2] : memref<64x128x512xf32> 135 /// affine.store %ld, %out0[%i0, %i1, %i2] : memref<64x128x512xf32> 136 /// } 137 /// affine.for %i3 = 0 to 128 { 138 /// %ld = affine.load %in1[%i0, %i1, %i3] : memref<64x128x128xf32> 139 /// affine.store %ld, %out1[%i0, %i1, %i3] : memref<64x128x128xf32> 140 /// } 141 /// } 142 /// } 143 /// return 144 /// } 145 /// 146 /// loops = {{%i0}, {%i2, %i3}}, to vectorize the outermost and the two 147 /// innermost loops; 148 /// loops = {{%i1}, {%i2, %i3}}, to vectorize the middle and the two innermost 149 /// loops; 150 /// loops = {{%i2}}, to vectorize only the first innermost loop; 151 /// loops = {{%i3}}, to vectorize only the second innermost loop; 152 /// loops = {{%i1}}, to vectorize only the middle loop. 153 LogicalResult 154 vectorizeAffineLoopNest(std::vector<SmallVector<AffineForOp, 2>> &loops, 155 const VectorizationStrategy &strategy); 156 157 /// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1. 158 /// As currently implemented, this transformation cannot fail and will return 159 /// early if the op is already in a normalized form. 160 void normalizeAffineParallel(AffineParallelOp op); 161 162 /// Normalize an affine.for op. If the affine.for op has only a single iteration 163 /// only then it is simply promoted, else it is normalized in the traditional 164 /// way, by converting the lower bound to zero and loop step to one. The upper 165 /// bound is set to the trip count of the loop. Original loops must have a 166 /// lower bound with only a single result. There is no such restriction on upper 167 /// bounds. Returns success if the loop has been normalized (or is already in 168 /// the normal form). 169 LogicalResult normalizeAffineFor(AffineForOp op); 170 171 /// Traverse `e` and return an AffineExpr where all occurrences of `dim` have 172 /// been replaced by either: 173 /// - `min` if `positivePath` is true when we reach an occurrence of `dim` 174 /// - `max` if `positivePath` is true when we reach an occurrence of `dim` 175 /// `positivePath` is negated each time we hit a multiplicative or divisive 176 /// binary op with a constant negative coefficient. 177 AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, 178 AffineExpr max, bool positivePath = true); 179 180 /// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while 181 /// optionally remapping the old memref's indices using the supplied affine map, 182 /// `indexRemap`. The new memref could be of a different shape or rank. 183 /// `extraIndices` provides any additional access indices to be added to the 184 /// start. 185 /// 186 /// `indexRemap` remaps indices of the old memref access to a new set of indices 187 /// that are used to index the memref. Additional input operands to indexRemap 188 /// can be optionally provided in `extraOperands`, and they occupy the start 189 /// of its input list. `indexRemap`'s dimensional inputs are expected to 190 /// correspond to memref's indices, and its symbolic inputs if any should be 191 /// provided in `symbolOperands`. 192 /// 193 /// `domOpFilter`, if non-null, restricts the replacement to only those 194 /// operations that are dominated by the former; similarly, `postDomOpFilter` 195 /// restricts replacement to only those operations that are postdominated by it. 196 /// 197 /// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing 198 /// uses of a memref without any requirement for access index rewrites as long 199 /// as the user operation has the MemRefsNormalizable trait. The default value 200 /// of this flag is false. 201 /// 202 /// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to 203 /// also be a candidate for replacement. The default value of this flag is 204 /// false. 205 /// 206 /// Returns true on success and false if the replacement is not possible, 207 /// whenever a memref is used as an operand in a non-dereferencing context and 208 /// 'allowNonDereferencingOps' is false, except for dealloc's on the memref 209 /// which are left untouched. See comments at function definition for an 210 /// example. 211 // 212 // Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]: 213 // The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and 214 // index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0, 215 // d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any 216 // extra operands, note that 'indexRemap' would just be applied to existing 217 // indices (%i, %j). 218 // TODO: allow extraIndices to be added at any position. 219 LogicalResult replaceAllMemRefUsesWith( 220 Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices = {}, 221 AffineMap indexRemap = AffineMap(), ArrayRef<Value> extraOperands = {}, 222 ArrayRef<Value> symbolOperands = {}, Operation *domOpFilter = nullptr, 223 Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false, 224 bool replaceInDeallocOp = false); 225 226 /// Performs the same replacement as the other version above but only for the 227 /// dereferencing uses of `oldMemRef` in `op`, except in cases where 228 /// 'allowNonDereferencingOps' is set to true where we replace the 229 /// non-dereferencing uses as well. 230 LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, 231 Operation *op, 232 ArrayRef<Value> extraIndices = {}, 233 AffineMap indexRemap = AffineMap(), 234 ArrayRef<Value> extraOperands = {}, 235 ArrayRef<Value> symbolOperands = {}, 236 bool allowNonDereferencingOps = false); 237 238 /// Rewrites the memref defined by this alloc op to have an identity layout map 239 /// and updates all its indexing uses. Returns failure if any of its uses 240 /// escape (while leaving the IR in a valid state). 241 LogicalResult normalizeMemRef(memref::AllocOp *op); 242 243 /// Uses the old memref type map layout and computes the new memref type to have 244 /// a new shape and a layout map, where the old layout map has been normalized 245 /// to an identity layout map. It returns the old memref in case no 246 /// normalization was needed or a failure occurs while transforming the old map 247 /// layout to an identity layout map. 248 MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder, 249 unsigned numSymbolicOperands); 250 251 /// Creates and inserts into 'builder' a new AffineApplyOp, with the number of 252 /// its results equal to the number of operands, as a composition 253 /// of all other AffineApplyOps reachable from input parameter 'operands'. If 254 /// different operands were drawing results from multiple affine apply ops, 255 /// these will also be collected into a single (multi-result) affine apply op. 256 /// The final results of the composed AffineApplyOp are returned in output 257 /// parameter 'results'. Returns the affine apply op created. 258 Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc, 259 ArrayRef<Value> operands, 260 ArrayRef<Operation *> affineApplyOps, 261 SmallVectorImpl<Value> *results); 262 263 /// Given an operation, inserts one or more single result affine apply 264 /// operations, results of which are exclusively used by this operation. 265 /// The operands of these newly created affine apply ops are 266 /// guaranteed to be loop iterators or terminal symbols of a function. 267 /// 268 /// Before 269 /// 270 /// affine.for %i = 0 to #map(%N) 271 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i) 272 /// send %A[%idx], ... 273 /// %v = "compute"(%idx, ...) 274 /// 275 /// After 276 /// 277 /// affine.for %i = 0 to #map(%N) 278 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i) 279 /// send %A[%idx], ... 280 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i) 281 /// %v = "compute"(%idx_, ...) 282 283 /// This allows the application of different transformations on send and 284 /// compute (for eg. different shifts/delays) 285 /// 286 /// Fills `sliceOps` with the list of affine.apply operations. 287 /// In the following cases, `sliceOps` remains empty: 288 /// 1. If none of opInst's operands were the result of an affine.apply 289 /// (i.e., there was no affine computation slice to create). 290 /// 2. If all the affine.apply op's supplying operands to this opInst did not 291 /// have any uses other than those in this opInst. 292 void createAffineComputationSlice(Operation *opInst, 293 SmallVectorImpl<AffineApplyOp> *sliceOps); 294 295 /// Emit code that computes the given affine expression using standard 296 /// arithmetic operations applied to the provided dimension and symbol values. 297 Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, 298 ValueRange dimValues, ValueRange symbolValues); 299 300 /// Create a sequence of operations that implement the `affineMap` applied to 301 /// the given `operands` (as it it were an AffineApplyOp). 302 Optional<SmallVector<Value, 8>> expandAffineMap(OpBuilder &builder, 303 Location loc, 304 AffineMap affineMap, 305 ValueRange operands); 306 307 } // namespace mlir 308 309 #endif // MLIR_DIALECT_AFFINE_UTILS_H 310