1 //===- Utils.h - Affine dialect utilities -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file declares a set of utilities for the affine dialect ops.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef MLIR_DIALECT_AFFINE_UTILS_H
14 #define MLIR_DIALECT_AFFINE_UTILS_H
15 
16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17 
18 namespace mlir {
19 
20 class AffineForOp;
21 class AffineIfOp;
22 class AffineParallelOp;
23 class DominanceInfo;
24 class Operation;
25 class PostDominanceInfo;
26 
27 namespace func {
28 class FuncOp;
29 } // namespace func
30 
31 namespace memref {
32 class AllocOp;
33 } // namespace memref
34 
35 struct LogicalResult;
36 
37 using ReductionLoopMap = DenseMap<Operation *, SmallVector<LoopReduction, 2>>;
38 
39 /// Replaces a parallel affine.for op with a 1-d affine.parallel op. `forOp`'s
40 /// body is taken by the affine.parallel op and the former is erased.
41 /// (mlir::isLoopParallel can be used to detect a parallel affine.for op.) The
42 /// reductions specified in `parallelReductions` are also parallelized.
43 /// Parallelization will fail in the presence of loop iteration arguments that
44 /// are not listed in `parallelReductions`.
45 LogicalResult
46 affineParallelize(AffineForOp forOp,
47                   ArrayRef<LoopReduction> parallelReductions = {});
48 
49 /// Hoists out affine.if/else to as high as possible, i.e., past all invariant
50 /// affine.fors/parallel's. Returns success if any hoisting happened; folded` is
51 /// set to true if the op was folded or erased. This hoisting could lead to
52 /// significant code expansion in some cases.
53 LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr);
54 
55 /// Holds parameters to perform n-D vectorization on a single loop nest.
56 /// For example, for the following loop nest:
57 ///
58 /// func @vec2d(%in: memref<64x128x512xf32>, %out: memref<64x128x512xf32>) {
59 ///   affine.for %i0 = 0 to 64 {
60 ///     affine.for %i1 = 0 to 128 {
61 ///       affine.for %i2 = 0 to 512 {
62 ///         %ld = affine.load %in[%i0, %i1, %i2] : memref<64x128x512xf32>
63 ///         affine.store %ld, %out[%i0, %i1, %i2] : memref<64x128x512xf32>
64 ///       }
65 ///     }
66 ///   }
67 ///   return
68 /// }
69 ///
70 /// and VectorizationStrategy = 'vectorSizes = {8, 4}', 'loopToVectorDim =
71 /// {{i1->0}, {i2->1}}', SuperVectorizer will generate:
72 ///
73 ///  func @vec2d(%arg0: memref<64x128x512xf32>, %arg1: memref<64x128x512xf32>) {
74 ///    affine.for %arg2 = 0 to 64 {
75 ///      affine.for %arg3 = 0 to 128 step 8 {
76 ///        affine.for %arg4 = 0 to 512 step 4 {
77 ///          %cst = arith.constant 0.000000e+00 : f32
78 ///          %0 = vector.transfer_read %arg0[%arg2, %arg3, %arg4], %cst : ...
79 ///          vector.transfer_write %0, %arg1[%arg2, %arg3, %arg4] : ...
80 ///        }
81 ///      }
82 ///    }
83 ///    return
84 ///  }
85 // TODO: Hoist to a VectorizationStrategy.cpp when appropriate.
86 struct VectorizationStrategy {
87   // Vectorization factors to apply to each target vector dimension.
88   // Each factor will be applied to a different loop.
89   SmallVector<int64_t, 8> vectorSizes;
90   // Maps each AffineForOp vectorization candidate with its vector dimension.
91   // The candidate will be vectorized using the vectorization factor in
92   // 'vectorSizes' for that dimension.
93   DenseMap<Operation *, unsigned> loopToVectorDim;
94   // Maps loops that implement vectorizable reductions to the corresponding
95   // reduction descriptors.
96   ReductionLoopMap reductionLoops;
97 };
98 
99 /// Replace affine store and load accesses by scalars by forwarding stores to
100 /// loads and eliminate invariant affine loads; consequently, eliminate dead
101 /// allocs.
102 void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
103                          PostDominanceInfo &postDomInfo);
104 
105 /// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
106 /// 'vectorSizes'. By default, each vectorization factor is applied
107 /// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can
108 /// be optionally used to provide a different loop vectorization order.
109 /// If `reductionLoops` is not empty, the given reduction loops may be
110 /// vectorized along the reduction dimension.
111 /// TODO: Vectorizing reductions is supported only for 1-D vectorization.
112 void vectorizeAffineLoops(
113     Operation *parentOp,
114     llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops,
115     ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern,
116     const ReductionLoopMap &reductionLoops = ReductionLoopMap());
117 
118 /// External utility to vectorize affine loops from a single loop nest using an
119 /// n-D vectorization strategy (see doc in VectorizationStrategy definition).
120 /// Loops are provided in a 2D vector container. The first dimension represents
121 /// the nesting level relative to the loops to be vectorized. The second
122 /// dimension contains the loops. This means that:
123 ///   a) every loop in 'loops[i]' must have a parent loop in 'loops[i-1]',
124 ///   b) a loop in 'loops[i]' may or may not have a child loop in 'loops[i+1]'.
125 ///
126 /// For example, for the following loop nest:
127 ///
128 ///   func @vec2d(%in0: memref<64x128x512xf32>, %in1: memref<64x128x128xf32>,
129 ///               %out0: memref<64x128x512xf32>,
130 ///               %out1: memref<64x128x128xf32>) {
131 ///     affine.for %i0 = 0 to 64 {
132 ///       affine.for %i1 = 0 to 128 {
133 ///         affine.for %i2 = 0 to 512 {
134 ///           %ld = affine.load %in0[%i0, %i1, %i2] : memref<64x128x512xf32>
135 ///           affine.store %ld, %out0[%i0, %i1, %i2] : memref<64x128x512xf32>
136 ///         }
137 ///         affine.for %i3 = 0 to 128 {
138 ///           %ld = affine.load %in1[%i0, %i1, %i3] : memref<64x128x128xf32>
139 ///           affine.store %ld, %out1[%i0, %i1, %i3] : memref<64x128x128xf32>
140 ///         }
141 ///       }
142 ///     }
143 ///     return
144 ///   }
145 ///
146 /// loops = {{%i0}, {%i2, %i3}}, to vectorize the outermost and the two
147 /// innermost loops;
148 /// loops = {{%i1}, {%i2, %i3}}, to vectorize the middle and the two innermost
149 /// loops;
150 /// loops = {{%i2}}, to vectorize only the first innermost loop;
151 /// loops = {{%i3}}, to vectorize only the second innermost loop;
152 /// loops = {{%i1}}, to vectorize only the middle loop.
153 LogicalResult
154 vectorizeAffineLoopNest(std::vector<SmallVector<AffineForOp, 2>> &loops,
155                         const VectorizationStrategy &strategy);
156 
157 /// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
158 /// As currently implemented, this transformation cannot fail and will return
159 /// early if the op is already in a normalized form.
160 void normalizeAffineParallel(AffineParallelOp op);
161 
162 /// Normalize an affine.for op. If the affine.for op has only a single iteration
163 /// only then it is simply promoted, else it is normalized in the traditional
164 /// way, by converting the lower bound to zero and loop step to one. The upper
165 /// bound is set to the trip count of the loop. Original loops must have a
166 /// lower bound with only a single result. There is no such restriction on upper
167 /// bounds. Returns success if the loop has been normalized (or is already in
168 /// the normal form).
169 LogicalResult normalizeAffineFor(AffineForOp op);
170 
171 /// Traverse `e` and return an AffineExpr where all occurrences of `dim` have
172 /// been replaced by either:
173 ///  - `min` if `positivePath` is true when we reach an occurrence of `dim`
174 ///  - `max` if `positivePath` is true when we reach an occurrence of `dim`
175 /// `positivePath` is negated each time we hit a multiplicative or divisive
176 /// binary op with a constant negative coefficient.
177 AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min,
178                         AffineExpr max, bool positivePath = true);
179 
180 /// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
181 /// optionally remapping the old memref's indices using the supplied affine map,
182 /// `indexRemap`. The new memref could be of a different shape or rank.
183 /// `extraIndices` provides any additional access indices to be added to the
184 /// start.
185 ///
186 /// `indexRemap` remaps indices of the old memref access to a new set of indices
187 /// that are used to index the memref. Additional input operands to indexRemap
188 /// can be optionally provided in `extraOperands`, and they occupy the start
189 /// of its input list. `indexRemap`'s dimensional inputs are expected to
190 /// correspond to memref's indices, and its symbolic inputs if any should be
191 /// provided in `symbolOperands`.
192 ///
193 /// `domOpFilter`, if non-null, restricts the replacement to only those
194 /// operations that are dominated by the former; similarly, `postDomOpFilter`
195 /// restricts replacement to only those operations that are postdominated by it.
196 ///
197 /// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing
198 /// uses of a memref without any requirement for access index rewrites as long
199 /// as the user operation has the MemRefsNormalizable trait. The default value
200 /// of this flag is false.
201 ///
202 /// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to
203 /// also be a candidate for replacement. The default value of this flag is
204 /// false.
205 ///
206 /// Returns true on success and false if the replacement is not possible,
207 /// whenever a memref is used as an operand in a non-dereferencing context and
208 /// 'allowNonDereferencingOps' is false, except for dealloc's on the memref
209 /// which are left untouched. See comments at function definition for an
210 /// example.
211 //
212 //  Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]:
213 //  The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and
214 //  index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0,
215 //  d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any
216 //  extra operands, note that 'indexRemap' would just be applied to existing
217 //  indices (%i, %j).
218 //  TODO: allow extraIndices to be added at any position.
219 LogicalResult replaceAllMemRefUsesWith(
220     Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices = {},
221     AffineMap indexRemap = AffineMap(), ArrayRef<Value> extraOperands = {},
222     ArrayRef<Value> symbolOperands = {}, Operation *domOpFilter = nullptr,
223     Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false,
224     bool replaceInDeallocOp = false);
225 
226 /// Performs the same replacement as the other version above but only for the
227 /// dereferencing uses of `oldMemRef` in `op`, except in cases where
228 /// 'allowNonDereferencingOps' is set to true where we replace the
229 /// non-dereferencing uses as well.
230 LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
231                                        Operation *op,
232                                        ArrayRef<Value> extraIndices = {},
233                                        AffineMap indexRemap = AffineMap(),
234                                        ArrayRef<Value> extraOperands = {},
235                                        ArrayRef<Value> symbolOperands = {},
236                                        bool allowNonDereferencingOps = false);
237 
238 /// Rewrites the memref defined by this alloc op to have an identity layout map
239 /// and updates all its indexing uses. Returns failure if any of its uses
240 /// escape (while leaving the IR in a valid state).
241 LogicalResult normalizeMemRef(memref::AllocOp *op);
242 
243 /// Uses the old memref type map layout and computes the new memref type to have
244 /// a new shape and a layout map, where the old layout map has been normalized
245 /// to an identity layout map. It returns the old memref in case no
246 /// normalization was needed or a failure occurs while transforming the old map
247 /// layout to an identity layout map.
248 MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder,
249                                unsigned numSymbolicOperands);
250 
251 /// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
252 /// its results equal to the number of operands, as a composition
253 /// of all other AffineApplyOps reachable from input parameter 'operands'. If
254 /// different operands were drawing results from multiple affine apply ops,
255 /// these will also be collected into a single (multi-result) affine apply op.
256 /// The final results of the composed AffineApplyOp are returned in output
257 /// parameter 'results'. Returns the affine apply op created.
258 Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
259                                        ArrayRef<Value> operands,
260                                        ArrayRef<Operation *> affineApplyOps,
261                                        SmallVectorImpl<Value> *results);
262 
263 /// Given an operation, inserts one or more single result affine apply
264 /// operations, results of which are exclusively used by this operation.
265 /// The operands of these newly created affine apply ops are
266 /// guaranteed to be loop iterators or terminal symbols of a function.
267 ///
268 /// Before
269 ///
270 /// affine.for %i = 0 to #map(%N)
271 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
272 ///   send %A[%idx], ...
273 ///   %v = "compute"(%idx, ...)
274 ///
275 /// After
276 ///
277 /// affine.for %i = 0 to #map(%N)
278 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
279 ///   send %A[%idx], ...
280 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
281 ///   %v = "compute"(%idx_, ...)
282 
283 /// This allows the application of different transformations on send and
284 /// compute (for eg. different shifts/delays)
285 ///
286 /// Fills `sliceOps` with the list of affine.apply operations.
287 /// In the following cases, `sliceOps` remains empty:
288 ///   1. If none of opInst's operands were the result of an affine.apply
289 ///      (i.e., there was no affine computation slice to create).
290 ///   2. If all the affine.apply op's supplying operands to this opInst did not
291 ///      have any uses other than those in this opInst.
292 void createAffineComputationSlice(Operation *opInst,
293                                   SmallVectorImpl<AffineApplyOp> *sliceOps);
294 
295 /// Emit code that computes the given affine expression using standard
296 /// arithmetic operations applied to the provided dimension and symbol values.
297 Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr,
298                        ValueRange dimValues, ValueRange symbolValues);
299 
300 /// Create a sequence of operations that implement the `affineMap` applied to
301 /// the given `operands` (as it it were an AffineApplyOp).
302 Optional<SmallVector<Value, 8>> expandAffineMap(OpBuilder &builder,
303                                                 Location loc,
304                                                 AffineMap affineMap,
305                                                 ValueRange operands);
306 
307 } // namespace mlir
308 
309 #endif // MLIR_DIALECT_AFFINE_UTILS_H
310