1 //===- Utils.h - General analysis utilities ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes for various transformation utilities for
10 // memref's and non-loop IR structures. These are not passes by themselves but
11 // are used either by passes, optimization sequences, or in turn by other
12 // transformation utilities.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H
17 #define MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H
18 
19 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
20 #include "mlir/IR/AffineMap.h"
21 #include "mlir/IR/Block.h"
22 #include "mlir/IR/Location.h"
23 #include "mlir/Support/LLVM.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include <memory>
26 
27 namespace mlir {
28 
29 class AffineForOp;
30 class Block;
31 class Location;
32 struct MemRefAccess;
33 class Operation;
34 class Value;
35 
36 /// Populates 'loops' with IVs of the loops surrounding 'op' ordered from
37 /// the outermost 'affine.for' operation to the innermost one.
38 //  TODO: handle 'affine.if' ops.
39 void getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops);
40 
41 /// Populates 'ops' with IVs of the loops surrounding `op`, along with
42 /// `affine.if` operations interleaved between these loops, ordered from the
43 /// outermost `affine.for` or `affine.if` operation to the innermost one.
44 void getEnclosingAffineForAndIfOps(Operation &op,
45                                    SmallVectorImpl<Operation *> *ops);
46 
47 /// Returns the nesting depth of this operation, i.e., the number of loops
48 /// surrounding this operation.
49 unsigned getNestingDepth(Operation *op);
50 
51 /// Returns whether a loop is a parallel loop and contains a reduction loop.
52 bool isLoopParallelAndContainsReduction(AffineForOp forOp);
53 
54 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
55 /// at 'forOp'.
56 void getSequentialLoops(AffineForOp forOp,
57                         llvm::SmallDenseSet<Value, 8> *sequentialLoops);
58 
59 /// Enumerates different result statuses of slice computation by
60 /// `computeSliceUnion`
61 // TODO: Identify and add different kinds of failures during slice computation.
62 struct SliceComputationResult {
63   enum ResultEnum {
64     Success,
65     IncorrectSliceFailure, // Slice is computed, but it is incorrect.
66     GenericFailure,        // Unable to compute src loop computation slice.
67   } value;
SliceComputationResultSliceComputationResult68   SliceComputationResult(ResultEnum v) : value(v) {}
69 };
70 
71 /// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their
72 /// associated operands for a set of loops within a loop nest (typically the
73 /// set of loops surrounding a store operation). Loop bound AffineMaps which
74 /// are non-null represent slices of that loop's iteration space.
75 struct ComputationSliceState {
76   // List of sliced loop IVs (ordered from outermost to innermost).
77   // EX: 'ivs[i]' has lower bound 'lbs[i]' and upper bound 'ubs[i]'.
78   SmallVector<Value, 4> ivs;
79   // List of lower bound AffineMaps.
80   SmallVector<AffineMap, 4> lbs;
81   // List of upper bound AffineMaps.
82   SmallVector<AffineMap, 4> ubs;
83   // List of lower bound operands (lbOperands[i] are used by 'lbs[i]').
84   std::vector<SmallVector<Value, 4>> lbOperands;
85   // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
86   std::vector<SmallVector<Value, 4>> ubOperands;
87   // Slice loop nest insertion point in target loop nest.
88   Block::iterator insertPoint;
89   // Adds to 'cst' with constraints which represent the slice bounds on 'ivs'
90   // in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim
91   // variables and the values in 'lb/ubOperands' are added as symbols.
92   // Constraints are added for all loop IV bounds (dim or symbol), and
93   // constraints are added for slice bounds in 'lbs'/'ubs'.
94   // Returns failure if we cannot add loop bounds because of unsupported cases.
95   LogicalResult getAsConstraints(FlatAffineValueConstraints *cst);
96 
97   /// Adds to 'cst' constraints which represent the original loop bounds on
98   /// 'ivs' in 'this'. This corresponds to the original domain of the loop nest
99   /// from which the slice is being computed. Returns failure if we cannot add
100   /// loop bounds because of unsupported cases.
101   LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst);
102 
103   // Clears all bounds and operands in slice state.
104   void clearBounds();
105 
106   /// Returns true if the computation slice is empty.
isEmptyComputationSliceState107   bool isEmpty() const { return ivs.empty(); }
108 
109   /// Returns true if the computation slice encloses all the iterations of the
110   /// sliced loop nest. Returns false if it does not. Returns llvm::None if it
111   /// cannot determine if the slice is maximal or not.
112   // TODO: Cache 'isMaximal' so that we don't recompute it when the slice
113   // information hasn't changed.
114   Optional<bool> isMaximal() const;
115 
116   /// Checks the validity of the slice computed. This is done using the
117   /// following steps:
118   /// 1. Get the new domain of the slice that would be created if fusion
119   /// succeeds. This domain gets constructed with source loop IVS and
120   /// destination loop IVS as dimensions.
121   /// 2. Project out the dimensions of the destination loop from the domain
122   /// above calculated in step(1) to express it purely in terms of the source
123   /// loop IVs.
124   /// 3. Calculate a set difference between the iterations of the new domain and
125   /// the original domain of the source loop.
126   /// If this difference is empty, the slice is declared to be valid. Otherwise,
127   /// return false as it implies that the effective fusion results in at least
128   /// one iteration of the slice that was not originally in the source's domain.
129   /// If the validity cannot be determined, returns llvm:None.
130   Optional<bool> isSliceValid();
131 
132   void dump() const;
133 
134 private:
135   /// Fast check to determine if the computation slice is maximal. Returns true
136   /// if each slice dimension maps to an existing dst dimension and both the src
137   /// and the dst loops for those dimensions have the same bounds. Returns false
138   /// if both the src and the dst loops don't have the same bounds. Returns
139   /// llvm::None if none of the above can be proven.
140   Optional<bool> isSliceMaximalFastCheck() const;
141 };
142 
143 /// Computes the computation slice loop bounds for one loop nest as affine maps
144 /// of the other loop nest's IVs and symbols, using 'dependenceConstraints'
145 /// computed between 'depSourceAccess' and 'depSinkAccess'.
146 /// If 'isBackwardSlice' is true, a backwards slice is computed in which the
147 /// slice bounds of loop nest surrounding 'depSourceAccess' are computed in
148 /// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess'
149 /// at 'loopDepth'.
150 /// If 'isBackwardSlice' is false, a forward slice is computed in which the
151 /// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms
152 /// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at
153 /// 'loopDepth'.
154 /// The slice loop bounds and associated operands are returned in 'sliceState'.
155 //
156 //  Backward slice example:
157 //
158 //    affine.for %i0 = 0 to 10 {
159 //      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
160 //    }
161 //    affine.for %i1 = 0 to 10 {
162 //      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
163 //    }
164 //
165 //    // Backward computation slice of loop nest '%i0'.
166 //    affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) {
167 //      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
168 //    }
169 //
170 //  Forward slice example:
171 //
172 //    affine.for %i0 = 0 to 10 {
173 //      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
174 //    }
175 //    affine.for %i1 = 0 to 10 {
176 //      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
177 //    }
178 //
179 //    // Forward computation slice of loop nest '%i1'.
180 //    affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) {
181 //      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
182 //    }
183 //
184 void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp,
185                               FlatAffineValueConstraints *dependenceConstraints,
186                               unsigned loopDepth, bool isBackwardSlice,
187                               ComputationSliceState *sliceState);
188 
189 /// Return the number of iterations for the `slicetripCountMap` provided.
190 uint64_t getSliceIterationCount(
191     const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap);
192 
193 /// Builds a map 'tripCountMap' from AffineForOp to constant trip count for
194 /// loop nest surrounding represented by slice loop bounds in 'slice'. Returns
195 /// true on success, false otherwise (if a non-constant trip count was
196 /// encountered).
197 bool buildSliceTripCountMap(
198     const ComputationSliceState &slice,
199     llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap);
200 
201 /// Computes in 'sliceUnion' the union of all slice bounds computed at
202 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
203 /// then verifies if it is valid. The parameter 'numCommonLoops' is the number
204 /// of loops common to the operations in 'opsA' and 'opsB'. If 'isBackwardSlice'
205 /// is true, computes slice bounds for loop nest surrounding ops in 'opsA', as a
206 /// function of IVs and symbols of loop nest surrounding ops in 'opsB' at
207 /// 'loopDepth'. If 'isBackwardSlice' is false, computes slice bounds for loop
208 /// nest surrounding ops in 'opsB', as a function of IVs and symbols of loop
209 /// nest surrounding ops in 'opsA' at 'loopDepth'. Returns
210 /// 'SliceComputationResult::Success' if union was computed correctly, an
211 /// appropriate 'failure' otherwise.
212 // TODO: Change this API to take 'forOpA'/'forOpB'.
213 SliceComputationResult
214 computeSliceUnion(ArrayRef<Operation *> opsA, ArrayRef<Operation *> opsB,
215                   unsigned loopDepth, unsigned numCommonLoops,
216                   bool isBackwardSlice, ComputationSliceState *sliceUnion);
217 
218 /// Creates a clone of the computation contained in the loop nest surrounding
219 /// 'srcOpInst', slices the iteration space of src loop based on slice bounds
220 /// in 'sliceState', and inserts the computation slice at the beginning of the
221 /// operation block of the loop at 'dstLoopDepth' in the loop nest surrounding
222 /// 'dstOpInst'. Returns the top-level loop of the computation slice on
223 /// success, returns nullptr otherwise.
224 // Loop depth is a crucial optimization choice that determines where to
225 // materialize the results of the backward slice - presenting a trade-off b/w
226 // storage and redundant computation in several cases.
227 // TODO: Support computation slices with common surrounding loops.
228 AffineForOp insertBackwardComputationSlice(Operation *srcOpInst,
229                                            Operation *dstOpInst,
230                                            unsigned dstLoopDepth,
231                                            ComputationSliceState *sliceState);
232 
233 /// A region of a memref's data space; this is typically constructed by
234 /// analyzing load/store op's on this memref and the index space of loops
235 /// surrounding such op's.
236 // For example, the memref region for a load operation at loop depth = 1:
237 //
238 //    affine.for %i = 0 to 32 {
239 //      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
240 //        affine.load %A[%ii]
241 //      }
242 //    }
243 //
244 // Region:  {memref = %A, write = false, {%i <= m0 <= %i + 7} }
245 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
246 //
247 struct MemRefRegion {
MemRefRegionMemRefRegion248   explicit MemRefRegion(Location loc) : loc(loc) {}
249 
250   /// Computes the memory region accessed by this memref with the region
251   /// represented as constraints symbolic/parametric in 'loopDepth' loops
252   /// surrounding opInst. The computed region's 'cst' field has exactly as many
253   /// dimensional variables as the rank of the memref, and *potentially*
254   /// additional symbolic variables which could include any of the loop IVs
255   /// surrounding opInst up until 'loopDepth' and another additional Function
256   /// symbols involved with the access (for eg., those appear in affine.apply's,
257   /// loop bounds, etc.). If 'sliceState' is non-null, operands from
258   /// 'sliceState' are added as symbols, and the following constraints are added
259   /// to the system:
260   /// *) Inequality constraints which represent loop bounds for 'sliceState'
261   ///    operands which are loop IVS (these represent the destination loop IVs
262   ///    of the slice, and are added as symbols to MemRefRegion's constraint
263   ///    system).
264   /// *) Inequality constraints for the slice bounds in 'sliceState', which
265   ///    represent the bounds on the loop IVs in this constraint system w.r.t
266   ///    to slice operands (which correspond to symbols).
267   /// If 'addMemRefDimBounds' is true, constant upper/lower bounds
268   /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'.
269   ///
270   ///  For example, the memref region for this operation at loopDepth = 1 will
271   ///  be:
272   ///
273   ///    affine.for %i = 0 to 32 {
274   ///      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
275   ///        load %A[%ii]
276   ///      }
277   ///    }
278   ///
279   ///   {memref = %A, write = false, {%i <= m0 <= %i + 7} }
280   /// The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
281   ///
282   LogicalResult compute(Operation *op, unsigned loopDepth,
283                         const ComputationSliceState *sliceState = nullptr,
284                         bool addMemRefDimBounds = true);
285 
getConstraintsMemRefRegion286   FlatAffineValueConstraints *getConstraints() { return &cst; }
getConstraintsMemRefRegion287   const FlatAffineValueConstraints *getConstraints() const { return &cst; }
isWriteMemRefRegion288   bool isWrite() const { return write; }
setWriteMemRefRegion289   void setWrite(bool flag) { write = flag; }
290 
291   /// Returns a constant upper bound on the number of elements in this region if
292   /// bounded by a known constant (always possible for static shapes), None
293   /// otherwise. Note that the symbols of the region are treated specially,
294   /// i.e., the returned bounding constant holds for *any given* value of the
295   /// symbol variables. The 'shape' vector is set to the corresponding
296   /// dimension-wise bounds major to minor. The number of elements and all the
297   /// dimension-wise bounds are guaranteed to be non-negative. We use int64_t
298   /// instead of uint64_t since index types can be at most int64_t. `lbs` are
299   /// set to the lower bounds for each of the rank dimensions, and lbDivisors
300   /// contains the corresponding denominators for floorDivs.
301   Optional<int64_t> getConstantBoundingSizeAndShape(
302       SmallVectorImpl<int64_t> *shape = nullptr,
303       std::vector<SmallVector<int64_t, 4>> *lbs = nullptr,
304       SmallVectorImpl<int64_t> *lbDivisors = nullptr) const;
305 
306   /// Gets the lower and upper bound map for the dimensional variable at
307   /// `pos`.
308   void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap,
309                              AffineMap &ubMap) const;
310 
311   /// A wrapper around FlatAffineValueConstraints::getConstantBoundOnDimSize().
312   /// 'pos' corresponds to the position of the memref shape's dimension (major
313   /// to minor) which matches 1:1 with the dimensional variable positions in
314   /// 'cst'.
315   Optional<int64_t>
316   getConstantBoundOnDimSize(unsigned pos,
317                             SmallVectorImpl<int64_t> *lb = nullptr,
318                             int64_t *lbFloorDivisor = nullptr) const {
319     assert(pos < getRank() && "invalid position");
320     return cst.getConstantBoundOnDimSize(pos, lb);
321   }
322 
323   /// Returns the size of this MemRefRegion in bytes.
324   Optional<int64_t> getRegionSize();
325 
326   // Wrapper around FlatAffineValueConstraints::unionBoundingBox.
327   LogicalResult unionBoundingBox(const MemRefRegion &other);
328 
329   /// Returns the rank of the memref that this region corresponds to.
330   unsigned getRank() const;
331 
332   /// Memref that this region corresponds to.
333   Value memref;
334 
335   /// Read or write.
336   bool write = false;
337 
338   /// If there is more than one load/store op associated with the region, the
339   /// location information would correspond to one of those op's.
340   Location loc;
341 
342   /// Region (data space) of the memref accessed. This set will thus have at
343   /// least as many dimensional variables as the shape dimensionality of the
344   /// memref, and these are the leading dimensions of the set appearing in that
345   /// order (major to minor / outermost to innermost). There may be additional
346   /// variables since getMemRefRegion() is called with a specific loop depth,
347   /// and thus the region is symbolic in the outer surrounding loops at that
348   /// depth.
349   // TODO: Replace this to exploit HyperRectangularSet.
350   FlatAffineValueConstraints cst;
351 };
352 
353 /// Returns the size of memref data in bytes if it's statically shaped, None
354 /// otherwise.
355 Optional<uint64_t> getMemRefSizeInBytes(MemRefType memRefType);
356 
357 /// Checks a load or store op for an out of bound access; returns failure if the
358 /// access is out of bounds along any of the dimensions, success otherwise.
359 /// Emits a diagnostic error (with location information) if emitError is true.
360 template <typename LoadOrStoreOpPointer>
361 LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
362                                       bool emitError = true);
363 
364 /// Returns the number of surrounding loops common to both A and B.
365 unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b);
366 
367 /// Gets the memory footprint of all data touched in the specified memory space
368 /// in bytes; if the memory space is unspecified, considers all memory spaces.
369 Optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp,
370                                           int memorySpace = -1);
371 
372 /// Simplify the integer set by simplifying the underlying affine expressions by
373 /// flattening and some simple inference. Also, drop any duplicate constraints.
374 /// Returns the simplified integer set. This method runs in time linear in the
375 /// number of constraints.
376 IntegerSet simplifyIntegerSet(IntegerSet set);
377 
378 /// Returns the innermost common loop depth for the set of operations in 'ops'.
379 unsigned getInnermostCommonLoopDepth(
380     ArrayRef<Operation *> ops,
381     SmallVectorImpl<AffineForOp> *surroundingLoops = nullptr);
382 
383 } // namespace mlir
384 
385 #endif // MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H
386