1 //===- Utils.h - General analysis utilities ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file defines prototypes for various transformation utilities for 10 // memref's and non-loop IR structures. These are not passes by themselves but 11 // are used either by passes, optimization sequences, or in turn by other 12 // transformation utilities. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H 17 #define MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H 18 19 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h" 20 #include "mlir/IR/AffineMap.h" 21 #include "mlir/IR/Block.h" 22 #include "mlir/IR/Location.h" 23 #include "mlir/Support/LLVM.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include <memory> 26 27 namespace mlir { 28 29 class AffineForOp; 30 class Block; 31 class Location; 32 struct MemRefAccess; 33 class Operation; 34 class Value; 35 36 /// Populates 'loops' with IVs of the loops surrounding 'op' ordered from 37 /// the outermost 'affine.for' operation to the innermost one. 38 // TODO: handle 'affine.if' ops. 39 void getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops); 40 41 /// Populates 'ops' with IVs of the loops surrounding `op`, along with 42 /// `affine.if` operations interleaved between these loops, ordered from the 43 /// outermost `affine.for` or `affine.if` operation to the innermost one. 44 void getEnclosingAffineForAndIfOps(Operation &op, 45 SmallVectorImpl<Operation *> *ops); 46 47 /// Returns the nesting depth of this operation, i.e., the number of loops 48 /// surrounding this operation. 49 unsigned getNestingDepth(Operation *op); 50 51 /// Returns whether a loop is a parallel loop and contains a reduction loop. 52 bool isLoopParallelAndContainsReduction(AffineForOp forOp); 53 54 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted 55 /// at 'forOp'. 56 void getSequentialLoops(AffineForOp forOp, 57 llvm::SmallDenseSet<Value, 8> *sequentialLoops); 58 59 /// Enumerates different result statuses of slice computation by 60 /// `computeSliceUnion` 61 // TODO: Identify and add different kinds of failures during slice computation. 62 struct SliceComputationResult { 63 enum ResultEnum { 64 Success, 65 IncorrectSliceFailure, // Slice is computed, but it is incorrect. 66 GenericFailure, // Unable to compute src loop computation slice. 67 } value; SliceComputationResultSliceComputationResult68 SliceComputationResult(ResultEnum v) : value(v) {} 69 }; 70 71 /// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their 72 /// associated operands for a set of loops within a loop nest (typically the 73 /// set of loops surrounding a store operation). Loop bound AffineMaps which 74 /// are non-null represent slices of that loop's iteration space. 75 struct ComputationSliceState { 76 // List of sliced loop IVs (ordered from outermost to innermost). 77 // EX: 'ivs[i]' has lower bound 'lbs[i]' and upper bound 'ubs[i]'. 78 SmallVector<Value, 4> ivs; 79 // List of lower bound AffineMaps. 80 SmallVector<AffineMap, 4> lbs; 81 // List of upper bound AffineMaps. 82 SmallVector<AffineMap, 4> ubs; 83 // List of lower bound operands (lbOperands[i] are used by 'lbs[i]'). 84 std::vector<SmallVector<Value, 4>> lbOperands; 85 // List of upper bound operands (ubOperands[i] are used by 'ubs[i]'). 86 std::vector<SmallVector<Value, 4>> ubOperands; 87 // Slice loop nest insertion point in target loop nest. 88 Block::iterator insertPoint; 89 // Adds to 'cst' with constraints which represent the slice bounds on 'ivs' 90 // in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim 91 // variables and the values in 'lb/ubOperands' are added as symbols. 92 // Constraints are added for all loop IV bounds (dim or symbol), and 93 // constraints are added for slice bounds in 'lbs'/'ubs'. 94 // Returns failure if we cannot add loop bounds because of unsupported cases. 95 LogicalResult getAsConstraints(FlatAffineValueConstraints *cst); 96 97 /// Adds to 'cst' constraints which represent the original loop bounds on 98 /// 'ivs' in 'this'. This corresponds to the original domain of the loop nest 99 /// from which the slice is being computed. Returns failure if we cannot add 100 /// loop bounds because of unsupported cases. 101 LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst); 102 103 // Clears all bounds and operands in slice state. 104 void clearBounds(); 105 106 /// Returns true if the computation slice is empty. isEmptyComputationSliceState107 bool isEmpty() const { return ivs.empty(); } 108 109 /// Returns true if the computation slice encloses all the iterations of the 110 /// sliced loop nest. Returns false if it does not. Returns llvm::None if it 111 /// cannot determine if the slice is maximal or not. 112 // TODO: Cache 'isMaximal' so that we don't recompute it when the slice 113 // information hasn't changed. 114 Optional<bool> isMaximal() const; 115 116 /// Checks the validity of the slice computed. This is done using the 117 /// following steps: 118 /// 1. Get the new domain of the slice that would be created if fusion 119 /// succeeds. This domain gets constructed with source loop IVS and 120 /// destination loop IVS as dimensions. 121 /// 2. Project out the dimensions of the destination loop from the domain 122 /// above calculated in step(1) to express it purely in terms of the source 123 /// loop IVs. 124 /// 3. Calculate a set difference between the iterations of the new domain and 125 /// the original domain of the source loop. 126 /// If this difference is empty, the slice is declared to be valid. Otherwise, 127 /// return false as it implies that the effective fusion results in at least 128 /// one iteration of the slice that was not originally in the source's domain. 129 /// If the validity cannot be determined, returns llvm:None. 130 Optional<bool> isSliceValid(); 131 132 void dump() const; 133 134 private: 135 /// Fast check to determine if the computation slice is maximal. Returns true 136 /// if each slice dimension maps to an existing dst dimension and both the src 137 /// and the dst loops for those dimensions have the same bounds. Returns false 138 /// if both the src and the dst loops don't have the same bounds. Returns 139 /// llvm::None if none of the above can be proven. 140 Optional<bool> isSliceMaximalFastCheck() const; 141 }; 142 143 /// Computes the computation slice loop bounds for one loop nest as affine maps 144 /// of the other loop nest's IVs and symbols, using 'dependenceConstraints' 145 /// computed between 'depSourceAccess' and 'depSinkAccess'. 146 /// If 'isBackwardSlice' is true, a backwards slice is computed in which the 147 /// slice bounds of loop nest surrounding 'depSourceAccess' are computed in 148 /// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess' 149 /// at 'loopDepth'. 150 /// If 'isBackwardSlice' is false, a forward slice is computed in which the 151 /// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms 152 /// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at 153 /// 'loopDepth'. 154 /// The slice loop bounds and associated operands are returned in 'sliceState'. 155 // 156 // Backward slice example: 157 // 158 // affine.for %i0 = 0 to 10 { 159 // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' 160 // } 161 // affine.for %i1 = 0 to 10 { 162 // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' 163 // } 164 // 165 // // Backward computation slice of loop nest '%i0'. 166 // affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) { 167 // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' 168 // } 169 // 170 // Forward slice example: 171 // 172 // affine.for %i0 = 0 to 10 { 173 // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' 174 // } 175 // affine.for %i1 = 0 to 10 { 176 // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' 177 // } 178 // 179 // // Forward computation slice of loop nest '%i1'. 180 // affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) { 181 // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' 182 // } 183 // 184 void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, 185 FlatAffineValueConstraints *dependenceConstraints, 186 unsigned loopDepth, bool isBackwardSlice, 187 ComputationSliceState *sliceState); 188 189 /// Return the number of iterations for the `slicetripCountMap` provided. 190 uint64_t getSliceIterationCount( 191 const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap); 192 193 /// Builds a map 'tripCountMap' from AffineForOp to constant trip count for 194 /// loop nest surrounding represented by slice loop bounds in 'slice'. Returns 195 /// true on success, false otherwise (if a non-constant trip count was 196 /// encountered). 197 bool buildSliceTripCountMap( 198 const ComputationSliceState &slice, 199 llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap); 200 201 /// Computes in 'sliceUnion' the union of all slice bounds computed at 202 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and 203 /// then verifies if it is valid. The parameter 'numCommonLoops' is the number 204 /// of loops common to the operations in 'opsA' and 'opsB'. If 'isBackwardSlice' 205 /// is true, computes slice bounds for loop nest surrounding ops in 'opsA', as a 206 /// function of IVs and symbols of loop nest surrounding ops in 'opsB' at 207 /// 'loopDepth'. If 'isBackwardSlice' is false, computes slice bounds for loop 208 /// nest surrounding ops in 'opsB', as a function of IVs and symbols of loop 209 /// nest surrounding ops in 'opsA' at 'loopDepth'. Returns 210 /// 'SliceComputationResult::Success' if union was computed correctly, an 211 /// appropriate 'failure' otherwise. 212 // TODO: Change this API to take 'forOpA'/'forOpB'. 213 SliceComputationResult 214 computeSliceUnion(ArrayRef<Operation *> opsA, ArrayRef<Operation *> opsB, 215 unsigned loopDepth, unsigned numCommonLoops, 216 bool isBackwardSlice, ComputationSliceState *sliceUnion); 217 218 /// Creates a clone of the computation contained in the loop nest surrounding 219 /// 'srcOpInst', slices the iteration space of src loop based on slice bounds 220 /// in 'sliceState', and inserts the computation slice at the beginning of the 221 /// operation block of the loop at 'dstLoopDepth' in the loop nest surrounding 222 /// 'dstOpInst'. Returns the top-level loop of the computation slice on 223 /// success, returns nullptr otherwise. 224 // Loop depth is a crucial optimization choice that determines where to 225 // materialize the results of the backward slice - presenting a trade-off b/w 226 // storage and redundant computation in several cases. 227 // TODO: Support computation slices with common surrounding loops. 228 AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, 229 Operation *dstOpInst, 230 unsigned dstLoopDepth, 231 ComputationSliceState *sliceState); 232 233 /// A region of a memref's data space; this is typically constructed by 234 /// analyzing load/store op's on this memref and the index space of loops 235 /// surrounding such op's. 236 // For example, the memref region for a load operation at loop depth = 1: 237 // 238 // affine.for %i = 0 to 32 { 239 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { 240 // affine.load %A[%ii] 241 // } 242 // } 243 // 244 // Region: {memref = %A, write = false, {%i <= m0 <= %i + 7} } 245 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i. 246 // 247 struct MemRefRegion { MemRefRegionMemRefRegion248 explicit MemRefRegion(Location loc) : loc(loc) {} 249 250 /// Computes the memory region accessed by this memref with the region 251 /// represented as constraints symbolic/parametric in 'loopDepth' loops 252 /// surrounding opInst. The computed region's 'cst' field has exactly as many 253 /// dimensional variables as the rank of the memref, and *potentially* 254 /// additional symbolic variables which could include any of the loop IVs 255 /// surrounding opInst up until 'loopDepth' and another additional Function 256 /// symbols involved with the access (for eg., those appear in affine.apply's, 257 /// loop bounds, etc.). If 'sliceState' is non-null, operands from 258 /// 'sliceState' are added as symbols, and the following constraints are added 259 /// to the system: 260 /// *) Inequality constraints which represent loop bounds for 'sliceState' 261 /// operands which are loop IVS (these represent the destination loop IVs 262 /// of the slice, and are added as symbols to MemRefRegion's constraint 263 /// system). 264 /// *) Inequality constraints for the slice bounds in 'sliceState', which 265 /// represent the bounds on the loop IVs in this constraint system w.r.t 266 /// to slice operands (which correspond to symbols). 267 /// If 'addMemRefDimBounds' is true, constant upper/lower bounds 268 /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'. 269 /// 270 /// For example, the memref region for this operation at loopDepth = 1 will 271 /// be: 272 /// 273 /// affine.for %i = 0 to 32 { 274 /// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { 275 /// load %A[%ii] 276 /// } 277 /// } 278 /// 279 /// {memref = %A, write = false, {%i <= m0 <= %i + 7} } 280 /// The last field is a 2-d FlatAffineValueConstraints symbolic in %i. 281 /// 282 LogicalResult compute(Operation *op, unsigned loopDepth, 283 const ComputationSliceState *sliceState = nullptr, 284 bool addMemRefDimBounds = true); 285 getConstraintsMemRefRegion286 FlatAffineValueConstraints *getConstraints() { return &cst; } getConstraintsMemRefRegion287 const FlatAffineValueConstraints *getConstraints() const { return &cst; } isWriteMemRefRegion288 bool isWrite() const { return write; } setWriteMemRefRegion289 void setWrite(bool flag) { write = flag; } 290 291 /// Returns a constant upper bound on the number of elements in this region if 292 /// bounded by a known constant (always possible for static shapes), None 293 /// otherwise. Note that the symbols of the region are treated specially, 294 /// i.e., the returned bounding constant holds for *any given* value of the 295 /// symbol variables. The 'shape' vector is set to the corresponding 296 /// dimension-wise bounds major to minor. The number of elements and all the 297 /// dimension-wise bounds are guaranteed to be non-negative. We use int64_t 298 /// instead of uint64_t since index types can be at most int64_t. `lbs` are 299 /// set to the lower bounds for each of the rank dimensions, and lbDivisors 300 /// contains the corresponding denominators for floorDivs. 301 Optional<int64_t> getConstantBoundingSizeAndShape( 302 SmallVectorImpl<int64_t> *shape = nullptr, 303 std::vector<SmallVector<int64_t, 4>> *lbs = nullptr, 304 SmallVectorImpl<int64_t> *lbDivisors = nullptr) const; 305 306 /// Gets the lower and upper bound map for the dimensional variable at 307 /// `pos`. 308 void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, 309 AffineMap &ubMap) const; 310 311 /// A wrapper around FlatAffineValueConstraints::getConstantBoundOnDimSize(). 312 /// 'pos' corresponds to the position of the memref shape's dimension (major 313 /// to minor) which matches 1:1 with the dimensional variable positions in 314 /// 'cst'. 315 Optional<int64_t> 316 getConstantBoundOnDimSize(unsigned pos, 317 SmallVectorImpl<int64_t> *lb = nullptr, 318 int64_t *lbFloorDivisor = nullptr) const { 319 assert(pos < getRank() && "invalid position"); 320 return cst.getConstantBoundOnDimSize(pos, lb); 321 } 322 323 /// Returns the size of this MemRefRegion in bytes. 324 Optional<int64_t> getRegionSize(); 325 326 // Wrapper around FlatAffineValueConstraints::unionBoundingBox. 327 LogicalResult unionBoundingBox(const MemRefRegion &other); 328 329 /// Returns the rank of the memref that this region corresponds to. 330 unsigned getRank() const; 331 332 /// Memref that this region corresponds to. 333 Value memref; 334 335 /// Read or write. 336 bool write = false; 337 338 /// If there is more than one load/store op associated with the region, the 339 /// location information would correspond to one of those op's. 340 Location loc; 341 342 /// Region (data space) of the memref accessed. This set will thus have at 343 /// least as many dimensional variables as the shape dimensionality of the 344 /// memref, and these are the leading dimensions of the set appearing in that 345 /// order (major to minor / outermost to innermost). There may be additional 346 /// variables since getMemRefRegion() is called with a specific loop depth, 347 /// and thus the region is symbolic in the outer surrounding loops at that 348 /// depth. 349 // TODO: Replace this to exploit HyperRectangularSet. 350 FlatAffineValueConstraints cst; 351 }; 352 353 /// Returns the size of memref data in bytes if it's statically shaped, None 354 /// otherwise. 355 Optional<uint64_t> getMemRefSizeInBytes(MemRefType memRefType); 356 357 /// Checks a load or store op for an out of bound access; returns failure if the 358 /// access is out of bounds along any of the dimensions, success otherwise. 359 /// Emits a diagnostic error (with location information) if emitError is true. 360 template <typename LoadOrStoreOpPointer> 361 LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, 362 bool emitError = true); 363 364 /// Returns the number of surrounding loops common to both A and B. 365 unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b); 366 367 /// Gets the memory footprint of all data touched in the specified memory space 368 /// in bytes; if the memory space is unspecified, considers all memory spaces. 369 Optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp, 370 int memorySpace = -1); 371 372 /// Simplify the integer set by simplifying the underlying affine expressions by 373 /// flattening and some simple inference. Also, drop any duplicate constraints. 374 /// Returns the simplified integer set. This method runs in time linear in the 375 /// number of constraints. 376 IntegerSet simplifyIntegerSet(IntegerSet set); 377 378 /// Returns the innermost common loop depth for the set of operations in 'ops'. 379 unsigned getInnermostCommonLoopDepth( 380 ArrayRef<Operation *> ops, 381 SmallVectorImpl<AffineForOp> *surroundingLoops = nullptr); 382 383 } // namespace mlir 384 385 #endif // MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H 386