1 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements miscellaneous analysis routines for non-loop IR 10 // structures. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/Affine/Analysis/Utils.h" 15 #include "mlir/Analysis/Presburger/PresburgerSet.h" 16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" 17 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" 18 #include "mlir/Dialect/Affine/IR/AffineOps.h" 19 #include "mlir/Dialect/Affine/IR/AffineValueMap.h" 20 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 21 #include "mlir/Dialect/StandardOps/IR/Ops.h" 22 #include "mlir/IR/IntegerSet.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "analysis-utils" 28 29 using namespace mlir; 30 31 using llvm::SmallDenseMap; 32 33 /// Populates 'loops' with IVs of the loops surrounding 'op' ordered from 34 /// the outermost 'affine.for' operation to the innermost one. 35 void mlir::getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops) { 36 auto *currOp = op.getParentOp(); 37 AffineForOp currAffineForOp; 38 // Traverse up the hierarchy collecting all 'affine.for' operation while 39 // skipping over 'affine.if' operations. 40 while (currOp) { 41 if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp)) 42 loops->push_back(currAffineForOp); 43 currOp = currOp->getParentOp(); 44 } 45 std::reverse(loops->begin(), loops->end()); 46 } 47 48 /// Populates 'ops' with IVs of the loops surrounding `op`, along with 49 /// `affine.if` operations interleaved between these loops, ordered from the 50 /// outermost `affine.for` operation to the innermost one. 51 void mlir::getEnclosingAffineForAndIfOps(Operation &op, 52 SmallVectorImpl<Operation *> *ops) { 53 ops->clear(); 54 Operation *currOp = op.getParentOp(); 55 56 // Traverse up the hierarchy collecting all `affine.for` and `affine.if` 57 // operations. 58 while (currOp) { 59 if (isa<AffineIfOp, AffineForOp>(currOp)) 60 ops->push_back(currOp); 61 currOp = currOp->getParentOp(); 62 } 63 std::reverse(ops->begin(), ops->end()); 64 } 65 66 // Populates 'cst' with FlatAffineValueConstraints which represent original 67 // domain of the loop bounds that define 'ivs'. 68 LogicalResult 69 ComputationSliceState::getSourceAsConstraints(FlatAffineValueConstraints &cst) { 70 assert(!ivs.empty() && "Cannot have a slice without its IVs"); 71 cst.reset(/*numDims=*/ivs.size(), /*numSymbols=*/0, /*numLocals=*/0, ivs); 72 for (Value iv : ivs) { 73 AffineForOp loop = getForInductionVarOwner(iv); 74 assert(loop && "Expected affine for"); 75 if (failed(cst.addAffineForOpDomain(loop))) 76 return failure(); 77 } 78 return success(); 79 } 80 81 // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds. 82 LogicalResult 83 ComputationSliceState::getAsConstraints(FlatAffineValueConstraints *cst) { 84 assert(!lbOperands.empty()); 85 // Adds src 'ivs' as dimension identifiers in 'cst'. 86 unsigned numDims = ivs.size(); 87 // Adds operands (dst ivs and symbols) as symbols in 'cst'. 88 unsigned numSymbols = lbOperands[0].size(); 89 90 SmallVector<Value, 4> values(ivs); 91 // Append 'ivs' then 'operands' to 'values'. 92 values.append(lbOperands[0].begin(), lbOperands[0].end()); 93 cst->reset(numDims, numSymbols, 0, values); 94 95 // Add loop bound constraints for values which are loop IVs of the destination 96 // of fusion and equality constraints for symbols which are constants. 97 for (unsigned i = numDims, end = values.size(); i < end; ++i) { 98 Value value = values[i]; 99 assert(cst->containsId(value) && "value expected to be present"); 100 if (isValidSymbol(value)) { 101 // Check if the symbol is a constant. 102 if (auto cOp = value.getDefiningOp<arith::ConstantIndexOp>()) 103 cst->addBound(FlatAffineConstraints::EQ, value, cOp.value()); 104 } else if (auto loop = getForInductionVarOwner(value)) { 105 if (failed(cst->addAffineForOpDomain(loop))) 106 return failure(); 107 } 108 } 109 110 // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]' 111 LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]); 112 assert(succeeded(ret) && 113 "should not fail as we never have semi-affine slice maps"); 114 (void)ret; 115 return success(); 116 } 117 118 // Clears state bounds and operand state. 119 void ComputationSliceState::clearBounds() { 120 lbs.clear(); 121 ubs.clear(); 122 lbOperands.clear(); 123 ubOperands.clear(); 124 } 125 126 void ComputationSliceState::dump() const { 127 llvm::errs() << "\tIVs:\n"; 128 for (Value iv : ivs) 129 llvm::errs() << "\t\t" << iv << "\n"; 130 131 llvm::errs() << "\tLBs:\n"; 132 for (auto &en : llvm::enumerate(lbs)) { 133 llvm::errs() << "\t\t" << en.value() << "\n"; 134 llvm::errs() << "\t\tOperands:\n"; 135 for (Value lbOp : lbOperands[en.index()]) 136 llvm::errs() << "\t\t\t" << lbOp << "\n"; 137 } 138 139 llvm::errs() << "\tUBs:\n"; 140 for (auto &en : llvm::enumerate(ubs)) { 141 llvm::errs() << "\t\t" << en.value() << "\n"; 142 llvm::errs() << "\t\tOperands:\n"; 143 for (Value ubOp : ubOperands[en.index()]) 144 llvm::errs() << "\t\t\t" << ubOp << "\n"; 145 } 146 } 147 148 /// Fast check to determine if the computation slice is maximal. Returns true if 149 /// each slice dimension maps to an existing dst dimension and both the src 150 /// and the dst loops for those dimensions have the same bounds. Returns false 151 /// if both the src and the dst loops don't have the same bounds. Returns 152 /// llvm::None if none of the above can be proven. 153 Optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const { 154 assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() && 155 "Unexpected number of lbs, ubs and ivs in slice"); 156 157 for (unsigned i = 0, end = lbs.size(); i < end; ++i) { 158 AffineMap lbMap = lbs[i]; 159 AffineMap ubMap = ubs[i]; 160 161 // Check if this slice is just an equality along this dimension. 162 if (!lbMap || !ubMap || lbMap.getNumResults() != 1 || 163 ubMap.getNumResults() != 1 || 164 lbMap.getResult(0) + 1 != ubMap.getResult(0) || 165 // The condition above will be true for maps describing a single 166 // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1). 167 // Make sure we skip those cases by checking that the lb result is not 168 // just a constant. 169 lbMap.getResult(0).isa<AffineConstantExpr>()) 170 return llvm::None; 171 172 // Limited support: we expect the lb result to be just a loop dimension for 173 // now. 174 AffineDimExpr result = lbMap.getResult(0).dyn_cast<AffineDimExpr>(); 175 if (!result) 176 return llvm::None; 177 178 // Retrieve dst loop bounds. 179 AffineForOp dstLoop = 180 getForInductionVarOwner(lbOperands[i][result.getPosition()]); 181 if (!dstLoop) 182 return llvm::None; 183 AffineMap dstLbMap = dstLoop.getLowerBoundMap(); 184 AffineMap dstUbMap = dstLoop.getUpperBoundMap(); 185 186 // Retrieve src loop bounds. 187 AffineForOp srcLoop = getForInductionVarOwner(ivs[i]); 188 assert(srcLoop && "Expected affine for"); 189 AffineMap srcLbMap = srcLoop.getLowerBoundMap(); 190 AffineMap srcUbMap = srcLoop.getUpperBoundMap(); 191 192 // Limited support: we expect simple src and dst loops with a single 193 // constant component per bound for now. 194 if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 || 195 dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1) 196 return llvm::None; 197 198 AffineExpr srcLbResult = srcLbMap.getResult(0); 199 AffineExpr dstLbResult = dstLbMap.getResult(0); 200 AffineExpr srcUbResult = srcUbMap.getResult(0); 201 AffineExpr dstUbResult = dstUbMap.getResult(0); 202 if (!srcLbResult.isa<AffineConstantExpr>() || 203 !srcUbResult.isa<AffineConstantExpr>() || 204 !dstLbResult.isa<AffineConstantExpr>() || 205 !dstUbResult.isa<AffineConstantExpr>()) 206 return llvm::None; 207 208 // Check if src and dst loop bounds are the same. If not, we can guarantee 209 // that the slice is not maximal. 210 if (srcLbResult != dstLbResult || srcUbResult != dstUbResult || 211 srcLoop.getStep() != dstLoop.getStep()) 212 return false; 213 } 214 215 return true; 216 } 217 218 /// Returns true if it is deterministically verified that the original iteration 219 /// space of the slice is contained within the new iteration space that is 220 /// created after fusing 'this' slice into its destination. 221 Optional<bool> ComputationSliceState::isSliceValid() { 222 // Fast check to determine if the slice is valid. If the following conditions 223 // are verified to be true, slice is declared valid by the fast check: 224 // 1. Each slice loop is a single iteration loop bound in terms of a single 225 // destination loop IV. 226 // 2. Loop bounds of the destination loop IV (from above) and those of the 227 // source loop IV are exactly the same. 228 // If the fast check is inconclusive or false, we proceed with a more 229 // expensive analysis. 230 // TODO: Store the result of the fast check, as it might be used again in 231 // `canRemoveSrcNodeAfterFusion`. 232 Optional<bool> isValidFastCheck = isSliceMaximalFastCheck(); 233 if (isValidFastCheck.hasValue() && isValidFastCheck.getValue()) 234 return true; 235 236 // Create constraints for the source loop nest using which slice is computed. 237 FlatAffineValueConstraints srcConstraints; 238 // TODO: Store the source's domain to avoid computation at each depth. 239 if (failed(getSourceAsConstraints(srcConstraints))) { 240 LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n"); 241 return llvm::None; 242 } 243 // As the set difference utility currently cannot handle symbols in its 244 // operands, validity of the slice cannot be determined. 245 if (srcConstraints.getNumSymbolIds() > 0) { 246 LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n"); 247 return llvm::None; 248 } 249 // TODO: Handle local ids in the source domains while using the 'projectOut' 250 // utility below. Currently, aligning is not done assuming that there will be 251 // no local ids in the source domain. 252 if (srcConstraints.getNumLocalIds() != 0) { 253 LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n"); 254 return llvm::None; 255 } 256 257 // Create constraints for the slice loop nest that would be created if the 258 // fusion succeeds. 259 FlatAffineValueConstraints sliceConstraints; 260 if (failed(getAsConstraints(&sliceConstraints))) { 261 LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n"); 262 return llvm::None; 263 } 264 265 // Projecting out every dimension other than the 'ivs' to express slice's 266 // domain completely in terms of source's IVs. 267 sliceConstraints.projectOut(ivs.size(), 268 sliceConstraints.getNumIds() - ivs.size()); 269 270 LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n"); 271 LLVM_DEBUG(srcConstraints.dump()); 272 LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds " 273 "(expressed in terms of its source's IVs):\n"); 274 LLVM_DEBUG(sliceConstraints.dump()); 275 276 // TODO: Store 'srcSet' to avoid recalculating for each depth. 277 PresburgerSet srcSet(srcConstraints); 278 PresburgerSet sliceSet(sliceConstraints); 279 PresburgerSet diffSet = sliceSet.subtract(srcSet); 280 281 if (!diffSet.isIntegerEmpty()) { 282 LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n"); 283 return false; 284 } 285 return true; 286 } 287 288 /// Returns true if the computation slice encloses all the iterations of the 289 /// sliced loop nest. Returns false if it does not. Returns llvm::None if it 290 /// cannot determine if the slice is maximal or not. 291 Optional<bool> ComputationSliceState::isMaximal() const { 292 // Fast check to determine if the computation slice is maximal. If the result 293 // is inconclusive, we proceed with a more expensive analysis. 294 Optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck(); 295 if (isMaximalFastCheck.hasValue()) 296 return isMaximalFastCheck; 297 298 // Create constraints for the src loop nest being sliced. 299 FlatAffineValueConstraints srcConstraints; 300 srcConstraints.reset(/*numDims=*/ivs.size(), /*numSymbols=*/0, 301 /*numLocals=*/0, ivs); 302 for (Value iv : ivs) { 303 AffineForOp loop = getForInductionVarOwner(iv); 304 assert(loop && "Expected affine for"); 305 if (failed(srcConstraints.addAffineForOpDomain(loop))) 306 return llvm::None; 307 } 308 309 // Create constraints for the slice using the dst loop nest information. We 310 // retrieve existing dst loops from the lbOperands. 311 SmallVector<Value, 8> consumerIVs; 312 for (Value lbOp : lbOperands[0]) 313 if (getForInductionVarOwner(lbOp)) 314 consumerIVs.push_back(lbOp); 315 316 // Add empty IV Values for those new loops that are not equalities and, 317 // therefore, are not yet materialized in the IR. 318 for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i) 319 consumerIVs.push_back(Value()); 320 321 FlatAffineValueConstraints sliceConstraints; 322 sliceConstraints.reset(/*numDims=*/consumerIVs.size(), /*numSymbols=*/0, 323 /*numLocals=*/0, consumerIVs); 324 325 if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0]))) 326 return llvm::None; 327 328 if (srcConstraints.getNumDimIds() != sliceConstraints.getNumDimIds()) 329 // Constraint dims are different. The integer set difference can't be 330 // computed so we don't know if the slice is maximal. 331 return llvm::None; 332 333 // Compute the difference between the src loop nest and the slice integer 334 // sets. 335 PresburgerSet srcSet(srcConstraints); 336 PresburgerSet sliceSet(sliceConstraints); 337 PresburgerSet diffSet = srcSet.subtract(sliceSet); 338 return diffSet.isIntegerEmpty(); 339 } 340 341 unsigned MemRefRegion::getRank() const { 342 return memref.getType().cast<MemRefType>().getRank(); 343 } 344 345 Optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape( 346 SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs, 347 SmallVectorImpl<int64_t> *lbDivisors) const { 348 auto memRefType = memref.getType().cast<MemRefType>(); 349 unsigned rank = memRefType.getRank(); 350 if (shape) 351 shape->reserve(rank); 352 353 assert(rank == cst.getNumDimIds() && "inconsistent memref region"); 354 355 // Use a copy of the region constraints that has upper/lower bounds for each 356 // memref dimension with static size added to guard against potential 357 // over-approximation from projection or union bounding box. We may not add 358 // this on the region itself since they might just be redundant constraints 359 // that will need non-trivials means to eliminate. 360 FlatAffineConstraints cstWithShapeBounds(cst); 361 for (unsigned r = 0; r < rank; r++) { 362 cstWithShapeBounds.addBound(FlatAffineConstraints::LB, r, 0); 363 int64_t dimSize = memRefType.getDimSize(r); 364 if (ShapedType::isDynamic(dimSize)) 365 continue; 366 cstWithShapeBounds.addBound(FlatAffineConstraints::UB, r, dimSize - 1); 367 } 368 369 // Find a constant upper bound on the extent of this memref region along each 370 // dimension. 371 int64_t numElements = 1; 372 int64_t diffConstant; 373 int64_t lbDivisor; 374 for (unsigned d = 0; d < rank; d++) { 375 SmallVector<int64_t, 4> lb; 376 Optional<int64_t> diff = 377 cstWithShapeBounds.getConstantBoundOnDimSize(d, &lb, &lbDivisor); 378 if (diff.hasValue()) { 379 diffConstant = diff.getValue(); 380 assert(diffConstant >= 0 && "Dim size bound can't be negative"); 381 assert(lbDivisor > 0); 382 } else { 383 // If no constant bound is found, then it can always be bound by the 384 // memref's dim size if the latter has a constant size along this dim. 385 auto dimSize = memRefType.getDimSize(d); 386 if (dimSize == -1) 387 return None; 388 diffConstant = dimSize; 389 // Lower bound becomes 0. 390 lb.resize(cstWithShapeBounds.getNumSymbolIds() + 1, 0); 391 lbDivisor = 1; 392 } 393 numElements *= diffConstant; 394 if (lbs) { 395 lbs->push_back(lb); 396 assert(lbDivisors && "both lbs and lbDivisor or none"); 397 lbDivisors->push_back(lbDivisor); 398 } 399 if (shape) { 400 shape->push_back(diffConstant); 401 } 402 } 403 return numElements; 404 } 405 406 void MemRefRegion::getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, 407 AffineMap &ubMap) const { 408 assert(pos < cst.getNumDimIds() && "invalid position"); 409 auto memRefType = memref.getType().cast<MemRefType>(); 410 unsigned rank = memRefType.getRank(); 411 412 assert(rank == cst.getNumDimIds() && "inconsistent memref region"); 413 414 auto boundPairs = cst.getLowerAndUpperBound( 415 pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolIds(), 416 /*localExprs=*/{}, memRefType.getContext()); 417 lbMap = boundPairs.first; 418 ubMap = boundPairs.second; 419 assert(lbMap && "lower bound for a region must exist"); 420 assert(ubMap && "upper bound for a region must exist"); 421 assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolIds() - rank); 422 assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolIds() - rank); 423 } 424 425 LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) { 426 assert(memref == other.memref); 427 return cst.unionBoundingBox(*other.getConstraints()); 428 } 429 430 /// Computes the memory region accessed by this memref with the region 431 /// represented as constraints symbolic/parametric in 'loopDepth' loops 432 /// surrounding opInst and any additional Function symbols. 433 // For example, the memref region for this load operation at loopDepth = 1 will 434 // be as below: 435 // 436 // affine.for %i = 0 to 32 { 437 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { 438 // load %A[%ii] 439 // } 440 // } 441 // 442 // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} } 443 // The last field is a 2-d FlatAffineConstraints symbolic in %i. 444 // 445 // TODO: extend this to any other memref dereferencing ops 446 // (dma_start, dma_wait). 447 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, 448 const ComputationSliceState *sliceState, 449 bool addMemRefDimBounds) { 450 assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) && 451 "affine read/write op expected"); 452 453 MemRefAccess access(op); 454 memref = access.memref; 455 write = access.isStore(); 456 457 unsigned rank = access.getRank(); 458 459 LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op 460 << "depth: " << loopDepth << "\n";); 461 462 // 0-d memrefs. 463 if (rank == 0) { 464 SmallVector<AffineForOp, 4> ivs; 465 getLoopIVs(*op, &ivs); 466 assert(loopDepth <= ivs.size() && "invalid 'loopDepth'"); 467 // The first 'loopDepth' IVs are symbols for this region. 468 ivs.resize(loopDepth); 469 SmallVector<Value, 4> regionSymbols; 470 extractForInductionVars(ivs, ®ionSymbols); 471 // A 0-d memref has a 0-d region. 472 cst.reset(rank, loopDepth, /*numLocals=*/0, regionSymbols); 473 return success(); 474 } 475 476 // Build the constraints for this region. 477 AffineValueMap accessValueMap; 478 access.getAccessMap(&accessValueMap); 479 AffineMap accessMap = accessValueMap.getAffineMap(); 480 481 unsigned numDims = accessMap.getNumDims(); 482 unsigned numSymbols = accessMap.getNumSymbols(); 483 unsigned numOperands = accessValueMap.getNumOperands(); 484 // Merge operands with slice operands. 485 SmallVector<Value, 4> operands; 486 operands.resize(numOperands); 487 for (unsigned i = 0; i < numOperands; ++i) 488 operands[i] = accessValueMap.getOperand(i); 489 490 if (sliceState != nullptr) { 491 operands.reserve(operands.size() + sliceState->lbOperands[0].size()); 492 // Append slice operands to 'operands' as symbols. 493 for (auto extraOperand : sliceState->lbOperands[0]) { 494 if (!llvm::is_contained(operands, extraOperand)) { 495 operands.push_back(extraOperand); 496 numSymbols++; 497 } 498 } 499 } 500 // We'll first associate the dims and symbols of the access map to the dims 501 // and symbols resp. of cst. This will change below once cst is 502 // fully constructed out. 503 cst.reset(numDims, numSymbols, 0, operands); 504 505 // Add equality constraints. 506 // Add inequalities for loop lower/upper bounds. 507 for (unsigned i = 0; i < numDims + numSymbols; ++i) { 508 auto operand = operands[i]; 509 if (auto loop = getForInductionVarOwner(operand)) { 510 // Note that cst can now have more dimensions than accessMap if the 511 // bounds expressions involve outer loops or other symbols. 512 // TODO: rewrite this to use getInstIndexSet; this way 513 // conditionals will be handled when the latter supports it. 514 if (failed(cst.addAffineForOpDomain(loop))) 515 return failure(); 516 } else { 517 // Has to be a valid symbol. 518 auto symbol = operand; 519 assert(isValidSymbol(symbol)); 520 // Check if the symbol is a constant. 521 if (auto *op = symbol.getDefiningOp()) { 522 if (auto constOp = dyn_cast<arith::ConstantIndexOp>(op)) { 523 cst.addBound(FlatAffineConstraints::EQ, symbol, constOp.value()); 524 } 525 } 526 } 527 } 528 529 // Add lower/upper bounds on loop IVs using bounds from 'sliceState'. 530 if (sliceState != nullptr) { 531 // Add dim and symbol slice operands. 532 for (auto operand : sliceState->lbOperands[0]) { 533 cst.addInductionVarOrTerminalSymbol(operand); 534 } 535 // Add upper/lower bounds from 'sliceState' to 'cst'. 536 LogicalResult ret = 537 cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs, 538 sliceState->lbOperands[0]); 539 assert(succeeded(ret) && 540 "should not fail as we never have semi-affine slice maps"); 541 (void)ret; 542 } 543 544 // Add access function equalities to connect loop IVs to data dimensions. 545 if (failed(cst.composeMap(&accessValueMap))) { 546 op->emitError("getMemRefRegion: compose affine map failed"); 547 LLVM_DEBUG(accessValueMap.getAffineMap().dump()); 548 return failure(); 549 } 550 551 // Set all identifiers appearing after the first 'rank' identifiers as 552 // symbolic identifiers - so that the ones corresponding to the memref 553 // dimensions are the dimensional identifiers for the memref region. 554 cst.setDimSymbolSeparation(cst.getNumDimAndSymbolIds() - rank); 555 556 // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which 557 // this memref region is symbolic. 558 SmallVector<AffineForOp, 4> enclosingIVs; 559 getLoopIVs(*op, &enclosingIVs); 560 assert(loopDepth <= enclosingIVs.size() && "invalid loop depth"); 561 enclosingIVs.resize(loopDepth); 562 SmallVector<Value, 4> ids; 563 cst.getValues(cst.getNumDimIds(), cst.getNumDimAndSymbolIds(), &ids); 564 for (auto id : ids) { 565 AffineForOp iv; 566 if ((iv = getForInductionVarOwner(id)) && 567 !llvm::is_contained(enclosingIVs, iv)) { 568 cst.projectOut(id); 569 } 570 } 571 572 // Project out any local variables (these would have been added for any 573 // mod/divs). 574 cst.projectOut(cst.getNumDimAndSymbolIds(), cst.getNumLocalIds()); 575 576 // Constant fold any symbolic identifiers. 577 cst.constantFoldIdRange(/*pos=*/cst.getNumDimIds(), 578 /*num=*/cst.getNumSymbolIds()); 579 580 assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format"); 581 582 // Add upper/lower bounds for each memref dimension with static size 583 // to guard against potential over-approximation from projection. 584 // TODO: Support dynamic memref dimensions. 585 if (addMemRefDimBounds) { 586 auto memRefType = memref.getType().cast<MemRefType>(); 587 for (unsigned r = 0; r < rank; r++) { 588 cst.addBound(FlatAffineConstraints::LB, /*pos=*/r, /*value=*/0); 589 if (memRefType.isDynamicDim(r)) 590 continue; 591 cst.addBound(FlatAffineConstraints::UB, /*pos=*/r, 592 memRefType.getDimSize(r) - 1); 593 } 594 } 595 cst.removeTrivialRedundancy(); 596 597 LLVM_DEBUG(llvm::dbgs() << "Memory region:\n"); 598 LLVM_DEBUG(cst.dump()); 599 return success(); 600 } 601 602 static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { 603 auto elementType = memRefType.getElementType(); 604 605 unsigned sizeInBits; 606 if (elementType.isIntOrFloat()) { 607 sizeInBits = elementType.getIntOrFloatBitWidth(); 608 } else { 609 auto vectorType = elementType.cast<VectorType>(); 610 sizeInBits = 611 vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); 612 } 613 return llvm::divideCeil(sizeInBits, 8); 614 } 615 616 // Returns the size of the region. 617 Optional<int64_t> MemRefRegion::getRegionSize() { 618 auto memRefType = memref.getType().cast<MemRefType>(); 619 620 if (!memRefType.getLayout().isIdentity()) { 621 LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n"); 622 return false; 623 } 624 625 // Indices to use for the DmaStart op. 626 // Indices for the original memref being DMAed from/to. 627 SmallVector<Value, 4> memIndices; 628 // Indices for the faster buffer being DMAed into/from. 629 SmallVector<Value, 4> bufIndices; 630 631 // Compute the extents of the buffer. 632 Optional<int64_t> numElements = getConstantBoundingSizeAndShape(); 633 if (!numElements.hasValue()) { 634 LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n"); 635 return None; 636 } 637 return getMemRefEltSizeInBytes(memRefType) * numElements.getValue(); 638 } 639 640 /// Returns the size of memref data in bytes if it's statically shaped, None 641 /// otherwise. If the element of the memref has vector type, takes into account 642 /// size of the vector as well. 643 // TODO: improve/complete this when we have target data. 644 Optional<uint64_t> mlir::getMemRefSizeInBytes(MemRefType memRefType) { 645 if (!memRefType.hasStaticShape()) 646 return None; 647 auto elementType = memRefType.getElementType(); 648 if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>()) 649 return None; 650 651 uint64_t sizeInBytes = getMemRefEltSizeInBytes(memRefType); 652 for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) { 653 sizeInBytes = sizeInBytes * memRefType.getDimSize(i); 654 } 655 return sizeInBytes; 656 } 657 658 template <typename LoadOrStoreOp> 659 LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp, 660 bool emitError) { 661 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface, 662 AffineWriteOpInterface>::value, 663 "argument should be either a AffineReadOpInterface or a " 664 "AffineWriteOpInterface"); 665 666 Operation *op = loadOrStoreOp.getOperation(); 667 MemRefRegion region(op->getLoc()); 668 if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr, 669 /*addMemRefDimBounds=*/false))) 670 return success(); 671 672 LLVM_DEBUG(llvm::dbgs() << "Memory region"); 673 LLVM_DEBUG(region.getConstraints()->dump()); 674 675 bool outOfBounds = false; 676 unsigned rank = loadOrStoreOp.getMemRefType().getRank(); 677 678 // For each dimension, check for out of bounds. 679 for (unsigned r = 0; r < rank; r++) { 680 FlatAffineConstraints ucst(*region.getConstraints()); 681 682 // Intersect memory region with constraint capturing out of bounds (both out 683 // of upper and out of lower), and check if the constraint system is 684 // feasible. If it is, there is at least one point out of bounds. 685 SmallVector<int64_t, 4> ineq(rank + 1, 0); 686 int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r); 687 // TODO: handle dynamic dim sizes. 688 if (dimSize == -1) 689 continue; 690 691 // Check for overflow: d_i >= memref dim size. 692 ucst.addBound(FlatAffineConstraints::LB, r, dimSize); 693 outOfBounds = !ucst.isEmpty(); 694 if (outOfBounds && emitError) { 695 loadOrStoreOp.emitOpError() 696 << "memref out of upper bound access along dimension #" << (r + 1); 697 } 698 699 // Check for a negative index. 700 FlatAffineConstraints lcst(*region.getConstraints()); 701 std::fill(ineq.begin(), ineq.end(), 0); 702 // d_i <= -1; 703 lcst.addBound(FlatAffineConstraints::UB, r, -1); 704 outOfBounds = !lcst.isEmpty(); 705 if (outOfBounds && emitError) { 706 loadOrStoreOp.emitOpError() 707 << "memref out of lower bound access along dimension #" << (r + 1); 708 } 709 } 710 return failure(outOfBounds); 711 } 712 713 // Explicitly instantiate the template so that the compiler knows we need them! 714 template LogicalResult 715 mlir::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp, bool emitError); 716 template LogicalResult 717 mlir::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp, bool emitError); 718 719 // Returns in 'positions' the Block positions of 'op' in each ancestor 720 // Block from the Block containing operation, stopping at 'limitBlock'. 721 static void findInstPosition(Operation *op, Block *limitBlock, 722 SmallVectorImpl<unsigned> *positions) { 723 Block *block = op->getBlock(); 724 while (block != limitBlock) { 725 // FIXME: This algorithm is unnecessarily O(n) and should be improved to not 726 // rely on linear scans. 727 int instPosInBlock = std::distance(block->begin(), op->getIterator()); 728 positions->push_back(instPosInBlock); 729 op = block->getParentOp(); 730 block = op->getBlock(); 731 } 732 std::reverse(positions->begin(), positions->end()); 733 } 734 735 // Returns the Operation in a possibly nested set of Blocks, where the 736 // position of the operation is represented by 'positions', which has a 737 // Block position for each level of nesting. 738 static Operation *getInstAtPosition(ArrayRef<unsigned> positions, 739 unsigned level, Block *block) { 740 unsigned i = 0; 741 for (auto &op : *block) { 742 if (i != positions[level]) { 743 ++i; 744 continue; 745 } 746 if (level == positions.size() - 1) 747 return &op; 748 if (auto childAffineForOp = dyn_cast<AffineForOp>(op)) 749 return getInstAtPosition(positions, level + 1, 750 childAffineForOp.getBody()); 751 752 for (auto ®ion : op.getRegions()) { 753 for (auto &b : region) 754 if (auto *ret = getInstAtPosition(positions, level + 1, &b)) 755 return ret; 756 } 757 return nullptr; 758 } 759 return nullptr; 760 } 761 762 // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'. 763 static LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value, 8> &ivs, 764 FlatAffineValueConstraints *cst) { 765 for (unsigned i = 0, e = cst->getNumDimIds(); i < e; ++i) { 766 auto value = cst->getValue(i); 767 if (ivs.count(value) == 0) { 768 assert(isForInductionVar(value)); 769 auto loop = getForInductionVarOwner(value); 770 if (failed(cst->addAffineForOpDomain(loop))) 771 return failure(); 772 } 773 } 774 return success(); 775 } 776 777 /// Returns the innermost common loop depth for the set of operations in 'ops'. 778 // TODO: Move this to LoopUtils. 779 unsigned mlir::getInnermostCommonLoopDepth( 780 ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) { 781 unsigned numOps = ops.size(); 782 assert(numOps > 0 && "Expected at least one operation"); 783 784 std::vector<SmallVector<AffineForOp, 4>> loops(numOps); 785 unsigned loopDepthLimit = std::numeric_limits<unsigned>::max(); 786 for (unsigned i = 0; i < numOps; ++i) { 787 getLoopIVs(*ops[i], &loops[i]); 788 loopDepthLimit = 789 std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size())); 790 } 791 792 unsigned loopDepth = 0; 793 for (unsigned d = 0; d < loopDepthLimit; ++d) { 794 unsigned i; 795 for (i = 1; i < numOps; ++i) { 796 if (loops[i - 1][d] != loops[i][d]) 797 return loopDepth; 798 } 799 if (surroundingLoops) 800 surroundingLoops->push_back(loops[i - 1][d]); 801 ++loopDepth; 802 } 803 return loopDepth; 804 } 805 806 /// Computes in 'sliceUnion' the union of all slice bounds computed at 807 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and 808 /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if 809 /// union was computed correctly, an appropriate failure otherwise. 810 SliceComputationResult 811 mlir::computeSliceUnion(ArrayRef<Operation *> opsA, ArrayRef<Operation *> opsB, 812 unsigned loopDepth, unsigned numCommonLoops, 813 bool isBackwardSlice, 814 ComputationSliceState *sliceUnion) { 815 // Compute the union of slice bounds between all pairs in 'opsA' and 816 // 'opsB' in 'sliceUnionCst'. 817 FlatAffineValueConstraints sliceUnionCst; 818 assert(sliceUnionCst.getNumDimAndSymbolIds() == 0); 819 std::vector<std::pair<Operation *, Operation *>> dependentOpPairs; 820 for (auto *i : opsA) { 821 MemRefAccess srcAccess(i); 822 for (auto *j : opsB) { 823 MemRefAccess dstAccess(j); 824 if (srcAccess.memref != dstAccess.memref) 825 continue; 826 // Check if 'loopDepth' exceeds nesting depth of src/dst ops. 827 if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) || 828 (isBackwardSlice && loopDepth > getNestingDepth(j))) { 829 LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n"); 830 return SliceComputationResult::GenericFailure; 831 } 832 833 bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) && 834 isa<AffineReadOpInterface>(dstAccess.opInst); 835 FlatAffineValueConstraints dependenceConstraints; 836 // Check dependence between 'srcAccess' and 'dstAccess'. 837 DependenceResult result = checkMemrefAccessDependence( 838 srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1, 839 &dependenceConstraints, /*dependenceComponents=*/nullptr, 840 /*allowRAR=*/readReadAccesses); 841 if (result.value == DependenceResult::Failure) { 842 LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n"); 843 return SliceComputationResult::GenericFailure; 844 } 845 if (result.value == DependenceResult::NoDependence) 846 continue; 847 dependentOpPairs.emplace_back(i, j); 848 849 // Compute slice bounds for 'srcAccess' and 'dstAccess'. 850 ComputationSliceState tmpSliceState; 851 mlir::getComputationSliceState(i, j, &dependenceConstraints, loopDepth, 852 isBackwardSlice, &tmpSliceState); 853 854 if (sliceUnionCst.getNumDimAndSymbolIds() == 0) { 855 // Initialize 'sliceUnionCst' with the bounds computed in previous step. 856 if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) { 857 LLVM_DEBUG(llvm::dbgs() 858 << "Unable to compute slice bound constraints\n"); 859 return SliceComputationResult::GenericFailure; 860 } 861 assert(sliceUnionCst.getNumDimAndSymbolIds() > 0); 862 continue; 863 } 864 865 // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'. 866 FlatAffineValueConstraints tmpSliceCst; 867 if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) { 868 LLVM_DEBUG(llvm::dbgs() 869 << "Unable to compute slice bound constraints\n"); 870 return SliceComputationResult::GenericFailure; 871 } 872 873 // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed. 874 if (!sliceUnionCst.areIdsAlignedWithOther(tmpSliceCst)) { 875 876 // Pre-constraint id alignment: record loop IVs used in each constraint 877 // system. 878 SmallPtrSet<Value, 8> sliceUnionIVs; 879 for (unsigned k = 0, l = sliceUnionCst.getNumDimIds(); k < l; ++k) 880 sliceUnionIVs.insert(sliceUnionCst.getValue(k)); 881 SmallPtrSet<Value, 8> tmpSliceIVs; 882 for (unsigned k = 0, l = tmpSliceCst.getNumDimIds(); k < l; ++k) 883 tmpSliceIVs.insert(tmpSliceCst.getValue(k)); 884 885 sliceUnionCst.mergeAndAlignIdsWithOther(/*offset=*/0, &tmpSliceCst); 886 887 // Post-constraint id alignment: add loop IV bounds missing after 888 // id alignment to constraint systems. This can occur if one constraint 889 // system uses an loop IV that is not used by the other. The call 890 // to unionBoundingBox below expects constraints for each Loop IV, even 891 // if they are the unsliced full loop bounds added here. 892 if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst))) 893 return SliceComputationResult::GenericFailure; 894 if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst))) 895 return SliceComputationResult::GenericFailure; 896 } 897 // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'. 898 if (sliceUnionCst.getNumLocalIds() > 0 || 899 tmpSliceCst.getNumLocalIds() > 0 || 900 failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) { 901 LLVM_DEBUG(llvm::dbgs() 902 << "Unable to compute union bounding box of slice bounds\n"); 903 return SliceComputationResult::GenericFailure; 904 } 905 } 906 } 907 908 // Empty union. 909 if (sliceUnionCst.getNumDimAndSymbolIds() == 0) 910 return SliceComputationResult::GenericFailure; 911 912 // Gather loops surrounding ops from loop nest where slice will be inserted. 913 SmallVector<Operation *, 4> ops; 914 for (auto &dep : dependentOpPairs) { 915 ops.push_back(isBackwardSlice ? dep.second : dep.first); 916 } 917 SmallVector<AffineForOp, 4> surroundingLoops; 918 unsigned innermostCommonLoopDepth = 919 getInnermostCommonLoopDepth(ops, &surroundingLoops); 920 if (loopDepth > innermostCommonLoopDepth) { 921 LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n"); 922 return SliceComputationResult::GenericFailure; 923 } 924 925 // Store 'numSliceLoopIVs' before converting dst loop IVs to dims. 926 unsigned numSliceLoopIVs = sliceUnionCst.getNumDimIds(); 927 928 // Convert any dst loop IVs which are symbol identifiers to dim identifiers. 929 sliceUnionCst.convertLoopIVSymbolsToDims(); 930 sliceUnion->clearBounds(); 931 sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap()); 932 sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap()); 933 934 // Get slice bounds from slice union constraints 'sliceUnionCst'. 935 sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs, 936 opsA[0]->getContext(), &sliceUnion->lbs, 937 &sliceUnion->ubs); 938 939 // Add slice bound operands of union. 940 SmallVector<Value, 4> sliceBoundOperands; 941 sliceUnionCst.getValues(numSliceLoopIVs, 942 sliceUnionCst.getNumDimAndSymbolIds(), 943 &sliceBoundOperands); 944 945 // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'. 946 sliceUnion->ivs.clear(); 947 sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs); 948 949 // Set loop nest insertion point to block start at 'loopDepth'. 950 sliceUnion->insertPoint = 951 isBackwardSlice 952 ? surroundingLoops[loopDepth - 1].getBody()->begin() 953 : std::prev(surroundingLoops[loopDepth - 1].getBody()->end()); 954 955 // Give each bound its own copy of 'sliceBoundOperands' for subsequent 956 // canonicalization. 957 sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands); 958 sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands); 959 960 // Check if the slice computed is valid. Return success only if it is verified 961 // that the slice is valid, otherwise return appropriate failure status. 962 Optional<bool> isSliceValid = sliceUnion->isSliceValid(); 963 if (!isSliceValid.hasValue()) { 964 LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n"); 965 return SliceComputationResult::GenericFailure; 966 } 967 if (!isSliceValid.getValue()) 968 return SliceComputationResult::IncorrectSliceFailure; 969 970 return SliceComputationResult::Success; 971 } 972 973 // TODO: extend this to handle multiple result maps. 974 static Optional<uint64_t> getConstDifference(AffineMap lbMap, AffineMap ubMap) { 975 assert(lbMap.getNumResults() == 1 && "expected single result bound map"); 976 assert(ubMap.getNumResults() == 1 && "expected single result bound map"); 977 assert(lbMap.getNumDims() == ubMap.getNumDims()); 978 assert(lbMap.getNumSymbols() == ubMap.getNumSymbols()); 979 AffineExpr lbExpr(lbMap.getResult(0)); 980 AffineExpr ubExpr(ubMap.getResult(0)); 981 auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(), 982 lbMap.getNumSymbols()); 983 auto cExpr = loopSpanExpr.dyn_cast<AffineConstantExpr>(); 984 if (!cExpr) 985 return None; 986 return cExpr.getValue(); 987 } 988 989 // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop 990 // nest surrounding represented by slice loop bounds in 'slice'. Returns true 991 // on success, false otherwise (if a non-constant trip count was encountered). 992 // TODO: Make this work with non-unit step loops. 993 bool mlir::buildSliceTripCountMap( 994 const ComputationSliceState &slice, 995 llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) { 996 unsigned numSrcLoopIVs = slice.ivs.size(); 997 // Populate map from AffineForOp -> trip count 998 for (unsigned i = 0; i < numSrcLoopIVs; ++i) { 999 AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]); 1000 auto *op = forOp.getOperation(); 1001 AffineMap lbMap = slice.lbs[i]; 1002 AffineMap ubMap = slice.ubs[i]; 1003 // If lower or upper bound maps are null or provide no results, it implies 1004 // that source loop was not at all sliced, and the entire loop will be a 1005 // part of the slice. 1006 if (!lbMap || lbMap.getNumResults() == 0 || !ubMap || 1007 ubMap.getNumResults() == 0) { 1008 // The iteration of src loop IV 'i' was not sliced. Use full loop bounds. 1009 if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) { 1010 (*tripCountMap)[op] = 1011 forOp.getConstantUpperBound() - forOp.getConstantLowerBound(); 1012 continue; 1013 } 1014 Optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp); 1015 if (maybeConstTripCount.hasValue()) { 1016 (*tripCountMap)[op] = maybeConstTripCount.getValue(); 1017 continue; 1018 } 1019 return false; 1020 } 1021 Optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap); 1022 // Slice bounds are created with a constant ub - lb difference. 1023 if (!tripCount.hasValue()) 1024 return false; 1025 (*tripCountMap)[op] = tripCount.getValue(); 1026 } 1027 return true; 1028 } 1029 1030 // Return the number of iterations in the given slice. 1031 uint64_t mlir::getSliceIterationCount( 1032 const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) { 1033 uint64_t iterCount = 1; 1034 for (const auto &count : sliceTripCountMap) { 1035 iterCount *= count.second; 1036 } 1037 return iterCount; 1038 } 1039 1040 const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier"; 1041 // Computes slice bounds by projecting out any loop IVs from 1042 // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice 1043 // bounds in 'sliceState' which represent the one loop nest's IVs in terms of 1044 // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice'). 1045 void mlir::getComputationSliceState( 1046 Operation *depSourceOp, Operation *depSinkOp, 1047 FlatAffineValueConstraints *dependenceConstraints, unsigned loopDepth, 1048 bool isBackwardSlice, ComputationSliceState *sliceState) { 1049 // Get loop nest surrounding src operation. 1050 SmallVector<AffineForOp, 4> srcLoopIVs; 1051 getLoopIVs(*depSourceOp, &srcLoopIVs); 1052 unsigned numSrcLoopIVs = srcLoopIVs.size(); 1053 1054 // Get loop nest surrounding dst operation. 1055 SmallVector<AffineForOp, 4> dstLoopIVs; 1056 getLoopIVs(*depSinkOp, &dstLoopIVs); 1057 unsigned numDstLoopIVs = dstLoopIVs.size(); 1058 1059 assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) || 1060 (isBackwardSlice && loopDepth <= numDstLoopIVs)); 1061 1062 // Project out dimensions other than those up to 'loopDepth'. 1063 unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth; 1064 unsigned num = 1065 isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth; 1066 dependenceConstraints->projectOut(pos, num); 1067 1068 // Add slice loop IV values to 'sliceState'. 1069 unsigned offset = isBackwardSlice ? 0 : loopDepth; 1070 unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs; 1071 dependenceConstraints->getValues(offset, offset + numSliceLoopIVs, 1072 &sliceState->ivs); 1073 1074 // Set up lower/upper bound affine maps for the slice. 1075 sliceState->lbs.resize(numSliceLoopIVs, AffineMap()); 1076 sliceState->ubs.resize(numSliceLoopIVs, AffineMap()); 1077 1078 // Get bounds for slice IVs in terms of other IVs, symbols, and constants. 1079 dependenceConstraints->getSliceBounds(offset, numSliceLoopIVs, 1080 depSourceOp->getContext(), 1081 &sliceState->lbs, &sliceState->ubs); 1082 1083 // Set up bound operands for the slice's lower and upper bounds. 1084 SmallVector<Value, 4> sliceBoundOperands; 1085 unsigned numDimsAndSymbols = dependenceConstraints->getNumDimAndSymbolIds(); 1086 for (unsigned i = 0; i < numDimsAndSymbols; ++i) { 1087 if (i < offset || i >= offset + numSliceLoopIVs) { 1088 sliceBoundOperands.push_back(dependenceConstraints->getValue(i)); 1089 } 1090 } 1091 1092 // Give each bound its own copy of 'sliceBoundOperands' for subsequent 1093 // canonicalization. 1094 sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands); 1095 sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands); 1096 1097 // Set destination loop nest insertion point to block start at 'dstLoopDepth'. 1098 sliceState->insertPoint = 1099 isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin() 1100 : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end()); 1101 1102 llvm::SmallDenseSet<Value, 8> sequentialLoops; 1103 if (isa<AffineReadOpInterface>(depSourceOp) && 1104 isa<AffineReadOpInterface>(depSinkOp)) { 1105 // For read-read access pairs, clear any slice bounds on sequential loops. 1106 // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'. 1107 getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0], 1108 &sequentialLoops); 1109 } 1110 auto getSliceLoop = [&](unsigned i) { 1111 return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i]; 1112 }; 1113 auto isInnermostInsertion = [&]() { 1114 return (isBackwardSlice ? loopDepth >= srcLoopIVs.size() 1115 : loopDepth >= dstLoopIVs.size()); 1116 }; 1117 llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap; 1118 auto srcIsUnitSlice = [&]() { 1119 return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) && 1120 (getSliceIterationCount(sliceTripCountMap) == 1)); 1121 }; 1122 // Clear all sliced loop bounds beginning at the first sequential loop, or 1123 // first loop with a slice fusion barrier attribute.. 1124 1125 for (unsigned i = 0; i < numSliceLoopIVs; ++i) { 1126 Value iv = getSliceLoop(i).getInductionVar(); 1127 if (sequentialLoops.count(iv) == 0 && 1128 getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr) 1129 continue; 1130 // Skip reset of bounds of reduction loop inserted in the destination loop 1131 // that meets the following conditions: 1132 // 1. Slice is single trip count. 1133 // 2. Loop bounds of the source and destination match. 1134 // 3. Is being inserted at the innermost insertion point. 1135 Optional<bool> isMaximal = sliceState->isMaximal(); 1136 if (isLoopParallelAndContainsReduction(getSliceLoop(i)) && 1137 isInnermostInsertion() && srcIsUnitSlice() && isMaximal.hasValue() && 1138 isMaximal.getValue()) 1139 continue; 1140 for (unsigned j = i; j < numSliceLoopIVs; ++j) { 1141 sliceState->lbs[j] = AffineMap(); 1142 sliceState->ubs[j] = AffineMap(); 1143 } 1144 break; 1145 } 1146 } 1147 1148 /// Creates a computation slice of the loop nest surrounding 'srcOpInst', 1149 /// updates the slice loop bounds with any non-null bound maps specified in 1150 /// 'sliceState', and inserts this slice into the loop nest surrounding 1151 /// 'dstOpInst' at loop depth 'dstLoopDepth'. 1152 // TODO: extend the slicing utility to compute slices that 1153 // aren't necessarily a one-to-one relation b/w the source and destination. The 1154 // relation between the source and destination could be many-to-many in general. 1155 // TODO: the slice computation is incorrect in the cases 1156 // where the dependence from the source to the destination does not cover the 1157 // entire destination index set. Subtract out the dependent destination 1158 // iterations from destination index set and check for emptiness --- this is one 1159 // solution. 1160 AffineForOp 1161 mlir::insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, 1162 unsigned dstLoopDepth, 1163 ComputationSliceState *sliceState) { 1164 // Get loop nest surrounding src operation. 1165 SmallVector<AffineForOp, 4> srcLoopIVs; 1166 getLoopIVs(*srcOpInst, &srcLoopIVs); 1167 unsigned numSrcLoopIVs = srcLoopIVs.size(); 1168 1169 // Get loop nest surrounding dst operation. 1170 SmallVector<AffineForOp, 4> dstLoopIVs; 1171 getLoopIVs(*dstOpInst, &dstLoopIVs); 1172 unsigned dstLoopIVsSize = dstLoopIVs.size(); 1173 if (dstLoopDepth > dstLoopIVsSize) { 1174 dstOpInst->emitError("invalid destination loop depth"); 1175 return AffineForOp(); 1176 } 1177 1178 // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'. 1179 SmallVector<unsigned, 4> positions; 1180 // TODO: This code is incorrect since srcLoopIVs can be 0-d. 1181 findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions); 1182 1183 // Clone src loop nest and insert it a the beginning of the operation block 1184 // of the loop at 'dstLoopDepth' in 'dstLoopIVs'. 1185 auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1]; 1186 OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin()); 1187 auto sliceLoopNest = 1188 cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation())); 1189 1190 Operation *sliceInst = 1191 getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody()); 1192 // Get loop nest surrounding 'sliceInst'. 1193 SmallVector<AffineForOp, 4> sliceSurroundingLoops; 1194 getLoopIVs(*sliceInst, &sliceSurroundingLoops); 1195 1196 // Sanity check. 1197 unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size(); 1198 (void)sliceSurroundingLoopsSize; 1199 assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize); 1200 unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs; 1201 (void)sliceLoopLimit; 1202 assert(sliceLoopLimit >= sliceSurroundingLoopsSize); 1203 1204 // Update loop bounds for loops in 'sliceLoopNest'. 1205 for (unsigned i = 0; i < numSrcLoopIVs; ++i) { 1206 auto forOp = sliceSurroundingLoops[dstLoopDepth + i]; 1207 if (AffineMap lbMap = sliceState->lbs[i]) 1208 forOp.setLowerBound(sliceState->lbOperands[i], lbMap); 1209 if (AffineMap ubMap = sliceState->ubs[i]) 1210 forOp.setUpperBound(sliceState->ubOperands[i], ubMap); 1211 } 1212 return sliceLoopNest; 1213 } 1214 1215 // Constructs MemRefAccess populating it with the memref, its indices and 1216 // opinst from 'loadOrStoreOpInst'. 1217 MemRefAccess::MemRefAccess(Operation *loadOrStoreOpInst) { 1218 if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) { 1219 memref = loadOp.getMemRef(); 1220 opInst = loadOrStoreOpInst; 1221 auto loadMemrefType = loadOp.getMemRefType(); 1222 indices.reserve(loadMemrefType.getRank()); 1223 for (auto index : loadOp.getMapOperands()) { 1224 indices.push_back(index); 1225 } 1226 } else { 1227 assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) && 1228 "Affine read/write op expected"); 1229 auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst); 1230 opInst = loadOrStoreOpInst; 1231 memref = storeOp.getMemRef(); 1232 auto storeMemrefType = storeOp.getMemRefType(); 1233 indices.reserve(storeMemrefType.getRank()); 1234 for (auto index : storeOp.getMapOperands()) { 1235 indices.push_back(index); 1236 } 1237 } 1238 } 1239 1240 unsigned MemRefAccess::getRank() const { 1241 return memref.getType().cast<MemRefType>().getRank(); 1242 } 1243 1244 bool MemRefAccess::isStore() const { 1245 return isa<AffineWriteOpInterface>(opInst); 1246 } 1247 1248 /// Returns the nesting depth of this statement, i.e., the number of loops 1249 /// surrounding this statement. 1250 unsigned mlir::getNestingDepth(Operation *op) { 1251 Operation *currOp = op; 1252 unsigned depth = 0; 1253 while ((currOp = currOp->getParentOp())) { 1254 if (isa<AffineForOp>(currOp)) 1255 depth++; 1256 } 1257 return depth; 1258 } 1259 1260 /// Equal if both affine accesses are provably equivalent (at compile 1261 /// time) when considering the memref, the affine maps and their respective 1262 /// operands. The equality of access functions + operands is checked by 1263 /// subtracting fully composed value maps, and then simplifying the difference 1264 /// using the expression flattener. 1265 /// TODO: this does not account for aliasing of memrefs. 1266 bool MemRefAccess::operator==(const MemRefAccess &rhs) const { 1267 if (memref != rhs.memref) 1268 return false; 1269 1270 AffineValueMap diff, thisMap, rhsMap; 1271 getAccessMap(&thisMap); 1272 rhs.getAccessMap(&rhsMap); 1273 AffineValueMap::difference(thisMap, rhsMap, &diff); 1274 return llvm::all_of(diff.getAffineMap().getResults(), 1275 [](AffineExpr e) { return e == 0; }); 1276 } 1277 1278 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB', 1279 /// where each lists loops from outer-most to inner-most in loop nest. 1280 unsigned mlir::getNumCommonSurroundingLoops(Operation &a, Operation &b) { 1281 SmallVector<AffineForOp, 4> loopsA, loopsB; 1282 getLoopIVs(a, &loopsA); 1283 getLoopIVs(b, &loopsB); 1284 1285 unsigned minNumLoops = std::min(loopsA.size(), loopsB.size()); 1286 unsigned numCommonLoops = 0; 1287 for (unsigned i = 0; i < minNumLoops; ++i) { 1288 if (loopsA[i].getOperation() != loopsB[i].getOperation()) 1289 break; 1290 ++numCommonLoops; 1291 } 1292 return numCommonLoops; 1293 } 1294 1295 static Optional<int64_t> getMemoryFootprintBytes(Block &block, 1296 Block::iterator start, 1297 Block::iterator end, 1298 int memorySpace) { 1299 SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions; 1300 1301 // Walk this 'affine.for' operation to gather all memory regions. 1302 auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult { 1303 if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) { 1304 // Neither load nor a store op. 1305 return WalkResult::advance(); 1306 } 1307 1308 // Compute the memref region symbolic in any IVs enclosing this block. 1309 auto region = std::make_unique<MemRefRegion>(opInst->getLoc()); 1310 if (failed( 1311 region->compute(opInst, 1312 /*loopDepth=*/getNestingDepth(&*block.begin())))) { 1313 return opInst->emitError("error obtaining memory region\n"); 1314 } 1315 1316 auto it = regions.find(region->memref); 1317 if (it == regions.end()) { 1318 regions[region->memref] = std::move(region); 1319 } else if (failed(it->second->unionBoundingBox(*region))) { 1320 return opInst->emitWarning( 1321 "getMemoryFootprintBytes: unable to perform a union on a memory " 1322 "region"); 1323 } 1324 return WalkResult::advance(); 1325 }); 1326 if (result.wasInterrupted()) 1327 return None; 1328 1329 int64_t totalSizeInBytes = 0; 1330 for (const auto ®ion : regions) { 1331 Optional<int64_t> size = region.second->getRegionSize(); 1332 if (!size.hasValue()) 1333 return None; 1334 totalSizeInBytes += size.getValue(); 1335 } 1336 return totalSizeInBytes; 1337 } 1338 1339 Optional<int64_t> mlir::getMemoryFootprintBytes(AffineForOp forOp, 1340 int memorySpace) { 1341 auto *forInst = forOp.getOperation(); 1342 return ::getMemoryFootprintBytes( 1343 *forInst->getBlock(), Block::iterator(forInst), 1344 std::next(Block::iterator(forInst)), memorySpace); 1345 } 1346 1347 /// Returns whether a loop is parallel and contains a reduction loop. 1348 bool mlir::isLoopParallelAndContainsReduction(AffineForOp forOp) { 1349 SmallVector<LoopReduction> reductions; 1350 if (!isLoopParallel(forOp, &reductions)) 1351 return false; 1352 return !reductions.empty(); 1353 } 1354 1355 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted 1356 /// at 'forOp'. 1357 void mlir::getSequentialLoops(AffineForOp forOp, 1358 llvm::SmallDenseSet<Value, 8> *sequentialLoops) { 1359 forOp->walk([&](Operation *op) { 1360 if (auto innerFor = dyn_cast<AffineForOp>(op)) 1361 if (!isLoopParallel(innerFor)) 1362 sequentialLoops->insert(innerFor.getInductionVar()); 1363 }); 1364 } 1365 1366 IntegerSet mlir::simplifyIntegerSet(IntegerSet set) { 1367 FlatAffineConstraints fac(set); 1368 if (fac.isEmpty()) 1369 return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(), 1370 set.getContext()); 1371 fac.removeTrivialRedundancy(); 1372 1373 auto simplifiedSet = fac.getAsIntegerSet(set.getContext()); 1374 assert(simplifiedSet && "guaranteed to succeed while roundtripping"); 1375 return simplifiedSet; 1376 } 1377