1 //===- HoistPadding.cpp - Hoisting transformation for PadTensorOp ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions concerned with hoisting padding operations. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h" 14 #include "mlir/Analysis/SliceAnalysis.h" 15 #include "mlir/Dialect/Affine/Utils.h" 16 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 17 #include "mlir/Dialect/Linalg/Transforms/Transforms.h" 18 #include "mlir/Dialect/SCF/SCF.h" 19 #include "mlir/Dialect/SCF/Utils.h" 20 #include "mlir/Dialect/StandardOps/IR/Ops.h" 21 #include "mlir/Dialect/Tensor/IR/Tensor.h" 22 #include "mlir/Dialect/Vector/VectorOps.h" 23 #include "mlir/Dialect/Vector/VectorUtils.h" 24 #include "mlir/IR/AsmState.h" 25 #include "mlir/IR/BuiltinOps.h" 26 #include "mlir/IR/Dominance.h" 27 #include "mlir/Transforms/LoopUtils.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/Support/Debug.h" 30 31 using llvm::dbgs; 32 33 #define DEBUG_TYPE "hoist-padding" 34 35 #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ") 36 37 using namespace mlir; 38 using namespace mlir::linalg; 39 40 /// Analysis class to support PadTensorOp hoisting across multiple enclosing 41 /// loops. The failure conditions are: 42 /// 1. Pad op has a use that is not an input of a LinalgOp. 43 /// 2. Pad op does not have a constant padding value. 44 /// 3. There is no immediately enclosing scf::ForOp. 45 /// 4. The backward slice from the pad op to the scf::ForOp to hoist above 46 /// contains an unknown op with non index type operands, a region, or a 47 /// memory effect. 48 /// 5. The backward slice from the pad op to the scf::ForOp to hoist above is 49 /// empty. 50 /// 6. The source tensor of pad op is not defined by an extract slice op. 51 /// 7. The source tensor of the extract slice op is not defined outside of 52 /// the outermost enclosing scf::ForOp. 53 /// 8. There is no enclosing scf::ForOp that indexes the padded data. 54 /// Other cases succeed and will trigger hoisting of the pad op. 55 struct HoistingAnalysis { 56 HoistingAnalysis(PadTensorOp padTensorOp, int numLoops); 57 58 bool isValid() { return valid; } 59 60 /// Footprint of the packedTensor, computed from the packingLoops. 61 SmallVector<Value> getPackedTensorSizes(ImplicitLocOpBuilder &b); 62 63 /// The outermost loop, determined by `nLevels` above which `padTensorOp` will 64 /// be hoisted. 65 scf::ForOp outermostEnclosingForOp; 66 67 /// Backward slice rooted at `padTensorOp` and nested under 68 /// `outermostEnclosingForOp`. 69 SetVector<Operation *> backwardSlice; 70 71 /// The scf::ForOp immediately enclosing `padTensorOp` such that: 72 /// 1. they are nested under `outermostEnclosingForOp` (inclusive) 73 /// 2. whose induction variable is used, directly or indirectly, in the 74 /// computation of `padTensorOp`. 75 /// The span of these loops determines the footprint of the packed tensor. 76 SmallVector<scf::ForOp> packingLoops; 77 78 private: 79 /// Drop any non-index dependencies of `padTensorOp` and `sliceOp` from 80 /// `backwardSlice`. The method follows the use-def chains of the index 81 /// operands consumed by `padTensorOp` and `sliceOp` and drops the operations 82 /// not part of this index computation. Afterwards, the filtered 83 /// `backwardSlice` contains only the loops whose induction variable is used, 84 /// directly or indirectly, to index the padded tensor. The method returns 85 /// failure if the filtered backward slice contains an unexpected operation. 86 /// 87 /// Example: 88 /// ``` 89 /// %source = linalg.fill(%cst, %arg0) 90 /// scf.for %i 91 /// %unrelated = linalg.fill(%cst, %arg1) // not used to index %source! 92 /// scf.for %j (%arg2 = %unrelated) 93 /// scf.for %k // not used to index %source! 94 /// %ubi = affine.min #map(%i) 95 /// %ubj = affine.min #map(%j) 96 /// %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj] 97 /// %padded_slice = linalg.pad_tensor %slice 98 /// ``` 99 /// dropNonIndexDependencies(%padded_slice, %slice) 100 /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice. 101 LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp, 102 tensor::ExtractSliceOp sliceOp); 103 104 /// Encodes whether the analysis is valid and hoisting can proceed. 105 bool valid; 106 }; 107 108 /// Return true if all uses of `padTensorOp` are an input tensor of some 109 /// LinalgOp. 110 static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) { 111 for (OpOperand &use : padTensorOp.result().getUses()) { 112 auto linalgUser = dyn_cast<linalg::LinalgOp>(use.getOwner()); 113 if (!linalgUser || !linalgUser.isInputTensor(&use)) { 114 LLVM_DEBUG(DBGS() << "Found a use of " << *(padTensorOp) 115 << "\nthat is not an input tensor of a LinalgOp, " 116 << "cannot hoist\n" 117 << *(use.getOwner()) << "\n"); 118 return false; 119 } 120 } 121 return true; 122 } 123 124 /// Return at most nLevels of immediately enclosing scf::ForOp loops. 125 /// Stops at the first parent that is not an scf::ForOp. 126 /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm. 127 /// Control-flow and other containing ops with regions are not modeled atm. 128 static void 129 getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels, 130 SmallVector<scf::ForOp> &reverseEnclosingLoops) { 131 AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>()); 132 (void)state; 133 scf::ForOp outermostEnclosingForOp = nullptr; 134 Operation *nextEnclosingOp = padTensorOp->getParentOp(); 135 while (nLevels-- > 0 && 136 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) { 137 LLVM_DEBUG( 138 DBGS() << "loops: "; 139 outermostEnclosingForOp.getInductionVar().printAsOperand(dbgs(), state); 140 dbgs() << "\n"); 141 reverseEnclosingLoops.push_back(outermostEnclosingForOp); 142 nextEnclosingOp = outermostEnclosingForOp->getParentOp(); 143 } 144 } 145 146 HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) { 147 valid = false; 148 149 // Bail on any use that isn't an input of a Linalg op. 150 // Hoisting of inplace updates happens after vectorization. 151 if (!isOnlyUsedAsInputOfLinalgOp(padTensorOp)) 152 return; 153 154 // Get at most `numLoops` of immediately enclosing loops. 155 SmallVector<scf::ForOp> reverseEnclosingLoops; 156 getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops); 157 if (reverseEnclosingLoops.empty()) { 158 LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n"); 159 return; 160 } 161 162 outermostEnclosingForOp = reverseEnclosingLoops.back(); 163 164 // Get the `sliceOp` that defines the source tensor of `padTensorOp` and 165 // check its source is defined outside of the outermost loop. This check 166 // ensures the padded data is available for packing before entering the 167 // outermost enclosing loop. 168 // 169 // Example: 170 // ``` 171 // %source = linalg.fill(%cst, %arg0) 172 // // %source is available for packing here! 173 // scf.for %i 174 // scf.for %j 175 // scf.for %k 176 // %slice = tensor.extract_slice %source [%i, %j] 177 // %padded_slice = linalg.pad_tensor %slice 178 // ``` 179 auto sliceOp = padTensorOp.source().getDefiningOp<tensor::ExtractSliceOp>(); 180 if (!sliceOp) { 181 LLVM_DEBUG(DBGS() << "Cannot find the extract slice op -> skip\n"); 182 return; 183 } 184 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.source())) { 185 LLVM_DEBUG(DBGS() << "Source not defined outside of loops -> skip\n"); 186 return; 187 } 188 189 // Check the region of `padTensorOp` depends on a constant only. Adding 190 // hoisting support for arbitrary padding regions would require cloning all 191 // dependencies captured by the padding region. 192 Value paddingValue = padTensorOp.getConstantPaddingValue(); 193 if (!paddingValue || 194 !isa_and_nonnull<arith::ConstantOp>(paddingValue.getDefiningOp())) { 195 LLVM_DEBUG(DBGS() << "Cannot find constant padding value -> skip\n"); 196 return; 197 } 198 199 // Get all the ops in the backwards slice starting from `padTensorOp` and that 200 // are dominated by the outermost enclosing loop. 201 DominanceInfo domInfo(outermostEnclosingForOp); 202 getBackwardSlice(padTensorOp.getOperation(), &backwardSlice, 203 [&](Operation *op) { 204 return domInfo.dominates(outermostEnclosingForOp, op); 205 }); 206 if (backwardSlice.empty()) 207 return; 208 // Add `padTensorOp` itself to the backward slice. 209 backwardSlice.insert(padTensorOp.getOperation()); 210 211 // Remove all ops in the backward slice that are not used to index the padded 212 // tensor. In particular, keep `padTensorOp`, `sliceOp`, and the loop and 213 // affine operations used for the index computation. 214 if (failed(dropNonIndexDependencies(padTensorOp, sliceOp))) 215 return; 216 217 // Add only the loops part of the filtered `backwardSlice` to the packing 218 // loops. All other loops are not used to index the padded data and 219 // consequently access the same data in every loop iteration. Adding them to 220 // the packing loops would increase the cache footprint of the packed data 221 // by storing the same data multiple times. 222 for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops)) 223 if (backwardSlice.contains(forOp)) 224 packingLoops.push_back(forOp); 225 if (packingLoops.empty()) { 226 LLVM_DEBUG(DBGS() << "Cannot find a packing loop -> skip\n"); 227 return; 228 } 229 230 // The analysis is valid and hoisting can occur. 231 valid = true; 232 } 233 234 LogicalResult 235 HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp, 236 tensor::ExtractSliceOp sliceOp) { 237 // Set of all values used for index computation. 238 SetVector<Value> indexEdges; 239 240 // Add all index operands of `operation` to `indexEdges`. An index operand is 241 // an operand of type index. 242 auto addIndexOperandsToIndexEdges = [&](Operation *operation) { 243 for (Value operand : operation->getOperands()) 244 if (operand.getType().isIndex()) 245 indexEdges.insert(operand); 246 }; 247 248 // Check if any operation result is contained in `indexEdges`. 249 auto hasIndexResult = [&](Operation *operation) { 250 return llvm::any_of(operation->getResults(), [&](Value result) { 251 return indexEdges.contains(result); 252 }); 253 }; 254 255 // Starting from `padTensorOp` and `sliceOp` walk the use-def edges of index 256 // type in `backwardSlice`. Add the index operands of an operation to 257 // `indexEdges` and remove all operations from `backwardSlice` that are not 258 // part of the index computation. 259 // 260 // Example: 261 // ``` 262 // %source = linalg.fill(%cst, %arg0) 263 // scf.for %i 264 // %unrelated = linalg.fill(%cst, %arg1) // not used to index %source! 265 // scf.for %j (%arg2 = %unrelated) 266 // scf.for %k // not used to index %source! 267 // %ubi = affine.min #map(%i) 268 // %ubj = affine.min #map(%j) 269 // %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj] 270 // %padded_slice = linalg.pad_tensor %slice 271 // ``` 272 // After iterating `backwardSlice` we obtain: 273 // indexEdges = [%i, %j, %ubi, %ubj] 274 // backwardSlice = backwardSlice / [linalg.fill(%cst, %arg1), scf.for %k] 275 SetVector<Operation *> operationsToRemove; 276 for (Operation *op : llvm::reverse(backwardSlice)) { 277 // Add the index operands of `padTensorOp` and `sliceOp` to start the 278 // exploration of the index computation. 279 if (op == padTensorOp || op == sliceOp) { 280 addIndexOperandsToIndexEdges(op); 281 continue; 282 } 283 // Add the index operands of the loop if its induction variable is 284 // used for index computation. 285 if (auto forOp = dyn_cast<scf::ForOp>(op)) { 286 if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) { 287 addIndexOperandsToIndexEdges(op); 288 continue; 289 } 290 } 291 // Add the index operands of all other operations if at least one result is 292 // used for index computation. 293 if (hasIndexResult(op)) { 294 addIndexOperandsToIndexEdges(op); 295 // Check the operands of the remaining operations all have index type. 296 if (llvm::any_of(op->getOperandTypes(), 297 [](Type type) { return !type.isIndex(); })) { 298 LLVM_DEBUG(DBGS() << "Unsupported op with non index type operands: " 299 << op << " -> skip\n"); 300 return failure(); 301 } 302 // Check the remaining operations do not have regions or memory effects. 303 auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op); 304 bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect(); 305 if (hasMemoryEffect || op->getNumRegions() != 0) { 306 LLVM_DEBUG(DBGS() << "Unsupported op with region or memory effect: " 307 << op << " -> skip\n"); 308 return failure(); 309 } 310 continue; 311 } 312 // Remove all other operations not used by the index computation. An 313 // exception are constant operations that may be used by `padTensorOp`. 314 if (!isa<arith::ConstantOp>(op)) 315 operationsToRemove.insert(op); 316 } 317 backwardSlice.set_subtract(operationsToRemove); 318 return success(); 319 } 320 321 SmallVector<Value> 322 HoistingAnalysis::getPackedTensorSizes(ImplicitLocOpBuilder &b) { 323 SmallVector<Value> dynamicTensorSizes; 324 325 // Upper bound the packing loop lengths to size the packed tensor. Taking 326 // upper bounds can make the sizes of the packed tensor independent of the 327 // enclosing loops. This independence is a prerequisite for reusing the same 328 // buffer for all enclosing loop iterations and hoisting its allocation out of 329 // the enclosing loops. 330 for (auto forOp : packingLoops) { 331 // Compute an upper bound `ubVal` for the upper bound of `forOp`. 332 AffineMap boundMap; 333 SmallVector<Value> boundOperands; 334 getUpperBoundForIndex(forOp.upperBound(), boundMap, boundOperands); 335 Value ubVal = b.createOrFold<AffineMinOp>(boundMap, boundOperands); 336 // Compute the maximal packing loop length as (ub - lb).ceilDiv(step) and 337 // store the result to `dynamicTensorSizes`. 338 // TODO: instead of using the lower bound of `forOp` directly, implement a 339 // lower bound computation similar to the upper bound computation. 340 AffineExpr lb, ub, step; 341 bindDims(b.getContext(), lb, ub); 342 bindSymbols(b.getContext(), step); 343 Value res = b.createOrFold<AffineApplyOp>( 344 (ub - lb).ceilDiv(step), 345 ValueRange{forOp.lowerBound(), ubVal, cast<scf::ForOp>(forOp).step()}); 346 dynamicTensorSizes.push_back(res); 347 } 348 349 return dynamicTensorSizes; 350 } 351 352 static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) { 353 return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp<ConstantOp>(); 354 } 355 356 /// Return the current iteration number in the loop (iv - lb).ceilDiv(step). 357 /// The returned Value is guaranteed not to depend on any loop comprised in 358 /// [`outer`, `forOp`]. 359 /// Return null if such a loop-independent quantity cannot be computed. 360 static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer, 361 scf::ForOp forOp) { 362 MLIRContext *ctx = forOp->getContext(); 363 AffineExpr iv, lb, step; 364 bindDims(ctx, iv, lb); 365 bindSymbols(ctx, step); 366 if (!isDefinedOutsideOrConstant(outer, forOp.lowerBound()) || 367 !isDefinedOutsideOrConstant(outer, forOp.step())) 368 return Value(); 369 Value ivVal = forOp.getInductionVar(), lbVal = forOp.lowerBound(), 370 stepVal = forOp.step(); 371 auto loc = forOp->getLoc(); 372 return b.createOrFold<AffineApplyOp>(loc, (iv - lb).ceilDiv(step), 373 ValueRange{ivVal, lbVal, stepVal}); 374 } 375 376 FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist, 377 int numLoops, 378 PadTensorOp &hoistedOp) { 379 LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops 380 << " loops\n"); 381 HoistingAnalysis analysis(opToHoist, numLoops); 382 if (!analysis.isValid()) { 383 LLVM_DEBUG(DBGS() << "Analysis failed -> Skip\n"); 384 return failure(); 385 } 386 387 scf::ForOp outer = analysis.outermostEnclosingForOp; 388 ImplicitLocOpBuilder b(outer->getLoc(), outer); 389 390 SmallVector<Value> dynamicTensorSizes = analysis.getPackedTensorSizes(b); 391 392 // Update actual number of loops, which may be smaller. 393 int nPackedLoops = analysis.packingLoops.size(); 394 395 Location loc = opToHoist->getLoc(); 396 RankedTensorType paddedTensorType = opToHoist.getResultType(); 397 int paddedRank = paddedTensorType.getRank(); 398 399 // Create the packed tensor<?x?x..?xpadded_shape> into which we amortize 400 // padding. 401 SmallVector<int64_t> packedShape(nPackedLoops, ShapedType::kDynamicSize); 402 // TODO: go grab dims when necessary, for now PadTensorOp returns a static 403 // tensor. 404 llvm::append_range(packedShape, paddedTensorType.getShape()); 405 auto packedTensorType = 406 RankedTensorType::get(packedShape, paddedTensorType.getElementType()); 407 Value packedTensor = b.create<linalg::InitTensorOp>( 408 loc, dynamicTensorSizes, packedTensorType.getShape(), 409 packedTensorType.getElementType()); 410 411 // Clone the operations involved in the backward slice, iteratively stepping 412 // into the loops that we encounter. 413 // The implementation proceeds in a stack-like fashion: 414 // 1. Iteratively clone and step into the loops, pushing the `packedTensor` 415 // deeper in the stack. 416 // 2. Create a InsertSliceOp at the top of the stack. 417 // 3. Iteratively pop and yield the result of the InsertSliceOp across 418 // the cloned loops. 419 SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings; 420 clonedLoopIvs.reserve(nPackedLoops); 421 leadingPackedTensorIndexings.reserve(nPackedLoops); 422 BlockAndValueMapping bvm; 423 // Stack step 1. iteratively clone loops and push `packedTensor`. 424 for (Operation *op : analysis.backwardSlice) { 425 // Specifically sit out in the extract_slice(packedTensor) case: this is the 426 // piece we seek to replace. 427 if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) 428 if (bvm.lookupOrDefault(sliceOp.source()) == packedTensor) 429 continue; 430 // Clone all operations except it is a loop. 431 auto forOp = dyn_cast<scf::ForOp>(op); 432 if (!forOp) { 433 b.clone(*op, bvm); 434 continue; 435 } 436 // Create a packing loop that takes `packedTensor` as iteration argument. 437 auto clonedForOp = 438 b.create<scf::ForOp>(loc, bvm.lookupOrDefault(forOp.lowerBound()), 439 bvm.lookupOrDefault(forOp.upperBound()), 440 bvm.lookupOrDefault(forOp.step()), packedTensor); 441 // Map the induction var, region args and results to the `clonedForOp`. 442 bvm.map(forOp.getInductionVar(), clonedForOp.getInductionVar()); 443 bvm.map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs()); 444 bvm.map(forOp.getResults(), clonedForOp.getResults()); 445 assert(clonedForOp->getNumRegions() == 1); 446 clonedLoopIvs.push_back(clonedForOp.getInductionVar()); 447 448 b.setInsertionPointToStart(&clonedForOp->getRegion(0).front()); 449 Value loopIndependentIterationCount = 450 buildLoopIterationCount(b, outer, clonedForOp); 451 // Assert the loop-independent iteration count can be computed. 452 if (!loopIndependentIterationCount) 453 llvm_unreachable("loop independence prerequisite not met"); 454 leadingPackedTensorIndexings.push_back(loopIndependentIterationCount); 455 packedTensor = clonedForOp.getRegionIterArgs().front(); 456 } 457 458 // Stack step 2. create InsertSliceOp at the top of the stack. 459 // offsets = [clonedLoopIvs, 0 .. 0]. 460 SmallVector<OpFoldResult> offsets(leadingPackedTensorIndexings.begin(), 461 leadingPackedTensorIndexings.end()); 462 offsets.append(paddedRank, b.getIndexAttr(0)); 463 // sizes = [1 .. 1, paddedShape]. 464 SmallVector<OpFoldResult> sizes(nPackedLoops, b.getIndexAttr(1)); 465 for (int64_t sz : paddedTensorType.getShape()) { 466 // TODO: go grab dims when necessary, for now PadTensorOp returns a static 467 // tensor. 468 assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes"); 469 sizes.push_back(b.getIndexAttr(sz)); 470 } 471 // strides = [1 .. 1]. 472 SmallVector<OpFoldResult> strides(nPackedLoops + paddedRank, 473 b.getIndexAttr(1)); 474 475 Value inserted = 476 b.create<tensor::InsertSliceOp>(loc, bvm.lookup(opToHoist.result()), 477 packedTensor, offsets, sizes, strides); 478 479 // Stack step 3. iteratively pop the stack and propagate the yield. 480 Value valueToYield = inserted; 481 for (Value iv : llvm::reverse(clonedLoopIvs)) { 482 auto forOp = scf::getForInductionVarOwner(iv); 483 b.setInsertionPointToEnd(&forOp.getRegion().front()); 484 b.create<scf::YieldOp>(loc, valueToYield); 485 valueToYield = forOp.getResult(0); 486 } 487 488 // Now the packed tensor is ready, replace the original padding op by a 489 // 1x..x1 slice [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1]. 490 b.setInsertionPoint(opToHoist); 491 SmallVector<Value> loopIterationCounts = llvm::to_vector<4>( 492 llvm::map_range(analysis.packingLoops, [&](Operation *loop) { 493 return buildLoopIterationCount(b, outer, cast<scf::ForOp>(loop)); 494 })); 495 // Assert all loop iteration counts can be computed. 496 if (llvm::any_of(loopIterationCounts, [](Value v) { return !v; })) 497 llvm_unreachable("loop independence prerequisite not met"); 498 // offsets = [originalLoopIvs, 0 .. 0]. 499 offsets.assign(loopIterationCounts.begin(), loopIterationCounts.end()); 500 offsets.append(paddedRank, b.getIndexAttr(0)); 501 // sizes = [1 .. 1, paddedShape] (definedabove). 502 // strides = [1 .. 1] (defined above) 503 packedTensor = 504 scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0); 505 Value newResult = b.create<tensor::ExtractSliceOp>( 506 loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides); 507 508 // Make the newly cloned `opToHoist` available to the caller. 509 hoistedOp = cast<PadTensorOp>(bvm.lookup(opToHoist.result()).getDefiningOp()); 510 return newResult; 511 } 512