1 //===- VectorToSCF.cpp - Convert vector to SCF dialect ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements lowering of vector transfer operations to SCF. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <type_traits> 14 15 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" 16 17 #include "../PassDetail.h" 18 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" 19 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" 20 #include "mlir/Dialect/SCF/EDSC/Intrinsics.h" 21 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" 22 #include "mlir/Dialect/Vector/EDSC/Intrinsics.h" 23 #include "mlir/Dialect/Vector/VectorOps.h" 24 #include "mlir/Dialect/Vector/VectorUtils.h" 25 #include "mlir/IR/Builders.h" 26 #include "mlir/Pass/Pass.h" 27 #include "mlir/Transforms/GreedyPatternRewriteDriver.h" 28 #include "mlir/Transforms/Passes.h" 29 30 using namespace mlir; 31 using namespace mlir::edsc; 32 using namespace mlir::edsc::intrinsics; 33 using vector::TransferReadOp; 34 using vector::TransferWriteOp; 35 36 namespace { 37 38 /// Attribute name used for labeling transfer ops during progressive lowering. 39 static const char kPassLabel[] = "__vector_to_scf_lowering__"; 40 41 /// Patterns that inherit from this struct have access to 42 /// VectorTransferToSCFOptions. 43 template <typename OpTy> 44 struct VectorToSCFPattern : public OpRewritePattern<OpTy> { 45 explicit VectorToSCFPattern(MLIRContext *context, 46 VectorTransferToSCFOptions opt) 47 : OpRewritePattern<OpTy>(context), options(opt) {} 48 49 VectorTransferToSCFOptions options; 50 }; 51 52 /// Given a vector transfer op, calculate which dimension of the `source` 53 /// memref should be unpacked in the next application of TransferOpConversion. 54 /// A return value of None indicates a broadcast. 55 template <typename OpTy> 56 static Optional<int64_t> unpackedDim(OpTy xferOp) { 57 auto map = xferOp.permutation_map(); 58 if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) { 59 return expr.getPosition(); 60 } 61 assert(xferOp.isBroadcastDim(0) && 62 "Expected AffineDimExpr or AffineConstantExpr"); 63 return None; 64 } 65 66 /// Compute the permutation map for the new (N-1)-D vector transfer op. This 67 /// map is identical to the current permutation map, but the first result is 68 /// omitted. 69 template <typename OpTy> 70 static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) { 71 auto map = xferOp.permutation_map(); 72 return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(), 73 builder.getContext()); 74 } 75 76 /// Calculate the indices for the new vector transfer op. 77 /// 78 /// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ... 79 /// --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32> 80 /// ^^^^^^ 81 /// `iv` is the iteration variable of the (new) surrounding loop. 82 template <typename OpTy> 83 static void getXferIndices(OpTy xferOp, Value iv, 84 SmallVector<Value, 8> &indices) { 85 typename OpTy::Adaptor adaptor(xferOp); 86 // Corresponding memref dim of the vector dim that is unpacked. 87 auto dim = unpackedDim(xferOp); 88 auto prevIndices = adaptor.indices(); 89 indices.append(prevIndices.begin(), prevIndices.end()); 90 91 bool isBroadcast = !dim.hasValue(); 92 if (!isBroadcast) { 93 using edsc::op::operator+; 94 indices[dim.getValue()] = adaptor.indices()[dim.getValue()] + iv; 95 } 96 } 97 98 static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc, 99 Value value) { 100 if (hasRetVal) { 101 builder.create<scf::YieldOp>(loc, value); 102 } else { 103 builder.create<scf::YieldOp>(loc); 104 } 105 } 106 107 /// Generates a boolean Value that is true if the iv-th bit in xferOp's mask 108 /// is set to true. No such check is generated under following circumstances: 109 /// * xferOp does not have a mask. 110 /// * xferOp's mask is not 1D. (In case of (N>1)-D, a subvector of the mask is 111 /// computed and attached to the new transfer op in the pattern.) 112 /// * The to-be-unpacked dim of xferOp is a broadcast. 113 template <typename OpTy> 114 static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) { 115 if (!xferOp.mask()) 116 return Value(); 117 if (xferOp.getMaskType().getRank() != 1) 118 return Value(); 119 if (xferOp.isBroadcastDim(0)) 120 return Value(); 121 122 auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); 123 return vector_extract_element(xferOp.mask(), ivI32).value; 124 } 125 126 /// Helper function TransferOpConversion and TransferOp1dConversion. 127 /// Generate an in-bounds check if the transfer op may go out-of-bounds on the 128 /// specified dimension `dim` with the loop iteration variable `iv`. 129 /// E.g., when unpacking dimension 0 from: 130 /// ``` 131 /// %vec = vector.transfer_read %A[%a, %b] %cst 132 /// : vector<5x4xf32>, memref<?x?xf32> 133 /// ``` 134 /// An if check similar to this will be generated inside the loop: 135 /// ``` 136 /// %d = memref.dim %A, %c0 : memref<?x?xf32> 137 /// if (%a + iv < %d) { 138 /// (in-bounds case) 139 /// } else { 140 /// (out-of-bounds case) 141 /// } 142 /// ``` 143 /// 144 /// If the transfer is 1D and has a mask, this function generates a more complex 145 /// check also accounts for potentially masked out elements. 146 /// 147 /// This function variant returns the value returned by `inBoundsCase` or 148 /// `outOfBoundsCase`. The MLIR type of the return value must be specified in 149 /// `resultTypes`. 150 template <typename OpTy> 151 static Value generateInBoundsCheck( 152 OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim, 153 TypeRange resultTypes, 154 function_ref<Value(OpBuilder &, Location)> inBoundsCase, 155 function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) { 156 bool hasRetVal = !resultTypes.empty(); 157 Value cond; // Condition to be built... 158 159 // Condition check 1: Access in-bounds? 160 bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts. 161 if (!xferOp.isDimInBounds(0) && !isBroadcast) { 162 auto memrefDim = 163 memref_dim(xferOp.source(), std_constant_index(dim.getValue())); 164 using edsc::op::operator+; 165 auto memrefIdx = xferOp.indices()[dim.getValue()] + iv; 166 cond = std_cmpi_sgt(memrefDim.value, memrefIdx); 167 } 168 169 // Condition check 2: Masked in? 170 if (auto maskCond = generateMaskCheck(builder, xferOp, iv)) { 171 if (cond) { 172 cond = builder.create<AndOp>(xferOp.getLoc(), cond, maskCond); 173 } else { 174 cond = maskCond; 175 } 176 } 177 178 // If the condition is non-empty, generate an SCF::IfOp. 179 if (cond) { 180 auto check = builder.create<scf::IfOp>( 181 xferOp.getLoc(), resultTypes, cond, 182 /*thenBuilder=*/ 183 [&](OpBuilder &builder, Location loc) { 184 maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc)); 185 }, 186 /*elseBuilder=*/ 187 [&](OpBuilder &builder, Location loc) { 188 if (outOfBoundsCase) { 189 maybeYieldValue(hasRetVal, builder, loc, 190 outOfBoundsCase(builder, loc)); 191 } else { 192 builder.create<scf::YieldOp>(loc); 193 } 194 }); 195 196 return hasRetVal ? check.getResult(0) : Value(); 197 } 198 199 // Condition is empty, no need for an SCF::IfOp. 200 return inBoundsCase(builder, xferOp.getLoc()); 201 } 202 203 /// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have 204 /// a return value. Consequently, this function does not have a return value. 205 template <typename OpTy> 206 static void generateInBoundsCheck( 207 OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim, 208 function_ref<void(OpBuilder &, Location)> inBoundsCase, 209 function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) { 210 generateInBoundsCheck( 211 xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(), 212 /*inBoundsCase=*/ 213 [&](OpBuilder &builder, Location loc) { 214 inBoundsCase(builder, loc); 215 return Value(); 216 }, 217 /*outOfBoundsCase=*/ 218 [&](OpBuilder &builder, Location loc) { 219 if (outOfBoundsCase) 220 outOfBoundsCase(builder, loc); 221 return Value(); 222 }); 223 } 224 225 /// Given an ArrayAttr, return a copy where the first element is dropped. 226 static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) { 227 if (!attr) 228 return attr; 229 return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front()); 230 } 231 232 /// Add the pass label to a vector transfer op if its rank is not the target 233 /// rank. 234 template <typename OpTy> 235 static void maybeApplyPassLabel(OpBuilder &builder, OpTy newXferOp, 236 unsigned targetRank) { 237 if (newXferOp.getVectorType().getRank() > targetRank) 238 newXferOp->setAttr(kPassLabel, builder.getUnitAttr()); 239 } 240 241 namespace lowering_n_d { 242 243 /// Helper data structure for data and mask buffers. 244 struct BufferAllocs { 245 Value dataBuffer; 246 Value maskBuffer; 247 }; 248 249 /// Allocate temporary buffers for data (vector) and mask (if present). 250 /// TODO: Parallelism and threadlocal considerations. 251 template <typename OpTy> 252 static BufferAllocs allocBuffers(OpTy xferOp) { 253 auto &b = ScopedContext::getBuilderRef(); 254 OpBuilder::InsertionGuard guard(b); 255 Operation *scope = 256 xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>(); 257 assert(scope && "Expected op to be inside automatic allocation scope"); 258 b.setInsertionPointToStart(&scope->getRegion(0).front()); 259 260 BufferAllocs result; 261 auto bufferType = MemRefType::get({}, xferOp.getVectorType()); 262 result.dataBuffer = memref_alloca(bufferType).value; 263 264 if (xferOp.mask()) { 265 auto maskType = MemRefType::get({}, xferOp.mask().getType()); 266 auto maskBuffer = memref_alloca(maskType).value; 267 b.setInsertionPoint(xferOp); 268 memref_store(xferOp.mask(), maskBuffer); 269 result.maskBuffer = memref_load(maskBuffer); 270 } 271 272 return result; 273 } 274 275 /// Given a MemRefType with VectorType element type, unpack one dimension from 276 /// the VectorType into the MemRefType. 277 /// 278 /// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>> 279 static MemRefType unpackOneDim(MemRefType type) { 280 auto vectorType = type.getElementType().dyn_cast<VectorType>(); 281 auto memrefShape = type.getShape(); 282 SmallVector<int64_t, 8> newMemrefShape; 283 newMemrefShape.append(memrefShape.begin(), memrefShape.end()); 284 newMemrefShape.push_back(vectorType.getDimSize(0)); 285 return MemRefType::get(newMemrefShape, 286 VectorType::get(vectorType.getShape().drop_front(), 287 vectorType.getElementType())); 288 } 289 290 /// Given a transfer op, find the memref from which the mask is loaded. This 291 /// is similar to Strategy<TransferWriteOp>::getBuffer. 292 template <typename OpTy> 293 static Value getMaskBuffer(OpTy xferOp) { 294 assert(xferOp.mask() && "Expected that transfer op has mask"); 295 auto loadOp = xferOp.mask().template getDefiningOp<memref::LoadOp>(); 296 assert(loadOp && "Expected transfer op mask produced by LoadOp"); 297 return loadOp.getMemRef(); 298 } 299 300 /// Codegen strategy, depending on the operation. 301 template <typename OpTy> 302 struct Strategy; 303 304 /// Code strategy for vector TransferReadOp. 305 template <> 306 struct Strategy<TransferReadOp> { 307 /// Find the StoreOp that is used for writing the current TransferReadOp's 308 /// result to the temporary buffer allocation. 309 static memref::StoreOp getStoreOp(TransferReadOp xferOp) { 310 assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp"); 311 auto storeOp = dyn_cast<memref::StoreOp>((*xferOp->use_begin()).getOwner()); 312 assert(storeOp && "Expected TransferReadOp result used by StoreOp"); 313 return storeOp; 314 } 315 316 /// Find the temporary buffer allocation. All labeled TransferReadOps are 317 /// used like this, where %buf is either the buffer allocation or a type cast 318 /// of the buffer allocation: 319 /// ``` 320 /// %vec = vector.transfer_read ... { __vector_to_scf_lowering__ } ... 321 /// memref.store %vec, %buf[...] ... 322 /// ``` 323 static Value getBuffer(TransferReadOp xferOp) { 324 return getStoreOp(xferOp).getMemRef(); 325 } 326 327 /// Retrieve the indices of the current StoreOp that stores into the buffer. 328 static void getBufferIndices(TransferReadOp xferOp, 329 SmallVector<Value, 8> &indices) { 330 auto storeOp = getStoreOp(xferOp); 331 auto prevIndices = memref::StoreOpAdaptor(storeOp).indices(); 332 indices.append(prevIndices.begin(), prevIndices.end()); 333 } 334 335 /// Rewrite the TransferReadOp, assuming that there are no out-of-bounds 336 /// accesses on the to-be-unpacked dimension. 337 /// 338 /// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration 339 /// variable `iv`. 340 /// 2. Store the result into the (already `vector.type_cast`ed) buffer. 341 /// 342 /// E.g.: 343 /// ``` 344 /// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst 345 /// : memref<?x?x?xf32>, vector<4x3xf32> 346 /// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>> 347 /// ``` 348 /// Is rewritten to: 349 /// ``` 350 /// %casted = vector.type_cast %buf 351 /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>> 352 /// for %j = 0 to 4 { 353 /// %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst 354 /// : memref<?x?x?xf32>, vector<3xf32> 355 /// memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>> 356 /// } 357 /// ``` 358 /// 359 /// Note: The loop and type cast are generated in TransferOpConversion. 360 /// The original TransferReadOp and store op are deleted in `cleanup`. 361 /// Note: The `mask` operand is set in TransferOpConversion. 362 static TransferReadOp rewriteOp(OpBuilder &builder, 363 VectorTransferToSCFOptions options, 364 TransferReadOp xferOp, Value buffer, 365 Value iv) { 366 SmallVector<Value, 8> storeIndices; 367 getBufferIndices(xferOp, storeIndices); 368 storeIndices.push_back(iv); 369 370 SmallVector<Value, 8> xferIndices; 371 getXferIndices(xferOp, iv, xferIndices); 372 373 auto bufferType = buffer.getType().dyn_cast<ShapedType>(); 374 auto vecType = bufferType.getElementType().dyn_cast<VectorType>(); 375 auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr()); 376 auto newXfer = 377 vector_transfer_read( 378 vecType, xferOp.source(), xferIndices, 379 AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), 380 xferOp.padding(), Value(), inBoundsAttr) 381 .value; 382 383 maybeApplyPassLabel(builder, 384 dyn_cast<TransferReadOp>(newXfer.getDefiningOp()), 385 options.targetRank); 386 387 memref_store(newXfer, buffer, storeIndices); 388 return newXfer.getDefiningOp<TransferReadOp>(); 389 } 390 391 /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write 392 /// padding value to the temporary buffer. 393 static void handleOutOfBoundsDim(OpBuilder & /*builder*/, 394 TransferReadOp xferOp, Value buffer, 395 Value iv) { 396 SmallVector<Value, 8> storeIndices; 397 getBufferIndices(xferOp, storeIndices); 398 storeIndices.push_back(iv); 399 400 auto bufferType = buffer.getType().dyn_cast<ShapedType>(); 401 auto vecType = bufferType.getElementType().dyn_cast<VectorType>(); 402 auto vec = std_splat(vecType, xferOp.padding()); 403 memref_store(vec, buffer, storeIndices); 404 } 405 406 /// Cleanup after rewriting the op. 407 static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp) { 408 rewriter.eraseOp(getStoreOp(xferOp)); 409 rewriter.eraseOp(xferOp); 410 } 411 }; 412 413 /// Codegen strategy for vector TransferWriteOp. 414 template <> 415 struct Strategy<TransferWriteOp> { 416 /// Find the temporary buffer allocation. All labeled TransferWriteOps are 417 /// used like this, where %buf is either the buffer allocation or a type cast 418 /// of the buffer allocation: 419 /// ``` 420 /// %vec = memref.load %buf[...] ... 421 /// vector.transfer_write %vec ... { __vector_to_scf_lowering__ } ... 422 /// ``` 423 static Value getBuffer(TransferWriteOp xferOp) { 424 auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>(); 425 assert(loadOp && "Expected transfer op vector produced by LoadOp"); 426 return loadOp.getMemRef(); 427 } 428 429 /// Retrieve the indices of the current LoadOp that loads from the buffer. 430 static void getBufferIndices(TransferWriteOp xferOp, 431 SmallVector<Value, 8> &indices) { 432 auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>(); 433 auto prevIndices = memref::LoadOpAdaptor(loadOp).indices(); 434 indices.append(prevIndices.begin(), prevIndices.end()); 435 } 436 437 /// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds 438 /// accesses on the to-be-unpacked dimension. 439 /// 440 /// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer, 441 /// using the loop iteration variable `iv`. 442 /// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back 443 /// to memory. 444 /// 445 /// Note: For more details, see comments on Strategy<TransferReadOp>. 446 static TransferWriteOp rewriteOp(OpBuilder &builder, 447 VectorTransferToSCFOptions options, 448 TransferWriteOp xferOp, Value buffer, 449 Value iv) { 450 SmallVector<Value, 8> loadIndices; 451 getBufferIndices(xferOp, loadIndices); 452 loadIndices.push_back(iv); 453 454 SmallVector<Value, 8> xferIndices; 455 getXferIndices(xferOp, iv, xferIndices); 456 457 auto vec = memref_load(buffer, loadIndices); 458 auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr()); 459 auto newXfer = vector_transfer_write( 460 Type(), vec, xferOp.source(), xferIndices, 461 AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), Value(), 462 inBoundsAttr); 463 464 maybeApplyPassLabel(builder, newXfer.op, options.targetRank); 465 466 return newXfer; 467 } 468 469 /// Handle out-of-bounds accesses on the to-be-unpacked dimension. 470 static void handleOutOfBoundsDim(OpBuilder &builder, TransferWriteOp xferOp, 471 Value buffer, Value iv) {} 472 473 /// Cleanup after rewriting the op. 474 static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) { 475 rewriter.eraseOp(xferOp); 476 } 477 }; 478 479 template <typename OpTy> 480 LogicalResult checkPrepareXferOp(OpTy xferOp, 481 VectorTransferToSCFOptions options) { 482 if (xferOp->hasAttr(kPassLabel)) 483 return failure(); 484 if (xferOp.getVectorType().getRank() <= options.targetRank) 485 return failure(); 486 return success(); 487 } 488 489 /// Prepare a TransferReadOp for progressive lowering. 490 /// 491 /// 1. Allocate a temporary buffer. 492 /// 2. Label the TransferReadOp, marking it eligible for progressive lowering. 493 /// 3. Store the result of the TransferReadOp into the temporary buffer. 494 /// 4. Load the result from the temporary buffer and replace all uses of the 495 /// original TransferReadOp with this load. 496 /// 497 /// E.g.: 498 /// ``` 499 /// %vec = vector.transfer_read %A[%a, %b, %c], %cst 500 /// : vector<5x4xf32>, memref<?x?x?xf32> 501 /// ``` 502 /// is rewritten to: 503 /// ``` 504 /// %0 = memref.alloca() : memref<vector<5x4xf32>> 505 /// %1 = vector.transfer_read %A[%a, %b, %c], %cst 506 /// { __vector_to_scf_lowering__ } : vector<5x4xf32>, memref<?x?x?xf32> 507 /// memref.store %1, %0[] : memref<vector<5x4xf32>> 508 /// %vec = memref.load %0[] : memref<vector<5x4xf32>> 509 /// ``` 510 /// 511 /// Note: A second temporary buffer may be allocated for the `mask` operand. 512 struct PrepareTransferReadConversion 513 : public VectorToSCFPattern<TransferReadOp> { 514 using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern; 515 516 LogicalResult matchAndRewrite(TransferReadOp xferOp, 517 PatternRewriter &rewriter) const override { 518 if (checkPrepareXferOp(xferOp, options).failed()) 519 return failure(); 520 521 ScopedContext scope(rewriter, xferOp.getLoc()); 522 auto buffers = allocBuffers(xferOp); 523 auto *newXfer = rewriter.clone(*xferOp.getOperation()); 524 newXfer->setAttr(kPassLabel, rewriter.getUnitAttr()); 525 if (xferOp.mask()) { 526 dyn_cast<TransferReadOp>(newXfer).maskMutable().assign( 527 buffers.maskBuffer); 528 } 529 530 memref_store(newXfer->getResult(0), buffers.dataBuffer); 531 rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer); 532 533 return success(); 534 } 535 }; 536 537 /// Prepare a TransferWriteOp for progressive lowering. 538 /// 539 /// 1. Allocate a temporary buffer. 540 /// 2. Store the vector into the buffer. 541 /// 3. Load the vector from the buffer again. 542 /// 4. Use the loaded vector as a TransferWriteOp operand and label the op, 543 /// marking it eligible for progressive lowering via TransferOpConversion. 544 /// 545 /// E.g.: 546 /// ``` 547 /// vector.transfer_write %vec, %A[%a, %b, %c] 548 /// : vector<5x4xf32>, memref<?x?x?xf32> 549 /// ``` 550 /// is rewritten to: 551 /// ``` 552 /// %0 = memref.alloca() : memref<vector<5x4xf32>> 553 /// memref.store %vec, %0[] : memref<vector<5x4xf32>> 554 /// %1 = memref.load %0[] : memref<vector<5x4xf32>> 555 /// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ } 556 /// : vector<5x4xf32>, memref<?x?x?xf32> 557 /// ``` 558 /// 559 /// Note: A second temporary buffer may be allocated for the `mask` operand. 560 struct PrepareTransferWriteConversion 561 : public VectorToSCFPattern<TransferWriteOp> { 562 using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern; 563 564 LogicalResult matchAndRewrite(TransferWriteOp xferOp, 565 PatternRewriter &rewriter) const override { 566 if (checkPrepareXferOp(xferOp, options).failed()) 567 return failure(); 568 569 ScopedContext scope(rewriter, xferOp.getLoc()); 570 auto buffers = allocBuffers(xferOp); 571 memref_store(xferOp.vector(), buffers.dataBuffer); 572 auto loadedVec = memref_load(buffers.dataBuffer); 573 rewriter.updateRootInPlace(xferOp, [&]() { 574 xferOp.vectorMutable().assign(loadedVec); 575 xferOp->setAttr(kPassLabel, rewriter.getUnitAttr()); 576 }); 577 578 if (xferOp.mask()) { 579 rewriter.updateRootInPlace( 580 xferOp, [&]() { xferOp.maskMutable().assign(buffers.maskBuffer); }); 581 } 582 583 return success(); 584 } 585 }; 586 587 /// Progressive lowering of vector transfer ops: Unpack one dimension. 588 /// 589 /// 1. Unpack one dimension from the current buffer type and cast the buffer 590 /// to that new type. E.g.: 591 /// ``` 592 /// %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>> 593 /// vector.transfer_write %vec ... 594 /// ``` 595 /// The following cast is generated: 596 /// ``` 597 /// %casted = vector.type_cast %0 598 /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>> 599 /// ``` 600 /// 2. Generate a for loop and rewrite the transfer op according to the 601 /// corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be 602 /// out-of-bounds, generate an if-check and handle both cases separately. 603 /// 3. Clean up according to the corresponding Strategy<OpTy>. 604 template <typename OpTy> 605 struct TransferOpConversion : public VectorToSCFPattern<OpTy> { 606 using VectorToSCFPattern<OpTy>::VectorToSCFPattern; 607 608 LogicalResult matchAndRewrite(OpTy xferOp, 609 PatternRewriter &rewriter) const override { 610 if (!xferOp->hasAttr(kPassLabel)) 611 return failure(); 612 613 ScopedContext scope(rewriter, xferOp.getLoc()); 614 615 // Find and cast data buffer. How the buffer can be found depends on OpTy. 616 auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp); 617 auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>(); 618 auto castedDataType = unpackOneDim(dataBufferType); 619 auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer); 620 621 // If the xferOp has a mask: Find and cast mask buffer. 622 Value castedMaskBuffer; 623 if (xferOp.mask()) { 624 auto maskBuffer = getMaskBuffer(xferOp); 625 auto maskBufferType = 626 maskBuffer.getType().template dyn_cast<MemRefType>(); 627 if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) { 628 // Do not unpack a dimension of the mask, if: 629 // * To-be-unpacked transfer op dimension is a broadcast. 630 // * Mask is 1D, i.e., the mask cannot be further unpacked. 631 // (That means that all remaining dimensions of the transfer op must 632 // be broadcasted.) 633 castedMaskBuffer = maskBuffer; 634 } else { 635 auto castedMaskType = unpackOneDim(maskBufferType); 636 castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer); 637 } 638 } 639 640 // Loop bounds and step. 641 auto lb = std_constant_index(0).value; 642 auto ub = std_constant_index( 643 castedDataType.getDimSize(castedDataType.getRank() - 1)) 644 .value; 645 auto step = std_constant_index(1).value; 646 647 // Generate for loop. 648 rewriter.create<scf::ForOp>( 649 xferOp.getLoc(), lb, ub, step, ValueRange(), 650 [&](OpBuilder &b, Location loc, Value iv, ValueRange /*loopState*/) { 651 ScopedContext scope(b, loc); 652 generateInBoundsCheck( 653 xferOp, iv, b, unpackedDim(xferOp), 654 /*inBoundsCase=*/ 655 [&](OpBuilder &b, Location /*loc*/) { 656 // Create new transfer op. 657 OpTy newXfer = Strategy<OpTy>::rewriteOp( 658 b, this->options, xferOp, castedDataBuffer, iv); 659 660 // If old transfer op has a mask: Set mask on new transfer op. 661 // Special case: If the mask of the old transfer op is 1D and 662 // the 663 // unpacked dim is not a broadcast, no mask is 664 // needed on the new transfer op. 665 if (xferOp.mask() && (xferOp.isBroadcastDim(0) || 666 xferOp.getMaskType().getRank() > 1)) { 667 OpBuilder::InsertionGuard guard(b); 668 b.setInsertionPoint(newXfer); // Insert load before newXfer. 669 670 SmallVector<Value, 8> loadIndices; 671 Strategy<OpTy>::getBufferIndices(xferOp, loadIndices); 672 // In case of broadcast: Use same indices to load from memref 673 // as before. 674 if (!xferOp.isBroadcastDim(0)) 675 loadIndices.push_back(iv); 676 677 auto mask = memref_load(castedMaskBuffer, loadIndices); 678 rewriter.updateRootInPlace( 679 newXfer, [&]() { newXfer.maskMutable().assign(mask); }); 680 } 681 }, 682 /*outOfBoundsCase=*/ 683 [&](OpBuilder &b, Location /*loc*/) { 684 Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp, 685 castedDataBuffer, iv); 686 }); 687 b.create<scf::YieldOp>(loc); 688 }); 689 690 Strategy<OpTy>::cleanup(rewriter, xferOp); 691 return success(); 692 } 693 }; 694 695 } // namespace lowering_n_d 696 697 namespace lowering_n_d_unrolled { 698 699 /// If the original transfer op has a mask, compute the mask of the new transfer 700 /// op (for the current iteration `i`) and assign it. 701 template <typename OpTy> 702 static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp, 703 int64_t i) { 704 if (!xferOp.mask()) 705 return; 706 707 if (xferOp.isBroadcastDim(0)) { 708 // To-be-unpacked dimension is a broadcast, which does not have a 709 // corresponding mask dimension. Mask attribute remains unchanged. 710 newXferOp.maskMutable().assign(xferOp.mask()); 711 return; 712 } 713 714 if (xferOp.getMaskType().getRank() > 1) { 715 // Unpack one dimension of the mask. 716 OpBuilder::InsertionGuard guard(builder); 717 builder.setInsertionPoint(newXferOp); // Insert load before newXfer. 718 719 llvm::SmallVector<int64_t, 1> indices({i}); 720 auto newMask = vector_extract(xferOp.mask(), indices).value; 721 newXferOp.maskMutable().assign(newMask); 722 } 723 724 // If we end up here: The mask of the old transfer op is 1D and the unpacked 725 // dim is not a broadcast, so no mask is needed on the new transfer op. 726 // `generateInBoundsCheck` will have evaluated the mask already. 727 } 728 729 /// Progressive lowering of vector TransferReadOp with unrolling: Unpack one 730 /// dimension. This is similar to TransferOpConversion<TransferReadOp>, but no 731 /// memref buffer is allocated and the SCF loop is fully unrolled. 732 /// 733 /// ``` 734 /// E.g.: 735 /// ``` 736 /// %vec = vector.transfer_read %A[%a, %b, %c], %padding 737 /// : memref<?x?x?xf32>, vector<5x4xf32> 738 /// ``` 739 /// is rewritten to IR such as (simplified): 740 /// ``` 741 /// %v_init = splat %padding : vector<5x4xf32> 742 /// %tmp0 = vector.transfer_read %A[%a, %b, %c], %padding 743 /// : memref<?x?x?xf32>, vector<4xf32> 744 /// %v0 = vector.insert %tmp0, %v_init[0] : vector<4xf32> into vector<5x4xf32> 745 /// %tmp1 = vector.transfer_read %A[%a, %b + 1, %c], %padding 746 /// : memref<?x?x?xf32>, vector<4xf32> 747 /// %v1 = vector.insert %tmp1, %v0[1] : vector<4xf32> into vector<5x4xf32> 748 /// ... 749 /// %tmp4 = vector.transfer_read %A[%a, %b + 4, %c], %padding 750 /// : memref<?x?x?xf32>, vector<4xf32> 751 /// %vec = vector.insert %tmp1, %v3[4] : vector<4xf32> into vector<5x4xf32> 752 /// ``` 753 /// 754 /// Note: As an optimization, if the result of the original TransferReadOp 755 /// was directly inserted into another vector, no new %v_init vector is created. 756 /// Instead, the new TransferReadOp results are inserted into that vector. 757 struct UnrollTransferReadConversion 758 : public VectorToSCFPattern<TransferReadOp> { 759 using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern; 760 761 /// Return the vector into which the newly created TransferReadOp results 762 /// are inserted. 763 Value getResultVector(TransferReadOp xferOp, 764 PatternRewriter &rewriter) const { 765 if (auto insertOp = getInsertOp(xferOp)) 766 return insertOp.dest(); 767 return std_splat(xferOp.getVectorType(), xferOp.padding()).value; 768 } 769 770 /// If the result of the TransferReadOp has exactly one user, which is a 771 /// vector::InsertOp, return that operation. 772 vector::InsertOp getInsertOp(TransferReadOp xferOp) const { 773 if (xferOp->hasOneUse()) { 774 Operation *xferOpUser = *xferOp->getUsers().begin(); 775 if (auto insertOp = dyn_cast<vector::InsertOp>(xferOpUser)) 776 return insertOp; 777 } 778 779 return vector::InsertOp(); 780 } 781 782 /// If the result of the TransferReadOp has exactly one user, which is a 783 /// vector::InsertOp, return that operation's indices. 784 void getInsertionIndices(TransferReadOp xferOp, 785 SmallVector<int64_t, 8> &indices) const { 786 if (auto insertOp = getInsertOp(xferOp)) { 787 llvm::for_each(insertOp.position(), [&](Attribute attr) { 788 indices.push_back(attr.dyn_cast<IntegerAttr>().getInt()); 789 }); 790 } 791 } 792 793 /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds 794 /// accesses, and broadcasts and transposes in permutation maps. 795 LogicalResult matchAndRewrite(TransferReadOp xferOp, 796 PatternRewriter &rewriter) const override { 797 if (xferOp.getVectorType().getRank() <= options.targetRank) 798 return failure(); 799 800 ScopedContext scope(rewriter, xferOp.getLoc()); 801 auto insertOp = getInsertOp(xferOp); 802 auto vec = getResultVector(xferOp, rewriter); 803 auto vecType = vec.getType().dyn_cast<VectorType>(); 804 auto xferVecType = xferOp.getVectorType(); 805 auto newXferVecType = VectorType::get(xferVecType.getShape().drop_front(), 806 xferVecType.getElementType()); 807 int64_t dimSize = xferVecType.getShape()[0]; 808 809 // Generate fully unrolled loop of transfer ops. 810 for (int64_t i = 0; i < dimSize; ++i) { 811 Value iv = std_constant_index(i); 812 813 vec = generateInBoundsCheck( 814 xferOp, iv, rewriter, unpackedDim(xferOp), TypeRange(vecType), 815 /*inBoundsCase=*/ 816 [&](OpBuilder &b, Location loc) { 817 ScopedContext scope(b, loc); 818 819 // Indices for the new transfer op. 820 SmallVector<Value, 8> xferIndices; 821 getXferIndices(xferOp, iv, xferIndices); 822 823 // Indices for the new vector.insert op. 824 SmallVector<int64_t, 8> insertionIndices; 825 getInsertionIndices(xferOp, insertionIndices); 826 insertionIndices.push_back(i); 827 828 auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr()); 829 auto newXferOpVal = 830 vector_transfer_read( 831 newXferVecType, xferOp.source(), xferIndices, 832 AffineMapAttr::get(unpackedPermutationMap(xferOp, b)), 833 xferOp.padding(), Value(), inBoundsAttr) 834 .value; 835 auto newXferOp = 836 dyn_cast<TransferReadOp>(newXferOpVal.getDefiningOp()); 837 838 maybeAssignMask(b, xferOp, newXferOp, i); 839 840 return vector_insert(newXferOp, vec, insertionIndices).value; 841 }, 842 /*outOfBoundsCase=*/ 843 [&](OpBuilder &b, Location loc) { 844 // Loop through original (unmodified) vector. 845 return vec; 846 }); 847 } 848 849 if (insertOp) { 850 // Rewrite single user of the old TransferReadOp, which was an InsertOp. 851 rewriter.replaceOp(insertOp, vec); 852 rewriter.eraseOp(xferOp); 853 } else { 854 rewriter.replaceOp(xferOp, vec); 855 } 856 857 return success(); 858 } 859 }; 860 861 /// Progressive lowering of vector TransferWriteOp with unrolling: Unpack one 862 /// dimension. This is similar to TransferOpConversion<TransferWriteOp>, but no 863 /// memref buffer is allocated and the SCF loop is fully unrolled. 864 /// 865 /// ``` 866 /// E.g.: 867 /// ``` 868 /// vector.transfer_write %vec, %A[%a, %b, %c] 869 /// : vector<5x4xf32>, memref<?x?x?xf32> 870 /// ``` 871 /// is rewritten to IR such as (simplified): 872 /// ``` 873 /// %v0 = vector.extract %vec[0] : vector<5x4xf32> 874 /// vector.transfer_write %v0, %A[%a, %b, %c] : vector<4xf32>, memref<...> 875 /// %v1 = vector.extract %vec[1] : vector<5x4xf32> 876 /// vector.transfer_write %v1, %A[%a, %b + 1, %c] : vector<4xf32>, memref<...> 877 /// ... 878 /// %v4 = vector.extract %vec[4] : vector<5x4xf32> 879 /// vector.transfer_write %v4, %A[%a, %b + 4, %c] : vector<4xf32>, memref<...> 880 /// ``` 881 /// 882 /// Note: As an optimization, if the vector of the original TransferWriteOp 883 /// was directly extracted from another vector via an ExtractOp `a`, extract 884 /// the vectors for the newly generated TransferWriteOps from `a`'s input. By 885 /// doing so, `a` may become dead, and the number of ExtractOps generated during 886 /// recursive application of this pattern will be minimal. 887 struct UnrollTransferWriteConversion 888 : public VectorToSCFPattern<TransferWriteOp> { 889 using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern; 890 891 /// Return the vector from which newly generated ExtracOps will extract. 892 Value getDataVector(TransferWriteOp xferOp) const { 893 if (auto extractOp = getExtractOp(xferOp)) 894 return extractOp.vector(); 895 return xferOp.vector(); 896 } 897 898 /// If the input of the given TransferWriteOp is an ExtractOp, return it. 899 vector::ExtractOp getExtractOp(TransferWriteOp xferOp) const { 900 if (auto *op = xferOp.vector().getDefiningOp()) 901 return dyn_cast<vector::ExtractOp>(op); 902 return vector::ExtractOp(); 903 } 904 905 /// If the input of the given TransferWriteOp is an ExtractOp, return its 906 /// indices. 907 void getExtractionIndices(TransferWriteOp xferOp, 908 SmallVector<int64_t, 8> &indices) const { 909 if (auto extractOp = getExtractOp(xferOp)) { 910 llvm::for_each(extractOp.position(), [&](Attribute attr) { 911 indices.push_back(attr.dyn_cast<IntegerAttr>().getInt()); 912 }); 913 } 914 } 915 916 /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds 917 /// accesses, and broadcasts and transposes in permutation maps. 918 LogicalResult matchAndRewrite(TransferWriteOp xferOp, 919 PatternRewriter &rewriter) const override { 920 if (xferOp.getVectorType().getRank() <= options.targetRank) 921 return failure(); 922 923 ScopedContext scope(rewriter, xferOp.getLoc()); 924 auto vec = getDataVector(xferOp); 925 auto xferVecType = xferOp.getVectorType(); 926 int64_t dimSize = xferVecType.getShape()[0]; 927 928 // Generate fully unrolled loop of transfer ops. 929 for (int64_t i = 0; i < dimSize; ++i) { 930 Value iv = std_constant_index(i); 931 932 generateInBoundsCheck( 933 xferOp, iv, rewriter, unpackedDim(xferOp), 934 /*inBoundsCase=*/[&](OpBuilder &b, Location loc) { 935 ScopedContext scope(b, loc); 936 937 // Indices for the new transfer op. 938 SmallVector<Value, 8> xferIndices; 939 getXferIndices(xferOp, iv, xferIndices); 940 941 // Indices for the new vector.extract op. 942 SmallVector<int64_t, 8> extractionIndices; 943 getExtractionIndices(xferOp, extractionIndices); 944 extractionIndices.push_back(i); 945 946 auto extracted = vector_extract(vec, extractionIndices).value; 947 auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr()); 948 949 auto newXferOp = 950 vector_transfer_write( 951 Type(), extracted, xferOp.source(), xferIndices, 952 AffineMapAttr::get(unpackedPermutationMap(xferOp, b)), 953 Value(), inBoundsAttr) 954 .op; 955 956 maybeAssignMask(b, xferOp, newXferOp, i); 957 }); 958 } 959 960 rewriter.eraseOp(xferOp); 961 return success(); 962 } 963 }; 964 965 } // namespace lowering_n_d_unrolled 966 967 namespace lowering_1_d { 968 969 /// Compute the indices into the memref for the LoadOp/StoreOp generated as 970 /// part of TransferOp1dConversion. Return the memref dimension on which 971 /// the transfer is operating. A return value of None indicates a broadcast. 972 template <typename OpTy> 973 static Optional<int64_t> 974 get1dMemrefIndices(OpTy xferOp, Value iv, 975 SmallVector<Value, 8> &memrefIndices) { 976 auto indices = xferOp.indices(); 977 auto map = xferOp.permutation_map(); 978 979 memrefIndices.append(indices.begin(), indices.end()); 980 assert(map.getNumResults() == 1 && 981 "Expected 1 permutation map result for 1D transfer"); 982 if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) { 983 auto dim = expr.getPosition(); 984 using edsc::op::operator+; 985 memrefIndices[dim] = memrefIndices[dim] + iv; 986 return dim; 987 } 988 989 assert(xferOp.isBroadcastDim(0) && 990 "Expected AffineDimExpr or AffineConstantExpr"); 991 return None; 992 } 993 994 /// Codegen strategy for TransferOp1dConversion, depending on the 995 /// operation. 996 template <typename OpTy> 997 struct Strategy1d; 998 999 /// Codegen strategy for TransferReadOp. 1000 template <> 1001 struct Strategy1d<TransferReadOp> { 1002 static void generateForLoopBody(OpBuilder &builder, Location loc, 1003 TransferReadOp xferOp, Value iv, 1004 ValueRange loopState) { 1005 SmallVector<Value, 8> indices; 1006 auto dim = get1dMemrefIndices(xferOp, iv, indices); 1007 auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); 1008 auto vec = loopState[0]; 1009 1010 // In case of out-of-bounds access, leave `vec` as is (was initialized with 1011 // padding value). 1012 auto nextVec = generateInBoundsCheck( 1013 xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()), 1014 /*inBoundsCase=*/ 1015 [&](OpBuilder & /*b*/, Location loc) { 1016 auto val = memref_load(xferOp.source(), indices); 1017 return vector_insert_element(val, vec, ivI32.value).value; 1018 }, 1019 /*outOfBoundsCase=*/ 1020 [&](OpBuilder & /*b*/, Location loc) { return vec; }); 1021 builder.create<scf::YieldOp>(loc, nextVec); 1022 } 1023 1024 static Value initialLoopState(TransferReadOp xferOp) { 1025 // Inititalize vector with padding value. 1026 return std_splat(xferOp.getVectorType(), xferOp.padding()).value; 1027 } 1028 }; 1029 1030 /// Codegen strategy for TransferWriteOp. 1031 template <> 1032 struct Strategy1d<TransferWriteOp> { 1033 static void generateForLoopBody(OpBuilder &builder, Location loc, 1034 TransferWriteOp xferOp, Value iv, 1035 ValueRange /*loopState*/) { 1036 SmallVector<Value, 8> indices; 1037 auto dim = get1dMemrefIndices(xferOp, iv, indices); 1038 auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); 1039 1040 // Nothing to do in case of out-of-bounds access. 1041 generateInBoundsCheck( 1042 xferOp, iv, builder, dim, 1043 /*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) { 1044 auto val = vector_extract_element(xferOp.vector(), ivI32.value); 1045 memref_store(val, xferOp.source(), indices); 1046 }); 1047 builder.create<scf::YieldOp>(loc); 1048 } 1049 1050 static Value initialLoopState(TransferWriteOp xferOp) { return Value(); } 1051 }; 1052 1053 /// Return true if the last dimension of the MemRefType has unit stride. 1054 static bool isLastMemrefDimUnitStride(MemRefType type) { 1055 int64_t offset; 1056 SmallVector<int64_t, 4> strides; 1057 auto successStrides = getStridesAndOffset(type, strides, offset); 1058 return succeeded(successStrides) && strides.back() == 1; 1059 } 1060 1061 /// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is 1062 /// necessary in cases where a 1D vector transfer op cannot be lowered into 1063 /// vector load/stores due to non-unit strides or broadcasts: 1064 /// 1065 /// * Transfer dimension is not the last memref dimension 1066 /// * Transfer dimension is a broadcast (i.e., scalar load + broadcast) 1067 /// * Memref has a layout map with non-unit stride on the last dimension 1068 /// 1069 /// This pattern generates IR as follows: 1070 /// 1071 /// 1. Generate a for loop iterating over each vector element. 1072 /// 2. Inside the loop, generate a InsertElementOp or ExtractElementOp, 1073 /// depending on OpTy. 1074 /// 1075 /// TODO: In some cases (no masking, etc.), LLVM::MatrixColumnMajorLoadOp 1076 /// can be generated instead of TransferOp1dConversion. Add such a pattern 1077 /// to ConvertVectorToLLVM. 1078 /// 1079 /// E.g.: 1080 /// ``` 1081 /// vector.transfer_write %vec, %A[%a, %b] 1082 /// {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} 1083 /// : vector<9xf32>, memref<?x?xf32> 1084 /// ``` 1085 /// Is rewritten to approximately the following pseudo-IR: 1086 /// ``` 1087 /// for i = 0 to 9 { 1088 /// %t = vector.extractelement %vec[i] : vector<9xf32> 1089 /// memref.store %t, %arg0[%a + i, %b] : memref<?x?xf32> 1090 /// } 1091 /// ``` 1092 template <typename OpTy> 1093 struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> { 1094 using VectorToSCFPattern<OpTy>::VectorToSCFPattern; 1095 1096 LogicalResult matchAndRewrite(OpTy xferOp, 1097 PatternRewriter &rewriter) const override { 1098 ScopedContext scope(rewriter, xferOp.getLoc()); 1099 auto map = xferOp.permutation_map(); 1100 auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>(); 1101 1102 if (!memRefType) 1103 return failure(); 1104 if (xferOp.getVectorType().getRank() != 1) 1105 return failure(); 1106 if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType)) 1107 return failure(); // Handled by ConvertVectorToLLVM 1108 1109 // Loop bounds, step, state... 1110 auto vecType = xferOp.getVectorType(); 1111 auto lb = std_constant_index(0); 1112 auto ub = std_constant_index(vecType.getDimSize(0)); 1113 auto step = std_constant_index(1); 1114 auto loopState = Strategy1d<OpTy>::initialLoopState(xferOp); 1115 1116 // Generate for loop. 1117 rewriter.replaceOpWithNewOp<scf::ForOp>( 1118 xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(), 1119 [&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) { 1120 ScopedContext nestedScope(builder, loc); 1121 Strategy1d<OpTy>::generateForLoopBody(builder, loc, xferOp, iv, 1122 loopState); 1123 }); 1124 1125 return success(); 1126 } 1127 }; 1128 1129 } // namespace lowering_1_d 1130 } // namespace 1131 1132 namespace mlir { 1133 1134 void populateVectorToSCFConversionPatterns( 1135 RewritePatternSet &patterns, const VectorTransferToSCFOptions &options) { 1136 if (options.unroll) { 1137 patterns.add<lowering_n_d_unrolled::UnrollTransferReadConversion, 1138 lowering_n_d_unrolled::UnrollTransferWriteConversion>( 1139 patterns.getContext(), options); 1140 } else { 1141 patterns.add<lowering_n_d::PrepareTransferReadConversion, 1142 lowering_n_d::PrepareTransferWriteConversion, 1143 lowering_n_d::TransferOpConversion<TransferReadOp>, 1144 lowering_n_d::TransferOpConversion<TransferWriteOp>>( 1145 patterns.getContext(), options); 1146 } 1147 1148 if (options.targetRank == 1) { 1149 patterns.add<lowering_1_d::TransferOp1dConversion<TransferReadOp>, 1150 lowering_1_d::TransferOp1dConversion<TransferWriteOp>>( 1151 patterns.getContext(), options); 1152 } 1153 } 1154 1155 } // namespace mlir 1156 1157 namespace { 1158 1159 struct ConvertVectorToSCFPass 1160 : public ConvertVectorToSCFBase<ConvertVectorToSCFPass> { 1161 ConvertVectorToSCFPass() = default; 1162 ConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) { 1163 this->fullUnroll = options.unroll; 1164 this->targetRank = options.targetRank; 1165 this->lowerPermutationMaps = options.lowerPermutationMaps; 1166 } 1167 1168 void runOnFunction() override { 1169 VectorTransferToSCFOptions options; 1170 options.unroll = fullUnroll; 1171 options.targetRank = targetRank; 1172 options.lowerPermutationMaps = lowerPermutationMaps; 1173 1174 // Lower permutation maps first. 1175 if (lowerPermutationMaps) { 1176 RewritePatternSet lowerTransferPatterns(getFunction().getContext()); 1177 mlir::vector::populateVectorTransferPermutationMapLoweringPatterns( 1178 lowerTransferPatterns); 1179 (void)applyPatternsAndFoldGreedily(getFunction(), 1180 std::move(lowerTransferPatterns)); 1181 } 1182 1183 RewritePatternSet patterns(getFunction().getContext()); 1184 populateVectorToSCFConversionPatterns(patterns, options); 1185 (void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns)); 1186 } 1187 }; 1188 1189 } // namespace 1190 1191 std::unique_ptr<Pass> 1192 mlir::createConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) { 1193 return std::make_unique<ConvertVectorToSCFPass>(options); 1194 } 1195