1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" 10 #include "../PassDetail.h" 11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h" 12 #include "mlir/Dialect/Affine/IR/AffineOps.h" 13 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 14 #include "mlir/Dialect/Complex/IR/Complex.h" 15 #include "mlir/Dialect/GPU/GPUDialect.h" 16 #include "mlir/Dialect/SCF/SCF.h" 17 #include "mlir/Dialect/StandardOps/IR/Ops.h" 18 #include "mlir/Transforms/DialectConversion.h" 19 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/Support/CommandLine.h" 22 23 using namespace mlir; 24 using namespace mlir::scf; 25 26 namespace { 27 // A pass that traverses top-level loops in the function and converts them to 28 // GPU launch operations. Nested launches are not allowed, so this does not 29 // walk the function recursively to avoid considering nested loops. 30 struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> { 31 ForLoopMapper() = default; 32 ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) { 33 this->numBlockDims = numBlockDims; 34 this->numThreadDims = numThreadDims; 35 } 36 37 void runOnFunction() override { 38 for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) { 39 if (auto forOp = dyn_cast<AffineForOp>(&op)) { 40 if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims, 41 numThreadDims))) 42 signalPassFailure(); 43 } 44 } 45 } 46 }; 47 48 struct ParallelLoopToGpuPass 49 : public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> { 50 void runOnOperation() override { 51 RewritePatternSet patterns(&getContext()); 52 populateParallelLoopToGPUPatterns(patterns); 53 ConversionTarget target(getContext()); 54 target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); 55 configureParallelLoopToGPULegality(target); 56 if (failed(applyPartialConversion(getOperation(), target, 57 std::move(patterns)))) 58 signalPassFailure(); 59 finalizeParallelLoopToGPUConversion(getOperation()); 60 } 61 }; 62 63 } // namespace 64 65 std::unique_ptr<OperationPass<FuncOp>> 66 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) { 67 return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims); 68 } 69 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineForToGPUPass() { 70 return std::make_unique<ForLoopMapper>(); 71 } 72 73 std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() { 74 return std::make_unique<ParallelLoopToGpuPass>(); 75 } 76