1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
10 #include "../PassDetail.h"
11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
12 #include "mlir/Dialect/Affine/IR/AffineOps.h"
13 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
14 #include "mlir/Dialect/Complex/IR/Complex.h"
15 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
16 #include "mlir/Dialect/SCF/IR/SCF.h"
17 #include "mlir/Transforms/DialectConversion.h"
18
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/Support/CommandLine.h"
21
22 using namespace mlir;
23 using namespace mlir::scf;
24
25 namespace {
26 // A pass that traverses top-level loops in the function and converts them to
27 // GPU launch operations. Nested launches are not allowed, so this does not
28 // walk the function recursively to avoid considering nested loops.
29 struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> {
30 ForLoopMapper() = default;
ForLoopMapper__anonb2e6168a0111::ForLoopMapper31 ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
32 this->numBlockDims = numBlockDims;
33 this->numThreadDims = numThreadDims;
34 }
35
runOnOperation__anonb2e6168a0111::ForLoopMapper36 void runOnOperation() override {
37 for (Operation &op :
38 llvm::make_early_inc_range(getOperation().getBody().getOps())) {
39 if (auto forOp = dyn_cast<AffineForOp>(&op)) {
40 if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
41 numThreadDims)))
42 signalPassFailure();
43 }
44 }
45 }
46 };
47
48 struct ParallelLoopToGpuPass
49 : public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
runOnOperation__anonb2e6168a0111::ParallelLoopToGpuPass50 void runOnOperation() override {
51 RewritePatternSet patterns(&getContext());
52 populateParallelLoopToGPUPatterns(patterns);
53 ConversionTarget target(getContext());
54 target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
55 configureParallelLoopToGPULegality(target);
56 if (failed(applyPartialConversion(getOperation(), target,
57 std::move(patterns))))
58 signalPassFailure();
59 finalizeParallelLoopToGPUConversion(getOperation());
60 }
61 };
62
63 } // namespace
64
65 std::unique_ptr<InterfacePass<FunctionOpInterface>>
createAffineForToGPUPass(unsigned numBlockDims,unsigned numThreadDims)66 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
67 return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
68 }
69 std::unique_ptr<InterfacePass<FunctionOpInterface>>
createAffineForToGPUPass()70 mlir::createAffineForToGPUPass() {
71 return std::make_unique<ForLoopMapper>();
72 }
73
createParallelLoopToGpuPass()74 std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
75 return std::make_unique<ParallelLoopToGpuPass>();
76 }
77