14ead2cf7SAlex Zinenko //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
24ead2cf7SAlex Zinenko //
34ead2cf7SAlex Zinenko // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44ead2cf7SAlex Zinenko // See https://llvm.org/LICENSE.txt for license information.
54ead2cf7SAlex Zinenko // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64ead2cf7SAlex Zinenko //
74ead2cf7SAlex Zinenko //===----------------------------------------------------------------------===//
84ead2cf7SAlex Zinenko 
94ead2cf7SAlex Zinenko #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
104ead2cf7SAlex Zinenko #include "../PassDetail.h"
114ead2cf7SAlex Zinenko #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
124ead2cf7SAlex Zinenko #include "mlir/Dialect/Affine/IR/AffineOps.h"
134ead2cf7SAlex Zinenko #include "mlir/Dialect/GPU/GPUDialect.h"
144ead2cf7SAlex Zinenko #include "mlir/Dialect/SCF/SCF.h"
154ead2cf7SAlex Zinenko #include "mlir/Dialect/StandardOps/IR/Ops.h"
164ead2cf7SAlex Zinenko #include "mlir/Transforms/DialectConversion.h"
174ead2cf7SAlex Zinenko 
184ead2cf7SAlex Zinenko #include "llvm/ADT/ArrayRef.h"
194ead2cf7SAlex Zinenko #include "llvm/Support/CommandLine.h"
204ead2cf7SAlex Zinenko 
214ead2cf7SAlex Zinenko using namespace mlir;
224ead2cf7SAlex Zinenko using namespace mlir::scf;
234ead2cf7SAlex Zinenko 
244ead2cf7SAlex Zinenko namespace {
254ead2cf7SAlex Zinenko // A pass that traverses top-level loops in the function and converts them to
264ead2cf7SAlex Zinenko // GPU launch operations.  Nested launches are not allowed, so this does not
274ead2cf7SAlex Zinenko // walk the function recursively to avoid considering nested loops.
282bcd1927SMaheshRavishankar struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> {
294ead2cf7SAlex Zinenko   ForLoopMapper() = default;
304ead2cf7SAlex Zinenko   ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
314ead2cf7SAlex Zinenko     this->numBlockDims = numBlockDims;
324ead2cf7SAlex Zinenko     this->numThreadDims = numThreadDims;
334ead2cf7SAlex Zinenko   }
344ead2cf7SAlex Zinenko 
354ead2cf7SAlex Zinenko   void runOnFunction() override {
364ead2cf7SAlex Zinenko     for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) {
374ead2cf7SAlex Zinenko       if (auto forOp = dyn_cast<AffineForOp>(&op)) {
384ead2cf7SAlex Zinenko         if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
394ead2cf7SAlex Zinenko                                                     numThreadDims)))
404ead2cf7SAlex Zinenko           signalPassFailure();
414ead2cf7SAlex Zinenko       }
424ead2cf7SAlex Zinenko     }
434ead2cf7SAlex Zinenko   }
444ead2cf7SAlex Zinenko };
454ead2cf7SAlex Zinenko 
464ead2cf7SAlex Zinenko struct ParallelLoopToGpuPass
474ead2cf7SAlex Zinenko     : public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
484ead2cf7SAlex Zinenko   void runOnOperation() override {
494ead2cf7SAlex Zinenko     OwningRewritePatternList patterns;
504ead2cf7SAlex Zinenko     populateParallelLoopToGPUPatterns(patterns, &getContext());
514ead2cf7SAlex Zinenko     ConversionTarget target(getContext());
524ead2cf7SAlex Zinenko     target.addLegalDialect<StandardOpsDialect>();
534ead2cf7SAlex Zinenko     target.addLegalDialect<AffineDialect>();
544ead2cf7SAlex Zinenko     target.addLegalDialect<gpu::GPUDialect>();
554ead2cf7SAlex Zinenko     target.addLegalDialect<scf::SCFDialect>();
564ead2cf7SAlex Zinenko     target.addIllegalOp<scf::ParallelOp>();
57*3fffffa8SRiver Riddle     if (failed(applyPartialConversion(getOperation(), target,
58*3fffffa8SRiver Riddle                                       std::move(patterns))))
594ead2cf7SAlex Zinenko       signalPassFailure();
604ead2cf7SAlex Zinenko   }
614ead2cf7SAlex Zinenko };
624ead2cf7SAlex Zinenko 
634ead2cf7SAlex Zinenko } // namespace
644ead2cf7SAlex Zinenko 
654ead2cf7SAlex Zinenko std::unique_ptr<OperationPass<FuncOp>>
662bcd1927SMaheshRavishankar mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
674ead2cf7SAlex Zinenko   return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
684ead2cf7SAlex Zinenko }
692bcd1927SMaheshRavishankar std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineForToGPUPass() {
704ead2cf7SAlex Zinenko   return std::make_unique<ForLoopMapper>();
714ead2cf7SAlex Zinenko }
724ead2cf7SAlex Zinenko 
734ead2cf7SAlex Zinenko std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
744ead2cf7SAlex Zinenko   return std::make_unique<ParallelLoopToGpuPass>();
754ead2cf7SAlex Zinenko }
76