1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities to generate mappings for parallel loops to
10 // GPU devices.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
15
16 #include "PassDetail.h"
17 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
18 #include "mlir/Dialect/GPU/Transforms/Passes.h"
19 #include "mlir/Dialect/SCF/IR/SCF.h"
20 #include "mlir/IR/AffineMap.h"
21
22 namespace mlir {
23
24 using scf::ParallelOp;
25
getMappingAttrName()26 StringRef gpu::getMappingAttrName() { return "mapping"; }
27
28 LogicalResult
setMappingAttr(ParallelOp ploopOp,ArrayRef<ParallelLoopDimMappingAttr> mapping)29 gpu::setMappingAttr(ParallelOp ploopOp,
30 ArrayRef<ParallelLoopDimMappingAttr> mapping) {
31 // Verify that each processor is mapped to only once.
32 llvm::DenseSet<gpu::Processor> specifiedMappings;
33 for (auto dimAttr : mapping) {
34 gpu::Processor processor = dimAttr.getProcessor();
35 if (processor != gpu::Processor::Sequential &&
36 specifiedMappings.count(processor))
37 return ploopOp.emitError(
38 "invalid mapping multiple loops to same processor");
39 }
40 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
41 ploopOp->setAttr(getMappingAttrName(),
42 ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
43 return success();
44 }
45
46 namespace gpu {
47 namespace {
48 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
49 } // namespace
50
51 static constexpr int kNumHardwareIds = 3;
52
53 /// Bounded increment on MappingLevel. Increments to the next
54 /// level unless Sequential was already reached.
operator ++(MappingLevel & mappingLevel)55 static MappingLevel &operator++(MappingLevel &mappingLevel) {
56 if (mappingLevel < Sequential) {
57 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
58 }
59 return mappingLevel;
60 }
61
62 /// Computed the hardware id to use for a given mapping level. Will
63 /// assign x,y and z hardware ids for the first 3 dimensions and use
64 /// sequential after.
65 /// TODO: Make this use x for the inner-most loop that is
66 /// distributed to map to x, the next innermost to y and the next innermost to
67 /// z.
getHardwareIdForMapping(MappingLevel level,int dimension)68 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
69
70 if (dimension >= kNumHardwareIds || level == Sequential)
71 return Processor::Sequential;
72 switch (level) {
73 case MapGrid:
74 switch (dimension) {
75 case 0:
76 return Processor::BlockX;
77 case 1:
78 return Processor::BlockY;
79 case 2:
80 return Processor::BlockZ;
81 default:
82 return Processor::Sequential;
83 }
84 break;
85 case MapBlock:
86 switch (dimension) {
87 case 0:
88 return Processor::ThreadX;
89 case 1:
90 return Processor::ThreadY;
91 case 2:
92 return Processor::ThreadZ;
93 default:
94 return Processor::Sequential;
95 }
96 default:;
97 }
98 return Processor::Sequential;
99 }
100
101 /// Add mapping information to the given parallel loop. Do not add
102 /// mapping information if the loop already has it. Also, don't
103 /// start a mapping at a nested loop.
mapParallelOp(ParallelOp parallelOp,MappingLevel mappingLevel=MapGrid)104 static void mapParallelOp(ParallelOp parallelOp,
105 MappingLevel mappingLevel = MapGrid) {
106 // Do not try to add a mapping to already mapped loops or nested loops.
107 if (parallelOp->getAttr(getMappingAttrName()) ||
108 ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
109 return;
110
111 MLIRContext *ctx = parallelOp.getContext();
112 Builder b(ctx);
113 SmallVector<ParallelLoopDimMappingAttr, 4> attrs;
114 attrs.reserve(parallelOp.getNumLoops());
115 for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
116 attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
117 getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
118 b.getDimIdentityMap()));
119 }
120 (void)setMappingAttr(parallelOp, attrs);
121 ++mappingLevel;
122 // Parallel loop operations are immediately nested, so do not use
123 // walk but just iterate over the operations.
124 for (Operation &op : *parallelOp.getBody()) {
125 if (ParallelOp nested = dyn_cast<ParallelOp>(op))
126 mapParallelOp(nested, mappingLevel);
127 }
128 }
129
130 namespace {
131 struct GpuMapParallelLoopsPass
132 : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
runOnOperationmlir::gpu::__anond8c028e30211::GpuMapParallelLoopsPass133 void runOnOperation() override {
134 for (Region ®ion : getOperation()->getRegions()) {
135 region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
136 }
137 }
138 };
139
140 } // namespace
141 } // namespace gpu
142 } // namespace mlir
143
144 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
createGpuMapParallelLoopsPass()145 mlir::createGpuMapParallelLoopsPass() {
146 return std::make_unique<gpu::GpuMapParallelLoopsPass>();
147 }
148