17a7eacc7SStephan Herhut //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
27a7eacc7SStephan Herhut //
37a7eacc7SStephan Herhut // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47a7eacc7SStephan Herhut // See https://llvm.org/LICENSE.txt for license information.
57a7eacc7SStephan Herhut // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67a7eacc7SStephan Herhut //
77a7eacc7SStephan Herhut //===----------------------------------------------------------------------===//
87a7eacc7SStephan Herhut //
97a7eacc7SStephan Herhut // This file implements utilities to generate mappings for parallel loops to
107a7eacc7SStephan Herhut // GPU devices.
117a7eacc7SStephan Herhut //
127a7eacc7SStephan Herhut //===----------------------------------------------------------------------===//
137a7eacc7SStephan Herhut 
147a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
157a7eacc7SStephan Herhut 
167a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/GPUDialect.h"
177a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/Passes.h"
187a7eacc7SStephan Herhut #include "mlir/Dialect/LoopOps/LoopOps.h"
197a7eacc7SStephan Herhut #include "mlir/IR/AffineMap.h"
207a7eacc7SStephan Herhut #include "mlir/Pass/Pass.h"
217a7eacc7SStephan Herhut 
227a7eacc7SStephan Herhut using namespace mlir;
237a7eacc7SStephan Herhut using namespace mlir::gpu;
247a7eacc7SStephan Herhut using namespace mlir::loop;
257a7eacc7SStephan Herhut 
26*46bb6613SMaheshRavishankar #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc"
27*46bb6613SMaheshRavishankar namespace mlir {
28*46bb6613SMaheshRavishankar 
29*46bb6613SMaheshRavishankar #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc"
30*46bb6613SMaheshRavishankar namespace gpu {
31*46bb6613SMaheshRavishankar 
32*46bb6613SMaheshRavishankar StringRef getMappingAttrName() { return "mapping"; }
33*46bb6613SMaheshRavishankar 
34*46bb6613SMaheshRavishankar ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
35*46bb6613SMaheshRavishankar                                                      AffineMap map,
36*46bb6613SMaheshRavishankar                                                      AffineMap bound) {
37*46bb6613SMaheshRavishankar   MLIRContext *context = map.getContext();
38*46bb6613SMaheshRavishankar   OpBuilder builder(context);
39*46bb6613SMaheshRavishankar   return ParallelLoopDimMapping::get(
40*46bb6613SMaheshRavishankar       builder.getI64IntegerAttr(static_cast<int32_t>(processor)),
41*46bb6613SMaheshRavishankar       AffineMapAttr::get(map), AffineMapAttr::get(bound), context);
42*46bb6613SMaheshRavishankar }
43*46bb6613SMaheshRavishankar 
44*46bb6613SMaheshRavishankar LogicalResult setMappingAttr(loop::ParallelOp ploopOp,
45*46bb6613SMaheshRavishankar                              ArrayRef<ParallelLoopDimMapping> mapping) {
46*46bb6613SMaheshRavishankar   // Verify that each processor is mapped to only once.
47*46bb6613SMaheshRavishankar   llvm::DenseSet<gpu::Processor> specifiedMappings;
48*46bb6613SMaheshRavishankar   for (auto dimAttr : mapping) {
49*46bb6613SMaheshRavishankar     gpu::Processor processor = getProcessor(dimAttr);
50*46bb6613SMaheshRavishankar     if (processor != gpu::Processor::Sequential &&
51*46bb6613SMaheshRavishankar         specifiedMappings.count(processor))
52*46bb6613SMaheshRavishankar       return ploopOp.emitError(
53*46bb6613SMaheshRavishankar           "invalid mapping multiple loops to same processor");
54*46bb6613SMaheshRavishankar   }
55*46bb6613SMaheshRavishankar   ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
56*46bb6613SMaheshRavishankar   ploopOp.setAttr(getMappingAttrName(),
57*46bb6613SMaheshRavishankar                   ArrayAttr::get(mappingAsAttrs, ploopOp.getContext()));
58*46bb6613SMaheshRavishankar   return success();
59*46bb6613SMaheshRavishankar }
60*46bb6613SMaheshRavishankar } // namespace gpu
61*46bb6613SMaheshRavishankar } // namespace mlir
62*46bb6613SMaheshRavishankar 
637a7eacc7SStephan Herhut namespace {
647a7eacc7SStephan Herhut 
657a7eacc7SStephan Herhut enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
667a7eacc7SStephan Herhut 
677a7eacc7SStephan Herhut static constexpr int kNumHardwareIds = 3;
687a7eacc7SStephan Herhut 
697a7eacc7SStephan Herhut } // namespace
707a7eacc7SStephan Herhut 
717a7eacc7SStephan Herhut /// Bounded increment on MappingLevel. Increments to the next
727a7eacc7SStephan Herhut /// level unless Sequential was already reached.
737a7eacc7SStephan Herhut MappingLevel &operator++(MappingLevel &mappingLevel) {
747a7eacc7SStephan Herhut   if (mappingLevel < Sequential) {
757a7eacc7SStephan Herhut     mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
767a7eacc7SStephan Herhut   }
777a7eacc7SStephan Herhut   return mappingLevel;
787a7eacc7SStephan Herhut }
797a7eacc7SStephan Herhut 
807a7eacc7SStephan Herhut /// Computed the hardware id to use for a given mapping level. Will
817a7eacc7SStephan Herhut /// assign x,y and z hardware ids for the first 3 dimensions and use
827a7eacc7SStephan Herhut /// sequential after.
83*46bb6613SMaheshRavishankar /// TODO(ravishankarm/herhut) : Make this use x for the inner-most loop that is
84*46bb6613SMaheshRavishankar /// distributed to map to x, the next innermost to y and the next innermost to
85*46bb6613SMaheshRavishankar /// z.
86*46bb6613SMaheshRavishankar static gpu::Processor getHardwareIdForMapping(MappingLevel level,
87*46bb6613SMaheshRavishankar                                               int dimension) {
88*46bb6613SMaheshRavishankar 
897a7eacc7SStephan Herhut   if (dimension >= kNumHardwareIds || level == Sequential)
90*46bb6613SMaheshRavishankar     return Processor::Sequential;
91*46bb6613SMaheshRavishankar   switch (level) {
92*46bb6613SMaheshRavishankar   case MapGrid:
93*46bb6613SMaheshRavishankar     switch (dimension) {
94*46bb6613SMaheshRavishankar     case 0:
95*46bb6613SMaheshRavishankar       return Processor::BlockX;
96*46bb6613SMaheshRavishankar     case 1:
97*46bb6613SMaheshRavishankar       return Processor::BlockY;
98*46bb6613SMaheshRavishankar     case 2:
99*46bb6613SMaheshRavishankar       return Processor::BlockZ;
100*46bb6613SMaheshRavishankar     default:
101*46bb6613SMaheshRavishankar       return Processor::Sequential;
102*46bb6613SMaheshRavishankar     }
103*46bb6613SMaheshRavishankar     break;
104*46bb6613SMaheshRavishankar   case MapBlock:
105*46bb6613SMaheshRavishankar     switch (dimension) {
106*46bb6613SMaheshRavishankar     case 0:
107*46bb6613SMaheshRavishankar       return Processor::ThreadX;
108*46bb6613SMaheshRavishankar     case 1:
109*46bb6613SMaheshRavishankar       return Processor::ThreadY;
110*46bb6613SMaheshRavishankar     case 2:
111*46bb6613SMaheshRavishankar       return Processor::ThreadZ;
112*46bb6613SMaheshRavishankar     default:
113*46bb6613SMaheshRavishankar       return Processor::Sequential;
114*46bb6613SMaheshRavishankar     }
115*46bb6613SMaheshRavishankar   default:;
116*46bb6613SMaheshRavishankar   }
117*46bb6613SMaheshRavishankar   return Processor::Sequential;
1187a7eacc7SStephan Herhut }
1197a7eacc7SStephan Herhut 
1207a7eacc7SStephan Herhut /// Add mapping information to the given parallel loop. Do not add
1217a7eacc7SStephan Herhut /// mapping information if the loop already has it. Also, don't
1227a7eacc7SStephan Herhut /// start a mapping at a nested loop.
1237a7eacc7SStephan Herhut static void mapParallelOp(ParallelOp parallelOp,
1247a7eacc7SStephan Herhut                           MappingLevel mappingLevel = MapGrid) {
1257a7eacc7SStephan Herhut   // Do not try to add a mapping to already mapped loops or nested loops.
126*46bb6613SMaheshRavishankar   if (parallelOp.getAttr(getMappingAttrName()) ||
1277a7eacc7SStephan Herhut       ((mappingLevel == MapGrid) && parallelOp.getParentOfType<ParallelOp>()))
1287a7eacc7SStephan Herhut     return;
1297a7eacc7SStephan Herhut 
1307a7eacc7SStephan Herhut   MLIRContext *ctx = parallelOp.getContext();
1317a7eacc7SStephan Herhut   Builder b(ctx);
132*46bb6613SMaheshRavishankar   SmallVector<ParallelLoopDimMapping, 4> attrs;
1337a7eacc7SStephan Herhut   attrs.reserve(parallelOp.getNumInductionVars());
1347a7eacc7SStephan Herhut   for (int i = 0, e = parallelOp.getNumInductionVars(); i < e; ++i) {
135*46bb6613SMaheshRavishankar     attrs.push_back(getParallelLoopDimMappingAttr(
136*46bb6613SMaheshRavishankar         getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
137*46bb6613SMaheshRavishankar         b.getDimIdentityMap()));
1387a7eacc7SStephan Herhut   }
139*46bb6613SMaheshRavishankar   setMappingAttr(parallelOp, attrs);
1407a7eacc7SStephan Herhut   ++mappingLevel;
1417a7eacc7SStephan Herhut   // Parallel loop operations are immediately nested, so do not use
1427a7eacc7SStephan Herhut   // walk but just iterate over the operations.
1437a7eacc7SStephan Herhut   for (Operation &op : *parallelOp.getBody()) {
1447a7eacc7SStephan Herhut     if (ParallelOp nested = dyn_cast<ParallelOp>(op))
1457a7eacc7SStephan Herhut       mapParallelOp(nested, mappingLevel);
1467a7eacc7SStephan Herhut   }
1477a7eacc7SStephan Herhut }
1487a7eacc7SStephan Herhut 
1497a7eacc7SStephan Herhut void mlir::greedilyMapParallelLoopsToGPU(Region &region) {
1507a7eacc7SStephan Herhut   region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
1517a7eacc7SStephan Herhut }
152