17a7eacc7SStephan Herhut //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// 27a7eacc7SStephan Herhut // 37a7eacc7SStephan Herhut // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 47a7eacc7SStephan Herhut // See https://llvm.org/LICENSE.txt for license information. 57a7eacc7SStephan Herhut // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67a7eacc7SStephan Herhut // 77a7eacc7SStephan Herhut //===----------------------------------------------------------------------===// 87a7eacc7SStephan Herhut // 97a7eacc7SStephan Herhut // This file implements utilities to generate mappings for parallel loops to 107a7eacc7SStephan Herhut // GPU devices. 117a7eacc7SStephan Herhut // 127a7eacc7SStephan Herhut //===----------------------------------------------------------------------===// 137a7eacc7SStephan Herhut 147a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/ParallelLoopMapper.h" 157a7eacc7SStephan Herhut 167a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/GPUDialect.h" 177a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/Passes.h" 187a7eacc7SStephan Herhut #include "mlir/Dialect/LoopOps/LoopOps.h" 197a7eacc7SStephan Herhut #include "mlir/IR/AffineMap.h" 207a7eacc7SStephan Herhut #include "mlir/Pass/Pass.h" 217a7eacc7SStephan Herhut 227a7eacc7SStephan Herhut using namespace mlir; 237a7eacc7SStephan Herhut using namespace mlir::gpu; 247a7eacc7SStephan Herhut using namespace mlir::loop; 257a7eacc7SStephan Herhut 26*46bb6613SMaheshRavishankar #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" 27*46bb6613SMaheshRavishankar namespace mlir { 28*46bb6613SMaheshRavishankar 29*46bb6613SMaheshRavishankar #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" 30*46bb6613SMaheshRavishankar namespace gpu { 31*46bb6613SMaheshRavishankar 32*46bb6613SMaheshRavishankar StringRef getMappingAttrName() { return "mapping"; } 33*46bb6613SMaheshRavishankar 34*46bb6613SMaheshRavishankar ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, 35*46bb6613SMaheshRavishankar AffineMap map, 36*46bb6613SMaheshRavishankar AffineMap bound) { 37*46bb6613SMaheshRavishankar MLIRContext *context = map.getContext(); 38*46bb6613SMaheshRavishankar OpBuilder builder(context); 39*46bb6613SMaheshRavishankar return ParallelLoopDimMapping::get( 40*46bb6613SMaheshRavishankar builder.getI64IntegerAttr(static_cast<int32_t>(processor)), 41*46bb6613SMaheshRavishankar AffineMapAttr::get(map), AffineMapAttr::get(bound), context); 42*46bb6613SMaheshRavishankar } 43*46bb6613SMaheshRavishankar 44*46bb6613SMaheshRavishankar LogicalResult setMappingAttr(loop::ParallelOp ploopOp, 45*46bb6613SMaheshRavishankar ArrayRef<ParallelLoopDimMapping> mapping) { 46*46bb6613SMaheshRavishankar // Verify that each processor is mapped to only once. 47*46bb6613SMaheshRavishankar llvm::DenseSet<gpu::Processor> specifiedMappings; 48*46bb6613SMaheshRavishankar for (auto dimAttr : mapping) { 49*46bb6613SMaheshRavishankar gpu::Processor processor = getProcessor(dimAttr); 50*46bb6613SMaheshRavishankar if (processor != gpu::Processor::Sequential && 51*46bb6613SMaheshRavishankar specifiedMappings.count(processor)) 52*46bb6613SMaheshRavishankar return ploopOp.emitError( 53*46bb6613SMaheshRavishankar "invalid mapping multiple loops to same processor"); 54*46bb6613SMaheshRavishankar } 55*46bb6613SMaheshRavishankar ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size()); 56*46bb6613SMaheshRavishankar ploopOp.setAttr(getMappingAttrName(), 57*46bb6613SMaheshRavishankar ArrayAttr::get(mappingAsAttrs, ploopOp.getContext())); 58*46bb6613SMaheshRavishankar return success(); 59*46bb6613SMaheshRavishankar } 60*46bb6613SMaheshRavishankar } // namespace gpu 61*46bb6613SMaheshRavishankar } // namespace mlir 62*46bb6613SMaheshRavishankar 637a7eacc7SStephan Herhut namespace { 647a7eacc7SStephan Herhut 657a7eacc7SStephan Herhut enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; 667a7eacc7SStephan Herhut 677a7eacc7SStephan Herhut static constexpr int kNumHardwareIds = 3; 687a7eacc7SStephan Herhut 697a7eacc7SStephan Herhut } // namespace 707a7eacc7SStephan Herhut 717a7eacc7SStephan Herhut /// Bounded increment on MappingLevel. Increments to the next 727a7eacc7SStephan Herhut /// level unless Sequential was already reached. 737a7eacc7SStephan Herhut MappingLevel &operator++(MappingLevel &mappingLevel) { 747a7eacc7SStephan Herhut if (mappingLevel < Sequential) { 757a7eacc7SStephan Herhut mappingLevel = static_cast<MappingLevel>(mappingLevel + 1); 767a7eacc7SStephan Herhut } 777a7eacc7SStephan Herhut return mappingLevel; 787a7eacc7SStephan Herhut } 797a7eacc7SStephan Herhut 807a7eacc7SStephan Herhut /// Computed the hardware id to use for a given mapping level. Will 817a7eacc7SStephan Herhut /// assign x,y and z hardware ids for the first 3 dimensions and use 827a7eacc7SStephan Herhut /// sequential after. 83*46bb6613SMaheshRavishankar /// TODO(ravishankarm/herhut) : Make this use x for the inner-most loop that is 84*46bb6613SMaheshRavishankar /// distributed to map to x, the next innermost to y and the next innermost to 85*46bb6613SMaheshRavishankar /// z. 86*46bb6613SMaheshRavishankar static gpu::Processor getHardwareIdForMapping(MappingLevel level, 87*46bb6613SMaheshRavishankar int dimension) { 88*46bb6613SMaheshRavishankar 897a7eacc7SStephan Herhut if (dimension >= kNumHardwareIds || level == Sequential) 90*46bb6613SMaheshRavishankar return Processor::Sequential; 91*46bb6613SMaheshRavishankar switch (level) { 92*46bb6613SMaheshRavishankar case MapGrid: 93*46bb6613SMaheshRavishankar switch (dimension) { 94*46bb6613SMaheshRavishankar case 0: 95*46bb6613SMaheshRavishankar return Processor::BlockX; 96*46bb6613SMaheshRavishankar case 1: 97*46bb6613SMaheshRavishankar return Processor::BlockY; 98*46bb6613SMaheshRavishankar case 2: 99*46bb6613SMaheshRavishankar return Processor::BlockZ; 100*46bb6613SMaheshRavishankar default: 101*46bb6613SMaheshRavishankar return Processor::Sequential; 102*46bb6613SMaheshRavishankar } 103*46bb6613SMaheshRavishankar break; 104*46bb6613SMaheshRavishankar case MapBlock: 105*46bb6613SMaheshRavishankar switch (dimension) { 106*46bb6613SMaheshRavishankar case 0: 107*46bb6613SMaheshRavishankar return Processor::ThreadX; 108*46bb6613SMaheshRavishankar case 1: 109*46bb6613SMaheshRavishankar return Processor::ThreadY; 110*46bb6613SMaheshRavishankar case 2: 111*46bb6613SMaheshRavishankar return Processor::ThreadZ; 112*46bb6613SMaheshRavishankar default: 113*46bb6613SMaheshRavishankar return Processor::Sequential; 114*46bb6613SMaheshRavishankar } 115*46bb6613SMaheshRavishankar default:; 116*46bb6613SMaheshRavishankar } 117*46bb6613SMaheshRavishankar return Processor::Sequential; 1187a7eacc7SStephan Herhut } 1197a7eacc7SStephan Herhut 1207a7eacc7SStephan Herhut /// Add mapping information to the given parallel loop. Do not add 1217a7eacc7SStephan Herhut /// mapping information if the loop already has it. Also, don't 1227a7eacc7SStephan Herhut /// start a mapping at a nested loop. 1237a7eacc7SStephan Herhut static void mapParallelOp(ParallelOp parallelOp, 1247a7eacc7SStephan Herhut MappingLevel mappingLevel = MapGrid) { 1257a7eacc7SStephan Herhut // Do not try to add a mapping to already mapped loops or nested loops. 126*46bb6613SMaheshRavishankar if (parallelOp.getAttr(getMappingAttrName()) || 1277a7eacc7SStephan Herhut ((mappingLevel == MapGrid) && parallelOp.getParentOfType<ParallelOp>())) 1287a7eacc7SStephan Herhut return; 1297a7eacc7SStephan Herhut 1307a7eacc7SStephan Herhut MLIRContext *ctx = parallelOp.getContext(); 1317a7eacc7SStephan Herhut Builder b(ctx); 132*46bb6613SMaheshRavishankar SmallVector<ParallelLoopDimMapping, 4> attrs; 1337a7eacc7SStephan Herhut attrs.reserve(parallelOp.getNumInductionVars()); 1347a7eacc7SStephan Herhut for (int i = 0, e = parallelOp.getNumInductionVars(); i < e; ++i) { 135*46bb6613SMaheshRavishankar attrs.push_back(getParallelLoopDimMappingAttr( 136*46bb6613SMaheshRavishankar getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), 137*46bb6613SMaheshRavishankar b.getDimIdentityMap())); 1387a7eacc7SStephan Herhut } 139*46bb6613SMaheshRavishankar setMappingAttr(parallelOp, attrs); 1407a7eacc7SStephan Herhut ++mappingLevel; 1417a7eacc7SStephan Herhut // Parallel loop operations are immediately nested, so do not use 1427a7eacc7SStephan Herhut // walk but just iterate over the operations. 1437a7eacc7SStephan Herhut for (Operation &op : *parallelOp.getBody()) { 1447a7eacc7SStephan Herhut if (ParallelOp nested = dyn_cast<ParallelOp>(op)) 1457a7eacc7SStephan Herhut mapParallelOp(nested, mappingLevel); 1467a7eacc7SStephan Herhut } 1477a7eacc7SStephan Herhut } 1487a7eacc7SStephan Herhut 1497a7eacc7SStephan Herhut void mlir::greedilyMapParallelLoopsToGPU(Region ®ion) { 1507a7eacc7SStephan Herhut region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); 1517a7eacc7SStephan Herhut } 152