17a7eacc7SStephan Herhut //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// 27a7eacc7SStephan Herhut // 37a7eacc7SStephan Herhut // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 47a7eacc7SStephan Herhut // See https://llvm.org/LICENSE.txt for license information. 57a7eacc7SStephan Herhut // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67a7eacc7SStephan Herhut // 77a7eacc7SStephan Herhut //===----------------------------------------------------------------------===// 87a7eacc7SStephan Herhut // 97a7eacc7SStephan Herhut // This file implements utilities to generate mappings for parallel loops to 107a7eacc7SStephan Herhut // GPU devices. 117a7eacc7SStephan Herhut // 127a7eacc7SStephan Herhut //===----------------------------------------------------------------------===// 137a7eacc7SStephan Herhut 147a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/ParallelLoopMapper.h" 157a7eacc7SStephan Herhut 16*bcf3d524SChristian Sigg #include "PassDetail.h" 177a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/GPUDialect.h" 187a7eacc7SStephan Herhut #include "mlir/Dialect/GPU/Passes.h" 19c25b20c0SAlex Zinenko #include "mlir/Dialect/SCF/SCF.h" 207a7eacc7SStephan Herhut #include "mlir/IR/AffineMap.h" 217a7eacc7SStephan Herhut #include "mlir/Pass/Pass.h" 227a7eacc7SStephan Herhut 237d1ed69cSFederico Lebrón #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" 2446bb6613SMaheshRavishankar #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" 25*bcf3d524SChristian Sigg 2646bb6613SMaheshRavishankar namespace mlir { 2746bb6613SMaheshRavishankar 28*bcf3d524SChristian Sigg using scf::ParallelOp; 2946bb6613SMaheshRavishankar 30*bcf3d524SChristian Sigg StringRef gpu::getMappingAttrName() { return "mapping"; } 31*bcf3d524SChristian Sigg 32*bcf3d524SChristian Sigg gpu::ParallelLoopDimMapping 33*bcf3d524SChristian Sigg gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map, 3446bb6613SMaheshRavishankar AffineMap bound) { 3546bb6613SMaheshRavishankar MLIRContext *context = map.getContext(); 3646bb6613SMaheshRavishankar OpBuilder builder(context); 3746bb6613SMaheshRavishankar return ParallelLoopDimMapping::get( 38fee90542SVladislav Vinogradov ProcessorAttr::get(builder.getContext(), processor), 3946bb6613SMaheshRavishankar AffineMapAttr::get(map), AffineMapAttr::get(bound), context); 4046bb6613SMaheshRavishankar } 4146bb6613SMaheshRavishankar 42*bcf3d524SChristian Sigg LogicalResult gpu::setMappingAttr(ParallelOp ploopOp, 4346bb6613SMaheshRavishankar ArrayRef<ParallelLoopDimMapping> mapping) { 4446bb6613SMaheshRavishankar // Verify that each processor is mapped to only once. 4546bb6613SMaheshRavishankar llvm::DenseSet<gpu::Processor> specifiedMappings; 4646bb6613SMaheshRavishankar for (auto dimAttr : mapping) { 4746bb6613SMaheshRavishankar gpu::Processor processor = getProcessor(dimAttr); 4846bb6613SMaheshRavishankar if (processor != gpu::Processor::Sequential && 4946bb6613SMaheshRavishankar specifiedMappings.count(processor)) 5046bb6613SMaheshRavishankar return ploopOp.emitError( 5146bb6613SMaheshRavishankar "invalid mapping multiple loops to same processor"); 5246bb6613SMaheshRavishankar } 5346bb6613SMaheshRavishankar ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size()); 541ffc1aaaSChristian Sigg ploopOp->setAttr(getMappingAttrName(), 55c2c83e97STres Popp ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs)); 5646bb6613SMaheshRavishankar return success(); 5746bb6613SMaheshRavishankar } 5846bb6613SMaheshRavishankar 59*bcf3d524SChristian Sigg namespace gpu { 607a7eacc7SStephan Herhut namespace { 617a7eacc7SStephan Herhut enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; 62*bcf3d524SChristian Sigg } // namespace 637a7eacc7SStephan Herhut 647a7eacc7SStephan Herhut static constexpr int kNumHardwareIds = 3; 657a7eacc7SStephan Herhut 667a7eacc7SStephan Herhut /// Bounded increment on MappingLevel. Increments to the next 677a7eacc7SStephan Herhut /// level unless Sequential was already reached. 68*bcf3d524SChristian Sigg static MappingLevel &operator++(MappingLevel &mappingLevel) { 697a7eacc7SStephan Herhut if (mappingLevel < Sequential) { 707a7eacc7SStephan Herhut mappingLevel = static_cast<MappingLevel>(mappingLevel + 1); 717a7eacc7SStephan Herhut } 727a7eacc7SStephan Herhut return mappingLevel; 737a7eacc7SStephan Herhut } 747a7eacc7SStephan Herhut 757a7eacc7SStephan Herhut /// Computed the hardware id to use for a given mapping level. Will 767a7eacc7SStephan Herhut /// assign x,y and z hardware ids for the first 3 dimensions and use 777a7eacc7SStephan Herhut /// sequential after. 789db53a18SRiver Riddle /// TODO: Make this use x for the inner-most loop that is 7946bb6613SMaheshRavishankar /// distributed to map to x, the next innermost to y and the next innermost to 8046bb6613SMaheshRavishankar /// z. 81*bcf3d524SChristian Sigg static Processor getHardwareIdForMapping(MappingLevel level, int dimension) { 8246bb6613SMaheshRavishankar 837a7eacc7SStephan Herhut if (dimension >= kNumHardwareIds || level == Sequential) 8446bb6613SMaheshRavishankar return Processor::Sequential; 8546bb6613SMaheshRavishankar switch (level) { 8646bb6613SMaheshRavishankar case MapGrid: 8746bb6613SMaheshRavishankar switch (dimension) { 8846bb6613SMaheshRavishankar case 0: 8946bb6613SMaheshRavishankar return Processor::BlockX; 9046bb6613SMaheshRavishankar case 1: 9146bb6613SMaheshRavishankar return Processor::BlockY; 9246bb6613SMaheshRavishankar case 2: 9346bb6613SMaheshRavishankar return Processor::BlockZ; 9446bb6613SMaheshRavishankar default: 9546bb6613SMaheshRavishankar return Processor::Sequential; 9646bb6613SMaheshRavishankar } 9746bb6613SMaheshRavishankar break; 9846bb6613SMaheshRavishankar case MapBlock: 9946bb6613SMaheshRavishankar switch (dimension) { 10046bb6613SMaheshRavishankar case 0: 10146bb6613SMaheshRavishankar return Processor::ThreadX; 10246bb6613SMaheshRavishankar case 1: 10346bb6613SMaheshRavishankar return Processor::ThreadY; 10446bb6613SMaheshRavishankar case 2: 10546bb6613SMaheshRavishankar return Processor::ThreadZ; 10646bb6613SMaheshRavishankar default: 10746bb6613SMaheshRavishankar return Processor::Sequential; 10846bb6613SMaheshRavishankar } 10946bb6613SMaheshRavishankar default:; 11046bb6613SMaheshRavishankar } 11146bb6613SMaheshRavishankar return Processor::Sequential; 1127a7eacc7SStephan Herhut } 1137a7eacc7SStephan Herhut 1147a7eacc7SStephan Herhut /// Add mapping information to the given parallel loop. Do not add 1157a7eacc7SStephan Herhut /// mapping information if the loop already has it. Also, don't 1167a7eacc7SStephan Herhut /// start a mapping at a nested loop. 1177a7eacc7SStephan Herhut static void mapParallelOp(ParallelOp parallelOp, 1187a7eacc7SStephan Herhut MappingLevel mappingLevel = MapGrid) { 1197a7eacc7SStephan Herhut // Do not try to add a mapping to already mapped loops or nested loops. 1201ffc1aaaSChristian Sigg if (parallelOp->getAttr(getMappingAttrName()) || 1210bf4a82aSChristian Sigg ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>())) 1227a7eacc7SStephan Herhut return; 1237a7eacc7SStephan Herhut 1247a7eacc7SStephan Herhut MLIRContext *ctx = parallelOp.getContext(); 1257a7eacc7SStephan Herhut Builder b(ctx); 12646bb6613SMaheshRavishankar SmallVector<ParallelLoopDimMapping, 4> attrs; 127c2d03e4eSAlexander Belyaev attrs.reserve(parallelOp.getNumLoops()); 128c2d03e4eSAlexander Belyaev for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { 12946bb6613SMaheshRavishankar attrs.push_back(getParallelLoopDimMappingAttr( 13046bb6613SMaheshRavishankar getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), 13146bb6613SMaheshRavishankar b.getDimIdentityMap())); 1327a7eacc7SStephan Herhut } 133e21adfa3SRiver Riddle (void)setMappingAttr(parallelOp, attrs); 1347a7eacc7SStephan Herhut ++mappingLevel; 1357a7eacc7SStephan Herhut // Parallel loop operations are immediately nested, so do not use 1367a7eacc7SStephan Herhut // walk but just iterate over the operations. 1377a7eacc7SStephan Herhut for (Operation &op : *parallelOp.getBody()) { 1387a7eacc7SStephan Herhut if (ParallelOp nested = dyn_cast<ParallelOp>(op)) 1397a7eacc7SStephan Herhut mapParallelOp(nested, mappingLevel); 1407a7eacc7SStephan Herhut } 1417a7eacc7SStephan Herhut } 1427a7eacc7SStephan Herhut 143*bcf3d524SChristian Sigg namespace { 144*bcf3d524SChristian Sigg struct GpuMapParallelLoopsPass 145*bcf3d524SChristian Sigg : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> { 146*bcf3d524SChristian Sigg void runOnOperation() override { 147*bcf3d524SChristian Sigg for (Region ®ion : getOperation()->getRegions()) { 1487a7eacc7SStephan Herhut region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); 1497a7eacc7SStephan Herhut } 150*bcf3d524SChristian Sigg } 151*bcf3d524SChristian Sigg }; 152*bcf3d524SChristian Sigg 153*bcf3d524SChristian Sigg } // namespace 154*bcf3d524SChristian Sigg } // namespace gpu 155*bcf3d524SChristian Sigg } // namespace mlir 156*bcf3d524SChristian Sigg 157*bcf3d524SChristian Sigg std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>> 158*bcf3d524SChristian Sigg mlir::createGpuMapParallelLoopsPass() { 159*bcf3d524SChristian Sigg return std::make_unique<gpu::GpuMapParallelLoopsPass>(); 160*bcf3d524SChristian Sigg } 161