1 //===- GPUToSPIRV.cpp - GPU to SPIR-V Patterns ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements patterns to convert GPU dialect to SPIR-V dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRV.h"
14 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
15 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
16 #include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
17 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
18 #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
19 #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"
20 #include "mlir/IR/BuiltinOps.h"
21 #include "mlir/Transforms/DialectConversion.h"
22 
23 using namespace mlir;
24 
25 static constexpr const char kSPIRVModule[] = "__spv__";
26 
27 namespace {
28 /// Pattern lowering GPU block/thread size/id to loading SPIR-V invocation
29 /// builtin variables.
30 template <typename SourceOp, spirv::BuiltIn builtin>
31 class LaunchConfigConversion : public OpConversionPattern<SourceOp> {
32 public:
33   using OpConversionPattern<SourceOp>::OpConversionPattern;
34 
35   LogicalResult
36   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
37                   ConversionPatternRewriter &rewriter) const override;
38 };
39 
40 /// Pattern lowering subgroup size/id to loading SPIR-V invocation
41 /// builtin variables.
42 template <typename SourceOp, spirv::BuiltIn builtin>
43 class SingleDimLaunchConfigConversion : public OpConversionPattern<SourceOp> {
44 public:
45   using OpConversionPattern<SourceOp>::OpConversionPattern;
46 
47   LogicalResult
48   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
49                   ConversionPatternRewriter &rewriter) const override;
50 };
51 
52 /// This is separate because in Vulkan workgroup size is exposed to shaders via
53 /// a constant with WorkgroupSize decoration. So here we cannot generate a
54 /// builtin variable; instead the information in the `spv.entry_point_abi`
55 /// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp.
56 class WorkGroupSizeConversion : public OpConversionPattern<gpu::BlockDimOp> {
57 public:
WorkGroupSizeConversion(TypeConverter & typeConverter,MLIRContext * context)58   WorkGroupSizeConversion(TypeConverter &typeConverter, MLIRContext *context)
59       : OpConversionPattern(typeConverter, context, /*benefit*/ 10) {}
60 
61   LogicalResult
62   matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor,
63                   ConversionPatternRewriter &rewriter) const override;
64 };
65 
66 /// Pattern to convert a kernel function in GPU dialect within a spv.module.
67 class GPUFuncOpConversion final : public OpConversionPattern<gpu::GPUFuncOp> {
68 public:
69   using OpConversionPattern<gpu::GPUFuncOp>::OpConversionPattern;
70 
71   LogicalResult
72   matchAndRewrite(gpu::GPUFuncOp funcOp, OpAdaptor adaptor,
73                   ConversionPatternRewriter &rewriter) const override;
74 
75 private:
76   SmallVector<int32_t, 3> workGroupSizeAsInt32;
77 };
78 
79 /// Pattern to convert a gpu.module to a spv.module.
80 class GPUModuleConversion final : public OpConversionPattern<gpu::GPUModuleOp> {
81 public:
82   using OpConversionPattern<gpu::GPUModuleOp>::OpConversionPattern;
83 
84   LogicalResult
85   matchAndRewrite(gpu::GPUModuleOp moduleOp, OpAdaptor adaptor,
86                   ConversionPatternRewriter &rewriter) const override;
87 };
88 
89 class GPUModuleEndConversion final
90     : public OpConversionPattern<gpu::ModuleEndOp> {
91 public:
92   using OpConversionPattern::OpConversionPattern;
93 
94   LogicalResult
matchAndRewrite(gpu::ModuleEndOp endOp,OpAdaptor adaptor,ConversionPatternRewriter & rewriter) const95   matchAndRewrite(gpu::ModuleEndOp endOp, OpAdaptor adaptor,
96                   ConversionPatternRewriter &rewriter) const override {
97     rewriter.eraseOp(endOp);
98     return success();
99   }
100 };
101 
102 /// Pattern to convert a gpu.return into a SPIR-V return.
103 // TODO: This can go to DRR when GPU return has operands.
104 class GPUReturnOpConversion final : public OpConversionPattern<gpu::ReturnOp> {
105 public:
106   using OpConversionPattern<gpu::ReturnOp>::OpConversionPattern;
107 
108   LogicalResult
109   matchAndRewrite(gpu::ReturnOp returnOp, OpAdaptor adaptor,
110                   ConversionPatternRewriter &rewriter) const override;
111 };
112 
113 /// Pattern to convert a gpu.barrier op into a spv.ControlBarrier op.
114 class GPUBarrierConversion final : public OpConversionPattern<gpu::BarrierOp> {
115 public:
116   using OpConversionPattern::OpConversionPattern;
117 
118   LogicalResult
119   matchAndRewrite(gpu::BarrierOp barrierOp, OpAdaptor adaptor,
120                   ConversionPatternRewriter &rewriter) const override;
121 };
122 
123 } // namespace
124 
125 //===----------------------------------------------------------------------===//
126 // Builtins.
127 //===----------------------------------------------------------------------===//
128 
129 template <typename SourceOp, spirv::BuiltIn builtin>
matchAndRewrite(SourceOp op,typename SourceOp::Adaptor adaptor,ConversionPatternRewriter & rewriter) const130 LogicalResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
131     SourceOp op, typename SourceOp::Adaptor adaptor,
132     ConversionPatternRewriter &rewriter) const {
133   auto *typeConverter = this->template getTypeConverter<SPIRVTypeConverter>();
134   auto indexType = typeConverter->getIndexType();
135 
136   // SPIR-V invocation builtin variables are a vector of type <3xi32>
137   auto spirvBuiltin =
138       spirv::getBuiltinVariableValue(op, builtin, indexType, rewriter);
139   rewriter.replaceOpWithNewOp<spirv::CompositeExtractOp>(
140       op, indexType, spirvBuiltin,
141       rewriter.getI32ArrayAttr({static_cast<int32_t>(op.dimension())}));
142   return success();
143 }
144 
145 template <typename SourceOp, spirv::BuiltIn builtin>
146 LogicalResult
matchAndRewrite(SourceOp op,typename SourceOp::Adaptor adaptor,ConversionPatternRewriter & rewriter) const147 SingleDimLaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
148     SourceOp op, typename SourceOp::Adaptor adaptor,
149     ConversionPatternRewriter &rewriter) const {
150   auto *typeConverter = this->template getTypeConverter<SPIRVTypeConverter>();
151   auto indexType = typeConverter->getIndexType();
152 
153   auto spirvBuiltin =
154       spirv::getBuiltinVariableValue(op, builtin, indexType, rewriter);
155   rewriter.replaceOp(op, spirvBuiltin);
156   return success();
157 }
158 
matchAndRewrite(gpu::BlockDimOp op,OpAdaptor adaptor,ConversionPatternRewriter & rewriter) const159 LogicalResult WorkGroupSizeConversion::matchAndRewrite(
160     gpu::BlockDimOp op, OpAdaptor adaptor,
161     ConversionPatternRewriter &rewriter) const {
162   auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op);
163   if (!workGroupSizeAttr)
164     return failure();
165 
166   auto val = workGroupSizeAttr
167                  .getValues<int32_t>()[static_cast<int32_t>(op.dimension())];
168   auto convertedType =
169       getTypeConverter()->convertType(op.getResult().getType());
170   if (!convertedType)
171     return failure();
172   rewriter.replaceOpWithNewOp<spirv::ConstantOp>(
173       op, convertedType, IntegerAttr::get(convertedType, val));
174   return success();
175 }
176 
177 //===----------------------------------------------------------------------===//
178 // GPUFuncOp
179 //===----------------------------------------------------------------------===//
180 
181 // Legalizes a GPU function as an entry SPIR-V function.
182 static spirv::FuncOp
lowerAsEntryFunction(gpu::GPUFuncOp funcOp,TypeConverter & typeConverter,ConversionPatternRewriter & rewriter,spirv::EntryPointABIAttr entryPointInfo,ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo)183 lowerAsEntryFunction(gpu::GPUFuncOp funcOp, TypeConverter &typeConverter,
184                      ConversionPatternRewriter &rewriter,
185                      spirv::EntryPointABIAttr entryPointInfo,
186                      ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
187   auto fnType = funcOp.getFunctionType();
188   if (fnType.getNumResults()) {
189     funcOp.emitError("SPIR-V lowering only supports entry functions"
190                      "with no return values right now");
191     return nullptr;
192   }
193   if (!argABIInfo.empty() && fnType.getNumInputs() != argABIInfo.size()) {
194     funcOp.emitError(
195         "lowering as entry functions requires ABI info for all arguments "
196         "or none of them");
197     return nullptr;
198   }
199   // Update the signature to valid SPIR-V types and add the ABI
200   // attributes. These will be "materialized" by using the
201   // LowerABIAttributesPass.
202   TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
203   {
204     for (const auto &argType :
205          enumerate(funcOp.getFunctionType().getInputs())) {
206       auto convertedType = typeConverter.convertType(argType.value());
207       signatureConverter.addInputs(argType.index(), convertedType);
208     }
209   }
210   auto newFuncOp = rewriter.create<spirv::FuncOp>(
211       funcOp.getLoc(), funcOp.getName(),
212       rewriter.getFunctionType(signatureConverter.getConvertedTypes(),
213                                llvm::None));
214   for (const auto &namedAttr : funcOp->getAttrs()) {
215     if (namedAttr.getName() == FunctionOpInterface::getTypeAttrName() ||
216         namedAttr.getName() == SymbolTable::getSymbolAttrName())
217       continue;
218     newFuncOp->setAttr(namedAttr.getName(), namedAttr.getValue());
219   }
220 
221   rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
222                               newFuncOp.end());
223   if (failed(rewriter.convertRegionTypes(&newFuncOp.getBody(), typeConverter,
224                                          &signatureConverter)))
225     return nullptr;
226   rewriter.eraseOp(funcOp);
227 
228   // Set the attributes for argument and the function.
229   StringRef argABIAttrName = spirv::getInterfaceVarABIAttrName();
230   for (auto argIndex : llvm::seq<unsigned>(0, argABIInfo.size())) {
231     newFuncOp.setArgAttr(argIndex, argABIAttrName, argABIInfo[argIndex]);
232   }
233   newFuncOp->setAttr(spirv::getEntryPointABIAttrName(), entryPointInfo);
234 
235   return newFuncOp;
236 }
237 
238 /// Populates `argABI` with spv.interface_var_abi attributes for lowering
239 /// gpu.func to spv.func if no arguments have the attributes set
240 /// already. Returns failure if any argument has the ABI attribute set already.
241 static LogicalResult
getDefaultABIAttrs(MLIRContext * context,gpu::GPUFuncOp funcOp,SmallVectorImpl<spirv::InterfaceVarABIAttr> & argABI)242 getDefaultABIAttrs(MLIRContext *context, gpu::GPUFuncOp funcOp,
243                    SmallVectorImpl<spirv::InterfaceVarABIAttr> &argABI) {
244   spirv::TargetEnvAttr targetEnv = spirv::lookupTargetEnvOrDefault(funcOp);
245   if (!spirv::needsInterfaceVarABIAttrs(targetEnv))
246     return success();
247 
248   for (auto argIndex : llvm::seq<unsigned>(0, funcOp.getNumArguments())) {
249     if (funcOp.getArgAttrOfType<spirv::InterfaceVarABIAttr>(
250             argIndex, spirv::getInterfaceVarABIAttrName()))
251       return failure();
252     // Vulkan's interface variable requirements needs scalars to be wrapped in a
253     // struct. The struct held in storage buffer.
254     Optional<spirv::StorageClass> sc;
255     if (funcOp.getArgument(argIndex).getType().isIntOrIndexOrFloat())
256       sc = spirv::StorageClass::StorageBuffer;
257     argABI.push_back(spirv::getInterfaceVarABIAttr(0, argIndex, sc, context));
258   }
259   return success();
260 }
261 
matchAndRewrite(gpu::GPUFuncOp funcOp,OpAdaptor adaptor,ConversionPatternRewriter & rewriter) const262 LogicalResult GPUFuncOpConversion::matchAndRewrite(
263     gpu::GPUFuncOp funcOp, OpAdaptor adaptor,
264     ConversionPatternRewriter &rewriter) const {
265   if (!gpu::GPUDialect::isKernel(funcOp))
266     return failure();
267 
268   SmallVector<spirv::InterfaceVarABIAttr, 4> argABI;
269   if (failed(getDefaultABIAttrs(rewriter.getContext(), funcOp, argABI))) {
270     argABI.clear();
271     for (auto argIndex : llvm::seq<unsigned>(0, funcOp.getNumArguments())) {
272       // If the ABI is already specified, use it.
273       auto abiAttr = funcOp.getArgAttrOfType<spirv::InterfaceVarABIAttr>(
274           argIndex, spirv::getInterfaceVarABIAttrName());
275       if (!abiAttr) {
276         funcOp.emitRemark(
277             "match failure: missing 'spv.interface_var_abi' attribute at "
278             "argument ")
279             << argIndex;
280         return failure();
281       }
282       argABI.push_back(abiAttr);
283     }
284   }
285 
286   auto entryPointAttr = spirv::lookupEntryPointABI(funcOp);
287   if (!entryPointAttr) {
288     funcOp.emitRemark("match failure: missing 'spv.entry_point_abi' attribute");
289     return failure();
290   }
291   spirv::FuncOp newFuncOp = lowerAsEntryFunction(
292       funcOp, *getTypeConverter(), rewriter, entryPointAttr, argABI);
293   if (!newFuncOp)
294     return failure();
295   newFuncOp->removeAttr(
296       rewriter.getStringAttr(gpu::GPUDialect::getKernelFuncAttrName()));
297   return success();
298 }
299 
300 //===----------------------------------------------------------------------===//
301 // ModuleOp with gpu.module.
302 //===----------------------------------------------------------------------===//
303 
matchAndRewrite(gpu::GPUModuleOp moduleOp,OpAdaptor adaptor,ConversionPatternRewriter & rewriter) const304 LogicalResult GPUModuleConversion::matchAndRewrite(
305     gpu::GPUModuleOp moduleOp, OpAdaptor adaptor,
306     ConversionPatternRewriter &rewriter) const {
307   spirv::TargetEnvAttr targetEnv = spirv::lookupTargetEnvOrDefault(moduleOp);
308   spirv::AddressingModel addressingModel = spirv::getAddressingModel(targetEnv);
309   FailureOr<spirv::MemoryModel> memoryModel = spirv::getMemoryModel(targetEnv);
310   if (failed(memoryModel))
311     return moduleOp.emitRemark("match failure: could not selected memory model "
312                                "based on 'spv.target_env'");
313 
314   // Add a keyword to the module name to avoid symbolic conflict.
315   std::string spvModuleName = (kSPIRVModule + moduleOp.getName()).str();
316   auto spvModule = rewriter.create<spirv::ModuleOp>(
317       moduleOp.getLoc(), addressingModel, *memoryModel, llvm::None,
318       StringRef(spvModuleName));
319 
320   // Move the region from the module op into the SPIR-V module.
321   Region &spvModuleRegion = spvModule.getRegion();
322   rewriter.inlineRegionBefore(moduleOp.body(), spvModuleRegion,
323                               spvModuleRegion.begin());
324   // The spv.module build method adds a block. Remove that.
325   rewriter.eraseBlock(&spvModuleRegion.back());
326   rewriter.eraseOp(moduleOp);
327   return success();
328 }
329 
330 //===----------------------------------------------------------------------===//
331 // GPU return inside kernel functions to SPIR-V return.
332 //===----------------------------------------------------------------------===//
333 
matchAndRewrite(gpu::ReturnOp returnOp,OpAdaptor adaptor,ConversionPatternRewriter & rewriter) const334 LogicalResult GPUReturnOpConversion::matchAndRewrite(
335     gpu::ReturnOp returnOp, OpAdaptor adaptor,
336     ConversionPatternRewriter &rewriter) const {
337   if (!adaptor.getOperands().empty())
338     return failure();
339 
340   rewriter.replaceOpWithNewOp<spirv::ReturnOp>(returnOp);
341   return success();
342 }
343 
344 //===----------------------------------------------------------------------===//
345 // Barrier.
346 //===----------------------------------------------------------------------===//
347 
matchAndRewrite(gpu::BarrierOp barrierOp,OpAdaptor adaptor,ConversionPatternRewriter & rewriter) const348 LogicalResult GPUBarrierConversion::matchAndRewrite(
349     gpu::BarrierOp barrierOp, OpAdaptor adaptor,
350     ConversionPatternRewriter &rewriter) const {
351   MLIRContext *context = getContext();
352   // Both execution and memory scope should be workgroup.
353   auto scope = spirv::ScopeAttr::get(context, spirv::Scope::Workgroup);
354   // Require acquire and release memory semantics for workgroup memory.
355   auto memorySemantics = spirv::MemorySemanticsAttr::get(
356       context, spirv::MemorySemantics::WorkgroupMemory |
357                    spirv::MemorySemantics::AcquireRelease);
358   rewriter.replaceOpWithNewOp<spirv::ControlBarrierOp>(barrierOp, scope, scope,
359                                                        memorySemantics);
360   return success();
361 }
362 
363 //===----------------------------------------------------------------------===//
364 // GPU To SPIRV Patterns.
365 //===----------------------------------------------------------------------===//
366 
populateGPUToSPIRVPatterns(SPIRVTypeConverter & typeConverter,RewritePatternSet & patterns)367 void mlir::populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
368                                       RewritePatternSet &patterns) {
369   patterns.add<
370       GPUBarrierConversion, GPUFuncOpConversion, GPUModuleConversion,
371       GPUModuleEndConversion, GPUReturnOpConversion,
372       LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
373       LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
374       LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
375       LaunchConfigConversion<gpu::ThreadIdOp,
376                              spirv::BuiltIn::LocalInvocationId>,
377       LaunchConfigConversion<gpu::GlobalIdOp,
378                              spirv::BuiltIn::GlobalInvocationId>,
379       SingleDimLaunchConfigConversion<gpu::SubgroupIdOp,
380                                       spirv::BuiltIn::SubgroupId>,
381       SingleDimLaunchConfigConversion<gpu::NumSubgroupsOp,
382                                       spirv::BuiltIn::NumSubgroups>,
383       SingleDimLaunchConfigConversion<gpu::SubgroupSizeOp,
384                                       spirv::BuiltIn::SubgroupSize>,
385       WorkGroupSizeConversion>(typeConverter, patterns.getContext());
386 }
387