1 //===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a base class for a pass to serialize a gpu module
10 // into a binary blob that can be executed on a GPU. The binary blob is added
11 // as a string attribute to the gpu module.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "mlir/Dialect/GPU/Passes.h"
16 #include "mlir/Pass/Pass.h"
17 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
18 #include "mlir/Target/LLVMIR/Export.h"
19 #include "llvm/IR/LegacyPassManager.h"
20 #include "llvm/MC/TargetRegistry.h"
21 #include "llvm/Support/TargetSelect.h"
22 #include "llvm/Target/TargetMachine.h"
23 
24 using namespace mlir;
25 
26 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
27 
28 gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
29     : OperationPass<gpu::GPUModuleOp>(passID) {}
30 
31 gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
32     : OperationPass<gpu::GPUModuleOp>(other) {}
33 
34 Optional<std::string>
35 gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
36                                          llvm::TargetMachine &targetMachine) {
37   llvmModule.setDataLayout(targetMachine.createDataLayout());
38 
39   if (failed(optimizeLlvm(llvmModule, targetMachine)))
40     return llvm::None;
41 
42   std::string targetISA;
43   llvm::raw_string_ostream stream(targetISA);
44 
45   { // Drop pstream after this to prevent the ISA from being stuck buffering
46     llvm::buffer_ostream pstream(stream);
47     llvm::legacy::PassManager codegenPasses;
48 
49     if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
50                                           llvm::CGFT_AssemblyFile))
51       return llvm::None;
52 
53     codegenPasses.run(llvmModule);
54   }
55   return stream.str();
56 }
57 
58 void gpu::SerializeToBlobPass::runOnOperation() {
59   // Lower the module to an LLVM IR module using a separate context to enable
60   // multi-threaded processing.
61   llvm::LLVMContext llvmContext;
62   std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
63   if (!llvmModule)
64     return signalPassFailure();
65 
66   // Lower the LLVM IR module to target ISA.
67   std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
68   if (!targetMachine)
69     return signalPassFailure();
70 
71   Optional<std::string> maybeTargetISA =
72       translateToISA(*llvmModule, *targetMachine);
73 
74   if (!maybeTargetISA.hasValue())
75     return signalPassFailure();
76 
77   std::string targetISA = std::move(maybeTargetISA.getValue());
78 
79   // Serialize the target ISA.
80   std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
81   if (!blob)
82     return signalPassFailure();
83 
84   // Add the blob as module attribute.
85   auto attr =
86       StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
87   getOperation()->setAttr(gpuBinaryAnnotation, attr);
88 }
89 
90 LogicalResult
91 gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
92                                        llvm::TargetMachine &targetMachine) {
93   // TODO: If serializeToCubin ends up defining optimizations, factor them
94   // into here from SerializeToHsaco
95   return success();
96 }
97 
98 void gpu::SerializeToBlobPass::getDependentDialects(
99     DialectRegistry &registry) const {
100   registerLLVMDialectTranslation(registry);
101   OperationPass<gpu::GPUModuleOp>::getDependentDialects(registry);
102 }
103 
104 std::unique_ptr<llvm::TargetMachine>
105 gpu::SerializeToBlobPass::createTargetMachine() {
106   Location loc = getOperation().getLoc();
107   std::string error;
108   const llvm::Target *target =
109       llvm::TargetRegistry::lookupTarget(triple, error);
110   if (!target) {
111     emitError(loc, Twine("failed to lookup target: ") + error);
112     return {};
113   }
114   llvm::TargetMachine *machine =
115       target->createTargetMachine(triple, chip, features, {}, {});
116   if (!machine) {
117     emitError(loc, "failed to create target machine");
118     return {};
119   }
120 
121   return std::unique_ptr<llvm::TargetMachine>{machine};
122 }
123 
124 std::unique_ptr<llvm::Module>
125 gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
126   return translateModuleToLLVMIR(getOperation(), llvmContext,
127                                  "LLVMDialectModule");
128 }
129