1 //===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a base class for a pass to serialize a gpu module
10 // into a binary blob that can be executed on a GPU. The binary blob is added
11 // as a string attribute to the gpu module.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "mlir/Dialect/GPU/Transforms/Passes.h"
16 #include "mlir/Pass/Pass.h"
17 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
18 #include "mlir/Target/LLVMIR/Export.h"
19 #include "llvm/IR/LegacyPassManager.h"
20 #include "llvm/MC/TargetRegistry.h"
21 #include "llvm/Support/TargetSelect.h"
22 #include "llvm/Target/TargetMachine.h"
23
24 #include <string>
25
26 #define DEBUG_TYPE "serialize-to-blob"
27
28 using namespace mlir;
29
getDefaultGpuBinaryAnnotation()30 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
31
SerializeToBlobPass(TypeID passID)32 gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
33 : OperationPass<gpu::GPUModuleOp>(passID) {}
34
SerializeToBlobPass(const SerializeToBlobPass & other)35 gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
36 : OperationPass<gpu::GPUModuleOp>(other) {}
37
38 Optional<std::string>
translateToISA(llvm::Module & llvmModule,llvm::TargetMachine & targetMachine)39 gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
40 llvm::TargetMachine &targetMachine) {
41 llvmModule.setDataLayout(targetMachine.createDataLayout());
42
43 if (failed(optimizeLlvm(llvmModule, targetMachine)))
44 return llvm::None;
45
46 std::string targetISA;
47 llvm::raw_string_ostream stream(targetISA);
48
49 { // Drop pstream after this to prevent the ISA from being stuck buffering
50 llvm::buffer_ostream pstream(stream);
51 llvm::legacy::PassManager codegenPasses;
52
53 if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
54 llvm::CGFT_AssemblyFile))
55 return llvm::None;
56
57 codegenPasses.run(llvmModule);
58 }
59 return stream.str();
60 }
61
runOnOperation()62 void gpu::SerializeToBlobPass::runOnOperation() {
63 // Lower the module to an LLVM IR module using a separate context to enable
64 // multi-threaded processing.
65 llvm::LLVMContext llvmContext;
66 std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
67 if (!llvmModule)
68 return signalPassFailure();
69
70 // Lower the LLVM IR module to target ISA.
71 std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
72 if (!targetMachine)
73 return signalPassFailure();
74
75 Optional<std::string> maybeTargetISA =
76 translateToISA(*llvmModule, *targetMachine);
77
78 if (!maybeTargetISA.has_value())
79 return signalPassFailure();
80
81 std::string targetISA = std::move(maybeTargetISA.value());
82
83 LLVM_DEBUG({
84 llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
85 llvm::dbgs() << targetISA << "\n";
86 llvm::dbgs().flush();
87 });
88
89 // Serialize the target ISA.
90 std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
91 if (!blob)
92 return signalPassFailure();
93
94 // Add the blob as module attribute.
95 auto attr =
96 StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
97 getOperation()->setAttr(gpuBinaryAnnotation, attr);
98 }
99
100 LogicalResult
optimizeLlvm(llvm::Module & llvmModule,llvm::TargetMachine & targetMachine)101 gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
102 llvm::TargetMachine &targetMachine) {
103 // TODO: If serializeToCubin ends up defining optimizations, factor them
104 // into here from SerializeToHsaco
105 return success();
106 }
107
getDependentDialects(DialectRegistry & registry) const108 void gpu::SerializeToBlobPass::getDependentDialects(
109 DialectRegistry ®istry) const {
110 registerLLVMDialectTranslation(registry);
111 OperationPass<gpu::GPUModuleOp>::getDependentDialects(registry);
112 }
113
114 std::unique_ptr<llvm::TargetMachine>
createTargetMachine()115 gpu::SerializeToBlobPass::createTargetMachine() {
116 Location loc = getOperation().getLoc();
117 std::string error;
118 const llvm::Target *target =
119 llvm::TargetRegistry::lookupTarget(triple, error);
120 if (!target) {
121 emitError(loc, Twine("failed to lookup target: ") + error);
122 return {};
123 }
124 llvm::TargetMachine *machine =
125 target->createTargetMachine(triple, chip, features, {}, {});
126 if (!machine) {
127 emitError(loc, "failed to create target machine");
128 return {};
129 }
130
131 return std::unique_ptr<llvm::TargetMachine>{machine};
132 }
133
134 std::unique_ptr<llvm::Module>
translateToLLVMIR(llvm::LLVMContext & llvmContext)135 gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
136 return translateModuleToLLVMIR(getOperation(), llvmContext,
137 "LLVMDialectModule");
138 }
139