1 //===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a base class for a pass to serialize a gpu module 10 // into a binary blob that can be executed on a GPU. The binary blob is added 11 // as a string attribute to the gpu module. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "mlir/Dialect/GPU/Transforms/Passes.h" 16 #include "mlir/Pass/Pass.h" 17 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" 18 #include "mlir/Target/LLVMIR/Export.h" 19 #include "llvm/IR/LegacyPassManager.h" 20 #include "llvm/MC/TargetRegistry.h" 21 #include "llvm/Support/TargetSelect.h" 22 #include "llvm/Target/TargetMachine.h" 23 24 #include <string> 25 26 #define DEBUG_TYPE "serialize-to-blob" 27 28 using namespace mlir; 29 30 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } 31 32 gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID) 33 : OperationPass<gpu::GPUModuleOp>(passID) {} 34 35 gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other) 36 : OperationPass<gpu::GPUModuleOp>(other) {} 37 38 Optional<std::string> 39 gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule, 40 llvm::TargetMachine &targetMachine) { 41 llvmModule.setDataLayout(targetMachine.createDataLayout()); 42 43 if (failed(optimizeLlvm(llvmModule, targetMachine))) 44 return llvm::None; 45 46 std::string targetISA; 47 llvm::raw_string_ostream stream(targetISA); 48 49 { // Drop pstream after this to prevent the ISA from being stuck buffering 50 llvm::buffer_ostream pstream(stream); 51 llvm::legacy::PassManager codegenPasses; 52 53 if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, 54 llvm::CGFT_AssemblyFile)) 55 return llvm::None; 56 57 codegenPasses.run(llvmModule); 58 } 59 return stream.str(); 60 } 61 62 void gpu::SerializeToBlobPass::runOnOperation() { 63 // Lower the module to an LLVM IR module using a separate context to enable 64 // multi-threaded processing. 65 llvm::LLVMContext llvmContext; 66 std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext); 67 if (!llvmModule) 68 return signalPassFailure(); 69 70 // Lower the LLVM IR module to target ISA. 71 std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine(); 72 if (!targetMachine) 73 return signalPassFailure(); 74 75 Optional<std::string> maybeTargetISA = 76 translateToISA(*llvmModule, *targetMachine); 77 78 if (!maybeTargetISA.has_value()) 79 return signalPassFailure(); 80 81 std::string targetISA = std::move(maybeTargetISA.value()); 82 83 LLVM_DEBUG({ 84 llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"; 85 llvm::dbgs() << targetISA << "\n"; 86 llvm::dbgs().flush(); 87 }); 88 89 // Serialize the target ISA. 90 std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA); 91 if (!blob) 92 return signalPassFailure(); 93 94 // Add the blob as module attribute. 95 auto attr = 96 StringAttr::get(&getContext(), StringRef(blob->data(), blob->size())); 97 getOperation()->setAttr(gpuBinaryAnnotation, attr); 98 } 99 100 LogicalResult 101 gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, 102 llvm::TargetMachine &targetMachine) { 103 // TODO: If serializeToCubin ends up defining optimizations, factor them 104 // into here from SerializeToHsaco 105 return success(); 106 } 107 108 void gpu::SerializeToBlobPass::getDependentDialects( 109 DialectRegistry ®istry) const { 110 registerLLVMDialectTranslation(registry); 111 OperationPass<gpu::GPUModuleOp>::getDependentDialects(registry); 112 } 113 114 std::unique_ptr<llvm::TargetMachine> 115 gpu::SerializeToBlobPass::createTargetMachine() { 116 Location loc = getOperation().getLoc(); 117 std::string error; 118 const llvm::Target *target = 119 llvm::TargetRegistry::lookupTarget(triple, error); 120 if (!target) { 121 emitError(loc, Twine("failed to lookup target: ") + error); 122 return {}; 123 } 124 llvm::TargetMachine *machine = 125 target->createTargetMachine(triple, chip, features, {}, {}); 126 if (!machine) { 127 emitError(loc, "failed to create target machine"); 128 return {}; 129 } 130 131 return std::unique_ptr<llvm::TargetMachine>{machine}; 132 } 133 134 std::unique_ptr<llvm::Module> 135 gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { 136 return translateModuleToLLVMIR(getOperation(), llvmContext, 137 "LLVMDialectModule"); 138 } 139