1 //===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a base class for a pass to serialize a gpu module 10 // into a binary blob that can be executed on a GPU. The binary blob is added 11 // as a string attribute to the gpu module. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "mlir/Dialect/GPU/Passes.h" 16 #include "mlir/Pass/Pass.h" 17 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" 18 #include "mlir/Target/LLVMIR/Export.h" 19 #include "llvm/IR/LegacyPassManager.h" 20 #include "llvm/MC/TargetRegistry.h" 21 #include "llvm/Support/TargetSelect.h" 22 #include "llvm/Target/TargetMachine.h" 23 24 using namespace mlir; 25 26 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } 27 28 gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID) 29 : OperationPass<gpu::GPUModuleOp>(passID) {} 30 31 gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other) 32 : OperationPass<gpu::GPUModuleOp>(other) {} 33 34 Optional<std::string> 35 gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule, 36 llvm::TargetMachine &targetMachine) { 37 llvmModule.setDataLayout(targetMachine.createDataLayout()); 38 39 if (failed(optimizeLlvm(llvmModule, targetMachine))) 40 return llvm::None; 41 42 std::string targetISA; 43 llvm::raw_string_ostream stream(targetISA); 44 45 { // Drop pstream after this to prevent the ISA from being stuck buffering 46 llvm::buffer_ostream pstream(stream); 47 llvm::legacy::PassManager codegenPasses; 48 49 if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, 50 llvm::CGFT_AssemblyFile)) 51 return llvm::None; 52 53 codegenPasses.run(llvmModule); 54 } 55 return stream.str(); 56 } 57 58 void gpu::SerializeToBlobPass::runOnOperation() { 59 // Lower the module to an LLVM IR module using a separate context to enable 60 // multi-threaded processing. 61 llvm::LLVMContext llvmContext; 62 std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext); 63 if (!llvmModule) 64 return signalPassFailure(); 65 66 // Lower the LLVM IR module to target ISA. 67 std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine(); 68 if (!targetMachine) 69 return signalPassFailure(); 70 71 Optional<std::string> maybeTargetISA = 72 translateToISA(*llvmModule, *targetMachine); 73 74 if (!maybeTargetISA.hasValue()) 75 return signalPassFailure(); 76 77 std::string targetISA = std::move(maybeTargetISA.getValue()); 78 79 // Serialize the target ISA. 80 std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA); 81 if (!blob) 82 return signalPassFailure(); 83 84 // Add the blob as module attribute. 85 auto attr = 86 StringAttr::get(&getContext(), StringRef(blob->data(), blob->size())); 87 getOperation()->setAttr(gpuBinaryAnnotation, attr); 88 } 89 90 LogicalResult 91 gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, 92 llvm::TargetMachine &targetMachine) { 93 // TODO: If serializeToCubin ends up defining optimizations, factor them 94 // into here from SerializeToHsaco 95 return success(); 96 } 97 98 void gpu::SerializeToBlobPass::getDependentDialects( 99 DialectRegistry ®istry) const { 100 registerLLVMDialectTranslation(registry); 101 OperationPass<gpu::GPUModuleOp>::getDependentDialects(registry); 102 } 103 104 std::unique_ptr<llvm::TargetMachine> 105 gpu::SerializeToBlobPass::createTargetMachine() { 106 Location loc = getOperation().getLoc(); 107 std::string error; 108 const llvm::Target *target = 109 llvm::TargetRegistry::lookupTarget(triple, error); 110 if (!target) { 111 emitError(loc, Twine("failed to lookup target: ") + error); 112 return {}; 113 } 114 llvm::TargetMachine *machine = 115 target->createTargetMachine(triple, chip, features, {}, {}); 116 if (!machine) { 117 emitError(loc, "failed to create target machine"); 118 return {}; 119 } 120 121 return std::unique_ptr<llvm::TargetMachine>{machine}; 122 } 123 124 std::unique_ptr<llvm::Module> 125 gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { 126 return translateModuleToLLVMIR(getOperation(), llvmContext, 127 "LLVMDialectModule"); 128 } 129