1 //===- AMDGPUOpenCLEnqueuedBlockLowering.cpp - Lower enqueued block -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // \file 11 // \brief This post-linking pass replaces the function pointer of enqueued 12 // block kernel with a global variable (runtime handle) and adds 13 // "runtime-handle" attribute to the enqueued block kernel. 14 // 15 // In LLVM CodeGen the runtime-handle metadata will be translated to 16 // RuntimeHandle metadata in code object. Runtime allocates a global buffer 17 // for each kernel with RuntimeHandel metadata and saves the kernel address 18 // required for the AQL packet into the buffer. __enqueue_kernel function 19 // in device library knows that the invoke function pointer in the block 20 // literal is actually runtime handle and loads the kernel address from it 21 // and put it into AQL packet for dispatching. 22 // 23 // This cannot be done in FE since FE cannot create a unique global variable 24 // with external linkage across LLVM modules. The global variable with internal 25 // linkage does not work since optimization passes will try to replace loads 26 // of the global variable with its initialization value. 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "AMDGPU.h" 31 #include "llvm/ADT/StringRef.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 #define DEBUG_TYPE "amdgpu-lower-enqueued-block" 39 40 using namespace llvm; 41 42 namespace { 43 44 /// \brief Lower enqueued blocks. 45 class AMDGPUOpenCLEnqueuedBlockLowering : public ModulePass { 46 public: 47 static char ID; 48 49 explicit AMDGPUOpenCLEnqueuedBlockLowering() : ModulePass(ID) {} 50 51 private: 52 bool runOnModule(Module &M) override; 53 }; 54 55 } // end anonymous namespace 56 57 char AMDGPUOpenCLEnqueuedBlockLowering::ID = 0; 58 59 char &llvm::AMDGPUOpenCLEnqueuedBlockLoweringID = 60 AMDGPUOpenCLEnqueuedBlockLowering::ID; 61 62 INITIALIZE_PASS(AMDGPUOpenCLEnqueuedBlockLowering, DEBUG_TYPE, 63 "Lower OpenCL enqueued blocks", false, false) 64 65 ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() { 66 return new AMDGPUOpenCLEnqueuedBlockLowering(); 67 } 68 69 bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { 70 auto &C = M.getContext(); 71 auto AS = AMDGPU::getAMDGPUAS(M); 72 bool Changed = false; 73 for (auto &F : M.functions()) { 74 if (F.hasFnAttribute("enqueued-block")) { 75 if (!F.hasOneUse() || !F.user_begin()->hasOneUse() || 76 !isa<ConstantExpr>(*F.user_begin()) || 77 !isa<ConstantExpr>(*F.user_begin()->user_begin())) { 78 continue; 79 } 80 auto *BitCast = cast<ConstantExpr>(*F.user_begin()); 81 auto *AddrCast = cast<ConstantExpr>(*BitCast->user_begin()); 82 auto RuntimeHandle = (F.getName() + "_runtime_handle").str(); 83 auto *GV = new GlobalVariable( 84 M, Type::getInt8Ty(C)->getPointerTo(AS.GLOBAL_ADDRESS), 85 /*IsConstant=*/true, GlobalValue::ExternalLinkage, 86 /*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr, 87 GlobalValue::NotThreadLocal, AS.GLOBAL_ADDRESS, 88 /*IsExternallyInitialized=*/true); 89 DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); 90 auto *NewPtr = ConstantExpr::getPointerCast(GV, AddrCast->getType()); 91 AddrCast->replaceAllUsesWith(NewPtr); 92 F.addFnAttr("runtime-handle", RuntimeHandle); 93 F.setLinkage(GlobalValue::ExternalLinkage); 94 Changed = true; 95 } 96 } 97 return Changed; 98 } 99