1cdda3daaSArpith Chacko Jacob //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===//
2cdda3daaSArpith Chacko Jacob //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cdda3daaSArpith Chacko Jacob //
7cdda3daaSArpith Chacko Jacob //===----------------------------------------------------------------------===//
8cdda3daaSArpith Chacko Jacob //
9cdda3daaSArpith Chacko Jacob // Generates code for built-in GPU calls which are not runtime-specific.
10cdda3daaSArpith Chacko Jacob // (Runtime-specific codegen lives in programming model specific files.)
11cdda3daaSArpith Chacko Jacob //
12cdda3daaSArpith Chacko Jacob //===----------------------------------------------------------------------===//
13cdda3daaSArpith Chacko Jacob
14cdda3daaSArpith Chacko Jacob #include "CodeGenFunction.h"
15cdda3daaSArpith Chacko Jacob #include "clang/Basic/Builtins.h"
16cdda3daaSArpith Chacko Jacob #include "llvm/IR/DataLayout.h"
17cdda3daaSArpith Chacko Jacob #include "llvm/IR/Instruction.h"
18cdda3daaSArpith Chacko Jacob #include "llvm/Support/MathExtras.h"
19ed181efaSSameer Sahasrabuddhe #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
20cdda3daaSArpith Chacko Jacob
21cdda3daaSArpith Chacko Jacob using namespace clang;
22cdda3daaSArpith Chacko Jacob using namespace CodeGen;
23cdda3daaSArpith Chacko Jacob
24*27177b82SJon Chesterfield namespace {
GetVprintfDeclaration(llvm::Module & M)25*27177b82SJon Chesterfield llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
26cdda3daaSArpith Chacko Jacob llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
27cdda3daaSArpith Chacko Jacob llvm::Type::getInt8PtrTy(M.getContext())};
28cdda3daaSArpith Chacko Jacob llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
29cdda3daaSArpith Chacko Jacob llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
30cdda3daaSArpith Chacko Jacob
31cdda3daaSArpith Chacko Jacob if (auto *F = M.getFunction("vprintf")) {
32cdda3daaSArpith Chacko Jacob // Our CUDA system header declares vprintf with the right signature, so
33cdda3daaSArpith Chacko Jacob // nobody else should have been able to declare vprintf with a bogus
34cdda3daaSArpith Chacko Jacob // signature.
35cdda3daaSArpith Chacko Jacob assert(F->getFunctionType() == VprintfFuncType);
36cdda3daaSArpith Chacko Jacob return F;
37cdda3daaSArpith Chacko Jacob }
38cdda3daaSArpith Chacko Jacob
39cdda3daaSArpith Chacko Jacob // vprintf doesn't already exist; create a declaration and insert it into the
40cdda3daaSArpith Chacko Jacob // module.
41cdda3daaSArpith Chacko Jacob return llvm::Function::Create(
42cdda3daaSArpith Chacko Jacob VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
43cdda3daaSArpith Chacko Jacob }
44cdda3daaSArpith Chacko Jacob
GetOpenMPVprintfDeclaration(CodeGenModule & CGM)45*27177b82SJon Chesterfield llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
46*27177b82SJon Chesterfield const char *Name = "__llvm_omp_vprintf";
47*27177b82SJon Chesterfield llvm::Module &M = CGM.getModule();
48*27177b82SJon Chesterfield llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
49*27177b82SJon Chesterfield llvm::Type::getInt8PtrTy(M.getContext()),
50*27177b82SJon Chesterfield llvm::Type::getInt32Ty(M.getContext())};
51*27177b82SJon Chesterfield llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
52*27177b82SJon Chesterfield llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
53*27177b82SJon Chesterfield
54*27177b82SJon Chesterfield if (auto *F = M.getFunction(Name)) {
55*27177b82SJon Chesterfield if (F->getFunctionType() != VprintfFuncType) {
56*27177b82SJon Chesterfield CGM.Error(SourceLocation(),
57*27177b82SJon Chesterfield "Invalid type declaration for __llvm_omp_vprintf");
58*27177b82SJon Chesterfield return nullptr;
59*27177b82SJon Chesterfield }
60*27177b82SJon Chesterfield return F;
61*27177b82SJon Chesterfield }
62*27177b82SJon Chesterfield
63*27177b82SJon Chesterfield return llvm::Function::Create(
64*27177b82SJon Chesterfield VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
65*27177b82SJon Chesterfield }
66*27177b82SJon Chesterfield
67cdda3daaSArpith Chacko Jacob // Transforms a call to printf into a call to the NVPTX vprintf syscall (which
68cdda3daaSArpith Chacko Jacob // isn't particularly special; it's invoked just like a regular function).
69cdda3daaSArpith Chacko Jacob // vprintf takes two args: A format string, and a pointer to a buffer containing
70cdda3daaSArpith Chacko Jacob // the varargs.
71cdda3daaSArpith Chacko Jacob //
72cdda3daaSArpith Chacko Jacob // For example, the call
73cdda3daaSArpith Chacko Jacob //
74cdda3daaSArpith Chacko Jacob // printf("format string", arg1, arg2, arg3);
75cdda3daaSArpith Chacko Jacob //
76cdda3daaSArpith Chacko Jacob // is converted into something resembling
77cdda3daaSArpith Chacko Jacob //
78cdda3daaSArpith Chacko Jacob // struct Tmp {
79cdda3daaSArpith Chacko Jacob // Arg1 a1;
80cdda3daaSArpith Chacko Jacob // Arg2 a2;
81cdda3daaSArpith Chacko Jacob // Arg3 a3;
82cdda3daaSArpith Chacko Jacob // };
83cdda3daaSArpith Chacko Jacob // char* buf = alloca(sizeof(Tmp));
84cdda3daaSArpith Chacko Jacob // *(Tmp*)buf = {a1, a2, a3};
85cdda3daaSArpith Chacko Jacob // vprintf("format string", buf);
86cdda3daaSArpith Chacko Jacob //
87cdda3daaSArpith Chacko Jacob // buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the
88cdda3daaSArpith Chacko Jacob // args is itself aligned to its preferred alignment.
89cdda3daaSArpith Chacko Jacob //
90cdda3daaSArpith Chacko Jacob // Note that by the time this function runs, E's args have already undergone the
91cdda3daaSArpith Chacko Jacob // standard C vararg promotion (short -> int, float -> double, etc.).
922c37ae6dSJon Chesterfield
93*27177b82SJon Chesterfield std::pair<llvm::Value *, llvm::TypeSize>
packArgsIntoNVPTXFormatBuffer(CodeGenFunction * CGF,const CallArgList & Args)94*27177b82SJon Chesterfield packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
952c37ae6dSJon Chesterfield const llvm::DataLayout &DL = CGF->CGM.getDataLayout();
962c37ae6dSJon Chesterfield llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext();
972c37ae6dSJon Chesterfield CGBuilderTy &Builder = CGF->Builder;
982c37ae6dSJon Chesterfield
992c37ae6dSJon Chesterfield // Construct and fill the args buffer that we'll pass to vprintf.
1002c37ae6dSJon Chesterfield if (Args.size() <= 1) {
101*27177b82SJon Chesterfield // If there are no args, pass a null pointer and size 0
102*27177b82SJon Chesterfield llvm::Value * BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
103*27177b82SJon Chesterfield return {BufferPtr, llvm::TypeSize::Fixed(0)};
1042c37ae6dSJon Chesterfield } else {
1052c37ae6dSJon Chesterfield llvm::SmallVector<llvm::Type *, 8> ArgTypes;
1062c37ae6dSJon Chesterfield for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
1072c37ae6dSJon Chesterfield ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType());
1082c37ae6dSJon Chesterfield
1092c37ae6dSJon Chesterfield // Using llvm::StructType is correct only because printf doesn't accept
1102c37ae6dSJon Chesterfield // aggregates. If we had to handle aggregates here, we'd have to manually
1112c37ae6dSJon Chesterfield // compute the offsets within the alloca -- we wouldn't be able to assume
1122c37ae6dSJon Chesterfield // that the alignment of the llvm type was the same as the alignment of the
1132c37ae6dSJon Chesterfield // clang type.
1142c37ae6dSJon Chesterfield llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
1152c37ae6dSJon Chesterfield llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy);
1162c37ae6dSJon Chesterfield
1172c37ae6dSJon Chesterfield for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
1182c37ae6dSJon Chesterfield llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
1192c37ae6dSJon Chesterfield llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal();
1202c37ae6dSJon Chesterfield Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
1212c37ae6dSJon Chesterfield }
122*27177b82SJon Chesterfield llvm::Value *BufferPtr =
123*27177b82SJon Chesterfield Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
124*27177b82SJon Chesterfield return {BufferPtr, DL.getTypeAllocSize(AllocaTy)};
1252c37ae6dSJon Chesterfield }
1262c37ae6dSJon Chesterfield }
1272c37ae6dSJon Chesterfield
containsNonScalarVarargs(CodeGenFunction * CGF,CallArgList Args)128*27177b82SJon Chesterfield bool containsNonScalarVarargs(CodeGenFunction *CGF, CallArgList Args) {
129*27177b82SJon Chesterfield return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) {
130*27177b82SJon Chesterfield return !A.getRValue(*CGF).isScalar();
131*27177b82SJon Chesterfield });
132*27177b82SJon Chesterfield }
133*27177b82SJon Chesterfield
EmitDevicePrintfCallExpr(const CallExpr * E,CodeGenFunction * CGF,llvm::Function * Decl,bool WithSizeArg)134*27177b82SJon Chesterfield RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
135*27177b82SJon Chesterfield llvm::Function *Decl, bool WithSizeArg) {
136*27177b82SJon Chesterfield CodeGenModule &CGM = CGF->CGM;
137*27177b82SJon Chesterfield CGBuilderTy &Builder = CGF->Builder;
138cdda3daaSArpith Chacko Jacob assert(E->getBuiltinCallee() == Builtin::BIprintf);
139cdda3daaSArpith Chacko Jacob assert(E->getNumArgs() >= 1); // printf always has at least one arg.
140cdda3daaSArpith Chacko Jacob
141*27177b82SJon Chesterfield // Uses the same format as nvptx for the argument packing, but also passes
142*27177b82SJon Chesterfield // an i32 for the total size of the passed pointer
143cdda3daaSArpith Chacko Jacob CallArgList Args;
144*27177b82SJon Chesterfield CGF->EmitCallArgs(Args,
145cdda3daaSArpith Chacko Jacob E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
146cdda3daaSArpith Chacko Jacob E->arguments(), E->getDirectCallee(),
147cdda3daaSArpith Chacko Jacob /* ParamsToSkip = */ 0);
148cdda3daaSArpith Chacko Jacob
149cdda3daaSArpith Chacko Jacob // We don't know how to emit non-scalar varargs.
150*27177b82SJon Chesterfield if (containsNonScalarVarargs(CGF, Args)) {
151cdda3daaSArpith Chacko Jacob CGM.ErrorUnsupported(E, "non-scalar arg to printf");
152*27177b82SJon Chesterfield return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0));
153cdda3daaSArpith Chacko Jacob }
154cdda3daaSArpith Chacko Jacob
155*27177b82SJon Chesterfield auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args);
156*27177b82SJon Chesterfield llvm::Value *BufferPtr = r.first;
157cdda3daaSArpith Chacko Jacob
158*27177b82SJon Chesterfield llvm::SmallVector<llvm::Value *, 3> Vec = {
159*27177b82SJon Chesterfield Args[0].getRValue(*CGF).getScalarVal(), BufferPtr};
160*27177b82SJon Chesterfield if (WithSizeArg) {
161*27177b82SJon Chesterfield // Passing > 32bit of data as a local alloca doesn't work for nvptx or
162*27177b82SJon Chesterfield // amdgpu
163*27177b82SJon Chesterfield llvm::Constant *Size =
164*27177b82SJon Chesterfield llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()),
165*27177b82SJon Chesterfield static_cast<uint32_t>(r.second.getFixedSize()));
166*27177b82SJon Chesterfield
167*27177b82SJon Chesterfield Vec.push_back(Size);
168*27177b82SJon Chesterfield }
169*27177b82SJon Chesterfield return RValue::get(Builder.CreateCall(Decl, Vec));
170*27177b82SJon Chesterfield }
171*27177b82SJon Chesterfield } // namespace
172*27177b82SJon Chesterfield
EmitNVPTXDevicePrintfCallExpr(const CallExpr * E)173*27177b82SJon Chesterfield RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
174*27177b82SJon Chesterfield assert(getTarget().getTriple().isNVPTX());
175*27177b82SJon Chesterfield return EmitDevicePrintfCallExpr(
176*27177b82SJon Chesterfield E, this, GetVprintfDeclaration(CGM.getModule()), false);
177cdda3daaSArpith Chacko Jacob }
178ed181efaSSameer Sahasrabuddhe
EmitAMDGPUDevicePrintfCallExpr(const CallExpr * E)179*27177b82SJon Chesterfield RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
180ed181efaSSameer Sahasrabuddhe assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
181ed181efaSSameer Sahasrabuddhe assert(E->getBuiltinCallee() == Builtin::BIprintf ||
182ed181efaSSameer Sahasrabuddhe E->getBuiltinCallee() == Builtin::BI__builtin_printf);
183ed181efaSSameer Sahasrabuddhe assert(E->getNumArgs() >= 1); // printf always has at least one arg.
184ed181efaSSameer Sahasrabuddhe
185ed181efaSSameer Sahasrabuddhe CallArgList CallArgs;
186ed181efaSSameer Sahasrabuddhe EmitCallArgs(CallArgs,
187ed181efaSSameer Sahasrabuddhe E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
188ed181efaSSameer Sahasrabuddhe E->arguments(), E->getDirectCallee(),
189ed181efaSSameer Sahasrabuddhe /* ParamsToSkip = */ 0);
190ed181efaSSameer Sahasrabuddhe
191ed181efaSSameer Sahasrabuddhe SmallVector<llvm::Value *, 8> Args;
192ed181efaSSameer Sahasrabuddhe for (auto A : CallArgs) {
193ed181efaSSameer Sahasrabuddhe // We don't know how to emit non-scalar varargs.
194ed181efaSSameer Sahasrabuddhe if (!A.getRValue(*this).isScalar()) {
195ed181efaSSameer Sahasrabuddhe CGM.ErrorUnsupported(E, "non-scalar arg to printf");
196ed181efaSSameer Sahasrabuddhe return RValue::get(llvm::ConstantInt::get(IntTy, -1));
197ed181efaSSameer Sahasrabuddhe }
198ed181efaSSameer Sahasrabuddhe
199ed181efaSSameer Sahasrabuddhe llvm::Value *Arg = A.getRValue(*this).getScalarVal();
200ed181efaSSameer Sahasrabuddhe Args.push_back(Arg);
201ed181efaSSameer Sahasrabuddhe }
202ed181efaSSameer Sahasrabuddhe
203ed181efaSSameer Sahasrabuddhe llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
204ed181efaSSameer Sahasrabuddhe IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
205ed181efaSSameer Sahasrabuddhe auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);
206ed181efaSSameer Sahasrabuddhe Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
207ed181efaSSameer Sahasrabuddhe return RValue::get(Printf);
208ed181efaSSameer Sahasrabuddhe }
209*27177b82SJon Chesterfield
EmitOpenMPDevicePrintfCallExpr(const CallExpr * E)210*27177b82SJon Chesterfield RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
211*27177b82SJon Chesterfield assert(getTarget().getTriple().isNVPTX() ||
212*27177b82SJon Chesterfield getTarget().getTriple().isAMDGCN());
213*27177b82SJon Chesterfield return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
214*27177b82SJon Chesterfield true);
215*27177b82SJon Chesterfield }
216