1cdda3daaSArpith Chacko Jacob //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===//
2cdda3daaSArpith Chacko Jacob //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cdda3daaSArpith Chacko Jacob //
7cdda3daaSArpith Chacko Jacob //===----------------------------------------------------------------------===//
8cdda3daaSArpith Chacko Jacob //
9cdda3daaSArpith Chacko Jacob // Generates code for built-in GPU calls which are not runtime-specific.
10cdda3daaSArpith Chacko Jacob // (Runtime-specific codegen lives in programming model specific files.)
11cdda3daaSArpith Chacko Jacob //
12cdda3daaSArpith Chacko Jacob //===----------------------------------------------------------------------===//
13cdda3daaSArpith Chacko Jacob 
14cdda3daaSArpith Chacko Jacob #include "CodeGenFunction.h"
15cdda3daaSArpith Chacko Jacob #include "clang/Basic/Builtins.h"
16cdda3daaSArpith Chacko Jacob #include "llvm/IR/DataLayout.h"
17cdda3daaSArpith Chacko Jacob #include "llvm/IR/Instruction.h"
18cdda3daaSArpith Chacko Jacob #include "llvm/Support/MathExtras.h"
19ed181efaSSameer Sahasrabuddhe #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
20cdda3daaSArpith Chacko Jacob 
21cdda3daaSArpith Chacko Jacob using namespace clang;
22cdda3daaSArpith Chacko Jacob using namespace CodeGen;
23cdda3daaSArpith Chacko Jacob 
24*27177b82SJon Chesterfield namespace {
GetVprintfDeclaration(llvm::Module & M)25*27177b82SJon Chesterfield llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
26cdda3daaSArpith Chacko Jacob   llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
27cdda3daaSArpith Chacko Jacob                             llvm::Type::getInt8PtrTy(M.getContext())};
28cdda3daaSArpith Chacko Jacob   llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
29cdda3daaSArpith Chacko Jacob       llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
30cdda3daaSArpith Chacko Jacob 
31cdda3daaSArpith Chacko Jacob   if (auto *F = M.getFunction("vprintf")) {
32cdda3daaSArpith Chacko Jacob     // Our CUDA system header declares vprintf with the right signature, so
33cdda3daaSArpith Chacko Jacob     // nobody else should have been able to declare vprintf with a bogus
34cdda3daaSArpith Chacko Jacob     // signature.
35cdda3daaSArpith Chacko Jacob     assert(F->getFunctionType() == VprintfFuncType);
36cdda3daaSArpith Chacko Jacob     return F;
37cdda3daaSArpith Chacko Jacob   }
38cdda3daaSArpith Chacko Jacob 
39cdda3daaSArpith Chacko Jacob   // vprintf doesn't already exist; create a declaration and insert it into the
40cdda3daaSArpith Chacko Jacob   // module.
41cdda3daaSArpith Chacko Jacob   return llvm::Function::Create(
42cdda3daaSArpith Chacko Jacob       VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
43cdda3daaSArpith Chacko Jacob }
44cdda3daaSArpith Chacko Jacob 
GetOpenMPVprintfDeclaration(CodeGenModule & CGM)45*27177b82SJon Chesterfield llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
46*27177b82SJon Chesterfield   const char *Name = "__llvm_omp_vprintf";
47*27177b82SJon Chesterfield   llvm::Module &M = CGM.getModule();
48*27177b82SJon Chesterfield   llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
49*27177b82SJon Chesterfield                             llvm::Type::getInt8PtrTy(M.getContext()),
50*27177b82SJon Chesterfield                             llvm::Type::getInt32Ty(M.getContext())};
51*27177b82SJon Chesterfield   llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
52*27177b82SJon Chesterfield       llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
53*27177b82SJon Chesterfield 
54*27177b82SJon Chesterfield   if (auto *F = M.getFunction(Name)) {
55*27177b82SJon Chesterfield     if (F->getFunctionType() != VprintfFuncType) {
56*27177b82SJon Chesterfield       CGM.Error(SourceLocation(),
57*27177b82SJon Chesterfield                 "Invalid type declaration for __llvm_omp_vprintf");
58*27177b82SJon Chesterfield       return nullptr;
59*27177b82SJon Chesterfield     }
60*27177b82SJon Chesterfield     return F;
61*27177b82SJon Chesterfield   }
62*27177b82SJon Chesterfield 
63*27177b82SJon Chesterfield   return llvm::Function::Create(
64*27177b82SJon Chesterfield       VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
65*27177b82SJon Chesterfield }
66*27177b82SJon Chesterfield 
67cdda3daaSArpith Chacko Jacob // Transforms a call to printf into a call to the NVPTX vprintf syscall (which
68cdda3daaSArpith Chacko Jacob // isn't particularly special; it's invoked just like a regular function).
69cdda3daaSArpith Chacko Jacob // vprintf takes two args: A format string, and a pointer to a buffer containing
70cdda3daaSArpith Chacko Jacob // the varargs.
71cdda3daaSArpith Chacko Jacob //
72cdda3daaSArpith Chacko Jacob // For example, the call
73cdda3daaSArpith Chacko Jacob //
74cdda3daaSArpith Chacko Jacob //   printf("format string", arg1, arg2, arg3);
75cdda3daaSArpith Chacko Jacob //
76cdda3daaSArpith Chacko Jacob // is converted into something resembling
77cdda3daaSArpith Chacko Jacob //
78cdda3daaSArpith Chacko Jacob //   struct Tmp {
79cdda3daaSArpith Chacko Jacob //     Arg1 a1;
80cdda3daaSArpith Chacko Jacob //     Arg2 a2;
81cdda3daaSArpith Chacko Jacob //     Arg3 a3;
82cdda3daaSArpith Chacko Jacob //   };
83cdda3daaSArpith Chacko Jacob //   char* buf = alloca(sizeof(Tmp));
84cdda3daaSArpith Chacko Jacob //   *(Tmp*)buf = {a1, a2, a3};
85cdda3daaSArpith Chacko Jacob //   vprintf("format string", buf);
86cdda3daaSArpith Chacko Jacob //
87cdda3daaSArpith Chacko Jacob // buf is aligned to the max of {alignof(Arg1), ...}.  Furthermore, each of the
88cdda3daaSArpith Chacko Jacob // args is itself aligned to its preferred alignment.
89cdda3daaSArpith Chacko Jacob //
90cdda3daaSArpith Chacko Jacob // Note that by the time this function runs, E's args have already undergone the
91cdda3daaSArpith Chacko Jacob // standard C vararg promotion (short -> int, float -> double, etc.).
922c37ae6dSJon Chesterfield 
93*27177b82SJon Chesterfield std::pair<llvm::Value *, llvm::TypeSize>
packArgsIntoNVPTXFormatBuffer(CodeGenFunction * CGF,const CallArgList & Args)94*27177b82SJon Chesterfield packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
952c37ae6dSJon Chesterfield   const llvm::DataLayout &DL = CGF->CGM.getDataLayout();
962c37ae6dSJon Chesterfield   llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext();
972c37ae6dSJon Chesterfield   CGBuilderTy &Builder = CGF->Builder;
982c37ae6dSJon Chesterfield 
992c37ae6dSJon Chesterfield   // Construct and fill the args buffer that we'll pass to vprintf.
1002c37ae6dSJon Chesterfield   if (Args.size() <= 1) {
101*27177b82SJon Chesterfield     // If there are no args, pass a null pointer and size 0
102*27177b82SJon Chesterfield     llvm::Value * BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
103*27177b82SJon Chesterfield     return {BufferPtr, llvm::TypeSize::Fixed(0)};
1042c37ae6dSJon Chesterfield   } else {
1052c37ae6dSJon Chesterfield     llvm::SmallVector<llvm::Type *, 8> ArgTypes;
1062c37ae6dSJon Chesterfield     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
1072c37ae6dSJon Chesterfield       ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType());
1082c37ae6dSJon Chesterfield 
1092c37ae6dSJon Chesterfield     // Using llvm::StructType is correct only because printf doesn't accept
1102c37ae6dSJon Chesterfield     // aggregates.  If we had to handle aggregates here, we'd have to manually
1112c37ae6dSJon Chesterfield     // compute the offsets within the alloca -- we wouldn't be able to assume
1122c37ae6dSJon Chesterfield     // that the alignment of the llvm type was the same as the alignment of the
1132c37ae6dSJon Chesterfield     // clang type.
1142c37ae6dSJon Chesterfield     llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
1152c37ae6dSJon Chesterfield     llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy);
1162c37ae6dSJon Chesterfield 
1172c37ae6dSJon Chesterfield     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
1182c37ae6dSJon Chesterfield       llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
1192c37ae6dSJon Chesterfield       llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal();
1202c37ae6dSJon Chesterfield       Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
1212c37ae6dSJon Chesterfield     }
122*27177b82SJon Chesterfield     llvm::Value *BufferPtr =
123*27177b82SJon Chesterfield         Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
124*27177b82SJon Chesterfield     return {BufferPtr, DL.getTypeAllocSize(AllocaTy)};
1252c37ae6dSJon Chesterfield   }
1262c37ae6dSJon Chesterfield }
1272c37ae6dSJon Chesterfield 
containsNonScalarVarargs(CodeGenFunction * CGF,CallArgList Args)128*27177b82SJon Chesterfield bool containsNonScalarVarargs(CodeGenFunction *CGF, CallArgList Args) {
129*27177b82SJon Chesterfield   return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) {
130*27177b82SJon Chesterfield     return !A.getRValue(*CGF).isScalar();
131*27177b82SJon Chesterfield   });
132*27177b82SJon Chesterfield }
133*27177b82SJon Chesterfield 
EmitDevicePrintfCallExpr(const CallExpr * E,CodeGenFunction * CGF,llvm::Function * Decl,bool WithSizeArg)134*27177b82SJon Chesterfield RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
135*27177b82SJon Chesterfield                                 llvm::Function *Decl, bool WithSizeArg) {
136*27177b82SJon Chesterfield   CodeGenModule &CGM = CGF->CGM;
137*27177b82SJon Chesterfield   CGBuilderTy &Builder = CGF->Builder;
138cdda3daaSArpith Chacko Jacob   assert(E->getBuiltinCallee() == Builtin::BIprintf);
139cdda3daaSArpith Chacko Jacob   assert(E->getNumArgs() >= 1); // printf always has at least one arg.
140cdda3daaSArpith Chacko Jacob 
141*27177b82SJon Chesterfield   // Uses the same format as nvptx for the argument packing, but also passes
142*27177b82SJon Chesterfield   // an i32 for the total size of the passed pointer
143cdda3daaSArpith Chacko Jacob   CallArgList Args;
144*27177b82SJon Chesterfield   CGF->EmitCallArgs(Args,
145cdda3daaSArpith Chacko Jacob                     E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
146cdda3daaSArpith Chacko Jacob                     E->arguments(), E->getDirectCallee(),
147cdda3daaSArpith Chacko Jacob                     /* ParamsToSkip = */ 0);
148cdda3daaSArpith Chacko Jacob 
149cdda3daaSArpith Chacko Jacob   // We don't know how to emit non-scalar varargs.
150*27177b82SJon Chesterfield   if (containsNonScalarVarargs(CGF, Args)) {
151cdda3daaSArpith Chacko Jacob     CGM.ErrorUnsupported(E, "non-scalar arg to printf");
152*27177b82SJon Chesterfield     return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0));
153cdda3daaSArpith Chacko Jacob   }
154cdda3daaSArpith Chacko Jacob 
155*27177b82SJon Chesterfield   auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args);
156*27177b82SJon Chesterfield   llvm::Value *BufferPtr = r.first;
157cdda3daaSArpith Chacko Jacob 
158*27177b82SJon Chesterfield   llvm::SmallVector<llvm::Value *, 3> Vec = {
159*27177b82SJon Chesterfield       Args[0].getRValue(*CGF).getScalarVal(), BufferPtr};
160*27177b82SJon Chesterfield   if (WithSizeArg) {
161*27177b82SJon Chesterfield     // Passing > 32bit of data as a local alloca doesn't work for nvptx or
162*27177b82SJon Chesterfield     // amdgpu
163*27177b82SJon Chesterfield     llvm::Constant *Size =
164*27177b82SJon Chesterfield         llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()),
165*27177b82SJon Chesterfield                                static_cast<uint32_t>(r.second.getFixedSize()));
166*27177b82SJon Chesterfield 
167*27177b82SJon Chesterfield     Vec.push_back(Size);
168*27177b82SJon Chesterfield   }
169*27177b82SJon Chesterfield   return RValue::get(Builder.CreateCall(Decl, Vec));
170*27177b82SJon Chesterfield }
171*27177b82SJon Chesterfield } // namespace
172*27177b82SJon Chesterfield 
EmitNVPTXDevicePrintfCallExpr(const CallExpr * E)173*27177b82SJon Chesterfield RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
174*27177b82SJon Chesterfield   assert(getTarget().getTriple().isNVPTX());
175*27177b82SJon Chesterfield   return EmitDevicePrintfCallExpr(
176*27177b82SJon Chesterfield       E, this, GetVprintfDeclaration(CGM.getModule()), false);
177cdda3daaSArpith Chacko Jacob }
178ed181efaSSameer Sahasrabuddhe 
EmitAMDGPUDevicePrintfCallExpr(const CallExpr * E)179*27177b82SJon Chesterfield RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
180ed181efaSSameer Sahasrabuddhe   assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
181ed181efaSSameer Sahasrabuddhe   assert(E->getBuiltinCallee() == Builtin::BIprintf ||
182ed181efaSSameer Sahasrabuddhe          E->getBuiltinCallee() == Builtin::BI__builtin_printf);
183ed181efaSSameer Sahasrabuddhe   assert(E->getNumArgs() >= 1); // printf always has at least one arg.
184ed181efaSSameer Sahasrabuddhe 
185ed181efaSSameer Sahasrabuddhe   CallArgList CallArgs;
186ed181efaSSameer Sahasrabuddhe   EmitCallArgs(CallArgs,
187ed181efaSSameer Sahasrabuddhe                E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
188ed181efaSSameer Sahasrabuddhe                E->arguments(), E->getDirectCallee(),
189ed181efaSSameer Sahasrabuddhe                /* ParamsToSkip = */ 0);
190ed181efaSSameer Sahasrabuddhe 
191ed181efaSSameer Sahasrabuddhe   SmallVector<llvm::Value *, 8> Args;
192ed181efaSSameer Sahasrabuddhe   for (auto A : CallArgs) {
193ed181efaSSameer Sahasrabuddhe     // We don't know how to emit non-scalar varargs.
194ed181efaSSameer Sahasrabuddhe     if (!A.getRValue(*this).isScalar()) {
195ed181efaSSameer Sahasrabuddhe       CGM.ErrorUnsupported(E, "non-scalar arg to printf");
196ed181efaSSameer Sahasrabuddhe       return RValue::get(llvm::ConstantInt::get(IntTy, -1));
197ed181efaSSameer Sahasrabuddhe     }
198ed181efaSSameer Sahasrabuddhe 
199ed181efaSSameer Sahasrabuddhe     llvm::Value *Arg = A.getRValue(*this).getScalarVal();
200ed181efaSSameer Sahasrabuddhe     Args.push_back(Arg);
201ed181efaSSameer Sahasrabuddhe   }
202ed181efaSSameer Sahasrabuddhe 
203ed181efaSSameer Sahasrabuddhe   llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
204ed181efaSSameer Sahasrabuddhe   IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
205ed181efaSSameer Sahasrabuddhe   auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);
206ed181efaSSameer Sahasrabuddhe   Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
207ed181efaSSameer Sahasrabuddhe   return RValue::get(Printf);
208ed181efaSSameer Sahasrabuddhe }
209*27177b82SJon Chesterfield 
EmitOpenMPDevicePrintfCallExpr(const CallExpr * E)210*27177b82SJon Chesterfield RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
211*27177b82SJon Chesterfield   assert(getTarget().getTriple().isNVPTX() ||
212*27177b82SJon Chesterfield          getTarget().getTriple().isAMDGCN());
213*27177b82SJon Chesterfield   return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
214*27177b82SJon Chesterfield                                   true);
215*27177b82SJon Chesterfield }
216