1 //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Utility function to lower a printf call into a series of device
10 // library calls on the AMDGPU target.
11 //
12 // WARNING: This file knows about certain library functions. It recognizes them
13 // by name, and hardwires knowledge of their semantics.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
18 #include "llvm/ADT/SparseBitVector.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "amdgpu-emit-printf"
24 
25 static bool isCString(const Value *Arg) {
26   auto Ty = Arg->getType();
27   auto PtrTy = dyn_cast<PointerType>(Ty);
28   if (!PtrTy)
29     return false;
30 
31   auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType());
32   if (!IntTy)
33     return false;
34 
35   return IntTy->getBitWidth() == 8;
36 }
37 
38 static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
39   auto Int64Ty = Builder.getInt64Ty();
40   auto Ty = Arg->getType();
41 
42   if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
43     switch (IntTy->getBitWidth()) {
44     case 32:
45       return Builder.CreateZExt(Arg, Int64Ty);
46     case 64:
47       return Arg;
48     }
49   }
50 
51   if (Ty->getTypeID() == Type::DoubleTyID) {
52     return Builder.CreateBitCast(Arg, Int64Ty);
53   }
54 
55   if (isa<PointerType>(Ty)) {
56     return Builder.CreatePtrToInt(Arg, Int64Ty);
57   }
58 
59   llvm_unreachable("unexpected type");
60 }
61 
62 static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
63   auto Int64Ty = Builder.getInt64Ty();
64   auto M = Builder.GetInsertBlock()->getModule();
65   auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
66   M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
67   return Builder.CreateCall(Fn, Version);
68 }
69 
70 static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
71                              Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
72                              Value *Arg4, Value *Arg5, Value *Arg6,
73                              bool IsLast) {
74   auto Int64Ty = Builder.getInt64Ty();
75   auto Int32Ty = Builder.getInt32Ty();
76   auto M = Builder.GetInsertBlock()->getModule();
77   auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
78                                    Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
79                                    Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
80   auto IsLastValue = Builder.getInt32(IsLast);
81   auto NumArgsValue = Builder.getInt32(NumArgs);
82   return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
83                                  Arg4, Arg5, Arg6, IsLastValue});
84 }
85 
86 static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
87                         bool IsLast) {
88   auto Arg0 = fitArgInto64Bits(Builder, Arg);
89   auto Zero = Builder.getInt64(0);
90   return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
91                         Zero, IsLast);
92 }
93 
94 // The device library does not provide strlen, so we build our own loop
95 // here. While we are at it, we also include the terminating null in the length.
96 static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
97   auto *Prev = Builder.GetInsertBlock();
98   Module *M = Prev->getModule();
99 
100   auto CharZero = Builder.getInt8(0);
101   auto One = Builder.getInt64(1);
102   auto Zero = Builder.getInt64(0);
103   auto Int64Ty = Builder.getInt64Ty();
104 
105   // The length is either zero for a null pointer, or the computed value for an
106   // actual string. We need a join block for a phi that represents the final
107   // value.
108   //
109   //  Strictly speaking, the zero does not matter since
110   // __ockl_printf_append_string_n ignores the length if the pointer is null.
111   BasicBlock *Join = nullptr;
112   if (Prev->getTerminator()) {
113     Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
114                                  "strlen.join");
115     Prev->getTerminator()->eraseFromParent();
116   } else {
117     Join = BasicBlock::Create(M->getContext(), "strlen.join",
118                               Prev->getParent());
119   }
120   BasicBlock *While =
121       BasicBlock::Create(M->getContext(), "strlen.while",
122                          Prev->getParent(), Join);
123   BasicBlock *WhileDone = BasicBlock::Create(
124       M->getContext(), "strlen.while.done",
125       Prev->getParent(), Join);
126 
127   // Emit an early return for when the pointer is null.
128   Builder.SetInsertPoint(Prev);
129   auto CmpNull =
130       Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
131   BranchInst::Create(Join, While, CmpNull, Prev);
132 
133   // Entry to the while loop.
134   Builder.SetInsertPoint(While);
135 
136   auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
137   PtrPhi->addIncoming(Str, Prev);
138   auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
139   PtrPhi->addIncoming(PtrNext, While);
140 
141   // Condition for the while loop.
142   auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
143   auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
144   Builder.CreateCondBr(Cmp, WhileDone, While);
145 
146   // Add one to the computed length.
147   Builder.SetInsertPoint(WhileDone, WhileDone->begin());
148   auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
149   auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
150   auto Len = Builder.CreateSub(End, Begin);
151   Len = Builder.CreateAdd(Len, One);
152 
153   // Final join.
154   BranchInst::Create(Join, WhileDone);
155   Builder.SetInsertPoint(Join, Join->begin());
156   auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
157   LenPhi->addIncoming(Len, WhileDone);
158   LenPhi->addIncoming(Zero, Prev);
159 
160   return LenPhi;
161 }
162 
163 static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
164                                 Value *Length, bool isLast) {
165   auto Int64Ty = Builder.getInt64Ty();
166   auto CharPtrTy = Builder.getInt8PtrTy();
167   auto Int32Ty = Builder.getInt32Ty();
168   auto M = Builder.GetInsertBlock()->getModule();
169   auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
170                                    Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
171   auto IsLastInt32 = Builder.getInt32(isLast);
172   return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
173 }
174 
175 static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
176                            bool IsLast) {
177   auto Length = getStrlenWithNull(Builder, Arg);
178   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
179 }
180 
181 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
182                          bool SpecIsCString, bool IsLast) {
183   if (SpecIsCString && isCString(Arg)) {
184     return appendString(Builder, Desc, Arg, IsLast);
185   }
186   // If the format specifies a string but the argument is not, the frontend will
187   // have printed a warning. We just rely on undefined behaviour and send the
188   // argument anyway.
189   return appendArg(Builder, Desc, Arg, IsLast);
190 }
191 
192 // Scan the format string to locate all specifiers, and mark the ones that
193 // specify a string, i.e, the "%s" specifier with optional '*' characters.
194 static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
195   StringRef Str;
196   if (!getConstantStringInfo(Fmt, Str) || Str.empty())
197     return;
198 
199   static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
200   size_t SpecPos = 0;
201   // Skip the first argument, the format string.
202   unsigned ArgIdx = 1;
203 
204   while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
205     if (Str[SpecPos + 1] == '%') {
206       SpecPos += 2;
207       continue;
208     }
209     auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
210     if (SpecEnd == StringRef::npos)
211       return;
212     auto Spec = Str.slice(SpecPos, SpecEnd + 1);
213     ArgIdx += Spec.count('*');
214     if (Str[SpecEnd] == 's') {
215       BV.set(ArgIdx);
216     }
217     SpecPos = SpecEnd + 1;
218     ++ArgIdx;
219   }
220 }
221 
222 Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
223                                   ArrayRef<Value *> Args) {
224   auto NumOps = Args.size();
225   assert(NumOps >= 1);
226 
227   auto Fmt = Args[0];
228   SparseBitVector<8> SpecIsCString;
229   locateCStrings(SpecIsCString, Fmt);
230 
231   auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
232   Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
233 
234   // FIXME: This invokes hostcall once for each argument. We can pack up to
235   // seven scalar printf arguments in a single hostcall. See the signature of
236   // callAppendArgs().
237   for (unsigned int i = 1; i != NumOps; ++i) {
238     bool IsLast = i == NumOps - 1;
239     bool IsCString = SpecIsCString.test(i);
240     Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
241   }
242 
243   return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
244 }
245