1 //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Utility function to lower a printf call into a series of device 10 // library calls on the AMDGPU target. 11 // 12 // WARNING: This file knows about certain library functions. It recognizes them 13 // by name, and hardwires knowledge of their semantics. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" 18 #include "llvm/ADT/SparseBitVector.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 21 using namespace llvm; 22 23 #define DEBUG_TYPE "amdgpu-emit-printf" 24 25 static bool isCString(const Value *Arg) { 26 auto Ty = Arg->getType(); 27 auto PtrTy = dyn_cast<PointerType>(Ty); 28 if (!PtrTy) 29 return false; 30 31 auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType()); 32 if (!IntTy) 33 return false; 34 35 return IntTy->getBitWidth() == 8; 36 } 37 38 static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { 39 auto Int64Ty = Builder.getInt64Ty(); 40 auto Ty = Arg->getType(); 41 42 if (auto IntTy = dyn_cast<IntegerType>(Ty)) { 43 switch (IntTy->getBitWidth()) { 44 case 32: 45 return Builder.CreateZExt(Arg, Int64Ty); 46 case 64: 47 return Arg; 48 } 49 } 50 51 if (Ty->getTypeID() == Type::DoubleTyID) { 52 return Builder.CreateBitCast(Arg, Int64Ty); 53 } 54 55 if (isa<PointerType>(Ty)) { 56 return Builder.CreatePtrToInt(Arg, Int64Ty); 57 } 58 59 llvm_unreachable("unexpected type"); 60 } 61 62 static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) { 63 auto Int64Ty = Builder.getInt64Ty(); 64 auto M = Builder.GetInsertBlock()->getModule(); 65 auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty); 66 M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1); 67 return Builder.CreateCall(Fn, Version); 68 } 69 70 static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs, 71 Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3, 72 Value *Arg4, Value *Arg5, Value *Arg6, 73 bool IsLast) { 74 auto Int64Ty = Builder.getInt64Ty(); 75 auto Int32Ty = Builder.getInt32Ty(); 76 auto M = Builder.GetInsertBlock()->getModule(); 77 auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty, 78 Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty, 79 Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty); 80 auto IsLastValue = Builder.getInt32(IsLast); 81 auto NumArgsValue = Builder.getInt32(NumArgs); 82 return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3, 83 Arg4, Arg5, Arg6, IsLastValue}); 84 } 85 86 static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, 87 bool IsLast) { 88 auto Arg0 = fitArgInto64Bits(Builder, Arg); 89 auto Zero = Builder.getInt64(0); 90 return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero, 91 Zero, IsLast); 92 } 93 94 // The device library does not provide strlen, so we build our own loop 95 // here. While we are at it, we also include the terminating null in the length. 96 static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) { 97 auto *Prev = Builder.GetInsertBlock(); 98 Module *M = Prev->getModule(); 99 100 auto CharZero = Builder.getInt8(0); 101 auto One = Builder.getInt64(1); 102 auto Zero = Builder.getInt64(0); 103 auto Int64Ty = Builder.getInt64Ty(); 104 105 // The length is either zero for a null pointer, or the computed value for an 106 // actual string. We need a join block for a phi that represents the final 107 // value. 108 // 109 // Strictly speaking, the zero does not matter since 110 // __ockl_printf_append_string_n ignores the length if the pointer is null. 111 BasicBlock *Join = nullptr; 112 if (Prev->getTerminator()) { 113 Join = Prev->splitBasicBlock(Builder.GetInsertPoint(), 114 "strlen.join"); 115 Prev->getTerminator()->eraseFromParent(); 116 } else { 117 Join = BasicBlock::Create(M->getContext(), "strlen.join", 118 Prev->getParent()); 119 } 120 BasicBlock *While = 121 BasicBlock::Create(M->getContext(), "strlen.while", 122 Prev->getParent(), Join); 123 BasicBlock *WhileDone = BasicBlock::Create( 124 M->getContext(), "strlen.while.done", 125 Prev->getParent(), Join); 126 127 // Emit an early return for when the pointer is null. 128 Builder.SetInsertPoint(Prev); 129 auto CmpNull = 130 Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType())); 131 BranchInst::Create(Join, While, CmpNull, Prev); 132 133 // Entry to the while loop. 134 Builder.SetInsertPoint(While); 135 136 auto PtrPhi = Builder.CreatePHI(Str->getType(), 2); 137 PtrPhi->addIncoming(Str, Prev); 138 auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One); 139 PtrPhi->addIncoming(PtrNext, While); 140 141 // Condition for the while loop. 142 auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi); 143 auto Cmp = Builder.CreateICmpEQ(Data, CharZero); 144 Builder.CreateCondBr(Cmp, WhileDone, While); 145 146 // Add one to the computed length. 147 Builder.SetInsertPoint(WhileDone, WhileDone->begin()); 148 auto Begin = Builder.CreatePtrToInt(Str, Int64Ty); 149 auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty); 150 auto Len = Builder.CreateSub(End, Begin); 151 Len = Builder.CreateAdd(Len, One); 152 153 // Final join. 154 BranchInst::Create(Join, WhileDone); 155 Builder.SetInsertPoint(Join, Join->begin()); 156 auto LenPhi = Builder.CreatePHI(Len->getType(), 2); 157 LenPhi->addIncoming(Len, WhileDone); 158 LenPhi->addIncoming(Zero, Prev); 159 160 return LenPhi; 161 } 162 163 static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, 164 Value *Length, bool isLast) { 165 auto Int64Ty = Builder.getInt64Ty(); 166 auto CharPtrTy = Builder.getInt8PtrTy(); 167 auto Int32Ty = Builder.getInt32Ty(); 168 auto M = Builder.GetInsertBlock()->getModule(); 169 auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty, 170 Int64Ty, CharPtrTy, Int64Ty, Int32Ty); 171 auto IsLastInt32 = Builder.getInt32(isLast); 172 return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32}); 173 } 174 175 static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, 176 bool IsLast) { 177 auto Length = getStrlenWithNull(Builder, Arg); 178 return callAppendStringN(Builder, Desc, Arg, Length, IsLast); 179 } 180 181 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, 182 bool SpecIsCString, bool IsLast) { 183 if (SpecIsCString && isCString(Arg)) { 184 return appendString(Builder, Desc, Arg, IsLast); 185 } 186 // If the format specifies a string but the argument is not, the frontend will 187 // have printed a warning. We just rely on undefined behaviour and send the 188 // argument anyway. 189 return appendArg(Builder, Desc, Arg, IsLast); 190 } 191 192 // Scan the format string to locate all specifiers, and mark the ones that 193 // specify a string, i.e, the "%s" specifier with optional '*' characters. 194 static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) { 195 StringRef Str; 196 if (!getConstantStringInfo(Fmt, Str) || Str.empty()) 197 return; 198 199 static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn"; 200 size_t SpecPos = 0; 201 // Skip the first argument, the format string. 202 unsigned ArgIdx = 1; 203 204 while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) { 205 if (Str[SpecPos + 1] == '%') { 206 SpecPos += 2; 207 continue; 208 } 209 auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos); 210 if (SpecEnd == StringRef::npos) 211 return; 212 auto Spec = Str.slice(SpecPos, SpecEnd + 1); 213 ArgIdx += Spec.count('*'); 214 if (Str[SpecEnd] == 's') { 215 BV.set(ArgIdx); 216 } 217 SpecPos = SpecEnd + 1; 218 ++ArgIdx; 219 } 220 } 221 222 Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, 223 ArrayRef<Value *> Args) { 224 auto NumOps = Args.size(); 225 assert(NumOps >= 1); 226 227 auto Fmt = Args[0]; 228 SparseBitVector<8> SpecIsCString; 229 locateCStrings(SpecIsCString, Fmt); 230 231 auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0)); 232 Desc = appendString(Builder, Desc, Fmt, NumOps == 1); 233 234 // FIXME: This invokes hostcall once for each argument. We can pack up to 235 // seven scalar printf arguments in a single hostcall. See the signature of 236 // callAppendArgs(). 237 for (unsigned int i = 1; i != NumOps; ++i) { 238 bool IsLast = i == NumOps - 1; 239 bool IsCString = SpecIsCString.test(i); 240 Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast); 241 } 242 243 return Builder.CreateTrunc(Desc, Builder.getInt32Ty()); 244 } 245