1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // \file 9 // 10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer 11 // later by the runtime. 12 // 13 // This pass traverses the functions in the module and converts 14 // each call to printf to a sequence of operations that 15 // store the following into the printf buffer: 16 // - format string (passed as a module's metadata unique ID) 17 // - bitwise copies of printf arguments 18 // The backend passes will need to store metadata in the kernel 19 //===----------------------------------------------------------------------===// 20 21 #include "AMDGPU.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/ADT/Triple.h" 25 #include "llvm/Analysis/InstructionSimplify.h" 26 #include "llvm/Analysis/TargetLibraryInfo.h" 27 #include "llvm/CodeGen/Passes.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/GlobalVariable.h" 32 #include "llvm/IR/IRBuilder.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/Module.h" 35 #include "llvm/IR/PassManager.h" 36 #include "llvm/IR/Type.h" 37 #include "llvm/InitializePasses.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 43 using namespace llvm; 44 45 #define DEBUG_TYPE "printfToRuntime" 46 #define DWORD_ALIGN 4 47 48 namespace { 49 class AMDGPUPrintfRuntimeBinding final : public ModulePass { 50 51 public: 52 static char ID; 53 54 explicit AMDGPUPrintfRuntimeBinding(); 55 56 private: 57 bool runOnModule(Module &M) override; 58 59 void getAnalysisUsage(AnalysisUsage &AU) const override { 60 AU.addRequired<TargetLibraryInfoWrapperPass>(); 61 AU.addRequired<DominatorTreeWrapperPass>(); 62 } 63 }; 64 65 class AMDGPUPrintfRuntimeBindingImpl { 66 public: 67 AMDGPUPrintfRuntimeBindingImpl( 68 function_ref<const DominatorTree &(Function &)> GetDT, 69 function_ref<const TargetLibraryInfo &(Function &)> GetTLI) 70 : GetDT(GetDT), GetTLI(GetTLI) {} 71 bool run(Module &M); 72 73 private: 74 void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers, 75 StringRef fmt, size_t num_ops) const; 76 77 bool shouldPrintAsStr(char Specifier, Type *OpType) const; 78 bool lowerPrintfForGpu(Module &M); 79 80 Value *simplify(Instruction *I, const TargetLibraryInfo *TLI, 81 const DominatorTree *DT) { 82 return SimplifyInstruction(I, {*TD, TLI, DT}); 83 } 84 85 const DataLayout *TD; 86 function_ref<const DominatorTree &(Function &)> GetDT; 87 function_ref<const TargetLibraryInfo &(Function &)> GetTLI; 88 SmallVector<CallInst *, 32> Printfs; 89 }; 90 } // namespace 91 92 char AMDGPUPrintfRuntimeBinding::ID = 0; 93 94 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, 95 "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", 96 false, false) 97 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 98 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 99 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", 100 "AMDGPU Printf lowering", false, false) 101 102 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; 103 104 namespace llvm { 105 ModulePass *createAMDGPUPrintfRuntimeBinding() { 106 return new AMDGPUPrintfRuntimeBinding(); 107 } 108 } // namespace llvm 109 110 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) { 111 initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); 112 } 113 114 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers( 115 SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt, 116 size_t NumOps) const { 117 // not all format characters are collected. 118 // At this time the format characters of interest 119 // are %p and %s, which use to know if we 120 // are either storing a literal string or a 121 // pointer to the printf buffer. 122 static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; 123 size_t CurFmtSpecifierIdx = 0; 124 size_t PrevFmtSpecifierIdx = 0; 125 126 while ((CurFmtSpecifierIdx = Fmt.find_first_of( 127 ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { 128 bool ArgDump = false; 129 StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, 130 CurFmtSpecifierIdx - PrevFmtSpecifierIdx); 131 size_t pTag = CurFmt.find_last_of("%"); 132 if (pTag != StringRef::npos) { 133 ArgDump = true; 134 while (pTag && CurFmt[--pTag] == '%') { 135 ArgDump = !ArgDump; 136 } 137 } 138 139 if (ArgDump) 140 OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); 141 142 PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; 143 } 144 } 145 146 bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier, 147 Type *OpType) const { 148 if (Specifier != 's') 149 return false; 150 const PointerType *PT = dyn_cast<PointerType>(OpType); 151 if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) 152 return false; 153 Type *ElemType = PT->getContainedType(0); 154 if (ElemType->getTypeID() != Type::IntegerTyID) 155 return false; 156 IntegerType *ElemIType = cast<IntegerType>(ElemType); 157 return ElemIType->getBitWidth() == 8; 158 } 159 160 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) { 161 LLVMContext &Ctx = M.getContext(); 162 IRBuilder<> Builder(Ctx); 163 Type *I32Ty = Type::getInt32Ty(Ctx); 164 unsigned UniqID = 0; 165 // NB: This is important for this string size to be divizable by 4 166 const char NonLiteralStr[4] = "???"; 167 168 for (auto CI : Printfs) { 169 unsigned NumOps = CI->getNumArgOperands(); 170 171 SmallString<16> OpConvSpecifiers; 172 Value *Op = CI->getArgOperand(0); 173 174 if (auto LI = dyn_cast<LoadInst>(Op)) { 175 Op = LI->getPointerOperand(); 176 for (auto Use : Op->users()) { 177 if (auto SI = dyn_cast<StoreInst>(Use)) { 178 Op = SI->getValueOperand(); 179 break; 180 } 181 } 182 } 183 184 if (auto I = dyn_cast<Instruction>(Op)) { 185 Value *Op_simplified = 186 simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction())); 187 if (Op_simplified) 188 Op = Op_simplified; 189 } 190 191 ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op); 192 193 if (ConstExpr) { 194 GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 195 196 StringRef Str("unknown"); 197 if (GVar && GVar->hasInitializer()) { 198 auto Init = GVar->getInitializer(); 199 if (auto CA = dyn_cast<ConstantDataArray>(Init)) { 200 if (CA->isString()) 201 Str = CA->getAsCString(); 202 } else if (isa<ConstantAggregateZero>(Init)) { 203 Str = ""; 204 } 205 // 206 // we need this call to ascertain 207 // that we are printing a string 208 // or a pointer. It takes out the 209 // specifiers and fills up the first 210 // arg 211 getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); 212 } 213 // Add metadata for the string 214 std::string AStreamHolder; 215 raw_string_ostream Sizes(AStreamHolder); 216 int Sum = DWORD_ALIGN; 217 Sizes << CI->getNumArgOperands() - 1; 218 Sizes << ':'; 219 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && 220 ArgCount <= OpConvSpecifiers.size(); 221 ArgCount++) { 222 Value *Arg = CI->getArgOperand(ArgCount); 223 Type *ArgType = Arg->getType(); 224 unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); 225 ArgSize = ArgSize / 8; 226 // 227 // ArgSize by design should be a multiple of DWORD_ALIGN, 228 // expand the arguments that do not follow this rule. 229 // 230 if (ArgSize % DWORD_ALIGN != 0) { 231 llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); 232 auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType); 233 int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; 234 if (LLVMVecType && NumElem > 1) 235 ResType = llvm::FixedVectorType::get(ResType, NumElem); 236 Builder.SetInsertPoint(CI); 237 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 238 if (OpConvSpecifiers[ArgCount - 1] == 'x' || 239 OpConvSpecifiers[ArgCount - 1] == 'X' || 240 OpConvSpecifiers[ArgCount - 1] == 'u' || 241 OpConvSpecifiers[ArgCount - 1] == 'o') 242 Arg = Builder.CreateZExt(Arg, ResType); 243 else 244 Arg = Builder.CreateSExt(Arg, ResType); 245 ArgType = Arg->getType(); 246 ArgSize = TD->getTypeAllocSizeInBits(ArgType); 247 ArgSize = ArgSize / 8; 248 CI->setOperand(ArgCount, Arg); 249 } 250 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 251 ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg); 252 if (FpCons) 253 ArgSize = 4; 254 else { 255 FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); 256 if (FpExt && FpExt->getType()->isDoubleTy() && 257 FpExt->getOperand(0)->getType()->isFloatTy()) 258 ArgSize = 4; 259 } 260 } 261 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { 262 if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { 263 GlobalVariable *GV = 264 dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 265 if (GV && GV->hasInitializer()) { 266 Constant *Init = GV->getInitializer(); 267 ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); 268 if (Init->isZeroValue() || CA->isString()) { 269 size_t SizeStr = Init->isZeroValue() 270 ? 1 271 : (strlen(CA->getAsCString().data()) + 1); 272 size_t Rem = SizeStr % DWORD_ALIGN; 273 size_t NSizeStr = 0; 274 LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr 275 << '\n'); 276 if (Rem) { 277 NSizeStr = SizeStr + (DWORD_ALIGN - Rem); 278 } else { 279 NSizeStr = SizeStr; 280 } 281 ArgSize = NSizeStr; 282 } 283 } else { 284 ArgSize = sizeof(NonLiteralStr); 285 } 286 } else { 287 ArgSize = sizeof(NonLiteralStr); 288 } 289 } 290 LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize 291 << " for type: " << *ArgType << '\n'); 292 Sizes << ArgSize << ':'; 293 Sum += ArgSize; 294 } 295 LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() 296 << '\n'); 297 for (size_t I = 0; I < Str.size(); ++I) { 298 // Rest of the C escape sequences (e.g. \') are handled correctly 299 // by the MDParser 300 switch (Str[I]) { 301 case '\a': 302 Sizes << "\\a"; 303 break; 304 case '\b': 305 Sizes << "\\b"; 306 break; 307 case '\f': 308 Sizes << "\\f"; 309 break; 310 case '\n': 311 Sizes << "\\n"; 312 break; 313 case '\r': 314 Sizes << "\\r"; 315 break; 316 case '\v': 317 Sizes << "\\v"; 318 break; 319 case ':': 320 // ':' cannot be scanned by Flex, as it is defined as a delimiter 321 // Replace it with it's octal representation \72 322 Sizes << "\\72"; 323 break; 324 default: 325 Sizes << Str[I]; 326 break; 327 } 328 } 329 330 // Insert the printf_alloc call 331 Builder.SetInsertPoint(CI); 332 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 333 334 AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, 335 Attribute::NoUnwind); 336 337 Type *SizetTy = Type::getInt32Ty(Ctx); 338 339 Type *Tys_alloc[1] = {SizetTy}; 340 Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); 341 FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); 342 FunctionCallee PrintfAllocFn = 343 M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); 344 345 LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); 346 std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); 347 MDString *fmtStrArray = MDString::get(Ctx, fmtstr); 348 349 // Instead of creating global variables, the 350 // printf format strings are extracted 351 // and passed as metadata. This avoids 352 // polluting llvm's symbol tables in this module. 353 // Metadata is going to be extracted 354 // by the backend passes and inserted 355 // into the OpenCL binary as appropriate. 356 StringRef amd("llvm.printf.fmts"); 357 NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); 358 MDNode *myMD = MDNode::get(Ctx, fmtStrArray); 359 metaD->addOperand(myMD); 360 Value *sumC = ConstantInt::get(SizetTy, Sum, false); 361 SmallVector<Value *, 1> alloc_args; 362 alloc_args.push_back(sumC); 363 CallInst *pcall = 364 CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); 365 366 // 367 // Insert code to split basicblock with a 368 // piece of hammock code. 369 // basicblock splits after buffer overflow check 370 // 371 ConstantPointerNull *zeroIntPtr = 372 ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); 373 ICmpInst *cmp = 374 dyn_cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); 375 if (!CI->use_empty()) { 376 Value *result = 377 Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); 378 CI->replaceAllUsesWith(result); 379 } 380 SplitBlock(CI->getParent(), cmp); 381 Instruction *Brnch = 382 SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); 383 384 Builder.SetInsertPoint(Brnch); 385 386 // store unique printf id in the buffer 387 // 388 SmallVector<Value *, 1> ZeroIdxList; 389 ConstantInt *zeroInt = 390 ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); 391 ZeroIdxList.push_back(zeroInt); 392 393 GetElementPtrInst *BufferIdx = GetElementPtrInst::Create( 394 nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch); 395 396 Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); 397 Value *id_gep_cast = 398 new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); 399 400 new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch); 401 402 SmallVector<Value *, 2> FourthIdxList; 403 ConstantInt *fourInt = 404 ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); 405 406 FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id 407 // the following GEP is the buffer pointer 408 BufferIdx = GetElementPtrInst::Create(nullptr, pcall, FourthIdxList, 409 "PrintBuffGep", Brnch); 410 411 Type *Int32Ty = Type::getInt32Ty(Ctx); 412 Type *Int64Ty = Type::getInt64Ty(Ctx); 413 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && 414 ArgCount <= OpConvSpecifiers.size(); 415 ArgCount++) { 416 Value *Arg = CI->getArgOperand(ArgCount); 417 Type *ArgType = Arg->getType(); 418 SmallVector<Value *, 32> WhatToStore; 419 if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) { 420 Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; 421 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 422 if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) { 423 APFloat Val(FpCons->getValueAPF()); 424 bool Lost = false; 425 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 426 &Lost); 427 Arg = ConstantFP::get(Ctx, Val); 428 IType = Int32Ty; 429 } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) { 430 if (FpExt->getType()->isDoubleTy() && 431 FpExt->getOperand(0)->getType()->isFloatTy()) { 432 Arg = FpExt->getOperand(0); 433 IType = Int32Ty; 434 } 435 } 436 } 437 Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); 438 WhatToStore.push_back(Arg); 439 } else if (ArgType->getTypeID() == Type::PointerTyID) { 440 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { 441 const char *S = NonLiteralStr; 442 if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { 443 auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 444 if (GV && GV->hasInitializer()) { 445 Constant *Init = GV->getInitializer(); 446 ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); 447 if (Init->isZeroValue() || CA->isString()) { 448 S = Init->isZeroValue() ? "" : CA->getAsCString().data(); 449 } 450 } 451 } 452 size_t SizeStr = strlen(S) + 1; 453 size_t Rem = SizeStr % DWORD_ALIGN; 454 size_t NSizeStr = 0; 455 if (Rem) { 456 NSizeStr = SizeStr + (DWORD_ALIGN - Rem); 457 } else { 458 NSizeStr = SizeStr; 459 } 460 if (S[0]) { 461 char *MyNewStr = new char[NSizeStr](); 462 strcpy(MyNewStr, S); 463 int NumInts = NSizeStr / 4; 464 int CharC = 0; 465 while (NumInts) { 466 int ANum = *(int *)(MyNewStr + CharC); 467 CharC += 4; 468 NumInts--; 469 Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); 470 WhatToStore.push_back(ANumV); 471 } 472 delete[] MyNewStr; 473 } else { 474 // Empty string, give a hint to RT it is no NULL 475 Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); 476 WhatToStore.push_back(ANumV); 477 } 478 } else { 479 uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); 480 assert((Size == 32 || Size == 64) && "unsupported size"); 481 Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; 482 Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); 483 WhatToStore.push_back(Arg); 484 } 485 } else if (isa<FixedVectorType>(ArgType)) { 486 Type *IType = NULL; 487 uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements(); 488 uint32_t EleSize = ArgType->getScalarSizeInBits(); 489 uint32_t TotalSize = EleCount * EleSize; 490 if (EleCount == 3) { 491 ShuffleVectorInst *Shuffle = 492 new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2}); 493 Shuffle->insertBefore(Brnch); 494 Arg = Shuffle; 495 ArgType = Arg->getType(); 496 TotalSize += EleSize; 497 } 498 switch (EleSize) { 499 default: 500 EleCount = TotalSize / 64; 501 IType = Type::getInt64Ty(ArgType->getContext()); 502 break; 503 case 8: 504 if (EleCount >= 8) { 505 EleCount = TotalSize / 64; 506 IType = Type::getInt64Ty(ArgType->getContext()); 507 } else if (EleCount >= 3) { 508 EleCount = 1; 509 IType = Type::getInt32Ty(ArgType->getContext()); 510 } else { 511 EleCount = 1; 512 IType = Type::getInt16Ty(ArgType->getContext()); 513 } 514 break; 515 case 16: 516 if (EleCount >= 3) { 517 EleCount = TotalSize / 64; 518 IType = Type::getInt64Ty(ArgType->getContext()); 519 } else { 520 EleCount = 1; 521 IType = Type::getInt32Ty(ArgType->getContext()); 522 } 523 break; 524 } 525 if (EleCount > 1) { 526 IType = FixedVectorType::get(IType, EleCount); 527 } 528 Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); 529 WhatToStore.push_back(Arg); 530 } else { 531 WhatToStore.push_back(Arg); 532 } 533 for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { 534 Value *TheBtCast = WhatToStore[I]; 535 unsigned ArgSize = 536 TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; 537 SmallVector<Value *, 1> BuffOffset; 538 BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); 539 540 Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); 541 Value *CastedGEP = 542 new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); 543 StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); 544 LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" 545 << *StBuff << '\n'); 546 (void)StBuff; 547 if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) 548 break; 549 BufferIdx = GetElementPtrInst::Create(nullptr, BufferIdx, BuffOffset, 550 "PrintBuffNextPtr", Brnch); 551 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" 552 << *BufferIdx << '\n'); 553 } 554 } 555 } 556 } 557 558 // erase the printf calls 559 for (auto CI : Printfs) 560 CI->eraseFromParent(); 561 562 Printfs.clear(); 563 return true; 564 } 565 566 bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) { 567 Triple TT(M.getTargetTriple()); 568 if (TT.getArch() == Triple::r600) 569 return false; 570 571 auto PrintfFunction = M.getFunction("printf"); 572 if (!PrintfFunction) 573 return false; 574 575 for (auto &U : PrintfFunction->uses()) { 576 if (auto *CI = dyn_cast<CallInst>(U.getUser())) { 577 if (CI->isCallee(&U)) 578 Printfs.push_back(CI); 579 } 580 } 581 582 if (Printfs.empty()) 583 return false; 584 585 if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { 586 for (auto &U : HostcallFunction->uses()) { 587 if (auto *CI = dyn_cast<CallInst>(U.getUser())) { 588 M.getContext().emitError( 589 CI, "Cannot use both printf and hostcall in the same module"); 590 } 591 } 592 } 593 594 TD = &M.getDataLayout(); 595 596 return lowerPrintfForGpu(M); 597 } 598 599 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { 600 auto GetDT = [this](Function &F) -> DominatorTree & { 601 return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 602 }; 603 auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { 604 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 605 }; 606 607 return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M); 608 } 609 610 PreservedAnalyses 611 AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) { 612 FunctionAnalysisManager &FAM = 613 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 614 auto GetDT = [&FAM](Function &F) -> DominatorTree & { 615 return FAM.getResult<DominatorTreeAnalysis>(F); 616 }; 617 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 618 return FAM.getResult<TargetLibraryAnalysis>(F); 619 }; 620 bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M); 621 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 622 } 623