1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace llvm; 28 29 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 30 const DataLayout &DL) { 31 Type *Ty = Arg.getType(); 32 MaybeAlign ArgAlign; 33 if (Arg.hasByRefAttr()) { 34 Ty = Arg.getParamByRefType(); 35 ArgAlign = Arg.getParamAlign(); 36 } 37 38 if (!ArgAlign) 39 ArgAlign = DL.getABITypeAlign(Ty); 40 41 return std::make_pair(Ty, *ArgAlign); 42 } 43 44 namespace llvm { 45 46 static cl::opt<bool> DumpHSAMetadata( 47 "amdgpu-dump-hsa-metadata", 48 cl::desc("Dump AMDGPU HSA Metadata")); 49 static cl::opt<bool> VerifyHSAMetadata( 50 "amdgpu-verify-hsa-metadata", 51 cl::desc("Verify AMDGPU HSA Metadata")); 52 53 namespace AMDGPU { 54 namespace HSAMD { 55 56 //===----------------------------------------------------------------------===// 57 // HSAMetadataStreamerV2 58 //===----------------------------------------------------------------------===// 59 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 60 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 61 } 62 63 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 64 errs() << "AMDGPU HSA Metadata Parser Test: "; 65 66 HSAMD::Metadata FromHSAMetadataString; 67 if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) { 68 errs() << "FAIL\n"; 69 return; 70 } 71 72 std::string ToHSAMetadataString; 73 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 74 errs() << "FAIL\n"; 75 return; 76 } 77 78 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 79 << '\n'; 80 if (HSAMetadataString != ToHSAMetadataString) { 81 errs() << "Original input: " << HSAMetadataString << '\n' 82 << "Produced output: " << ToHSAMetadataString << '\n'; 83 } 84 } 85 86 AccessQualifier 87 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 88 if (AccQual.empty()) 89 return AccessQualifier::Unknown; 90 91 return StringSwitch<AccessQualifier>(AccQual) 92 .Case("read_only", AccessQualifier::ReadOnly) 93 .Case("write_only", AccessQualifier::WriteOnly) 94 .Case("read_write", AccessQualifier::ReadWrite) 95 .Default(AccessQualifier::Default); 96 } 97 98 AddressSpaceQualifier 99 MetadataStreamerV2::getAddressSpaceQualifier( 100 unsigned AddressSpace) const { 101 switch (AddressSpace) { 102 case AMDGPUAS::PRIVATE_ADDRESS: 103 return AddressSpaceQualifier::Private; 104 case AMDGPUAS::GLOBAL_ADDRESS: 105 return AddressSpaceQualifier::Global; 106 case AMDGPUAS::CONSTANT_ADDRESS: 107 return AddressSpaceQualifier::Constant; 108 case AMDGPUAS::LOCAL_ADDRESS: 109 return AddressSpaceQualifier::Local; 110 case AMDGPUAS::FLAT_ADDRESS: 111 return AddressSpaceQualifier::Generic; 112 case AMDGPUAS::REGION_ADDRESS: 113 return AddressSpaceQualifier::Region; 114 default: 115 return AddressSpaceQualifier::Unknown; 116 } 117 } 118 119 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 120 StringRef BaseTypeName) const { 121 if (TypeQual.find("pipe") != StringRef::npos) 122 return ValueKind::Pipe; 123 124 return StringSwitch<ValueKind>(BaseTypeName) 125 .Case("image1d_t", ValueKind::Image) 126 .Case("image1d_array_t", ValueKind::Image) 127 .Case("image1d_buffer_t", ValueKind::Image) 128 .Case("image2d_t", ValueKind::Image) 129 .Case("image2d_array_t", ValueKind::Image) 130 .Case("image2d_array_depth_t", ValueKind::Image) 131 .Case("image2d_array_msaa_t", ValueKind::Image) 132 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 133 .Case("image2d_depth_t", ValueKind::Image) 134 .Case("image2d_msaa_t", ValueKind::Image) 135 .Case("image2d_msaa_depth_t", ValueKind::Image) 136 .Case("image3d_t", ValueKind::Image) 137 .Case("sampler_t", ValueKind::Sampler) 138 .Case("queue_t", ValueKind::Queue) 139 .Default(isa<PointerType>(Ty) ? 140 (Ty->getPointerAddressSpace() == 141 AMDGPUAS::LOCAL_ADDRESS ? 142 ValueKind::DynamicSharedPointer : 143 ValueKind::GlobalBuffer) : 144 ValueKind::ByValue); 145 } 146 147 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 148 switch (Ty->getTypeID()) { 149 case Type::IntegerTyID: { 150 if (!Signed) 151 return (Twine('u') + getTypeName(Ty, true)).str(); 152 153 auto BitWidth = Ty->getIntegerBitWidth(); 154 switch (BitWidth) { 155 case 8: 156 return "char"; 157 case 16: 158 return "short"; 159 case 32: 160 return "int"; 161 case 64: 162 return "long"; 163 default: 164 return (Twine('i') + Twine(BitWidth)).str(); 165 } 166 } 167 case Type::HalfTyID: 168 return "half"; 169 case Type::FloatTyID: 170 return "float"; 171 case Type::DoubleTyID: 172 return "double"; 173 case Type::FixedVectorTyID: { 174 auto VecTy = cast<FixedVectorType>(Ty); 175 auto ElTy = VecTy->getElementType(); 176 auto NumElements = VecTy->getNumElements(); 177 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 178 } 179 default: 180 return "unknown"; 181 } 182 } 183 184 std::vector<uint32_t> 185 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 186 std::vector<uint32_t> Dims; 187 if (Node->getNumOperands() != 3) 188 return Dims; 189 190 for (auto &Op : Node->operands()) 191 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 192 return Dims; 193 } 194 195 Kernel::CodeProps::Metadata 196 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 197 const SIProgramInfo &ProgramInfo) const { 198 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 199 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 200 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 201 const Function &F = MF.getFunction(); 202 203 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 204 F.getCallingConv() == CallingConv::SPIR_KERNEL); 205 206 Align MaxKernArgAlign; 207 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 208 MaxKernArgAlign); 209 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 210 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 211 HSACodeProps.mKernargSegmentAlign = 212 std::max(MaxKernArgAlign, Align(4)).value(); 213 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 214 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 215 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 216 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 217 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 218 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 219 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 220 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 221 222 return HSACodeProps; 223 } 224 225 Kernel::DebugProps::Metadata 226 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 227 const SIProgramInfo &ProgramInfo) const { 228 return HSAMD::Kernel::DebugProps::Metadata(); 229 } 230 231 void MetadataStreamerV2::emitVersion() { 232 auto &Version = HSAMetadata.mVersion; 233 234 Version.push_back(VersionMajor); 235 Version.push_back(VersionMinor); 236 } 237 238 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 239 auto &Printf = HSAMetadata.mPrintf; 240 241 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 242 if (!Node) 243 return; 244 245 for (auto Op : Node->operands()) 246 if (Op->getNumOperands()) 247 Printf.push_back( 248 std::string(cast<MDString>(Op->getOperand(0))->getString())); 249 } 250 251 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 252 auto &Kernel = HSAMetadata.mKernels.back(); 253 254 // TODO: What about other languages? 255 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 256 if (!Node || !Node->getNumOperands()) 257 return; 258 auto Op0 = Node->getOperand(0); 259 if (Op0->getNumOperands() <= 1) 260 return; 261 262 Kernel.mLanguage = "OpenCL C"; 263 Kernel.mLanguageVersion.push_back( 264 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 265 Kernel.mLanguageVersion.push_back( 266 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 267 } 268 269 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 270 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 271 272 if (auto Node = Func.getMetadata("reqd_work_group_size")) 273 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 274 if (auto Node = Func.getMetadata("work_group_size_hint")) 275 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 276 if (auto Node = Func.getMetadata("vec_type_hint")) { 277 Attrs.mVecTypeHint = getTypeName( 278 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 279 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 280 } 281 if (Func.hasFnAttribute("runtime-handle")) { 282 Attrs.mRuntimeHandle = 283 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 284 } 285 } 286 287 void MetadataStreamerV2::emitKernelArgs(const Function &Func) { 288 for (auto &Arg : Func.args()) 289 emitKernelArg(Arg); 290 291 emitHiddenKernelArgs(Func); 292 } 293 294 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 295 auto Func = Arg.getParent(); 296 auto ArgNo = Arg.getArgNo(); 297 const MDNode *Node; 298 299 StringRef Name; 300 Node = Func->getMetadata("kernel_arg_name"); 301 if (Node && ArgNo < Node->getNumOperands()) 302 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 303 else if (Arg.hasName()) 304 Name = Arg.getName(); 305 306 StringRef TypeName; 307 Node = Func->getMetadata("kernel_arg_type"); 308 if (Node && ArgNo < Node->getNumOperands()) 309 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 310 311 StringRef BaseTypeName; 312 Node = Func->getMetadata("kernel_arg_base_type"); 313 if (Node && ArgNo < Node->getNumOperands()) 314 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 315 316 StringRef AccQual; 317 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 318 Arg.hasNoAliasAttr()) { 319 AccQual = "read_only"; 320 } else { 321 Node = Func->getMetadata("kernel_arg_access_qual"); 322 if (Node && ArgNo < Node->getNumOperands()) 323 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 324 } 325 326 StringRef TypeQual; 327 Node = Func->getMetadata("kernel_arg_type_qual"); 328 if (Node && ArgNo < Node->getNumOperands()) 329 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 330 331 const DataLayout &DL = Func->getParent()->getDataLayout(); 332 333 MaybeAlign PointeeAlign; 334 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) { 335 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 336 // FIXME: Should report this for all address spaces 337 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(), 338 PtrTy->getElementType()); 339 } 340 } 341 342 Type *ArgTy; 343 Align ArgAlign; 344 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 345 346 emitKernelArg(DL, ArgTy, ArgAlign, 347 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name, 348 TypeName, BaseTypeName, AccQual, TypeQual); 349 } 350 351 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 352 Align Alignment, ValueKind ValueKind, 353 MaybeAlign PointeeAlign, StringRef Name, 354 StringRef TypeName, 355 StringRef BaseTypeName, 356 StringRef AccQual, StringRef TypeQual) { 357 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 358 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 359 360 Arg.mName = std::string(Name); 361 Arg.mTypeName = std::string(TypeName); 362 Arg.mSize = DL.getTypeAllocSize(Ty); 363 Arg.mAlign = Alignment.value(); 364 Arg.mValueKind = ValueKind; 365 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; 366 367 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 368 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 369 370 Arg.mAccQual = getAccessQualifier(AccQual); 371 372 // TODO: Emit Arg.mActualAccQual. 373 374 SmallVector<StringRef, 1> SplitTypeQuals; 375 TypeQual.split(SplitTypeQuals, " ", -1, false); 376 for (StringRef Key : SplitTypeQuals) { 377 auto P = StringSwitch<bool*>(Key) 378 .Case("const", &Arg.mIsConst) 379 .Case("restrict", &Arg.mIsRestrict) 380 .Case("volatile", &Arg.mIsVolatile) 381 .Case("pipe", &Arg.mIsPipe) 382 .Default(nullptr); 383 if (P) 384 *P = true; 385 } 386 } 387 388 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { 389 int HiddenArgNumBytes = 390 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 391 392 if (!HiddenArgNumBytes) 393 return; 394 395 auto &DL = Func.getParent()->getDataLayout(); 396 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 397 398 if (HiddenArgNumBytes >= 8) 399 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX); 400 if (HiddenArgNumBytes >= 16) 401 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY); 402 if (HiddenArgNumBytes >= 24) 403 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ); 404 405 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 406 AMDGPUAS::GLOBAL_ADDRESS); 407 408 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 409 // "none" argument. 410 if (HiddenArgNumBytes >= 32) { 411 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 412 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); 413 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 414 // The printf runtime binding pass should have ensured that hostcall and 415 // printf are not used in the same module. 416 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 417 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); 418 } else 419 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 420 } 421 422 // Emit "default queue" and "completion action" arguments if enqueue kernel is 423 // used, otherwise emit dummy "none" arguments. 424 if (HiddenArgNumBytes >= 48) { 425 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 426 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); 427 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); 428 } else { 429 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 430 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 431 } 432 } 433 434 // Emit the pointer argument for multi-grid object. 435 if (HiddenArgNumBytes >= 56) 436 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); 437 } 438 439 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 440 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 441 } 442 443 void MetadataStreamerV2::begin(const Module &Mod) { 444 emitVersion(); 445 emitPrintf(Mod); 446 } 447 448 void MetadataStreamerV2::end() { 449 std::string HSAMetadataString; 450 if (toString(HSAMetadata, HSAMetadataString)) 451 return; 452 453 if (DumpHSAMetadata) 454 dump(HSAMetadataString); 455 if (VerifyHSAMetadata) 456 verify(HSAMetadataString); 457 } 458 459 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 460 const SIProgramInfo &ProgramInfo) { 461 auto &Func = MF.getFunction(); 462 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 463 return; 464 465 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 466 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 467 468 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 469 auto &Kernel = HSAMetadata.mKernels.back(); 470 471 Kernel.mName = std::string(Func.getName()); 472 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 473 emitKernelLanguage(Func); 474 emitKernelAttrs(Func); 475 emitKernelArgs(Func); 476 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 477 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 478 } 479 480 //===----------------------------------------------------------------------===// 481 // HSAMetadataStreamerV3 482 //===----------------------------------------------------------------------===// 483 484 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 485 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 486 } 487 488 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 489 errs() << "AMDGPU HSA Metadata Parser Test: "; 490 491 msgpack::Document FromHSAMetadataString; 492 493 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 494 errs() << "FAIL\n"; 495 return; 496 } 497 498 std::string ToHSAMetadataString; 499 raw_string_ostream StrOS(ToHSAMetadataString); 500 FromHSAMetadataString.toYAML(StrOS); 501 502 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 503 if (HSAMetadataString != ToHSAMetadataString) { 504 errs() << "Original input: " << HSAMetadataString << '\n' 505 << "Produced output: " << StrOS.str() << '\n'; 506 } 507 } 508 509 Optional<StringRef> 510 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 511 return StringSwitch<Optional<StringRef>>(AccQual) 512 .Case("read_only", StringRef("read_only")) 513 .Case("write_only", StringRef("write_only")) 514 .Case("read_write", StringRef("read_write")) 515 .Default(None); 516 } 517 518 Optional<StringRef> 519 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 520 switch (AddressSpace) { 521 case AMDGPUAS::PRIVATE_ADDRESS: 522 return StringRef("private"); 523 case AMDGPUAS::GLOBAL_ADDRESS: 524 return StringRef("global"); 525 case AMDGPUAS::CONSTANT_ADDRESS: 526 return StringRef("constant"); 527 case AMDGPUAS::LOCAL_ADDRESS: 528 return StringRef("local"); 529 case AMDGPUAS::FLAT_ADDRESS: 530 return StringRef("generic"); 531 case AMDGPUAS::REGION_ADDRESS: 532 return StringRef("region"); 533 default: 534 return None; 535 } 536 } 537 538 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 539 StringRef BaseTypeName) const { 540 if (TypeQual.find("pipe") != StringRef::npos) 541 return "pipe"; 542 543 return StringSwitch<StringRef>(BaseTypeName) 544 .Case("image1d_t", "image") 545 .Case("image1d_array_t", "image") 546 .Case("image1d_buffer_t", "image") 547 .Case("image2d_t", "image") 548 .Case("image2d_array_t", "image") 549 .Case("image2d_array_depth_t", "image") 550 .Case("image2d_array_msaa_t", "image") 551 .Case("image2d_array_msaa_depth_t", "image") 552 .Case("image2d_depth_t", "image") 553 .Case("image2d_msaa_t", "image") 554 .Case("image2d_msaa_depth_t", "image") 555 .Case("image3d_t", "image") 556 .Case("sampler_t", "sampler") 557 .Case("queue_t", "queue") 558 .Default(isa<PointerType>(Ty) 559 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 560 ? "dynamic_shared_pointer" 561 : "global_buffer") 562 : "by_value"); 563 } 564 565 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 566 switch (Ty->getTypeID()) { 567 case Type::IntegerTyID: { 568 if (!Signed) 569 return (Twine('u') + getTypeName(Ty, true)).str(); 570 571 auto BitWidth = Ty->getIntegerBitWidth(); 572 switch (BitWidth) { 573 case 8: 574 return "char"; 575 case 16: 576 return "short"; 577 case 32: 578 return "int"; 579 case 64: 580 return "long"; 581 default: 582 return (Twine('i') + Twine(BitWidth)).str(); 583 } 584 } 585 case Type::HalfTyID: 586 return "half"; 587 case Type::FloatTyID: 588 return "float"; 589 case Type::DoubleTyID: 590 return "double"; 591 case Type::FixedVectorTyID: { 592 auto VecTy = cast<FixedVectorType>(Ty); 593 auto ElTy = VecTy->getElementType(); 594 auto NumElements = VecTy->getNumElements(); 595 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 596 } 597 default: 598 return "unknown"; 599 } 600 } 601 602 msgpack::ArrayDocNode 603 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 604 auto Dims = HSAMetadataDoc->getArrayNode(); 605 if (Node->getNumOperands() != 3) 606 return Dims; 607 608 for (auto &Op : Node->operands()) 609 Dims.push_back(Dims.getDocument()->getNode( 610 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 611 return Dims; 612 } 613 614 void MetadataStreamerV3::emitVersion() { 615 auto Version = HSAMetadataDoc->getArrayNode(); 616 Version.push_back(Version.getDocument()->getNode(VersionMajor)); 617 Version.push_back(Version.getDocument()->getNode(VersionMinor)); 618 getRootMetadata("amdhsa.version") = Version; 619 } 620 621 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 622 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 623 if (!Node) 624 return; 625 626 auto Printf = HSAMetadataDoc->getArrayNode(); 627 for (auto Op : Node->operands()) 628 if (Op->getNumOperands()) 629 Printf.push_back(Printf.getDocument()->getNode( 630 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 631 getRootMetadata("amdhsa.printf") = Printf; 632 } 633 634 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 635 msgpack::MapDocNode Kern) { 636 // TODO: What about other languages? 637 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 638 if (!Node || !Node->getNumOperands()) 639 return; 640 auto Op0 = Node->getOperand(0); 641 if (Op0->getNumOperands() <= 1) 642 return; 643 644 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 645 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 646 LanguageVersion.push_back(Kern.getDocument()->getNode( 647 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 648 LanguageVersion.push_back(Kern.getDocument()->getNode( 649 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 650 Kern[".language_version"] = LanguageVersion; 651 } 652 653 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 654 msgpack::MapDocNode Kern) { 655 656 if (auto Node = Func.getMetadata("reqd_work_group_size")) 657 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 658 if (auto Node = Func.getMetadata("work_group_size_hint")) 659 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 660 if (auto Node = Func.getMetadata("vec_type_hint")) { 661 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 662 getTypeName( 663 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 664 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 665 /*Copy=*/true); 666 } 667 if (Func.hasFnAttribute("runtime-handle")) { 668 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 669 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 670 /*Copy=*/true); 671 } 672 } 673 674 void MetadataStreamerV3::emitKernelArgs(const Function &Func, 675 msgpack::MapDocNode Kern) { 676 unsigned Offset = 0; 677 auto Args = HSAMetadataDoc->getArrayNode(); 678 for (auto &Arg : Func.args()) 679 emitKernelArg(Arg, Offset, Args); 680 681 emitHiddenKernelArgs(Func, Offset, Args); 682 683 Kern[".args"] = Args; 684 } 685 686 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 687 msgpack::ArrayDocNode Args) { 688 auto Func = Arg.getParent(); 689 auto ArgNo = Arg.getArgNo(); 690 const MDNode *Node; 691 692 StringRef Name; 693 Node = Func->getMetadata("kernel_arg_name"); 694 if (Node && ArgNo < Node->getNumOperands()) 695 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 696 else if (Arg.hasName()) 697 Name = Arg.getName(); 698 699 StringRef TypeName; 700 Node = Func->getMetadata("kernel_arg_type"); 701 if (Node && ArgNo < Node->getNumOperands()) 702 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 703 704 StringRef BaseTypeName; 705 Node = Func->getMetadata("kernel_arg_base_type"); 706 if (Node && ArgNo < Node->getNumOperands()) 707 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 708 709 StringRef AccQual; 710 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 711 Arg.hasNoAliasAttr()) { 712 AccQual = "read_only"; 713 } else { 714 Node = Func->getMetadata("kernel_arg_access_qual"); 715 if (Node && ArgNo < Node->getNumOperands()) 716 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 717 } 718 719 StringRef TypeQual; 720 Node = Func->getMetadata("kernel_arg_type_qual"); 721 if (Node && ArgNo < Node->getNumOperands()) 722 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 723 724 const DataLayout &DL = Func->getParent()->getDataLayout(); 725 726 MaybeAlign PointeeAlign; 727 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 728 729 // FIXME: Need to distinguish in memory alignment from pointer alignment. 730 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 731 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 732 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(), 733 PtrTy->getElementType()); 734 } 735 } 736 737 // There's no distinction between byval aggregates and raw aggregates. 738 Type *ArgTy; 739 Align ArgAlign; 740 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 741 742 emitKernelArg(DL, ArgTy, ArgAlign, 743 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 744 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 745 } 746 747 void MetadataStreamerV3::emitKernelArg( 748 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 749 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 750 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 751 StringRef AccQual, StringRef TypeQual) { 752 auto Arg = Args.getDocument()->getMapNode(); 753 754 if (!Name.empty()) 755 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 756 if (!TypeName.empty()) 757 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 758 auto Size = DL.getTypeAllocSize(Ty); 759 Arg[".size"] = Arg.getDocument()->getNode(Size); 760 Offset = alignTo(Offset, Alignment); 761 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 762 Offset += Size; 763 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 764 if (PointeeAlign) 765 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 766 767 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 768 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 769 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); 770 771 if (auto AQ = getAccessQualifier(AccQual)) 772 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 773 774 // TODO: Emit Arg[".actual_access"]. 775 776 SmallVector<StringRef, 1> SplitTypeQuals; 777 TypeQual.split(SplitTypeQuals, " ", -1, false); 778 for (StringRef Key : SplitTypeQuals) { 779 if (Key == "const") 780 Arg[".is_const"] = Arg.getDocument()->getNode(true); 781 else if (Key == "restrict") 782 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 783 else if (Key == "volatile") 784 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 785 else if (Key == "pipe") 786 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 787 } 788 789 Args.push_back(Arg); 790 } 791 792 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, 793 unsigned &Offset, 794 msgpack::ArrayDocNode Args) { 795 int HiddenArgNumBytes = 796 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 797 798 if (!HiddenArgNumBytes) 799 return; 800 801 auto &DL = Func.getParent()->getDataLayout(); 802 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 803 804 if (HiddenArgNumBytes >= 8) 805 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 806 Args); 807 if (HiddenArgNumBytes >= 16) 808 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 809 Args); 810 if (HiddenArgNumBytes >= 24) 811 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 812 Args); 813 814 auto Int8PtrTy = 815 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 816 817 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 818 // "none" argument. 819 if (HiddenArgNumBytes >= 32) { 820 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 821 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 822 Args); 823 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 824 // The printf runtime binding pass should have ensured that hostcall and 825 // printf are not used in the same module. 826 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 827 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 828 Args); 829 } else 830 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 831 } 832 833 // Emit "default queue" and "completion action" arguments if enqueue kernel is 834 // used, otherwise emit dummy "none" arguments. 835 if (HiddenArgNumBytes >= 48) { 836 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 837 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 838 Args); 839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 840 Args); 841 } else { 842 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 843 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 844 } 845 } 846 847 // Emit the pointer argument for multi-grid object. 848 if (HiddenArgNumBytes >= 56) 849 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 850 Args); 851 } 852 853 msgpack::MapDocNode 854 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 855 const SIProgramInfo &ProgramInfo) const { 856 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 857 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 858 const Function &F = MF.getFunction(); 859 860 auto Kern = HSAMetadataDoc->getMapNode(); 861 862 Align MaxKernArgAlign; 863 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 864 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 865 Kern[".group_segment_fixed_size"] = 866 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 867 Kern[".private_segment_fixed_size"] = 868 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 869 Kern[".kernarg_segment_align"] = 870 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 871 Kern[".wavefront_size"] = 872 Kern.getDocument()->getNode(STM.getWavefrontSize()); 873 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 874 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 875 Kern[".max_flat_workgroup_size"] = 876 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 877 Kern[".sgpr_spill_count"] = 878 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 879 Kern[".vgpr_spill_count"] = 880 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 881 882 return Kern; 883 } 884 885 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 886 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 887 } 888 889 void MetadataStreamerV3::begin(const Module &Mod) { 890 emitVersion(); 891 emitPrintf(Mod); 892 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 893 } 894 895 void MetadataStreamerV3::end() { 896 std::string HSAMetadataString; 897 raw_string_ostream StrOS(HSAMetadataString); 898 HSAMetadataDoc->toYAML(StrOS); 899 900 if (DumpHSAMetadata) 901 dump(StrOS.str()); 902 if (VerifyHSAMetadata) 903 verify(StrOS.str()); 904 } 905 906 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 907 const SIProgramInfo &ProgramInfo) { 908 auto &Func = MF.getFunction(); 909 auto Kern = getHSAKernelProps(MF, ProgramInfo); 910 911 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 912 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 913 914 auto Kernels = 915 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 916 917 { 918 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 919 Kern[".symbol"] = Kern.getDocument()->getNode( 920 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 921 emitKernelLanguage(Func, Kern); 922 emitKernelAttrs(Func, Kern); 923 emitKernelArgs(Func, Kern); 924 } 925 926 Kernels.push_back(Kern); 927 } 928 929 } // end namespace HSAMD 930 } // end namespace AMDGPU 931 } // end namespace llvm 932