1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "llvm/IR/Module.h" 22 using namespace llvm; 23 24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 25 const DataLayout &DL) { 26 Type *Ty = Arg.getType(); 27 MaybeAlign ArgAlign; 28 if (Arg.hasByRefAttr()) { 29 Ty = Arg.getParamByRefType(); 30 ArgAlign = Arg.getParamAlign(); 31 } 32 33 if (!ArgAlign) 34 ArgAlign = DL.getABITypeAlign(Ty); 35 36 return std::make_pair(Ty, *ArgAlign); 37 } 38 39 namespace llvm { 40 41 static cl::opt<bool> DumpHSAMetadata( 42 "amdgpu-dump-hsa-metadata", 43 cl::desc("Dump AMDGPU HSA Metadata")); 44 static cl::opt<bool> VerifyHSAMetadata( 45 "amdgpu-verify-hsa-metadata", 46 cl::desc("Verify AMDGPU HSA Metadata")); 47 48 namespace AMDGPU { 49 namespace HSAMD { 50 51 //===----------------------------------------------------------------------===// 52 // HSAMetadataStreamerV2 53 //===----------------------------------------------------------------------===// 54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 56 } 57 58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 59 errs() << "AMDGPU HSA Metadata Parser Test: "; 60 61 HSAMD::Metadata FromHSAMetadataString; 62 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 63 errs() << "FAIL\n"; 64 return; 65 } 66 67 std::string ToHSAMetadataString; 68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 69 errs() << "FAIL\n"; 70 return; 71 } 72 73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 74 << '\n'; 75 if (HSAMetadataString != ToHSAMetadataString) { 76 errs() << "Original input: " << HSAMetadataString << '\n' 77 << "Produced output: " << ToHSAMetadataString << '\n'; 78 } 79 } 80 81 AccessQualifier 82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 83 if (AccQual.empty()) 84 return AccessQualifier::Unknown; 85 86 return StringSwitch<AccessQualifier>(AccQual) 87 .Case("read_only", AccessQualifier::ReadOnly) 88 .Case("write_only", AccessQualifier::WriteOnly) 89 .Case("read_write", AccessQualifier::ReadWrite) 90 .Default(AccessQualifier::Default); 91 } 92 93 AddressSpaceQualifier 94 MetadataStreamerV2::getAddressSpaceQualifier( 95 unsigned AddressSpace) const { 96 switch (AddressSpace) { 97 case AMDGPUAS::PRIVATE_ADDRESS: 98 return AddressSpaceQualifier::Private; 99 case AMDGPUAS::GLOBAL_ADDRESS: 100 return AddressSpaceQualifier::Global; 101 case AMDGPUAS::CONSTANT_ADDRESS: 102 return AddressSpaceQualifier::Constant; 103 case AMDGPUAS::LOCAL_ADDRESS: 104 return AddressSpaceQualifier::Local; 105 case AMDGPUAS::FLAT_ADDRESS: 106 return AddressSpaceQualifier::Generic; 107 case AMDGPUAS::REGION_ADDRESS: 108 return AddressSpaceQualifier::Region; 109 default: 110 return AddressSpaceQualifier::Unknown; 111 } 112 } 113 114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 115 StringRef BaseTypeName) const { 116 if (TypeQual.find("pipe") != StringRef::npos) 117 return ValueKind::Pipe; 118 119 return StringSwitch<ValueKind>(BaseTypeName) 120 .Case("image1d_t", ValueKind::Image) 121 .Case("image1d_array_t", ValueKind::Image) 122 .Case("image1d_buffer_t", ValueKind::Image) 123 .Case("image2d_t", ValueKind::Image) 124 .Case("image2d_array_t", ValueKind::Image) 125 .Case("image2d_array_depth_t", ValueKind::Image) 126 .Case("image2d_array_msaa_t", ValueKind::Image) 127 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 128 .Case("image2d_depth_t", ValueKind::Image) 129 .Case("image2d_msaa_t", ValueKind::Image) 130 .Case("image2d_msaa_depth_t", ValueKind::Image) 131 .Case("image3d_t", ValueKind::Image) 132 .Case("sampler_t", ValueKind::Sampler) 133 .Case("queue_t", ValueKind::Queue) 134 .Default(isa<PointerType>(Ty) ? 135 (Ty->getPointerAddressSpace() == 136 AMDGPUAS::LOCAL_ADDRESS ? 137 ValueKind::DynamicSharedPointer : 138 ValueKind::GlobalBuffer) : 139 ValueKind::ByValue); 140 } 141 142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 143 switch (Ty->getTypeID()) { 144 case Type::IntegerTyID: { 145 if (!Signed) 146 return (Twine('u') + getTypeName(Ty, true)).str(); 147 148 auto BitWidth = Ty->getIntegerBitWidth(); 149 switch (BitWidth) { 150 case 8: 151 return "char"; 152 case 16: 153 return "short"; 154 case 32: 155 return "int"; 156 case 64: 157 return "long"; 158 default: 159 return (Twine('i') + Twine(BitWidth)).str(); 160 } 161 } 162 case Type::HalfTyID: 163 return "half"; 164 case Type::FloatTyID: 165 return "float"; 166 case Type::DoubleTyID: 167 return "double"; 168 case Type::FixedVectorTyID: { 169 auto VecTy = cast<FixedVectorType>(Ty); 170 auto ElTy = VecTy->getElementType(); 171 auto NumElements = VecTy->getNumElements(); 172 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 173 } 174 default: 175 return "unknown"; 176 } 177 } 178 179 std::vector<uint32_t> 180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 181 std::vector<uint32_t> Dims; 182 if (Node->getNumOperands() != 3) 183 return Dims; 184 185 for (auto &Op : Node->operands()) 186 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 187 return Dims; 188 } 189 190 Kernel::CodeProps::Metadata 191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 192 const SIProgramInfo &ProgramInfo) const { 193 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 194 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 195 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 196 const Function &F = MF.getFunction(); 197 198 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 199 F.getCallingConv() == CallingConv::SPIR_KERNEL); 200 201 Align MaxKernArgAlign; 202 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 203 MaxKernArgAlign); 204 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 205 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 206 HSACodeProps.mKernargSegmentAlign = 207 std::max(MaxKernArgAlign, Align(4)).value(); 208 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 209 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 210 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 211 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 212 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 213 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 214 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 215 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 216 217 return HSACodeProps; 218 } 219 220 Kernel::DebugProps::Metadata 221 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 222 const SIProgramInfo &ProgramInfo) const { 223 return HSAMD::Kernel::DebugProps::Metadata(); 224 } 225 226 void MetadataStreamerV2::emitVersion() { 227 auto &Version = HSAMetadata.mVersion; 228 229 Version.push_back(VersionMajorV2); 230 Version.push_back(VersionMinorV2); 231 } 232 233 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 234 auto &Printf = HSAMetadata.mPrintf; 235 236 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 237 if (!Node) 238 return; 239 240 for (auto Op : Node->operands()) 241 if (Op->getNumOperands()) 242 Printf.push_back( 243 std::string(cast<MDString>(Op->getOperand(0))->getString())); 244 } 245 246 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 247 auto &Kernel = HSAMetadata.mKernels.back(); 248 249 // TODO: What about other languages? 250 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 251 if (!Node || !Node->getNumOperands()) 252 return; 253 auto Op0 = Node->getOperand(0); 254 if (Op0->getNumOperands() <= 1) 255 return; 256 257 Kernel.mLanguage = "OpenCL C"; 258 Kernel.mLanguageVersion.push_back( 259 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 260 Kernel.mLanguageVersion.push_back( 261 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 262 } 263 264 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 265 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 266 267 if (auto Node = Func.getMetadata("reqd_work_group_size")) 268 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 269 if (auto Node = Func.getMetadata("work_group_size_hint")) 270 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 271 if (auto Node = Func.getMetadata("vec_type_hint")) { 272 Attrs.mVecTypeHint = getTypeName( 273 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 274 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 275 } 276 if (Func.hasFnAttribute("runtime-handle")) { 277 Attrs.mRuntimeHandle = 278 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 279 } 280 } 281 282 void MetadataStreamerV2::emitKernelArgs(const Function &Func) { 283 for (auto &Arg : Func.args()) 284 emitKernelArg(Arg); 285 286 emitHiddenKernelArgs(Func); 287 } 288 289 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 290 auto Func = Arg.getParent(); 291 auto ArgNo = Arg.getArgNo(); 292 const MDNode *Node; 293 294 StringRef Name; 295 Node = Func->getMetadata("kernel_arg_name"); 296 if (Node && ArgNo < Node->getNumOperands()) 297 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 298 else if (Arg.hasName()) 299 Name = Arg.getName(); 300 301 StringRef TypeName; 302 Node = Func->getMetadata("kernel_arg_type"); 303 if (Node && ArgNo < Node->getNumOperands()) 304 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 305 306 StringRef BaseTypeName; 307 Node = Func->getMetadata("kernel_arg_base_type"); 308 if (Node && ArgNo < Node->getNumOperands()) 309 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 310 311 StringRef AccQual; 312 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 313 Arg.hasNoAliasAttr()) { 314 AccQual = "read_only"; 315 } else { 316 Node = Func->getMetadata("kernel_arg_access_qual"); 317 if (Node && ArgNo < Node->getNumOperands()) 318 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 319 } 320 321 StringRef TypeQual; 322 Node = Func->getMetadata("kernel_arg_type_qual"); 323 if (Node && ArgNo < Node->getNumOperands()) 324 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 325 326 const DataLayout &DL = Func->getParent()->getDataLayout(); 327 328 MaybeAlign PointeeAlign; 329 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) { 330 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 331 // FIXME: Should report this for all address spaces 332 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(), 333 PtrTy->getElementType()); 334 } 335 } 336 337 Type *ArgTy; 338 Align ArgAlign; 339 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 340 341 emitKernelArg(DL, ArgTy, ArgAlign, 342 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name, 343 TypeName, BaseTypeName, AccQual, TypeQual); 344 } 345 346 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 347 Align Alignment, ValueKind ValueKind, 348 MaybeAlign PointeeAlign, StringRef Name, 349 StringRef TypeName, 350 StringRef BaseTypeName, 351 StringRef AccQual, StringRef TypeQual) { 352 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 353 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 354 355 Arg.mName = std::string(Name); 356 Arg.mTypeName = std::string(TypeName); 357 Arg.mSize = DL.getTypeAllocSize(Ty); 358 Arg.mAlign = Alignment.value(); 359 Arg.mValueKind = ValueKind; 360 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; 361 362 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 363 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 364 365 Arg.mAccQual = getAccessQualifier(AccQual); 366 367 // TODO: Emit Arg.mActualAccQual. 368 369 SmallVector<StringRef, 1> SplitTypeQuals; 370 TypeQual.split(SplitTypeQuals, " ", -1, false); 371 for (StringRef Key : SplitTypeQuals) { 372 auto P = StringSwitch<bool*>(Key) 373 .Case("const", &Arg.mIsConst) 374 .Case("restrict", &Arg.mIsRestrict) 375 .Case("volatile", &Arg.mIsVolatile) 376 .Case("pipe", &Arg.mIsPipe) 377 .Default(nullptr); 378 if (P) 379 *P = true; 380 } 381 } 382 383 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { 384 int HiddenArgNumBytes = 385 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 386 387 if (!HiddenArgNumBytes) 388 return; 389 390 auto &DL = Func.getParent()->getDataLayout(); 391 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 392 393 if (HiddenArgNumBytes >= 8) 394 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX); 395 if (HiddenArgNumBytes >= 16) 396 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY); 397 if (HiddenArgNumBytes >= 24) 398 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ); 399 400 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 401 AMDGPUAS::GLOBAL_ADDRESS); 402 403 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 404 // "none" argument. 405 if (HiddenArgNumBytes >= 32) { 406 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 407 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); 408 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 409 // The printf runtime binding pass should have ensured that hostcall and 410 // printf are not used in the same module. 411 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 412 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); 413 } else 414 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 415 } 416 417 // Emit "default queue" and "completion action" arguments if enqueue kernel is 418 // used, otherwise emit dummy "none" arguments. 419 if (HiddenArgNumBytes >= 48) { 420 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 421 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); 422 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); 423 } else { 424 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 425 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); 426 } 427 } 428 429 // Emit the pointer argument for multi-grid object. 430 if (HiddenArgNumBytes >= 56) 431 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); 432 } 433 434 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 435 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 436 } 437 438 void MetadataStreamerV2::begin(const Module &Mod, 439 const IsaInfo::AMDGPUTargetID &TargetID) { 440 emitVersion(); 441 emitPrintf(Mod); 442 } 443 444 void MetadataStreamerV2::end() { 445 std::string HSAMetadataString; 446 if (toString(HSAMetadata, HSAMetadataString)) 447 return; 448 449 if (DumpHSAMetadata) 450 dump(HSAMetadataString); 451 if (VerifyHSAMetadata) 452 verify(HSAMetadataString); 453 } 454 455 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 456 const SIProgramInfo &ProgramInfo) { 457 auto &Func = MF.getFunction(); 458 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 459 return; 460 461 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 462 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 463 464 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 465 auto &Kernel = HSAMetadata.mKernels.back(); 466 467 Kernel.mName = std::string(Func.getName()); 468 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 469 emitKernelLanguage(Func); 470 emitKernelAttrs(Func); 471 emitKernelArgs(Func); 472 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 473 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 474 } 475 476 //===----------------------------------------------------------------------===// 477 // HSAMetadataStreamerV3 478 //===----------------------------------------------------------------------===// 479 480 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 481 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 482 } 483 484 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 485 errs() << "AMDGPU HSA Metadata Parser Test: "; 486 487 msgpack::Document FromHSAMetadataString; 488 489 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 490 errs() << "FAIL\n"; 491 return; 492 } 493 494 std::string ToHSAMetadataString; 495 raw_string_ostream StrOS(ToHSAMetadataString); 496 FromHSAMetadataString.toYAML(StrOS); 497 498 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 499 if (HSAMetadataString != ToHSAMetadataString) { 500 errs() << "Original input: " << HSAMetadataString << '\n' 501 << "Produced output: " << StrOS.str() << '\n'; 502 } 503 } 504 505 Optional<StringRef> 506 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 507 return StringSwitch<Optional<StringRef>>(AccQual) 508 .Case("read_only", StringRef("read_only")) 509 .Case("write_only", StringRef("write_only")) 510 .Case("read_write", StringRef("read_write")) 511 .Default(None); 512 } 513 514 Optional<StringRef> 515 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 516 switch (AddressSpace) { 517 case AMDGPUAS::PRIVATE_ADDRESS: 518 return StringRef("private"); 519 case AMDGPUAS::GLOBAL_ADDRESS: 520 return StringRef("global"); 521 case AMDGPUAS::CONSTANT_ADDRESS: 522 return StringRef("constant"); 523 case AMDGPUAS::LOCAL_ADDRESS: 524 return StringRef("local"); 525 case AMDGPUAS::FLAT_ADDRESS: 526 return StringRef("generic"); 527 case AMDGPUAS::REGION_ADDRESS: 528 return StringRef("region"); 529 default: 530 return None; 531 } 532 } 533 534 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 535 StringRef BaseTypeName) const { 536 if (TypeQual.find("pipe") != StringRef::npos) 537 return "pipe"; 538 539 return StringSwitch<StringRef>(BaseTypeName) 540 .Case("image1d_t", "image") 541 .Case("image1d_array_t", "image") 542 .Case("image1d_buffer_t", "image") 543 .Case("image2d_t", "image") 544 .Case("image2d_array_t", "image") 545 .Case("image2d_array_depth_t", "image") 546 .Case("image2d_array_msaa_t", "image") 547 .Case("image2d_array_msaa_depth_t", "image") 548 .Case("image2d_depth_t", "image") 549 .Case("image2d_msaa_t", "image") 550 .Case("image2d_msaa_depth_t", "image") 551 .Case("image3d_t", "image") 552 .Case("sampler_t", "sampler") 553 .Case("queue_t", "queue") 554 .Default(isa<PointerType>(Ty) 555 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 556 ? "dynamic_shared_pointer" 557 : "global_buffer") 558 : "by_value"); 559 } 560 561 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 562 switch (Ty->getTypeID()) { 563 case Type::IntegerTyID: { 564 if (!Signed) 565 return (Twine('u') + getTypeName(Ty, true)).str(); 566 567 auto BitWidth = Ty->getIntegerBitWidth(); 568 switch (BitWidth) { 569 case 8: 570 return "char"; 571 case 16: 572 return "short"; 573 case 32: 574 return "int"; 575 case 64: 576 return "long"; 577 default: 578 return (Twine('i') + Twine(BitWidth)).str(); 579 } 580 } 581 case Type::HalfTyID: 582 return "half"; 583 case Type::FloatTyID: 584 return "float"; 585 case Type::DoubleTyID: 586 return "double"; 587 case Type::FixedVectorTyID: { 588 auto VecTy = cast<FixedVectorType>(Ty); 589 auto ElTy = VecTy->getElementType(); 590 auto NumElements = VecTy->getNumElements(); 591 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 592 } 593 default: 594 return "unknown"; 595 } 596 } 597 598 msgpack::ArrayDocNode 599 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 600 auto Dims = HSAMetadataDoc->getArrayNode(); 601 if (Node->getNumOperands() != 3) 602 return Dims; 603 604 for (auto &Op : Node->operands()) 605 Dims.push_back(Dims.getDocument()->getNode( 606 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 607 return Dims; 608 } 609 610 void MetadataStreamerV3::emitVersion() { 611 auto Version = HSAMetadataDoc->getArrayNode(); 612 Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); 613 Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); 614 getRootMetadata("amdhsa.version") = Version; 615 } 616 617 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 618 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 619 if (!Node) 620 return; 621 622 auto Printf = HSAMetadataDoc->getArrayNode(); 623 for (auto Op : Node->operands()) 624 if (Op->getNumOperands()) 625 Printf.push_back(Printf.getDocument()->getNode( 626 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 627 getRootMetadata("amdhsa.printf") = Printf; 628 } 629 630 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 631 msgpack::MapDocNode Kern) { 632 // TODO: What about other languages? 633 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 634 if (!Node || !Node->getNumOperands()) 635 return; 636 auto Op0 = Node->getOperand(0); 637 if (Op0->getNumOperands() <= 1) 638 return; 639 640 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 641 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 642 LanguageVersion.push_back(Kern.getDocument()->getNode( 643 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 644 LanguageVersion.push_back(Kern.getDocument()->getNode( 645 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 646 Kern[".language_version"] = LanguageVersion; 647 } 648 649 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 650 msgpack::MapDocNode Kern) { 651 652 if (auto Node = Func.getMetadata("reqd_work_group_size")) 653 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 654 if (auto Node = Func.getMetadata("work_group_size_hint")) 655 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 656 if (auto Node = Func.getMetadata("vec_type_hint")) { 657 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 658 getTypeName( 659 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 660 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 661 /*Copy=*/true); 662 } 663 if (Func.hasFnAttribute("runtime-handle")) { 664 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 665 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 666 /*Copy=*/true); 667 } 668 if (Func.hasFnAttribute("device-init")) 669 Kern[".kind"] = Kern.getDocument()->getNode("init"); 670 else if (Func.hasFnAttribute("device-fini")) 671 Kern[".kind"] = Kern.getDocument()->getNode("fini"); 672 } 673 674 void MetadataStreamerV3::emitKernelArgs(const Function &Func, 675 msgpack::MapDocNode Kern) { 676 unsigned Offset = 0; 677 auto Args = HSAMetadataDoc->getArrayNode(); 678 for (auto &Arg : Func.args()) 679 emitKernelArg(Arg, Offset, Args); 680 681 emitHiddenKernelArgs(Func, Offset, Args); 682 683 Kern[".args"] = Args; 684 } 685 686 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 687 msgpack::ArrayDocNode Args) { 688 auto Func = Arg.getParent(); 689 auto ArgNo = Arg.getArgNo(); 690 const MDNode *Node; 691 692 StringRef Name; 693 Node = Func->getMetadata("kernel_arg_name"); 694 if (Node && ArgNo < Node->getNumOperands()) 695 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 696 else if (Arg.hasName()) 697 Name = Arg.getName(); 698 699 StringRef TypeName; 700 Node = Func->getMetadata("kernel_arg_type"); 701 if (Node && ArgNo < Node->getNumOperands()) 702 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 703 704 StringRef BaseTypeName; 705 Node = Func->getMetadata("kernel_arg_base_type"); 706 if (Node && ArgNo < Node->getNumOperands()) 707 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 708 709 StringRef AccQual; 710 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 711 Arg.hasNoAliasAttr()) { 712 AccQual = "read_only"; 713 } else { 714 Node = Func->getMetadata("kernel_arg_access_qual"); 715 if (Node && ArgNo < Node->getNumOperands()) 716 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 717 } 718 719 StringRef TypeQual; 720 Node = Func->getMetadata("kernel_arg_type_qual"); 721 if (Node && ArgNo < Node->getNumOperands()) 722 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 723 724 const DataLayout &DL = Func->getParent()->getDataLayout(); 725 726 MaybeAlign PointeeAlign; 727 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 728 729 // FIXME: Need to distinguish in memory alignment from pointer alignment. 730 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 731 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 732 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(), 733 PtrTy->getElementType()); 734 } 735 } 736 737 // There's no distinction between byval aggregates and raw aggregates. 738 Type *ArgTy; 739 Align ArgAlign; 740 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 741 742 emitKernelArg(DL, ArgTy, ArgAlign, 743 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 744 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 745 } 746 747 void MetadataStreamerV3::emitKernelArg( 748 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 749 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 750 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 751 StringRef AccQual, StringRef TypeQual) { 752 auto Arg = Args.getDocument()->getMapNode(); 753 754 if (!Name.empty()) 755 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 756 if (!TypeName.empty()) 757 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 758 auto Size = DL.getTypeAllocSize(Ty); 759 Arg[".size"] = Arg.getDocument()->getNode(Size); 760 Offset = alignTo(Offset, Alignment); 761 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 762 Offset += Size; 763 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 764 if (PointeeAlign) 765 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 766 767 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 768 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 769 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); 770 771 if (auto AQ = getAccessQualifier(AccQual)) 772 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 773 774 // TODO: Emit Arg[".actual_access"]. 775 776 SmallVector<StringRef, 1> SplitTypeQuals; 777 TypeQual.split(SplitTypeQuals, " ", -1, false); 778 for (StringRef Key : SplitTypeQuals) { 779 if (Key == "const") 780 Arg[".is_const"] = Arg.getDocument()->getNode(true); 781 else if (Key == "restrict") 782 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 783 else if (Key == "volatile") 784 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 785 else if (Key == "pipe") 786 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 787 } 788 789 Args.push_back(Arg); 790 } 791 792 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, 793 unsigned &Offset, 794 msgpack::ArrayDocNode Args) { 795 int HiddenArgNumBytes = 796 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 797 798 if (!HiddenArgNumBytes) 799 return; 800 801 auto &DL = Func.getParent()->getDataLayout(); 802 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 803 804 if (HiddenArgNumBytes >= 8) 805 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 806 Args); 807 if (HiddenArgNumBytes >= 16) 808 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 809 Args); 810 if (HiddenArgNumBytes >= 24) 811 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 812 Args); 813 814 auto Int8PtrTy = 815 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 816 817 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 818 // "none" argument. 819 if (HiddenArgNumBytes >= 32) { 820 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 821 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 822 Args); 823 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 824 // The printf runtime binding pass should have ensured that hostcall and 825 // printf are not used in the same module. 826 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 827 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 828 Args); 829 } else 830 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 831 } 832 833 // Emit "default queue" and "completion action" arguments if enqueue kernel is 834 // used, otherwise emit dummy "none" arguments. 835 if (HiddenArgNumBytes >= 48) { 836 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 837 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 838 Args); 839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 840 Args); 841 } else { 842 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 843 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 844 } 845 } 846 847 // Emit the pointer argument for multi-grid object. 848 if (HiddenArgNumBytes >= 56) 849 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 850 Args); 851 } 852 853 msgpack::MapDocNode 854 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 855 const SIProgramInfo &ProgramInfo) const { 856 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 857 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 858 const Function &F = MF.getFunction(); 859 860 auto Kern = HSAMetadataDoc->getMapNode(); 861 862 Align MaxKernArgAlign; 863 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 864 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 865 Kern[".group_segment_fixed_size"] = 866 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 867 Kern[".private_segment_fixed_size"] = 868 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 869 Kern[".kernarg_segment_align"] = 870 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 871 Kern[".wavefront_size"] = 872 Kern.getDocument()->getNode(STM.getWavefrontSize()); 873 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 874 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 875 Kern[".max_flat_workgroup_size"] = 876 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 877 Kern[".sgpr_spill_count"] = 878 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 879 Kern[".vgpr_spill_count"] = 880 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 881 882 return Kern; 883 } 884 885 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 886 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 887 } 888 889 void MetadataStreamerV3::begin(const Module &Mod, 890 const IsaInfo::AMDGPUTargetID &TargetID) { 891 emitVersion(); 892 emitPrintf(Mod); 893 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 894 } 895 896 void MetadataStreamerV3::end() { 897 std::string HSAMetadataString; 898 raw_string_ostream StrOS(HSAMetadataString); 899 HSAMetadataDoc->toYAML(StrOS); 900 901 if (DumpHSAMetadata) 902 dump(StrOS.str()); 903 if (VerifyHSAMetadata) 904 verify(StrOS.str()); 905 } 906 907 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 908 const SIProgramInfo &ProgramInfo) { 909 auto &Func = MF.getFunction(); 910 auto Kern = getHSAKernelProps(MF, ProgramInfo); 911 912 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 913 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 914 915 auto Kernels = 916 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 917 918 { 919 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 920 Kern[".symbol"] = Kern.getDocument()->getNode( 921 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 922 emitKernelLanguage(Func, Kern); 923 emitKernelAttrs(Func, Kern); 924 emitKernelArgs(Func, Kern); 925 } 926 927 Kernels.push_back(Kern); 928 } 929 930 //===----------------------------------------------------------------------===// 931 // HSAMetadataStreamerV4 932 //===----------------------------------------------------------------------===// 933 934 void MetadataStreamerV4::emitVersion() { 935 auto Version = HSAMetadataDoc->getArrayNode(); 936 Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 937 Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 938 getRootMetadata("amdhsa.version") = Version; 939 } 940 941 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) { 942 getRootMetadata("amdhsa.target") = 943 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 944 } 945 946 void MetadataStreamerV4::begin(const Module &Mod, 947 const IsaInfo::AMDGPUTargetID &TargetID) { 948 emitVersion(); 949 emitTargetID(TargetID); 950 emitPrintf(Mod); 951 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 952 } 953 954 } // end namespace HSAMD 955 } // end namespace AMDGPU 956 } // end namespace llvm 957