1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 static cl::opt<bool> DumpHSAMetadata( 30 "amdgpu-dump-hsa-metadata", 31 cl::desc("Dump AMDGPU HSA Metadata")); 32 static cl::opt<bool> VerifyHSAMetadata( 33 "amdgpu-verify-hsa-metadata", 34 cl::desc("Verify AMDGPU HSA Metadata")); 35 36 namespace AMDGPU { 37 namespace HSAMD { 38 39 //===----------------------------------------------------------------------===// 40 // HSAMetadataStreamerV2 41 //===----------------------------------------------------------------------===// 42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 44 } 45 46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 47 errs() << "AMDGPU HSA Metadata Parser Test: "; 48 49 HSAMD::Metadata FromHSAMetadataString; 50 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 51 errs() << "FAIL\n"; 52 return; 53 } 54 55 std::string ToHSAMetadataString; 56 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 57 errs() << "FAIL\n"; 58 return; 59 } 60 61 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 62 << '\n'; 63 if (HSAMetadataString != ToHSAMetadataString) { 64 errs() << "Original input: " << HSAMetadataString << '\n' 65 << "Produced output: " << ToHSAMetadataString << '\n'; 66 } 67 } 68 69 AccessQualifier 70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 71 if (AccQual.empty()) 72 return AccessQualifier::Unknown; 73 74 return StringSwitch<AccessQualifier>(AccQual) 75 .Case("read_only", AccessQualifier::ReadOnly) 76 .Case("write_only", AccessQualifier::WriteOnly) 77 .Case("read_write", AccessQualifier::ReadWrite) 78 .Default(AccessQualifier::Default); 79 } 80 81 AddressSpaceQualifier 82 MetadataStreamerV2::getAddressSpaceQualifier( 83 unsigned AddressSpace) const { 84 switch (AddressSpace) { 85 case AMDGPUAS::PRIVATE_ADDRESS: 86 return AddressSpaceQualifier::Private; 87 case AMDGPUAS::GLOBAL_ADDRESS: 88 return AddressSpaceQualifier::Global; 89 case AMDGPUAS::CONSTANT_ADDRESS: 90 return AddressSpaceQualifier::Constant; 91 case AMDGPUAS::LOCAL_ADDRESS: 92 return AddressSpaceQualifier::Local; 93 case AMDGPUAS::FLAT_ADDRESS: 94 return AddressSpaceQualifier::Generic; 95 case AMDGPUAS::REGION_ADDRESS: 96 return AddressSpaceQualifier::Region; 97 default: 98 return AddressSpaceQualifier::Unknown; 99 } 100 } 101 102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 103 StringRef BaseTypeName) const { 104 if (TypeQual.find("pipe") != StringRef::npos) 105 return ValueKind::Pipe; 106 107 return StringSwitch<ValueKind>(BaseTypeName) 108 .Case("image1d_t", ValueKind::Image) 109 .Case("image1d_array_t", ValueKind::Image) 110 .Case("image1d_buffer_t", ValueKind::Image) 111 .Case("image2d_t", ValueKind::Image) 112 .Case("image2d_array_t", ValueKind::Image) 113 .Case("image2d_array_depth_t", ValueKind::Image) 114 .Case("image2d_array_msaa_t", ValueKind::Image) 115 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 116 .Case("image2d_depth_t", ValueKind::Image) 117 .Case("image2d_msaa_t", ValueKind::Image) 118 .Case("image2d_msaa_depth_t", ValueKind::Image) 119 .Case("image3d_t", ValueKind::Image) 120 .Case("sampler_t", ValueKind::Sampler) 121 .Case("queue_t", ValueKind::Queue) 122 .Default(isa<PointerType>(Ty) ? 123 (Ty->getPointerAddressSpace() == 124 AMDGPUAS::LOCAL_ADDRESS ? 125 ValueKind::DynamicSharedPointer : 126 ValueKind::GlobalBuffer) : 127 ValueKind::ByValue); 128 } 129 130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { 131 switch (Ty->getTypeID()) { 132 case Type::IntegerTyID: { 133 auto Signed = !TypeName.startswith("u"); 134 switch (Ty->getIntegerBitWidth()) { 135 case 8: 136 return Signed ? ValueType::I8 : ValueType::U8; 137 case 16: 138 return Signed ? ValueType::I16 : ValueType::U16; 139 case 32: 140 return Signed ? ValueType::I32 : ValueType::U32; 141 case 64: 142 return Signed ? ValueType::I64 : ValueType::U64; 143 default: 144 return ValueType::Struct; 145 } 146 } 147 case Type::HalfTyID: 148 return ValueType::F16; 149 case Type::FloatTyID: 150 return ValueType::F32; 151 case Type::DoubleTyID: 152 return ValueType::F64; 153 case Type::PointerTyID: 154 return getValueType(Ty->getPointerElementType(), TypeName); 155 case Type::VectorTyID: 156 return getValueType(Ty->getVectorElementType(), TypeName); 157 default: 158 return ValueType::Struct; 159 } 160 } 161 162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 163 switch (Ty->getTypeID()) { 164 case Type::IntegerTyID: { 165 if (!Signed) 166 return (Twine('u') + getTypeName(Ty, true)).str(); 167 168 auto BitWidth = Ty->getIntegerBitWidth(); 169 switch (BitWidth) { 170 case 8: 171 return "char"; 172 case 16: 173 return "short"; 174 case 32: 175 return "int"; 176 case 64: 177 return "long"; 178 default: 179 return (Twine('i') + Twine(BitWidth)).str(); 180 } 181 } 182 case Type::HalfTyID: 183 return "half"; 184 case Type::FloatTyID: 185 return "float"; 186 case Type::DoubleTyID: 187 return "double"; 188 case Type::VectorTyID: { 189 auto VecTy = cast<VectorType>(Ty); 190 auto ElTy = VecTy->getElementType(); 191 auto NumElements = VecTy->getVectorNumElements(); 192 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 193 } 194 default: 195 return "unknown"; 196 } 197 } 198 199 std::vector<uint32_t> 200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 201 std::vector<uint32_t> Dims; 202 if (Node->getNumOperands() != 3) 203 return Dims; 204 205 for (auto &Op : Node->operands()) 206 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 207 return Dims; 208 } 209 210 Kernel::CodeProps::Metadata 211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 212 const SIProgramInfo &ProgramInfo) const { 213 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 214 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 216 const Function &F = MF.getFunction(); 217 218 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 219 F.getCallingConv() == CallingConv::SPIR_KERNEL); 220 221 unsigned MaxKernArgAlign; 222 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 223 MaxKernArgAlign); 224 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 225 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 226 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); 227 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 228 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 229 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 230 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 231 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 232 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 233 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 234 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 235 236 return HSACodeProps; 237 } 238 239 Kernel::DebugProps::Metadata 240 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 241 const SIProgramInfo &ProgramInfo) const { 242 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 243 HSAMD::Kernel::DebugProps::Metadata HSADebugProps; 244 245 if (!STM.debuggerSupported()) 246 return HSADebugProps; 247 248 HSADebugProps.mDebuggerABIVersion.push_back(1); 249 HSADebugProps.mDebuggerABIVersion.push_back(0); 250 251 if (STM.debuggerEmitPrologue()) { 252 HSADebugProps.mPrivateSegmentBufferSGPR = 253 ProgramInfo.DebuggerPrivateSegmentBufferSGPR; 254 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = 255 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; 256 } 257 258 return HSADebugProps; 259 } 260 261 void MetadataStreamerV2::emitVersion() { 262 auto &Version = HSAMetadata.mVersion; 263 264 Version.push_back(VersionMajor); 265 Version.push_back(VersionMinor); 266 } 267 268 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 269 auto &Printf = HSAMetadata.mPrintf; 270 271 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 272 if (!Node) 273 return; 274 275 for (auto Op : Node->operands()) 276 if (Op->getNumOperands()) 277 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); 278 } 279 280 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 281 auto &Kernel = HSAMetadata.mKernels.back(); 282 283 // TODO: What about other languages? 284 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 285 if (!Node || !Node->getNumOperands()) 286 return; 287 auto Op0 = Node->getOperand(0); 288 if (Op0->getNumOperands() <= 1) 289 return; 290 291 Kernel.mLanguage = "OpenCL C"; 292 Kernel.mLanguageVersion.push_back( 293 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 294 Kernel.mLanguageVersion.push_back( 295 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 296 } 297 298 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 299 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 300 301 if (auto Node = Func.getMetadata("reqd_work_group_size")) 302 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 303 if (auto Node = Func.getMetadata("work_group_size_hint")) 304 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 305 if (auto Node = Func.getMetadata("vec_type_hint")) { 306 Attrs.mVecTypeHint = getTypeName( 307 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 308 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 309 } 310 if (Func.hasFnAttribute("runtime-handle")) { 311 Attrs.mRuntimeHandle = 312 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 313 } 314 } 315 316 void MetadataStreamerV2::emitKernelArgs(const Function &Func) { 317 for (auto &Arg : Func.args()) 318 emitKernelArg(Arg); 319 320 emitHiddenKernelArgs(Func); 321 } 322 323 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 324 auto Func = Arg.getParent(); 325 auto ArgNo = Arg.getArgNo(); 326 const MDNode *Node; 327 328 StringRef Name; 329 Node = Func->getMetadata("kernel_arg_name"); 330 if (Node && ArgNo < Node->getNumOperands()) 331 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 332 else if (Arg.hasName()) 333 Name = Arg.getName(); 334 335 StringRef TypeName; 336 Node = Func->getMetadata("kernel_arg_type"); 337 if (Node && ArgNo < Node->getNumOperands()) 338 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 339 340 StringRef BaseTypeName; 341 Node = Func->getMetadata("kernel_arg_base_type"); 342 if (Node && ArgNo < Node->getNumOperands()) 343 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 344 345 StringRef AccQual; 346 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 347 Arg.hasNoAliasAttr()) { 348 AccQual = "read_only"; 349 } else { 350 Node = Func->getMetadata("kernel_arg_access_qual"); 351 if (Node && ArgNo < Node->getNumOperands()) 352 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 353 } 354 355 StringRef TypeQual; 356 Node = Func->getMetadata("kernel_arg_type_qual"); 357 if (Node && ArgNo < Node->getNumOperands()) 358 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 359 360 Type *Ty = Arg.getType(); 361 const DataLayout &DL = Func->getParent()->getDataLayout(); 362 363 unsigned PointeeAlign = 0; 364 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 365 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 366 PointeeAlign = Arg.getParamAlignment(); 367 if (PointeeAlign == 0) 368 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 369 } 370 } 371 372 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), 373 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 374 } 375 376 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 377 ValueKind ValueKind, 378 unsigned PointeeAlign, StringRef Name, 379 StringRef TypeName, 380 StringRef BaseTypeName, 381 StringRef AccQual, StringRef TypeQual) { 382 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 383 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 384 385 Arg.mName = Name; 386 Arg.mTypeName = TypeName; 387 Arg.mSize = DL.getTypeAllocSize(Ty); 388 Arg.mAlign = DL.getABITypeAlignment(Ty); 389 Arg.mValueKind = ValueKind; 390 Arg.mValueType = getValueType(Ty, BaseTypeName); 391 Arg.mPointeeAlign = PointeeAlign; 392 393 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 394 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 395 396 Arg.mAccQual = getAccessQualifier(AccQual); 397 398 // TODO: Emit Arg.mActualAccQual. 399 400 SmallVector<StringRef, 1> SplitTypeQuals; 401 TypeQual.split(SplitTypeQuals, " ", -1, false); 402 for (StringRef Key : SplitTypeQuals) { 403 auto P = StringSwitch<bool*>(Key) 404 .Case("const", &Arg.mIsConst) 405 .Case("restrict", &Arg.mIsRestrict) 406 .Case("volatile", &Arg.mIsVolatile) 407 .Case("pipe", &Arg.mIsPipe) 408 .Default(nullptr); 409 if (P) 410 *P = true; 411 } 412 } 413 414 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { 415 int HiddenArgNumBytes = 416 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 417 418 if (!HiddenArgNumBytes) 419 return; 420 421 auto &DL = Func.getParent()->getDataLayout(); 422 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 423 424 if (HiddenArgNumBytes >= 8) 425 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); 426 if (HiddenArgNumBytes >= 16) 427 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); 428 if (HiddenArgNumBytes >= 24) 429 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); 430 431 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 432 AMDGPUAS::GLOBAL_ADDRESS); 433 434 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 435 // "none" argument. 436 if (HiddenArgNumBytes >= 32) { 437 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 438 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); 439 else 440 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 441 } 442 443 // Emit "default queue" and "completion action" arguments if enqueue kernel is 444 // used, otherwise emit dummy "none" arguments. 445 if (HiddenArgNumBytes >= 48) { 446 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 447 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); 448 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); 449 } else { 450 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 451 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 452 } 453 } 454 } 455 456 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 457 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 458 } 459 460 void MetadataStreamerV2::begin(const Module &Mod) { 461 emitVersion(); 462 emitPrintf(Mod); 463 } 464 465 void MetadataStreamerV2::end() { 466 std::string HSAMetadataString; 467 if (toString(HSAMetadata, HSAMetadataString)) 468 return; 469 470 if (DumpHSAMetadata) 471 dump(HSAMetadataString); 472 if (VerifyHSAMetadata) 473 verify(HSAMetadataString); 474 } 475 476 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 477 const SIProgramInfo &ProgramInfo) { 478 auto &Func = MF.getFunction(); 479 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 480 return; 481 482 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 483 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 484 485 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 486 auto &Kernel = HSAMetadata.mKernels.back(); 487 488 Kernel.mName = Func.getName(); 489 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 490 emitKernelLanguage(Func); 491 emitKernelAttrs(Func); 492 emitKernelArgs(Func); 493 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 494 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 495 } 496 497 //===----------------------------------------------------------------------===// 498 // HSAMetadataStreamerV3 499 //===----------------------------------------------------------------------===// 500 501 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 502 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 503 } 504 505 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 506 errs() << "AMDGPU HSA Metadata Parser Test: "; 507 508 std::shared_ptr<msgpack::Node> FromHSAMetadataString = 509 std::make_shared<msgpack::MapNode>(); 510 511 yaml::Input YIn(HSAMetadataString); 512 YIn >> FromHSAMetadataString; 513 if (YIn.error()) { 514 errs() << "FAIL\n"; 515 return; 516 } 517 518 std::string ToHSAMetadataString; 519 raw_string_ostream StrOS(ToHSAMetadataString); 520 yaml::Output YOut(StrOS); 521 YOut << FromHSAMetadataString; 522 523 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 524 if (HSAMetadataString != ToHSAMetadataString) { 525 errs() << "Original input: " << HSAMetadataString << '\n' 526 << "Produced output: " << StrOS.str() << '\n'; 527 } 528 } 529 530 Optional<StringRef> 531 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 532 return StringSwitch<Optional<StringRef>>(AccQual) 533 .Case("read_only", StringRef("read_only")) 534 .Case("write_only", StringRef("write_only")) 535 .Case("read_write", StringRef("read_write")) 536 .Default(None); 537 } 538 539 Optional<StringRef> 540 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 541 switch (AddressSpace) { 542 case AMDGPUAS::PRIVATE_ADDRESS: 543 return StringRef("private"); 544 case AMDGPUAS::GLOBAL_ADDRESS: 545 return StringRef("global"); 546 case AMDGPUAS::CONSTANT_ADDRESS: 547 return StringRef("constant"); 548 case AMDGPUAS::LOCAL_ADDRESS: 549 return StringRef("local"); 550 case AMDGPUAS::FLAT_ADDRESS: 551 return StringRef("generic"); 552 case AMDGPUAS::REGION_ADDRESS: 553 return StringRef("region"); 554 default: 555 return None; 556 } 557 } 558 559 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 560 StringRef BaseTypeName) const { 561 if (TypeQual.find("pipe") != StringRef::npos) 562 return "pipe"; 563 564 return StringSwitch<StringRef>(BaseTypeName) 565 .Case("image1d_t", "image") 566 .Case("image1d_array_t", "image") 567 .Case("image1d_buffer_t", "image") 568 .Case("image2d_t", "image") 569 .Case("image2d_array_t", "image") 570 .Case("image2d_array_depth_t", "image") 571 .Case("image2d_array_msaa_t", "image") 572 .Case("image2d_array_msaa_depth_t", "image") 573 .Case("image2d_depth_t", "image") 574 .Case("image2d_msaa_t", "image") 575 .Case("image2d_msaa_depth_t", "image") 576 .Case("image3d_t", "image") 577 .Case("sampler_t", "sampler") 578 .Case("queue_t", "queue") 579 .Default(isa<PointerType>(Ty) 580 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 581 ? "dynamic_shared_pointer" 582 : "global_buffer") 583 : "by_value"); 584 } 585 586 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { 587 switch (Ty->getTypeID()) { 588 case Type::IntegerTyID: { 589 auto Signed = !TypeName.startswith("u"); 590 switch (Ty->getIntegerBitWidth()) { 591 case 8: 592 return Signed ? "i8" : "u8"; 593 case 16: 594 return Signed ? "i16" : "u16"; 595 case 32: 596 return Signed ? "i32" : "u32"; 597 case 64: 598 return Signed ? "i64" : "u64"; 599 default: 600 return "struct"; 601 } 602 } 603 case Type::HalfTyID: 604 return "f16"; 605 case Type::FloatTyID: 606 return "f32"; 607 case Type::DoubleTyID: 608 return "f64"; 609 case Type::PointerTyID: 610 return getValueType(Ty->getPointerElementType(), TypeName); 611 case Type::VectorTyID: 612 return getValueType(Ty->getVectorElementType(), TypeName); 613 default: 614 return "struct"; 615 } 616 } 617 618 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 619 switch (Ty->getTypeID()) { 620 case Type::IntegerTyID: { 621 if (!Signed) 622 return (Twine('u') + getTypeName(Ty, true)).str(); 623 624 auto BitWidth = Ty->getIntegerBitWidth(); 625 switch (BitWidth) { 626 case 8: 627 return "char"; 628 case 16: 629 return "short"; 630 case 32: 631 return "int"; 632 case 64: 633 return "long"; 634 default: 635 return (Twine('i') + Twine(BitWidth)).str(); 636 } 637 } 638 case Type::HalfTyID: 639 return "half"; 640 case Type::FloatTyID: 641 return "float"; 642 case Type::DoubleTyID: 643 return "double"; 644 case Type::VectorTyID: { 645 auto VecTy = cast<VectorType>(Ty); 646 auto ElTy = VecTy->getElementType(); 647 auto NumElements = VecTy->getVectorNumElements(); 648 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 649 } 650 default: 651 return "unknown"; 652 } 653 } 654 655 std::shared_ptr<msgpack::ArrayNode> 656 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 657 auto Dims = std::make_shared<msgpack::ArrayNode>(); 658 if (Node->getNumOperands() != 3) 659 return Dims; 660 661 for (auto &Op : Node->operands()) 662 Dims->push_back(std::make_shared<msgpack::ScalarNode>( 663 mdconst::extract<ConstantInt>(Op)->getZExtValue())); 664 return Dims; 665 } 666 667 void MetadataStreamerV3::emitVersion() { 668 auto Version = std::make_shared<msgpack::ArrayNode>(); 669 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor)); 670 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor)); 671 getRootMetadata("amdhsa.version") = std::move(Version); 672 } 673 674 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 675 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 676 if (!Node) 677 return; 678 679 auto Printf = std::make_shared<msgpack::ArrayNode>(); 680 for (auto Op : Node->operands()) 681 if (Op->getNumOperands()) 682 Printf->push_back(std::make_shared<msgpack::ScalarNode>( 683 cast<MDString>(Op->getOperand(0))->getString())); 684 getRootMetadata("amdhsa.printf") = std::move(Printf); 685 } 686 687 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 688 msgpack::MapNode &Kern) { 689 // TODO: What about other languages? 690 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 691 if (!Node || !Node->getNumOperands()) 692 return; 693 auto Op0 = Node->getOperand(0); 694 if (Op0->getNumOperands() <= 1) 695 return; 696 697 Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C"); 698 auto LanguageVersion = std::make_shared<msgpack::ArrayNode>(); 699 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( 700 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 701 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( 702 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 703 Kern[".language_version"] = std::move(LanguageVersion); 704 } 705 706 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 707 msgpack::MapNode &Kern) { 708 709 if (auto Node = Func.getMetadata("reqd_work_group_size")) 710 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 711 if (auto Node = Func.getMetadata("work_group_size_hint")) 712 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 713 if (auto Node = Func.getMetadata("vec_type_hint")) { 714 Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName( 715 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 716 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue())); 717 } 718 if (Func.hasFnAttribute("runtime-handle")) { 719 Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>( 720 Func.getFnAttribute("runtime-handle").getValueAsString().str()); 721 } 722 } 723 724 void MetadataStreamerV3::emitKernelArgs(const Function &Func, 725 msgpack::MapNode &Kern) { 726 unsigned Offset = 0; 727 auto Args = std::make_shared<msgpack::ArrayNode>(); 728 for (auto &Arg : Func.args()) 729 emitKernelArg(Arg, Offset, *Args); 730 731 emitHiddenKernelArgs(Func, Offset, *Args); 732 733 // TODO: What about other languages? 734 if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { 735 auto &DL = Func.getParent()->getDataLayout(); 736 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 737 738 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args); 739 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args); 740 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args); 741 742 auto Int8PtrTy = 743 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 744 745 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 746 // "none" argument. 747 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 748 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args); 749 else 750 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 751 752 // Emit "default queue" and "completion action" arguments if enqueue kernel 753 // is used, otherwise emit dummy "none" arguments. 754 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 755 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args); 756 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args); 757 } else { 758 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 759 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 760 } 761 } 762 763 Kern[".args"] = std::move(Args); 764 } 765 766 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 767 msgpack::ArrayNode &Args) { 768 auto Func = Arg.getParent(); 769 auto ArgNo = Arg.getArgNo(); 770 const MDNode *Node; 771 772 StringRef Name; 773 Node = Func->getMetadata("kernel_arg_name"); 774 if (Node && ArgNo < Node->getNumOperands()) 775 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 776 else if (Arg.hasName()) 777 Name = Arg.getName(); 778 779 StringRef TypeName; 780 Node = Func->getMetadata("kernel_arg_type"); 781 if (Node && ArgNo < Node->getNumOperands()) 782 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 783 784 StringRef BaseTypeName; 785 Node = Func->getMetadata("kernel_arg_base_type"); 786 if (Node && ArgNo < Node->getNumOperands()) 787 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 788 789 StringRef AccQual; 790 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 791 Arg.hasNoAliasAttr()) { 792 AccQual = "read_only"; 793 } else { 794 Node = Func->getMetadata("kernel_arg_access_qual"); 795 if (Node && ArgNo < Node->getNumOperands()) 796 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 797 } 798 799 StringRef TypeQual; 800 Node = Func->getMetadata("kernel_arg_type_qual"); 801 if (Node && ArgNo < Node->getNumOperands()) 802 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 803 804 Type *Ty = Arg.getType(); 805 const DataLayout &DL = Func->getParent()->getDataLayout(); 806 807 unsigned PointeeAlign = 0; 808 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 809 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 810 PointeeAlign = Arg.getParamAlignment(); 811 if (PointeeAlign == 0) 812 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 813 } 814 } 815 816 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), 817 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, 818 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, 819 TypeQual); 820 } 821 822 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, 823 StringRef ValueKind, unsigned &Offset, 824 msgpack::ArrayNode &Args, 825 unsigned PointeeAlign, StringRef Name, 826 StringRef TypeName, 827 StringRef BaseTypeName, 828 StringRef AccQual, StringRef TypeQual) { 829 auto ArgPtr = std::make_shared<msgpack::MapNode>(); 830 auto &Arg = *ArgPtr; 831 832 if (!Name.empty()) 833 Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name); 834 if (!TypeName.empty()) 835 Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName); 836 auto Size = DL.getTypeAllocSize(Ty); 837 auto Align = DL.getABITypeAlignment(Ty); 838 Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size); 839 Offset = alignTo(Offset, Align); 840 Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset); 841 Offset += Size; 842 Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind); 843 Arg[".value_type"] = 844 std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName)); 845 if (PointeeAlign) 846 Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign); 847 848 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 849 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 850 Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier); 851 852 if (auto AQ = getAccessQualifier(AccQual)) 853 Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ); 854 855 // TODO: Emit Arg[".actual_access"]. 856 857 SmallVector<StringRef, 1> SplitTypeQuals; 858 TypeQual.split(SplitTypeQuals, " ", -1, false); 859 for (StringRef Key : SplitTypeQuals) { 860 if (Key == "const") 861 Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true); 862 else if (Key == "restrict") 863 Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true); 864 else if (Key == "volatile") 865 Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true); 866 else if (Key == "pipe") 867 Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true); 868 } 869 870 Args.push_back(std::move(ArgPtr)); 871 } 872 873 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, 874 unsigned &Offset, 875 msgpack::ArrayNode &Args) { 876 int HiddenArgNumBytes = 877 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 878 879 if (!HiddenArgNumBytes) 880 return; 881 882 auto &DL = Func.getParent()->getDataLayout(); 883 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 884 885 if (HiddenArgNumBytes >= 8) 886 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); 887 if (HiddenArgNumBytes >= 16) 888 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); 889 if (HiddenArgNumBytes >= 24) 890 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); 891 892 auto Int8PtrTy = 893 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 894 895 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 896 // "none" argument. 897 if (HiddenArgNumBytes >= 32) { 898 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 899 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); 900 else 901 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 902 } 903 904 // Emit "default queue" and "completion action" arguments if enqueue kernel is 905 // used, otherwise emit dummy "none" arguments. 906 if (HiddenArgNumBytes >= 48) { 907 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 908 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); 909 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); 910 } else { 911 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 912 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 913 } 914 } 915 } 916 917 std::shared_ptr<msgpack::MapNode> 918 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 919 const SIProgramInfo &ProgramInfo) const { 920 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 921 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 922 const Function &F = MF.getFunction(); 923 924 auto HSAKernelProps = std::make_shared<msgpack::MapNode>(); 925 auto &Kern = *HSAKernelProps; 926 927 unsigned MaxKernArgAlign; 928 Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>( 929 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 930 Kern[".group_segment_fixed_size"] = 931 std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize); 932 Kern[".private_segment_fixed_size"] = 933 std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize); 934 Kern[".kernarg_segment_align"] = 935 std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign)); 936 Kern[".wavefront_size"] = 937 std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize()); 938 Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR); 939 Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR); 940 Kern[".max_flat_workgroup_size"] = 941 std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize()); 942 Kern[".sgpr_spill_count"] = 943 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs()); 944 Kern[".vgpr_spill_count"] = 945 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs()); 946 947 return HSAKernelProps; 948 } 949 950 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 951 return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true); 952 } 953 954 void MetadataStreamerV3::begin(const Module &Mod) { 955 emitVersion(); 956 emitPrintf(Mod); 957 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode()); 958 } 959 960 void MetadataStreamerV3::end() { 961 std::string HSAMetadataString; 962 raw_string_ostream StrOS(HSAMetadataString); 963 yaml::Output YOut(StrOS); 964 YOut << HSAMetadataRoot; 965 966 if (DumpHSAMetadata) 967 dump(StrOS.str()); 968 if (VerifyHSAMetadata) 969 verify(StrOS.str()); 970 } 971 972 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 973 const SIProgramInfo &ProgramInfo) { 974 auto &Func = MF.getFunction(); 975 auto KernelProps = getHSAKernelProps(MF, ProgramInfo); 976 977 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 978 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 979 980 auto &KernelsNode = getRootMetadata("amdhsa.kernels"); 981 auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get()); 982 983 { 984 auto &Kern = *KernelProps; 985 Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName()); 986 Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>( 987 (Twine(Func.getName()) + Twine(".kd")).str()); 988 emitKernelLanguage(Func, Kern); 989 emitKernelAttrs(Func, Kern); 990 emitKernelArgs(Func, Kern); 991 } 992 993 Kernels->push_back(std::move(KernelProps)); 994 } 995 996 } // end namespace HSAMD 997 } // end namespace AMDGPU 998 } // end namespace llvm 999