1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 static cl::opt<bool> DumpHSAMetadata( 30 "amdgpu-dump-hsa-metadata", 31 cl::desc("Dump AMDGPU HSA Metadata")); 32 static cl::opt<bool> VerifyHSAMetadata( 33 "amdgpu-verify-hsa-metadata", 34 cl::desc("Verify AMDGPU HSA Metadata")); 35 36 namespace AMDGPU { 37 namespace HSAMD { 38 39 //===----------------------------------------------------------------------===// 40 // HSAMetadataStreamerV2 41 //===----------------------------------------------------------------------===// 42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 44 } 45 46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 47 errs() << "AMDGPU HSA Metadata Parser Test: "; 48 49 HSAMD::Metadata FromHSAMetadataString; 50 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 51 errs() << "FAIL\n"; 52 return; 53 } 54 55 std::string ToHSAMetadataString; 56 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 57 errs() << "FAIL\n"; 58 return; 59 } 60 61 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 62 << '\n'; 63 if (HSAMetadataString != ToHSAMetadataString) { 64 errs() << "Original input: " << HSAMetadataString << '\n' 65 << "Produced output: " << ToHSAMetadataString << '\n'; 66 } 67 } 68 69 AccessQualifier 70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 71 if (AccQual.empty()) 72 return AccessQualifier::Unknown; 73 74 return StringSwitch<AccessQualifier>(AccQual) 75 .Case("read_only", AccessQualifier::ReadOnly) 76 .Case("write_only", AccessQualifier::WriteOnly) 77 .Case("read_write", AccessQualifier::ReadWrite) 78 .Default(AccessQualifier::Default); 79 } 80 81 AddressSpaceQualifier 82 MetadataStreamerV2::getAddressSpaceQualifier( 83 unsigned AddressSpace) const { 84 switch (AddressSpace) { 85 case AMDGPUAS::PRIVATE_ADDRESS: 86 return AddressSpaceQualifier::Private; 87 case AMDGPUAS::GLOBAL_ADDRESS: 88 return AddressSpaceQualifier::Global; 89 case AMDGPUAS::CONSTANT_ADDRESS: 90 return AddressSpaceQualifier::Constant; 91 case AMDGPUAS::LOCAL_ADDRESS: 92 return AddressSpaceQualifier::Local; 93 case AMDGPUAS::FLAT_ADDRESS: 94 return AddressSpaceQualifier::Generic; 95 case AMDGPUAS::REGION_ADDRESS: 96 return AddressSpaceQualifier::Region; 97 default: 98 return AddressSpaceQualifier::Unknown; 99 } 100 } 101 102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 103 StringRef BaseTypeName) const { 104 if (TypeQual.find("pipe") != StringRef::npos) 105 return ValueKind::Pipe; 106 107 return StringSwitch<ValueKind>(BaseTypeName) 108 .Case("image1d_t", ValueKind::Image) 109 .Case("image1d_array_t", ValueKind::Image) 110 .Case("image1d_buffer_t", ValueKind::Image) 111 .Case("image2d_t", ValueKind::Image) 112 .Case("image2d_array_t", ValueKind::Image) 113 .Case("image2d_array_depth_t", ValueKind::Image) 114 .Case("image2d_array_msaa_t", ValueKind::Image) 115 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 116 .Case("image2d_depth_t", ValueKind::Image) 117 .Case("image2d_msaa_t", ValueKind::Image) 118 .Case("image2d_msaa_depth_t", ValueKind::Image) 119 .Case("image3d_t", ValueKind::Image) 120 .Case("sampler_t", ValueKind::Sampler) 121 .Case("queue_t", ValueKind::Queue) 122 .Default(isa<PointerType>(Ty) ? 123 (Ty->getPointerAddressSpace() == 124 AMDGPUAS::LOCAL_ADDRESS ? 125 ValueKind::DynamicSharedPointer : 126 ValueKind::GlobalBuffer) : 127 ValueKind::ByValue); 128 } 129 130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { 131 switch (Ty->getTypeID()) { 132 case Type::IntegerTyID: { 133 auto Signed = !TypeName.startswith("u"); 134 switch (Ty->getIntegerBitWidth()) { 135 case 8: 136 return Signed ? ValueType::I8 : ValueType::U8; 137 case 16: 138 return Signed ? ValueType::I16 : ValueType::U16; 139 case 32: 140 return Signed ? ValueType::I32 : ValueType::U32; 141 case 64: 142 return Signed ? ValueType::I64 : ValueType::U64; 143 default: 144 return ValueType::Struct; 145 } 146 } 147 case Type::HalfTyID: 148 return ValueType::F16; 149 case Type::FloatTyID: 150 return ValueType::F32; 151 case Type::DoubleTyID: 152 return ValueType::F64; 153 case Type::PointerTyID: 154 return getValueType(Ty->getPointerElementType(), TypeName); 155 case Type::VectorTyID: 156 return getValueType(Ty->getVectorElementType(), TypeName); 157 default: 158 return ValueType::Struct; 159 } 160 } 161 162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 163 switch (Ty->getTypeID()) { 164 case Type::IntegerTyID: { 165 if (!Signed) 166 return (Twine('u') + getTypeName(Ty, true)).str(); 167 168 auto BitWidth = Ty->getIntegerBitWidth(); 169 switch (BitWidth) { 170 case 8: 171 return "char"; 172 case 16: 173 return "short"; 174 case 32: 175 return "int"; 176 case 64: 177 return "long"; 178 default: 179 return (Twine('i') + Twine(BitWidth)).str(); 180 } 181 } 182 case Type::HalfTyID: 183 return "half"; 184 case Type::FloatTyID: 185 return "float"; 186 case Type::DoubleTyID: 187 return "double"; 188 case Type::VectorTyID: { 189 auto VecTy = cast<VectorType>(Ty); 190 auto ElTy = VecTy->getElementType(); 191 auto NumElements = VecTy->getVectorNumElements(); 192 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 193 } 194 default: 195 return "unknown"; 196 } 197 } 198 199 std::vector<uint32_t> 200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 201 std::vector<uint32_t> Dims; 202 if (Node->getNumOperands() != 3) 203 return Dims; 204 205 for (auto &Op : Node->operands()) 206 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 207 return Dims; 208 } 209 210 Kernel::CodeProps::Metadata 211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 212 const SIProgramInfo &ProgramInfo) const { 213 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 214 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 216 const Function &F = MF.getFunction(); 217 218 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 219 F.getCallingConv() == CallingConv::SPIR_KERNEL); 220 221 unsigned MaxKernArgAlign; 222 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 223 MaxKernArgAlign); 224 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 225 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 226 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); 227 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 228 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 229 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 230 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 231 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 232 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 233 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 234 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 235 236 return HSACodeProps; 237 } 238 239 Kernel::DebugProps::Metadata 240 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 241 const SIProgramInfo &ProgramInfo) const { 242 return HSAMD::Kernel::DebugProps::Metadata(); 243 } 244 245 void MetadataStreamerV2::emitVersion() { 246 auto &Version = HSAMetadata.mVersion; 247 248 Version.push_back(VersionMajor); 249 Version.push_back(VersionMinor); 250 } 251 252 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 253 auto &Printf = HSAMetadata.mPrintf; 254 255 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 256 if (!Node) 257 return; 258 259 for (auto Op : Node->operands()) 260 if (Op->getNumOperands()) 261 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); 262 } 263 264 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 265 auto &Kernel = HSAMetadata.mKernels.back(); 266 267 // TODO: What about other languages? 268 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 269 if (!Node || !Node->getNumOperands()) 270 return; 271 auto Op0 = Node->getOperand(0); 272 if (Op0->getNumOperands() <= 1) 273 return; 274 275 Kernel.mLanguage = "OpenCL C"; 276 Kernel.mLanguageVersion.push_back( 277 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 278 Kernel.mLanguageVersion.push_back( 279 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 280 } 281 282 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 283 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 284 285 if (auto Node = Func.getMetadata("reqd_work_group_size")) 286 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 287 if (auto Node = Func.getMetadata("work_group_size_hint")) 288 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 289 if (auto Node = Func.getMetadata("vec_type_hint")) { 290 Attrs.mVecTypeHint = getTypeName( 291 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 292 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 293 } 294 if (Func.hasFnAttribute("runtime-handle")) { 295 Attrs.mRuntimeHandle = 296 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 297 } 298 } 299 300 void MetadataStreamerV2::emitKernelArgs(const Function &Func) { 301 for (auto &Arg : Func.args()) 302 emitKernelArg(Arg); 303 304 emitHiddenKernelArgs(Func); 305 } 306 307 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 308 auto Func = Arg.getParent(); 309 auto ArgNo = Arg.getArgNo(); 310 const MDNode *Node; 311 312 StringRef Name; 313 Node = Func->getMetadata("kernel_arg_name"); 314 if (Node && ArgNo < Node->getNumOperands()) 315 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 316 else if (Arg.hasName()) 317 Name = Arg.getName(); 318 319 StringRef TypeName; 320 Node = Func->getMetadata("kernel_arg_type"); 321 if (Node && ArgNo < Node->getNumOperands()) 322 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 323 324 StringRef BaseTypeName; 325 Node = Func->getMetadata("kernel_arg_base_type"); 326 if (Node && ArgNo < Node->getNumOperands()) 327 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 328 329 StringRef AccQual; 330 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 331 Arg.hasNoAliasAttr()) { 332 AccQual = "read_only"; 333 } else { 334 Node = Func->getMetadata("kernel_arg_access_qual"); 335 if (Node && ArgNo < Node->getNumOperands()) 336 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 337 } 338 339 StringRef TypeQual; 340 Node = Func->getMetadata("kernel_arg_type_qual"); 341 if (Node && ArgNo < Node->getNumOperands()) 342 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 343 344 Type *Ty = Arg.getType(); 345 const DataLayout &DL = Func->getParent()->getDataLayout(); 346 347 unsigned PointeeAlign = 0; 348 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 349 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 350 PointeeAlign = Arg.getParamAlignment(); 351 if (PointeeAlign == 0) 352 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 353 } 354 } 355 356 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), 357 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 358 } 359 360 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 361 ValueKind ValueKind, 362 unsigned PointeeAlign, StringRef Name, 363 StringRef TypeName, 364 StringRef BaseTypeName, 365 StringRef AccQual, StringRef TypeQual) { 366 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 367 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 368 369 Arg.mName = Name; 370 Arg.mTypeName = TypeName; 371 Arg.mSize = DL.getTypeAllocSize(Ty); 372 Arg.mAlign = DL.getABITypeAlignment(Ty); 373 Arg.mValueKind = ValueKind; 374 Arg.mValueType = getValueType(Ty, BaseTypeName); 375 Arg.mPointeeAlign = PointeeAlign; 376 377 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 378 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 379 380 Arg.mAccQual = getAccessQualifier(AccQual); 381 382 // TODO: Emit Arg.mActualAccQual. 383 384 SmallVector<StringRef, 1> SplitTypeQuals; 385 TypeQual.split(SplitTypeQuals, " ", -1, false); 386 for (StringRef Key : SplitTypeQuals) { 387 auto P = StringSwitch<bool*>(Key) 388 .Case("const", &Arg.mIsConst) 389 .Case("restrict", &Arg.mIsRestrict) 390 .Case("volatile", &Arg.mIsVolatile) 391 .Case("pipe", &Arg.mIsPipe) 392 .Default(nullptr); 393 if (P) 394 *P = true; 395 } 396 } 397 398 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { 399 int HiddenArgNumBytes = 400 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 401 402 if (!HiddenArgNumBytes) 403 return; 404 405 auto &DL = Func.getParent()->getDataLayout(); 406 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 407 408 if (HiddenArgNumBytes >= 8) 409 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); 410 if (HiddenArgNumBytes >= 16) 411 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); 412 if (HiddenArgNumBytes >= 24) 413 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); 414 415 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 416 AMDGPUAS::GLOBAL_ADDRESS); 417 418 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 419 // "none" argument. 420 if (HiddenArgNumBytes >= 32) { 421 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 422 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); 423 else 424 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 425 } 426 427 // Emit "default queue" and "completion action" arguments if enqueue kernel is 428 // used, otherwise emit dummy "none" arguments. 429 if (HiddenArgNumBytes >= 48) { 430 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 431 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); 432 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); 433 } else { 434 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 435 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 436 } 437 } 438 } 439 440 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 441 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 442 } 443 444 void MetadataStreamerV2::begin(const Module &Mod) { 445 emitVersion(); 446 emitPrintf(Mod); 447 } 448 449 void MetadataStreamerV2::end() { 450 std::string HSAMetadataString; 451 if (toString(HSAMetadata, HSAMetadataString)) 452 return; 453 454 if (DumpHSAMetadata) 455 dump(HSAMetadataString); 456 if (VerifyHSAMetadata) 457 verify(HSAMetadataString); 458 } 459 460 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 461 const SIProgramInfo &ProgramInfo) { 462 auto &Func = MF.getFunction(); 463 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 464 return; 465 466 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 467 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 468 469 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 470 auto &Kernel = HSAMetadata.mKernels.back(); 471 472 Kernel.mName = Func.getName(); 473 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 474 emitKernelLanguage(Func); 475 emitKernelAttrs(Func); 476 emitKernelArgs(Func); 477 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 478 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 479 } 480 481 //===----------------------------------------------------------------------===// 482 // HSAMetadataStreamerV3 483 //===----------------------------------------------------------------------===// 484 485 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 486 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 487 } 488 489 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 490 errs() << "AMDGPU HSA Metadata Parser Test: "; 491 492 std::shared_ptr<msgpack::Node> FromHSAMetadataString = 493 std::make_shared<msgpack::MapNode>(); 494 495 yaml::Input YIn(HSAMetadataString); 496 YIn >> FromHSAMetadataString; 497 if (YIn.error()) { 498 errs() << "FAIL\n"; 499 return; 500 } 501 502 std::string ToHSAMetadataString; 503 raw_string_ostream StrOS(ToHSAMetadataString); 504 yaml::Output YOut(StrOS); 505 YOut << FromHSAMetadataString; 506 507 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 508 if (HSAMetadataString != ToHSAMetadataString) { 509 errs() << "Original input: " << HSAMetadataString << '\n' 510 << "Produced output: " << StrOS.str() << '\n'; 511 } 512 } 513 514 Optional<StringRef> 515 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 516 return StringSwitch<Optional<StringRef>>(AccQual) 517 .Case("read_only", StringRef("read_only")) 518 .Case("write_only", StringRef("write_only")) 519 .Case("read_write", StringRef("read_write")) 520 .Default(None); 521 } 522 523 Optional<StringRef> 524 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 525 switch (AddressSpace) { 526 case AMDGPUAS::PRIVATE_ADDRESS: 527 return StringRef("private"); 528 case AMDGPUAS::GLOBAL_ADDRESS: 529 return StringRef("global"); 530 case AMDGPUAS::CONSTANT_ADDRESS: 531 return StringRef("constant"); 532 case AMDGPUAS::LOCAL_ADDRESS: 533 return StringRef("local"); 534 case AMDGPUAS::FLAT_ADDRESS: 535 return StringRef("generic"); 536 case AMDGPUAS::REGION_ADDRESS: 537 return StringRef("region"); 538 default: 539 return None; 540 } 541 } 542 543 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 544 StringRef BaseTypeName) const { 545 if (TypeQual.find("pipe") != StringRef::npos) 546 return "pipe"; 547 548 return StringSwitch<StringRef>(BaseTypeName) 549 .Case("image1d_t", "image") 550 .Case("image1d_array_t", "image") 551 .Case("image1d_buffer_t", "image") 552 .Case("image2d_t", "image") 553 .Case("image2d_array_t", "image") 554 .Case("image2d_array_depth_t", "image") 555 .Case("image2d_array_msaa_t", "image") 556 .Case("image2d_array_msaa_depth_t", "image") 557 .Case("image2d_depth_t", "image") 558 .Case("image2d_msaa_t", "image") 559 .Case("image2d_msaa_depth_t", "image") 560 .Case("image3d_t", "image") 561 .Case("sampler_t", "sampler") 562 .Case("queue_t", "queue") 563 .Default(isa<PointerType>(Ty) 564 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 565 ? "dynamic_shared_pointer" 566 : "global_buffer") 567 : "by_value"); 568 } 569 570 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { 571 switch (Ty->getTypeID()) { 572 case Type::IntegerTyID: { 573 auto Signed = !TypeName.startswith("u"); 574 switch (Ty->getIntegerBitWidth()) { 575 case 8: 576 return Signed ? "i8" : "u8"; 577 case 16: 578 return Signed ? "i16" : "u16"; 579 case 32: 580 return Signed ? "i32" : "u32"; 581 case 64: 582 return Signed ? "i64" : "u64"; 583 default: 584 return "struct"; 585 } 586 } 587 case Type::HalfTyID: 588 return "f16"; 589 case Type::FloatTyID: 590 return "f32"; 591 case Type::DoubleTyID: 592 return "f64"; 593 case Type::PointerTyID: 594 return getValueType(Ty->getPointerElementType(), TypeName); 595 case Type::VectorTyID: 596 return getValueType(Ty->getVectorElementType(), TypeName); 597 default: 598 return "struct"; 599 } 600 } 601 602 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 603 switch (Ty->getTypeID()) { 604 case Type::IntegerTyID: { 605 if (!Signed) 606 return (Twine('u') + getTypeName(Ty, true)).str(); 607 608 auto BitWidth = Ty->getIntegerBitWidth(); 609 switch (BitWidth) { 610 case 8: 611 return "char"; 612 case 16: 613 return "short"; 614 case 32: 615 return "int"; 616 case 64: 617 return "long"; 618 default: 619 return (Twine('i') + Twine(BitWidth)).str(); 620 } 621 } 622 case Type::HalfTyID: 623 return "half"; 624 case Type::FloatTyID: 625 return "float"; 626 case Type::DoubleTyID: 627 return "double"; 628 case Type::VectorTyID: { 629 auto VecTy = cast<VectorType>(Ty); 630 auto ElTy = VecTy->getElementType(); 631 auto NumElements = VecTy->getVectorNumElements(); 632 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 633 } 634 default: 635 return "unknown"; 636 } 637 } 638 639 std::shared_ptr<msgpack::ArrayNode> 640 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 641 auto Dims = std::make_shared<msgpack::ArrayNode>(); 642 if (Node->getNumOperands() != 3) 643 return Dims; 644 645 for (auto &Op : Node->operands()) 646 Dims->push_back(std::make_shared<msgpack::ScalarNode>( 647 mdconst::extract<ConstantInt>(Op)->getZExtValue())); 648 return Dims; 649 } 650 651 void MetadataStreamerV3::emitVersion() { 652 auto Version = std::make_shared<msgpack::ArrayNode>(); 653 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor)); 654 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor)); 655 getRootMetadata("amdhsa.version") = std::move(Version); 656 } 657 658 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 659 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 660 if (!Node) 661 return; 662 663 auto Printf = std::make_shared<msgpack::ArrayNode>(); 664 for (auto Op : Node->operands()) 665 if (Op->getNumOperands()) 666 Printf->push_back(std::make_shared<msgpack::ScalarNode>( 667 cast<MDString>(Op->getOperand(0))->getString())); 668 getRootMetadata("amdhsa.printf") = std::move(Printf); 669 } 670 671 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 672 msgpack::MapNode &Kern) { 673 // TODO: What about other languages? 674 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 675 if (!Node || !Node->getNumOperands()) 676 return; 677 auto Op0 = Node->getOperand(0); 678 if (Op0->getNumOperands() <= 1) 679 return; 680 681 Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C"); 682 auto LanguageVersion = std::make_shared<msgpack::ArrayNode>(); 683 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( 684 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 685 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( 686 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 687 Kern[".language_version"] = std::move(LanguageVersion); 688 } 689 690 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 691 msgpack::MapNode &Kern) { 692 693 if (auto Node = Func.getMetadata("reqd_work_group_size")) 694 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 695 if (auto Node = Func.getMetadata("work_group_size_hint")) 696 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 697 if (auto Node = Func.getMetadata("vec_type_hint")) { 698 Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName( 699 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 700 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue())); 701 } 702 if (Func.hasFnAttribute("runtime-handle")) { 703 Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>( 704 Func.getFnAttribute("runtime-handle").getValueAsString().str()); 705 } 706 } 707 708 void MetadataStreamerV3::emitKernelArgs(const Function &Func, 709 msgpack::MapNode &Kern) { 710 unsigned Offset = 0; 711 auto Args = std::make_shared<msgpack::ArrayNode>(); 712 for (auto &Arg : Func.args()) 713 emitKernelArg(Arg, Offset, *Args); 714 715 emitHiddenKernelArgs(Func, Offset, *Args); 716 717 // TODO: What about other languages? 718 if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { 719 auto &DL = Func.getParent()->getDataLayout(); 720 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 721 722 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args); 723 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args); 724 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args); 725 726 auto Int8PtrTy = 727 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 728 729 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 730 // "none" argument. 731 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 732 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args); 733 else 734 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 735 736 // Emit "default queue" and "completion action" arguments if enqueue kernel 737 // is used, otherwise emit dummy "none" arguments. 738 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 739 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args); 740 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args); 741 } else { 742 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 743 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 744 } 745 } 746 747 Kern[".args"] = std::move(Args); 748 } 749 750 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 751 msgpack::ArrayNode &Args) { 752 auto Func = Arg.getParent(); 753 auto ArgNo = Arg.getArgNo(); 754 const MDNode *Node; 755 756 StringRef Name; 757 Node = Func->getMetadata("kernel_arg_name"); 758 if (Node && ArgNo < Node->getNumOperands()) 759 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 760 else if (Arg.hasName()) 761 Name = Arg.getName(); 762 763 StringRef TypeName; 764 Node = Func->getMetadata("kernel_arg_type"); 765 if (Node && ArgNo < Node->getNumOperands()) 766 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 767 768 StringRef BaseTypeName; 769 Node = Func->getMetadata("kernel_arg_base_type"); 770 if (Node && ArgNo < Node->getNumOperands()) 771 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 772 773 StringRef AccQual; 774 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 775 Arg.hasNoAliasAttr()) { 776 AccQual = "read_only"; 777 } else { 778 Node = Func->getMetadata("kernel_arg_access_qual"); 779 if (Node && ArgNo < Node->getNumOperands()) 780 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 781 } 782 783 StringRef TypeQual; 784 Node = Func->getMetadata("kernel_arg_type_qual"); 785 if (Node && ArgNo < Node->getNumOperands()) 786 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 787 788 Type *Ty = Arg.getType(); 789 const DataLayout &DL = Func->getParent()->getDataLayout(); 790 791 unsigned PointeeAlign = 0; 792 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 793 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 794 PointeeAlign = Arg.getParamAlignment(); 795 if (PointeeAlign == 0) 796 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 797 } 798 } 799 800 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), 801 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, 802 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, 803 TypeQual); 804 } 805 806 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, 807 StringRef ValueKind, unsigned &Offset, 808 msgpack::ArrayNode &Args, 809 unsigned PointeeAlign, StringRef Name, 810 StringRef TypeName, 811 StringRef BaseTypeName, 812 StringRef AccQual, StringRef TypeQual) { 813 auto ArgPtr = std::make_shared<msgpack::MapNode>(); 814 auto &Arg = *ArgPtr; 815 816 if (!Name.empty()) 817 Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name); 818 if (!TypeName.empty()) 819 Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName); 820 auto Size = DL.getTypeAllocSize(Ty); 821 auto Align = DL.getABITypeAlignment(Ty); 822 Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size); 823 Offset = alignTo(Offset, Align); 824 Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset); 825 Offset += Size; 826 Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind); 827 Arg[".value_type"] = 828 std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName)); 829 if (PointeeAlign) 830 Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign); 831 832 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 833 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 834 Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier); 835 836 if (auto AQ = getAccessQualifier(AccQual)) 837 Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ); 838 839 // TODO: Emit Arg[".actual_access"]. 840 841 SmallVector<StringRef, 1> SplitTypeQuals; 842 TypeQual.split(SplitTypeQuals, " ", -1, false); 843 for (StringRef Key : SplitTypeQuals) { 844 if (Key == "const") 845 Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true); 846 else if (Key == "restrict") 847 Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true); 848 else if (Key == "volatile") 849 Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true); 850 else if (Key == "pipe") 851 Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true); 852 } 853 854 Args.push_back(std::move(ArgPtr)); 855 } 856 857 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, 858 unsigned &Offset, 859 msgpack::ArrayNode &Args) { 860 int HiddenArgNumBytes = 861 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 862 863 if (!HiddenArgNumBytes) 864 return; 865 866 auto &DL = Func.getParent()->getDataLayout(); 867 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 868 869 if (HiddenArgNumBytes >= 8) 870 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); 871 if (HiddenArgNumBytes >= 16) 872 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); 873 if (HiddenArgNumBytes >= 24) 874 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); 875 876 auto Int8PtrTy = 877 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 878 879 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 880 // "none" argument. 881 if (HiddenArgNumBytes >= 32) { 882 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 883 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); 884 else 885 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 886 } 887 888 // Emit "default queue" and "completion action" arguments if enqueue kernel is 889 // used, otherwise emit dummy "none" arguments. 890 if (HiddenArgNumBytes >= 48) { 891 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 892 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); 893 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); 894 } else { 895 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 896 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 897 } 898 } 899 } 900 901 std::shared_ptr<msgpack::MapNode> 902 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 903 const SIProgramInfo &ProgramInfo) const { 904 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 905 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 906 const Function &F = MF.getFunction(); 907 908 auto HSAKernelProps = std::make_shared<msgpack::MapNode>(); 909 auto &Kern = *HSAKernelProps; 910 911 unsigned MaxKernArgAlign; 912 Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>( 913 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 914 Kern[".group_segment_fixed_size"] = 915 std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize); 916 Kern[".private_segment_fixed_size"] = 917 std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize); 918 Kern[".kernarg_segment_align"] = 919 std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign)); 920 Kern[".wavefront_size"] = 921 std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize()); 922 Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR); 923 Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR); 924 Kern[".max_flat_workgroup_size"] = 925 std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize()); 926 Kern[".sgpr_spill_count"] = 927 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs()); 928 Kern[".vgpr_spill_count"] = 929 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs()); 930 931 return HSAKernelProps; 932 } 933 934 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 935 return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true); 936 } 937 938 void MetadataStreamerV3::begin(const Module &Mod) { 939 emitVersion(); 940 emitPrintf(Mod); 941 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode()); 942 } 943 944 void MetadataStreamerV3::end() { 945 std::string HSAMetadataString; 946 raw_string_ostream StrOS(HSAMetadataString); 947 yaml::Output YOut(StrOS); 948 YOut << HSAMetadataRoot; 949 950 if (DumpHSAMetadata) 951 dump(StrOS.str()); 952 if (VerifyHSAMetadata) 953 verify(StrOS.str()); 954 } 955 956 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 957 const SIProgramInfo &ProgramInfo) { 958 auto &Func = MF.getFunction(); 959 auto KernelProps = getHSAKernelProps(MF, ProgramInfo); 960 961 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 962 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 963 964 auto &KernelsNode = getRootMetadata("amdhsa.kernels"); 965 auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get()); 966 967 { 968 auto &Kern = *KernelProps; 969 Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName()); 970 Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>( 971 (Twine(Func.getName()) + Twine(".kd")).str()); 972 emitKernelLanguage(Func, Kern); 973 emitKernelAttrs(Func, Kern); 974 emitKernelArgs(Func, Kern); 975 } 976 977 Kernels->push_back(std::move(KernelProps)); 978 } 979 980 } // end namespace HSAMD 981 } // end namespace AMDGPU 982 } // end namespace llvm 983