1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 static cl::opt<bool> DumpHSAMetadata( 30 "amdgpu-dump-hsa-metadata", 31 cl::desc("Dump AMDGPU HSA Metadata")); 32 static cl::opt<bool> VerifyHSAMetadata( 33 "amdgpu-verify-hsa-metadata", 34 cl::desc("Verify AMDGPU HSA Metadata")); 35 36 namespace AMDGPU { 37 namespace HSAMD { 38 39 //===----------------------------------------------------------------------===// 40 // HSAMetadataStreamerV2 41 //===----------------------------------------------------------------------===// 42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 44 } 45 46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 47 errs() << "AMDGPU HSA Metadata Parser Test: "; 48 49 HSAMD::Metadata FromHSAMetadataString; 50 if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) { 51 errs() << "FAIL\n"; 52 return; 53 } 54 55 std::string ToHSAMetadataString; 56 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 57 errs() << "FAIL\n"; 58 return; 59 } 60 61 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 62 << '\n'; 63 if (HSAMetadataString != ToHSAMetadataString) { 64 errs() << "Original input: " << HSAMetadataString << '\n' 65 << "Produced output: " << ToHSAMetadataString << '\n'; 66 } 67 } 68 69 AccessQualifier 70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 71 if (AccQual.empty()) 72 return AccessQualifier::Unknown; 73 74 return StringSwitch<AccessQualifier>(AccQual) 75 .Case("read_only", AccessQualifier::ReadOnly) 76 .Case("write_only", AccessQualifier::WriteOnly) 77 .Case("read_write", AccessQualifier::ReadWrite) 78 .Default(AccessQualifier::Default); 79 } 80 81 AddressSpaceQualifier 82 MetadataStreamerV2::getAddressSpaceQualifier( 83 unsigned AddressSpace) const { 84 switch (AddressSpace) { 85 case AMDGPUAS::PRIVATE_ADDRESS: 86 return AddressSpaceQualifier::Private; 87 case AMDGPUAS::GLOBAL_ADDRESS: 88 return AddressSpaceQualifier::Global; 89 case AMDGPUAS::CONSTANT_ADDRESS: 90 return AddressSpaceQualifier::Constant; 91 case AMDGPUAS::LOCAL_ADDRESS: 92 return AddressSpaceQualifier::Local; 93 case AMDGPUAS::FLAT_ADDRESS: 94 return AddressSpaceQualifier::Generic; 95 case AMDGPUAS::REGION_ADDRESS: 96 return AddressSpaceQualifier::Region; 97 default: 98 return AddressSpaceQualifier::Unknown; 99 } 100 } 101 102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 103 StringRef BaseTypeName) const { 104 if (TypeQual.find("pipe") != StringRef::npos) 105 return ValueKind::Pipe; 106 107 return StringSwitch<ValueKind>(BaseTypeName) 108 .Case("image1d_t", ValueKind::Image) 109 .Case("image1d_array_t", ValueKind::Image) 110 .Case("image1d_buffer_t", ValueKind::Image) 111 .Case("image2d_t", ValueKind::Image) 112 .Case("image2d_array_t", ValueKind::Image) 113 .Case("image2d_array_depth_t", ValueKind::Image) 114 .Case("image2d_array_msaa_t", ValueKind::Image) 115 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 116 .Case("image2d_depth_t", ValueKind::Image) 117 .Case("image2d_msaa_t", ValueKind::Image) 118 .Case("image2d_msaa_depth_t", ValueKind::Image) 119 .Case("image3d_t", ValueKind::Image) 120 .Case("sampler_t", ValueKind::Sampler) 121 .Case("queue_t", ValueKind::Queue) 122 .Default(isa<PointerType>(Ty) ? 123 (Ty->getPointerAddressSpace() == 124 AMDGPUAS::LOCAL_ADDRESS ? 125 ValueKind::DynamicSharedPointer : 126 ValueKind::GlobalBuffer) : 127 ValueKind::ByValue); 128 } 129 130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { 131 switch (Ty->getTypeID()) { 132 case Type::IntegerTyID: { 133 auto Signed = !TypeName.startswith("u"); 134 switch (Ty->getIntegerBitWidth()) { 135 case 8: 136 return Signed ? ValueType::I8 : ValueType::U8; 137 case 16: 138 return Signed ? ValueType::I16 : ValueType::U16; 139 case 32: 140 return Signed ? ValueType::I32 : ValueType::U32; 141 case 64: 142 return Signed ? ValueType::I64 : ValueType::U64; 143 default: 144 return ValueType::Struct; 145 } 146 } 147 case Type::HalfTyID: 148 return ValueType::F16; 149 case Type::FloatTyID: 150 return ValueType::F32; 151 case Type::DoubleTyID: 152 return ValueType::F64; 153 case Type::PointerTyID: 154 return getValueType(Ty->getPointerElementType(), TypeName); 155 case Type::FixedVectorTyID: 156 return getValueType(cast<VectorType>(Ty)->getElementType(), TypeName); 157 default: 158 return ValueType::Struct; 159 } 160 } 161 162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 163 switch (Ty->getTypeID()) { 164 case Type::IntegerTyID: { 165 if (!Signed) 166 return (Twine('u') + getTypeName(Ty, true)).str(); 167 168 auto BitWidth = Ty->getIntegerBitWidth(); 169 switch (BitWidth) { 170 case 8: 171 return "char"; 172 case 16: 173 return "short"; 174 case 32: 175 return "int"; 176 case 64: 177 return "long"; 178 default: 179 return (Twine('i') + Twine(BitWidth)).str(); 180 } 181 } 182 case Type::HalfTyID: 183 return "half"; 184 case Type::FloatTyID: 185 return "float"; 186 case Type::DoubleTyID: 187 return "double"; 188 case Type::FixedVectorTyID: { 189 auto VecTy = cast<FixedVectorType>(Ty); 190 auto ElTy = VecTy->getElementType(); 191 auto NumElements = VecTy->getNumElements(); 192 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 193 } 194 default: 195 return "unknown"; 196 } 197 } 198 199 std::vector<uint32_t> 200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 201 std::vector<uint32_t> Dims; 202 if (Node->getNumOperands() != 3) 203 return Dims; 204 205 for (auto &Op : Node->operands()) 206 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 207 return Dims; 208 } 209 210 Kernel::CodeProps::Metadata 211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 212 const SIProgramInfo &ProgramInfo) const { 213 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 214 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 216 const Function &F = MF.getFunction(); 217 218 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 219 F.getCallingConv() == CallingConv::SPIR_KERNEL); 220 221 Align MaxKernArgAlign; 222 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 223 MaxKernArgAlign); 224 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 225 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 226 HSACodeProps.mKernargSegmentAlign = 227 std::max(MaxKernArgAlign, Align(4)).value(); 228 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 229 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 230 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 231 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 232 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 233 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 234 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 235 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 236 237 return HSACodeProps; 238 } 239 240 Kernel::DebugProps::Metadata 241 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 242 const SIProgramInfo &ProgramInfo) const { 243 return HSAMD::Kernel::DebugProps::Metadata(); 244 } 245 246 void MetadataStreamerV2::emitVersion() { 247 auto &Version = HSAMetadata.mVersion; 248 249 Version.push_back(VersionMajor); 250 Version.push_back(VersionMinor); 251 } 252 253 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 254 auto &Printf = HSAMetadata.mPrintf; 255 256 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 257 if (!Node) 258 return; 259 260 for (auto Op : Node->operands()) 261 if (Op->getNumOperands()) 262 Printf.push_back( 263 std::string(cast<MDString>(Op->getOperand(0))->getString())); 264 } 265 266 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 267 auto &Kernel = HSAMetadata.mKernels.back(); 268 269 // TODO: What about other languages? 270 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 271 if (!Node || !Node->getNumOperands()) 272 return; 273 auto Op0 = Node->getOperand(0); 274 if (Op0->getNumOperands() <= 1) 275 return; 276 277 Kernel.mLanguage = "OpenCL C"; 278 Kernel.mLanguageVersion.push_back( 279 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 280 Kernel.mLanguageVersion.push_back( 281 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 282 } 283 284 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 285 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 286 287 if (auto Node = Func.getMetadata("reqd_work_group_size")) 288 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 289 if (auto Node = Func.getMetadata("work_group_size_hint")) 290 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 291 if (auto Node = Func.getMetadata("vec_type_hint")) { 292 Attrs.mVecTypeHint = getTypeName( 293 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 294 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 295 } 296 if (Func.hasFnAttribute("runtime-handle")) { 297 Attrs.mRuntimeHandle = 298 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 299 } 300 } 301 302 void MetadataStreamerV2::emitKernelArgs(const Function &Func) { 303 for (auto &Arg : Func.args()) 304 emitKernelArg(Arg); 305 306 emitHiddenKernelArgs(Func); 307 } 308 309 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 310 auto Func = Arg.getParent(); 311 auto ArgNo = Arg.getArgNo(); 312 const MDNode *Node; 313 314 StringRef Name; 315 Node = Func->getMetadata("kernel_arg_name"); 316 if (Node && ArgNo < Node->getNumOperands()) 317 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 318 else if (Arg.hasName()) 319 Name = Arg.getName(); 320 321 StringRef TypeName; 322 Node = Func->getMetadata("kernel_arg_type"); 323 if (Node && ArgNo < Node->getNumOperands()) 324 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 325 326 StringRef BaseTypeName; 327 Node = Func->getMetadata("kernel_arg_base_type"); 328 if (Node && ArgNo < Node->getNumOperands()) 329 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 330 331 StringRef AccQual; 332 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 333 Arg.hasNoAliasAttr()) { 334 AccQual = "read_only"; 335 } else { 336 Node = Func->getMetadata("kernel_arg_access_qual"); 337 if (Node && ArgNo < Node->getNumOperands()) 338 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 339 } 340 341 StringRef TypeQual; 342 Node = Func->getMetadata("kernel_arg_type_qual"); 343 if (Node && ArgNo < Node->getNumOperands()) 344 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 345 346 Type *Ty = Arg.getType(); 347 const DataLayout &DL = Func->getParent()->getDataLayout(); 348 349 unsigned PointeeAlign = 0; 350 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 351 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 352 PointeeAlign = Arg.getParamAlignment(); 353 if (PointeeAlign == 0) 354 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 355 } 356 } 357 358 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), 359 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 360 } 361 362 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 363 ValueKind ValueKind, 364 unsigned PointeeAlign, StringRef Name, 365 StringRef TypeName, 366 StringRef BaseTypeName, 367 StringRef AccQual, StringRef TypeQual) { 368 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 369 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 370 371 Arg.mName = std::string(Name); 372 Arg.mTypeName = std::string(TypeName); 373 Arg.mSize = DL.getTypeAllocSize(Ty); 374 Arg.mAlign = DL.getABITypeAlignment(Ty); 375 Arg.mValueKind = ValueKind; 376 Arg.mValueType = getValueType(Ty, BaseTypeName); 377 Arg.mPointeeAlign = PointeeAlign; 378 379 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 380 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 381 382 Arg.mAccQual = getAccessQualifier(AccQual); 383 384 // TODO: Emit Arg.mActualAccQual. 385 386 SmallVector<StringRef, 1> SplitTypeQuals; 387 TypeQual.split(SplitTypeQuals, " ", -1, false); 388 for (StringRef Key : SplitTypeQuals) { 389 auto P = StringSwitch<bool*>(Key) 390 .Case("const", &Arg.mIsConst) 391 .Case("restrict", &Arg.mIsRestrict) 392 .Case("volatile", &Arg.mIsVolatile) 393 .Case("pipe", &Arg.mIsPipe) 394 .Default(nullptr); 395 if (P) 396 *P = true; 397 } 398 } 399 400 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { 401 int HiddenArgNumBytes = 402 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 403 404 if (!HiddenArgNumBytes) 405 return; 406 407 auto &DL = Func.getParent()->getDataLayout(); 408 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 409 410 if (HiddenArgNumBytes >= 8) 411 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); 412 if (HiddenArgNumBytes >= 16) 413 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); 414 if (HiddenArgNumBytes >= 24) 415 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); 416 417 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 418 AMDGPUAS::GLOBAL_ADDRESS); 419 420 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 421 // "none" argument. 422 if (HiddenArgNumBytes >= 32) { 423 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 424 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); 425 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 426 // The printf runtime binding pass should have ensured that hostcall and 427 // printf are not used in the same module. 428 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 429 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer); 430 } else 431 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 432 } 433 434 // Emit "default queue" and "completion action" arguments if enqueue kernel is 435 // used, otherwise emit dummy "none" arguments. 436 if (HiddenArgNumBytes >= 48) { 437 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 438 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); 439 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); 440 } else { 441 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 442 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 443 } 444 } 445 446 // Emit the pointer argument for multi-grid object. 447 if (HiddenArgNumBytes >= 56) 448 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg); 449 } 450 451 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 452 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 453 } 454 455 void MetadataStreamerV2::begin(const Module &Mod) { 456 emitVersion(); 457 emitPrintf(Mod); 458 } 459 460 void MetadataStreamerV2::end() { 461 std::string HSAMetadataString; 462 if (toString(HSAMetadata, HSAMetadataString)) 463 return; 464 465 if (DumpHSAMetadata) 466 dump(HSAMetadataString); 467 if (VerifyHSAMetadata) 468 verify(HSAMetadataString); 469 } 470 471 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 472 const SIProgramInfo &ProgramInfo) { 473 auto &Func = MF.getFunction(); 474 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 475 return; 476 477 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 478 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 479 480 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 481 auto &Kernel = HSAMetadata.mKernels.back(); 482 483 Kernel.mName = std::string(Func.getName()); 484 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 485 emitKernelLanguage(Func); 486 emitKernelAttrs(Func); 487 emitKernelArgs(Func); 488 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 489 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 490 } 491 492 //===----------------------------------------------------------------------===// 493 // HSAMetadataStreamerV3 494 //===----------------------------------------------------------------------===// 495 496 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 497 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 498 } 499 500 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 501 errs() << "AMDGPU HSA Metadata Parser Test: "; 502 503 msgpack::Document FromHSAMetadataString; 504 505 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 506 errs() << "FAIL\n"; 507 return; 508 } 509 510 std::string ToHSAMetadataString; 511 raw_string_ostream StrOS(ToHSAMetadataString); 512 FromHSAMetadataString.toYAML(StrOS); 513 514 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 515 if (HSAMetadataString != ToHSAMetadataString) { 516 errs() << "Original input: " << HSAMetadataString << '\n' 517 << "Produced output: " << StrOS.str() << '\n'; 518 } 519 } 520 521 Optional<StringRef> 522 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 523 return StringSwitch<Optional<StringRef>>(AccQual) 524 .Case("read_only", StringRef("read_only")) 525 .Case("write_only", StringRef("write_only")) 526 .Case("read_write", StringRef("read_write")) 527 .Default(None); 528 } 529 530 Optional<StringRef> 531 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 532 switch (AddressSpace) { 533 case AMDGPUAS::PRIVATE_ADDRESS: 534 return StringRef("private"); 535 case AMDGPUAS::GLOBAL_ADDRESS: 536 return StringRef("global"); 537 case AMDGPUAS::CONSTANT_ADDRESS: 538 return StringRef("constant"); 539 case AMDGPUAS::LOCAL_ADDRESS: 540 return StringRef("local"); 541 case AMDGPUAS::FLAT_ADDRESS: 542 return StringRef("generic"); 543 case AMDGPUAS::REGION_ADDRESS: 544 return StringRef("region"); 545 default: 546 return None; 547 } 548 } 549 550 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 551 StringRef BaseTypeName) const { 552 if (TypeQual.find("pipe") != StringRef::npos) 553 return "pipe"; 554 555 return StringSwitch<StringRef>(BaseTypeName) 556 .Case("image1d_t", "image") 557 .Case("image1d_array_t", "image") 558 .Case("image1d_buffer_t", "image") 559 .Case("image2d_t", "image") 560 .Case("image2d_array_t", "image") 561 .Case("image2d_array_depth_t", "image") 562 .Case("image2d_array_msaa_t", "image") 563 .Case("image2d_array_msaa_depth_t", "image") 564 .Case("image2d_depth_t", "image") 565 .Case("image2d_msaa_t", "image") 566 .Case("image2d_msaa_depth_t", "image") 567 .Case("image3d_t", "image") 568 .Case("sampler_t", "sampler") 569 .Case("queue_t", "queue") 570 .Default(isa<PointerType>(Ty) 571 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 572 ? "dynamic_shared_pointer" 573 : "global_buffer") 574 : "by_value"); 575 } 576 577 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { 578 switch (Ty->getTypeID()) { 579 case Type::IntegerTyID: { 580 auto Signed = !TypeName.startswith("u"); 581 switch (Ty->getIntegerBitWidth()) { 582 case 8: 583 return Signed ? "i8" : "u8"; 584 case 16: 585 return Signed ? "i16" : "u16"; 586 case 32: 587 return Signed ? "i32" : "u32"; 588 case 64: 589 return Signed ? "i64" : "u64"; 590 default: 591 return "struct"; 592 } 593 } 594 case Type::HalfTyID: 595 return "f16"; 596 case Type::FloatTyID: 597 return "f32"; 598 case Type::DoubleTyID: 599 return "f64"; 600 case Type::PointerTyID: 601 return getValueType(Ty->getPointerElementType(), TypeName); 602 case Type::FixedVectorTyID: 603 return getValueType(cast<VectorType>(Ty)->getElementType(), TypeName); 604 default: 605 return "struct"; 606 } 607 } 608 609 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 610 switch (Ty->getTypeID()) { 611 case Type::IntegerTyID: { 612 if (!Signed) 613 return (Twine('u') + getTypeName(Ty, true)).str(); 614 615 auto BitWidth = Ty->getIntegerBitWidth(); 616 switch (BitWidth) { 617 case 8: 618 return "char"; 619 case 16: 620 return "short"; 621 case 32: 622 return "int"; 623 case 64: 624 return "long"; 625 default: 626 return (Twine('i') + Twine(BitWidth)).str(); 627 } 628 } 629 case Type::HalfTyID: 630 return "half"; 631 case Type::FloatTyID: 632 return "float"; 633 case Type::DoubleTyID: 634 return "double"; 635 case Type::FixedVectorTyID: { 636 auto VecTy = cast<FixedVectorType>(Ty); 637 auto ElTy = VecTy->getElementType(); 638 auto NumElements = VecTy->getNumElements(); 639 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 640 } 641 default: 642 return "unknown"; 643 } 644 } 645 646 msgpack::ArrayDocNode 647 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 648 auto Dims = HSAMetadataDoc->getArrayNode(); 649 if (Node->getNumOperands() != 3) 650 return Dims; 651 652 for (auto &Op : Node->operands()) 653 Dims.push_back(Dims.getDocument()->getNode( 654 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 655 return Dims; 656 } 657 658 void MetadataStreamerV3::emitVersion() { 659 auto Version = HSAMetadataDoc->getArrayNode(); 660 Version.push_back(Version.getDocument()->getNode(VersionMajor)); 661 Version.push_back(Version.getDocument()->getNode(VersionMinor)); 662 getRootMetadata("amdhsa.version") = Version; 663 } 664 665 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 666 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 667 if (!Node) 668 return; 669 670 auto Printf = HSAMetadataDoc->getArrayNode(); 671 for (auto Op : Node->operands()) 672 if (Op->getNumOperands()) 673 Printf.push_back(Printf.getDocument()->getNode( 674 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 675 getRootMetadata("amdhsa.printf") = Printf; 676 } 677 678 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 679 msgpack::MapDocNode Kern) { 680 // TODO: What about other languages? 681 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 682 if (!Node || !Node->getNumOperands()) 683 return; 684 auto Op0 = Node->getOperand(0); 685 if (Op0->getNumOperands() <= 1) 686 return; 687 688 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 689 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 690 LanguageVersion.push_back(Kern.getDocument()->getNode( 691 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 692 LanguageVersion.push_back(Kern.getDocument()->getNode( 693 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 694 Kern[".language_version"] = LanguageVersion; 695 } 696 697 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 698 msgpack::MapDocNode Kern) { 699 700 if (auto Node = Func.getMetadata("reqd_work_group_size")) 701 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 702 if (auto Node = Func.getMetadata("work_group_size_hint")) 703 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 704 if (auto Node = Func.getMetadata("vec_type_hint")) { 705 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 706 getTypeName( 707 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 708 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 709 /*Copy=*/true); 710 } 711 if (Func.hasFnAttribute("runtime-handle")) { 712 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 713 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 714 /*Copy=*/true); 715 } 716 } 717 718 void MetadataStreamerV3::emitKernelArgs(const Function &Func, 719 msgpack::MapDocNode Kern) { 720 unsigned Offset = 0; 721 auto Args = HSAMetadataDoc->getArrayNode(); 722 for (auto &Arg : Func.args()) 723 emitKernelArg(Arg, Offset, Args); 724 725 emitHiddenKernelArgs(Func, Offset, Args); 726 727 Kern[".args"] = Args; 728 } 729 730 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 731 msgpack::ArrayDocNode Args) { 732 auto Func = Arg.getParent(); 733 auto ArgNo = Arg.getArgNo(); 734 const MDNode *Node; 735 736 StringRef Name; 737 Node = Func->getMetadata("kernel_arg_name"); 738 if (Node && ArgNo < Node->getNumOperands()) 739 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 740 else if (Arg.hasName()) 741 Name = Arg.getName(); 742 743 StringRef TypeName; 744 Node = Func->getMetadata("kernel_arg_type"); 745 if (Node && ArgNo < Node->getNumOperands()) 746 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 747 748 StringRef BaseTypeName; 749 Node = Func->getMetadata("kernel_arg_base_type"); 750 if (Node && ArgNo < Node->getNumOperands()) 751 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 752 753 StringRef AccQual; 754 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 755 Arg.hasNoAliasAttr()) { 756 AccQual = "read_only"; 757 } else { 758 Node = Func->getMetadata("kernel_arg_access_qual"); 759 if (Node && ArgNo < Node->getNumOperands()) 760 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 761 } 762 763 StringRef TypeQual; 764 Node = Func->getMetadata("kernel_arg_type_qual"); 765 if (Node && ArgNo < Node->getNumOperands()) 766 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 767 768 Type *Ty = Arg.getType(); 769 const DataLayout &DL = Func->getParent()->getDataLayout(); 770 771 unsigned PointeeAlign = 0; 772 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 773 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 774 PointeeAlign = Arg.getParamAlignment(); 775 if (PointeeAlign == 0) 776 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 777 } 778 } 779 780 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), 781 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, 782 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, 783 TypeQual); 784 } 785 786 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, 787 StringRef ValueKind, unsigned &Offset, 788 msgpack::ArrayDocNode Args, 789 unsigned PointeeAlign, StringRef Name, 790 StringRef TypeName, 791 StringRef BaseTypeName, 792 StringRef AccQual, StringRef TypeQual) { 793 auto Arg = Args.getDocument()->getMapNode(); 794 795 if (!Name.empty()) 796 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 797 if (!TypeName.empty()) 798 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 799 auto Size = DL.getTypeAllocSize(Ty); 800 auto Align = DL.getABITypeAlignment(Ty); 801 Arg[".size"] = Arg.getDocument()->getNode(Size); 802 Offset = alignTo(Offset, Align); 803 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 804 Offset += Size; 805 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 806 Arg[".value_type"] = 807 Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true); 808 if (PointeeAlign) 809 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign); 810 811 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 812 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 813 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); 814 815 if (auto AQ = getAccessQualifier(AccQual)) 816 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 817 818 // TODO: Emit Arg[".actual_access"]. 819 820 SmallVector<StringRef, 1> SplitTypeQuals; 821 TypeQual.split(SplitTypeQuals, " ", -1, false); 822 for (StringRef Key : SplitTypeQuals) { 823 if (Key == "const") 824 Arg[".is_const"] = Arg.getDocument()->getNode(true); 825 else if (Key == "restrict") 826 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 827 else if (Key == "volatile") 828 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 829 else if (Key == "pipe") 830 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 831 } 832 833 Args.push_back(Arg); 834 } 835 836 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, 837 unsigned &Offset, 838 msgpack::ArrayDocNode Args) { 839 int HiddenArgNumBytes = 840 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 841 842 if (!HiddenArgNumBytes) 843 return; 844 845 auto &DL = Func.getParent()->getDataLayout(); 846 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 847 848 if (HiddenArgNumBytes >= 8) 849 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); 850 if (HiddenArgNumBytes >= 16) 851 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); 852 if (HiddenArgNumBytes >= 24) 853 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); 854 855 auto Int8PtrTy = 856 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 857 858 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 859 // "none" argument. 860 if (HiddenArgNumBytes >= 32) { 861 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 862 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); 863 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { 864 // The printf runtime binding pass should have ensured that hostcall and 865 // printf are not used in the same module. 866 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); 867 emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args); 868 } else 869 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 870 } 871 872 // Emit "default queue" and "completion action" arguments if enqueue kernel is 873 // used, otherwise emit dummy "none" arguments. 874 if (HiddenArgNumBytes >= 48) { 875 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 876 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); 877 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); 878 } else { 879 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 880 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 881 } 882 } 883 884 // Emit the pointer argument for multi-grid object. 885 if (HiddenArgNumBytes >= 56) 886 emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args); 887 } 888 889 msgpack::MapDocNode 890 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 891 const SIProgramInfo &ProgramInfo) const { 892 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 893 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 894 const Function &F = MF.getFunction(); 895 896 auto Kern = HSAMetadataDoc->getMapNode(); 897 898 Align MaxKernArgAlign; 899 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 900 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 901 Kern[".group_segment_fixed_size"] = 902 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 903 Kern[".private_segment_fixed_size"] = 904 Kern.getDocument()->getNode(ProgramInfo.ScratchSize); 905 Kern[".kernarg_segment_align"] = 906 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 907 Kern[".wavefront_size"] = 908 Kern.getDocument()->getNode(STM.getWavefrontSize()); 909 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); 910 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); 911 Kern[".max_flat_workgroup_size"] = 912 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 913 Kern[".sgpr_spill_count"] = 914 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 915 Kern[".vgpr_spill_count"] = 916 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 917 918 return Kern; 919 } 920 921 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 922 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 923 } 924 925 void MetadataStreamerV3::begin(const Module &Mod) { 926 emitVersion(); 927 emitPrintf(Mod); 928 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 929 } 930 931 void MetadataStreamerV3::end() { 932 std::string HSAMetadataString; 933 raw_string_ostream StrOS(HSAMetadataString); 934 HSAMetadataDoc->toYAML(StrOS); 935 936 if (DumpHSAMetadata) 937 dump(StrOS.str()); 938 if (VerifyHSAMetadata) 939 verify(StrOS.str()); 940 } 941 942 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 943 const SIProgramInfo &ProgramInfo) { 944 auto &Func = MF.getFunction(); 945 auto Kern = getHSAKernelProps(MF, ProgramInfo); 946 947 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 948 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 949 950 auto Kernels = 951 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 952 953 { 954 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 955 Kern[".symbol"] = Kern.getDocument()->getNode( 956 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 957 emitKernelLanguage(Func, Kern); 958 emitKernelAttrs(Func, Kern); 959 emitKernelArgs(Func, Kern); 960 } 961 962 Kernels.push_back(Kern); 963 } 964 965 } // end namespace HSAMD 966 } // end namespace AMDGPU 967 } // end namespace llvm 968