1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// AMDGPU HSA Metadata Streamer. 12 /// 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUHSAMetadataStreamer.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUSubtarget.h" 19 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "SIProgramInfo.h" 22 #include "Utils/AMDGPUBaseInfo.h" 23 #include "llvm/ADT/StringSwitch.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 namespace llvm { 29 30 static cl::opt<bool> DumpHSAMetadata( 31 "amdgpu-dump-hsa-metadata", 32 cl::desc("Dump AMDGPU HSA Metadata")); 33 static cl::opt<bool> VerifyHSAMetadata( 34 "amdgpu-verify-hsa-metadata", 35 cl::desc("Verify AMDGPU HSA Metadata")); 36 37 namespace AMDGPU { 38 namespace HSAMD { 39 40 //===----------------------------------------------------------------------===// 41 // HSAMetadataStreamerV2 42 //===----------------------------------------------------------------------===// 43 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { 44 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 45 } 46 47 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { 48 errs() << "AMDGPU HSA Metadata Parser Test: "; 49 50 HSAMD::Metadata FromHSAMetadataString; 51 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 52 errs() << "FAIL\n"; 53 return; 54 } 55 56 std::string ToHSAMetadataString; 57 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 58 errs() << "FAIL\n"; 59 return; 60 } 61 62 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 63 << '\n'; 64 if (HSAMetadataString != ToHSAMetadataString) { 65 errs() << "Original input: " << HSAMetadataString << '\n' 66 << "Produced output: " << ToHSAMetadataString << '\n'; 67 } 68 } 69 70 AccessQualifier 71 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { 72 if (AccQual.empty()) 73 return AccessQualifier::Unknown; 74 75 return StringSwitch<AccessQualifier>(AccQual) 76 .Case("read_only", AccessQualifier::ReadOnly) 77 .Case("write_only", AccessQualifier::WriteOnly) 78 .Case("read_write", AccessQualifier::ReadWrite) 79 .Default(AccessQualifier::Default); 80 } 81 82 AddressSpaceQualifier 83 MetadataStreamerV2::getAddressSpaceQualifier( 84 unsigned AddressSpace) const { 85 switch (AddressSpace) { 86 case AMDGPUAS::PRIVATE_ADDRESS: 87 return AddressSpaceQualifier::Private; 88 case AMDGPUAS::GLOBAL_ADDRESS: 89 return AddressSpaceQualifier::Global; 90 case AMDGPUAS::CONSTANT_ADDRESS: 91 return AddressSpaceQualifier::Constant; 92 case AMDGPUAS::LOCAL_ADDRESS: 93 return AddressSpaceQualifier::Local; 94 case AMDGPUAS::FLAT_ADDRESS: 95 return AddressSpaceQualifier::Generic; 96 case AMDGPUAS::REGION_ADDRESS: 97 return AddressSpaceQualifier::Region; 98 default: 99 return AddressSpaceQualifier::Unknown; 100 } 101 } 102 103 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, 104 StringRef BaseTypeName) const { 105 if (TypeQual.find("pipe") != StringRef::npos) 106 return ValueKind::Pipe; 107 108 return StringSwitch<ValueKind>(BaseTypeName) 109 .Case("image1d_t", ValueKind::Image) 110 .Case("image1d_array_t", ValueKind::Image) 111 .Case("image1d_buffer_t", ValueKind::Image) 112 .Case("image2d_t", ValueKind::Image) 113 .Case("image2d_array_t", ValueKind::Image) 114 .Case("image2d_array_depth_t", ValueKind::Image) 115 .Case("image2d_array_msaa_t", ValueKind::Image) 116 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 117 .Case("image2d_depth_t", ValueKind::Image) 118 .Case("image2d_msaa_t", ValueKind::Image) 119 .Case("image2d_msaa_depth_t", ValueKind::Image) 120 .Case("image3d_t", ValueKind::Image) 121 .Case("sampler_t", ValueKind::Sampler) 122 .Case("queue_t", ValueKind::Queue) 123 .Default(isa<PointerType>(Ty) ? 124 (Ty->getPointerAddressSpace() == 125 AMDGPUAS::LOCAL_ADDRESS ? 126 ValueKind::DynamicSharedPointer : 127 ValueKind::GlobalBuffer) : 128 ValueKind::ByValue); 129 } 130 131 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { 132 switch (Ty->getTypeID()) { 133 case Type::IntegerTyID: { 134 auto Signed = !TypeName.startswith("u"); 135 switch (Ty->getIntegerBitWidth()) { 136 case 8: 137 return Signed ? ValueType::I8 : ValueType::U8; 138 case 16: 139 return Signed ? ValueType::I16 : ValueType::U16; 140 case 32: 141 return Signed ? ValueType::I32 : ValueType::U32; 142 case 64: 143 return Signed ? ValueType::I64 : ValueType::U64; 144 default: 145 return ValueType::Struct; 146 } 147 } 148 case Type::HalfTyID: 149 return ValueType::F16; 150 case Type::FloatTyID: 151 return ValueType::F32; 152 case Type::DoubleTyID: 153 return ValueType::F64; 154 case Type::PointerTyID: 155 return getValueType(Ty->getPointerElementType(), TypeName); 156 case Type::VectorTyID: 157 return getValueType(Ty->getVectorElementType(), TypeName); 158 default: 159 return ValueType::Struct; 160 } 161 } 162 163 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { 164 switch (Ty->getTypeID()) { 165 case Type::IntegerTyID: { 166 if (!Signed) 167 return (Twine('u') + getTypeName(Ty, true)).str(); 168 169 auto BitWidth = Ty->getIntegerBitWidth(); 170 switch (BitWidth) { 171 case 8: 172 return "char"; 173 case 16: 174 return "short"; 175 case 32: 176 return "int"; 177 case 64: 178 return "long"; 179 default: 180 return (Twine('i') + Twine(BitWidth)).str(); 181 } 182 } 183 case Type::HalfTyID: 184 return "half"; 185 case Type::FloatTyID: 186 return "float"; 187 case Type::DoubleTyID: 188 return "double"; 189 case Type::VectorTyID: { 190 auto VecTy = cast<VectorType>(Ty); 191 auto ElTy = VecTy->getElementType(); 192 auto NumElements = VecTy->getVectorNumElements(); 193 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 194 } 195 default: 196 return "unknown"; 197 } 198 } 199 200 std::vector<uint32_t> 201 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { 202 std::vector<uint32_t> Dims; 203 if (Node->getNumOperands() != 3) 204 return Dims; 205 206 for (auto &Op : Node->operands()) 207 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 208 return Dims; 209 } 210 211 Kernel::CodeProps::Metadata 212 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, 213 const SIProgramInfo &ProgramInfo) const { 214 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 215 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 216 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 217 const Function &F = MF.getFunction(); 218 219 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 220 F.getCallingConv() == CallingConv::SPIR_KERNEL); 221 222 unsigned MaxKernArgAlign; 223 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 224 MaxKernArgAlign); 225 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 226 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 227 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); 228 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 229 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 230 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 231 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 232 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 233 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 234 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 235 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 236 237 return HSACodeProps; 238 } 239 240 Kernel::DebugProps::Metadata 241 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, 242 const SIProgramInfo &ProgramInfo) const { 243 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 244 HSAMD::Kernel::DebugProps::Metadata HSADebugProps; 245 246 if (!STM.debuggerSupported()) 247 return HSADebugProps; 248 249 HSADebugProps.mDebuggerABIVersion.push_back(1); 250 HSADebugProps.mDebuggerABIVersion.push_back(0); 251 252 if (STM.debuggerEmitPrologue()) { 253 HSADebugProps.mPrivateSegmentBufferSGPR = 254 ProgramInfo.DebuggerPrivateSegmentBufferSGPR; 255 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = 256 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; 257 } 258 259 return HSADebugProps; 260 } 261 262 void MetadataStreamerV2::emitVersion() { 263 auto &Version = HSAMetadata.mVersion; 264 265 Version.push_back(VersionMajor); 266 Version.push_back(VersionMinor); 267 } 268 269 void MetadataStreamerV2::emitPrintf(const Module &Mod) { 270 auto &Printf = HSAMetadata.mPrintf; 271 272 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 273 if (!Node) 274 return; 275 276 for (auto Op : Node->operands()) 277 if (Op->getNumOperands()) 278 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); 279 } 280 281 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { 282 auto &Kernel = HSAMetadata.mKernels.back(); 283 284 // TODO: What about other languages? 285 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 286 if (!Node || !Node->getNumOperands()) 287 return; 288 auto Op0 = Node->getOperand(0); 289 if (Op0->getNumOperands() <= 1) 290 return; 291 292 Kernel.mLanguage = "OpenCL C"; 293 Kernel.mLanguageVersion.push_back( 294 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 295 Kernel.mLanguageVersion.push_back( 296 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 297 } 298 299 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { 300 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 301 302 if (auto Node = Func.getMetadata("reqd_work_group_size")) 303 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 304 if (auto Node = Func.getMetadata("work_group_size_hint")) 305 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 306 if (auto Node = Func.getMetadata("vec_type_hint")) { 307 Attrs.mVecTypeHint = getTypeName( 308 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 309 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 310 } 311 if (Func.hasFnAttribute("runtime-handle")) { 312 Attrs.mRuntimeHandle = 313 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 314 } 315 } 316 317 void MetadataStreamerV2::emitKernelArgs(const Function &Func) { 318 for (auto &Arg : Func.args()) 319 emitKernelArg(Arg); 320 321 emitHiddenKernelArgs(Func); 322 } 323 324 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { 325 auto Func = Arg.getParent(); 326 auto ArgNo = Arg.getArgNo(); 327 const MDNode *Node; 328 329 StringRef Name; 330 Node = Func->getMetadata("kernel_arg_name"); 331 if (Node && ArgNo < Node->getNumOperands()) 332 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 333 else if (Arg.hasName()) 334 Name = Arg.getName(); 335 336 StringRef TypeName; 337 Node = Func->getMetadata("kernel_arg_type"); 338 if (Node && ArgNo < Node->getNumOperands()) 339 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 340 341 StringRef BaseTypeName; 342 Node = Func->getMetadata("kernel_arg_base_type"); 343 if (Node && ArgNo < Node->getNumOperands()) 344 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 345 346 StringRef AccQual; 347 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 348 Arg.hasNoAliasAttr()) { 349 AccQual = "read_only"; 350 } else { 351 Node = Func->getMetadata("kernel_arg_access_qual"); 352 if (Node && ArgNo < Node->getNumOperands()) 353 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 354 } 355 356 StringRef TypeQual; 357 Node = Func->getMetadata("kernel_arg_type_qual"); 358 if (Node && ArgNo < Node->getNumOperands()) 359 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 360 361 Type *Ty = Arg.getType(); 362 const DataLayout &DL = Func->getParent()->getDataLayout(); 363 364 unsigned PointeeAlign = 0; 365 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 366 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 367 PointeeAlign = Arg.getParamAlignment(); 368 if (PointeeAlign == 0) 369 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 370 } 371 } 372 373 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), 374 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 375 } 376 377 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, 378 ValueKind ValueKind, 379 unsigned PointeeAlign, StringRef Name, 380 StringRef TypeName, 381 StringRef BaseTypeName, 382 StringRef AccQual, StringRef TypeQual) { 383 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 384 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 385 386 Arg.mName = Name; 387 Arg.mTypeName = TypeName; 388 Arg.mSize = DL.getTypeAllocSize(Ty); 389 Arg.mAlign = DL.getABITypeAlignment(Ty); 390 Arg.mValueKind = ValueKind; 391 Arg.mValueType = getValueType(Ty, BaseTypeName); 392 Arg.mPointeeAlign = PointeeAlign; 393 394 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 395 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); 396 397 Arg.mAccQual = getAccessQualifier(AccQual); 398 399 // TODO: Emit Arg.mActualAccQual. 400 401 SmallVector<StringRef, 1> SplitTypeQuals; 402 TypeQual.split(SplitTypeQuals, " ", -1, false); 403 for (StringRef Key : SplitTypeQuals) { 404 auto P = StringSwitch<bool*>(Key) 405 .Case("const", &Arg.mIsConst) 406 .Case("restrict", &Arg.mIsRestrict) 407 .Case("volatile", &Arg.mIsVolatile) 408 .Case("pipe", &Arg.mIsPipe) 409 .Default(nullptr); 410 if (P) 411 *P = true; 412 } 413 } 414 415 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { 416 int HiddenArgNumBytes = 417 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 418 419 if (!HiddenArgNumBytes) 420 return; 421 422 auto &DL = Func.getParent()->getDataLayout(); 423 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 424 425 if (HiddenArgNumBytes >= 8) 426 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); 427 if (HiddenArgNumBytes >= 16) 428 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); 429 if (HiddenArgNumBytes >= 24) 430 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); 431 432 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 433 AMDGPUAS::GLOBAL_ADDRESS); 434 435 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 436 // "none" argument. 437 if (HiddenArgNumBytes >= 32) { 438 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 439 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); 440 else 441 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 442 } 443 444 // Emit "default queue" and "completion action" arguments if enqueue kernel is 445 // used, otherwise emit dummy "none" arguments. 446 if (HiddenArgNumBytes >= 48) { 447 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 448 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); 449 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); 450 } else { 451 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 452 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 453 } 454 } 455 } 456 457 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 458 return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); 459 } 460 461 void MetadataStreamerV2::begin(const Module &Mod) { 462 emitVersion(); 463 emitPrintf(Mod); 464 } 465 466 void MetadataStreamerV2::end() { 467 std::string HSAMetadataString; 468 if (toString(HSAMetadata, HSAMetadataString)) 469 return; 470 471 if (DumpHSAMetadata) 472 dump(HSAMetadataString); 473 if (VerifyHSAMetadata) 474 verify(HSAMetadataString); 475 } 476 477 void MetadataStreamerV2::emitKernel(const MachineFunction &MF, 478 const SIProgramInfo &ProgramInfo) { 479 auto &Func = MF.getFunction(); 480 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 481 return; 482 483 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 484 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 485 486 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 487 auto &Kernel = HSAMetadata.mKernels.back(); 488 489 Kernel.mName = Func.getName(); 490 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 491 emitKernelLanguage(Func); 492 emitKernelAttrs(Func); 493 emitKernelArgs(Func); 494 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 495 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 496 } 497 498 //===----------------------------------------------------------------------===// 499 // HSAMetadataStreamerV3 500 //===----------------------------------------------------------------------===// 501 502 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { 503 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 504 } 505 506 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { 507 errs() << "AMDGPU HSA Metadata Parser Test: "; 508 509 std::shared_ptr<msgpack::Node> FromHSAMetadataString = 510 std::make_shared<msgpack::MapNode>(); 511 512 yaml::Input YIn(HSAMetadataString); 513 YIn >> FromHSAMetadataString; 514 if (YIn.error()) { 515 errs() << "FAIL\n"; 516 return; 517 } 518 519 std::string ToHSAMetadataString; 520 raw_string_ostream StrOS(ToHSAMetadataString); 521 yaml::Output YOut(StrOS); 522 YOut << FromHSAMetadataString; 523 524 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 525 if (HSAMetadataString != ToHSAMetadataString) { 526 errs() << "Original input: " << HSAMetadataString << '\n' 527 << "Produced output: " << StrOS.str() << '\n'; 528 } 529 } 530 531 Optional<StringRef> 532 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { 533 return StringSwitch<Optional<StringRef>>(AccQual) 534 .Case("read_only", StringRef("read_only")) 535 .Case("write_only", StringRef("write_only")) 536 .Case("read_write", StringRef("read_write")) 537 .Default(None); 538 } 539 540 Optional<StringRef> 541 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { 542 switch (AddressSpace) { 543 case AMDGPUAS::PRIVATE_ADDRESS: 544 return StringRef("private"); 545 case AMDGPUAS::GLOBAL_ADDRESS: 546 return StringRef("global"); 547 case AMDGPUAS::CONSTANT_ADDRESS: 548 return StringRef("constant"); 549 case AMDGPUAS::LOCAL_ADDRESS: 550 return StringRef("local"); 551 case AMDGPUAS::FLAT_ADDRESS: 552 return StringRef("generic"); 553 case AMDGPUAS::REGION_ADDRESS: 554 return StringRef("region"); 555 default: 556 return None; 557 } 558 } 559 560 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, 561 StringRef BaseTypeName) const { 562 if (TypeQual.find("pipe") != StringRef::npos) 563 return "pipe"; 564 565 return StringSwitch<StringRef>(BaseTypeName) 566 .Case("image1d_t", "image") 567 .Case("image1d_array_t", "image") 568 .Case("image1d_buffer_t", "image") 569 .Case("image2d_t", "image") 570 .Case("image2d_array_t", "image") 571 .Case("image2d_array_depth_t", "image") 572 .Case("image2d_array_msaa_t", "image") 573 .Case("image2d_array_msaa_depth_t", "image") 574 .Case("image2d_depth_t", "image") 575 .Case("image2d_msaa_t", "image") 576 .Case("image2d_msaa_depth_t", "image") 577 .Case("image3d_t", "image") 578 .Case("sampler_t", "sampler") 579 .Case("queue_t", "queue") 580 .Default(isa<PointerType>(Ty) 581 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 582 ? "dynamic_shared_pointer" 583 : "global_buffer") 584 : "by_value"); 585 } 586 587 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { 588 switch (Ty->getTypeID()) { 589 case Type::IntegerTyID: { 590 auto Signed = !TypeName.startswith("u"); 591 switch (Ty->getIntegerBitWidth()) { 592 case 8: 593 return Signed ? "i8" : "u8"; 594 case 16: 595 return Signed ? "i16" : "u16"; 596 case 32: 597 return Signed ? "i32" : "u32"; 598 case 64: 599 return Signed ? "i64" : "u64"; 600 default: 601 return "struct"; 602 } 603 } 604 case Type::HalfTyID: 605 return "f16"; 606 case Type::FloatTyID: 607 return "f32"; 608 case Type::DoubleTyID: 609 return "f64"; 610 case Type::PointerTyID: 611 return getValueType(Ty->getPointerElementType(), TypeName); 612 case Type::VectorTyID: 613 return getValueType(Ty->getVectorElementType(), TypeName); 614 default: 615 return "struct"; 616 } 617 } 618 619 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { 620 switch (Ty->getTypeID()) { 621 case Type::IntegerTyID: { 622 if (!Signed) 623 return (Twine('u') + getTypeName(Ty, true)).str(); 624 625 auto BitWidth = Ty->getIntegerBitWidth(); 626 switch (BitWidth) { 627 case 8: 628 return "char"; 629 case 16: 630 return "short"; 631 case 32: 632 return "int"; 633 case 64: 634 return "long"; 635 default: 636 return (Twine('i') + Twine(BitWidth)).str(); 637 } 638 } 639 case Type::HalfTyID: 640 return "half"; 641 case Type::FloatTyID: 642 return "float"; 643 case Type::DoubleTyID: 644 return "double"; 645 case Type::VectorTyID: { 646 auto VecTy = cast<VectorType>(Ty); 647 auto ElTy = VecTy->getElementType(); 648 auto NumElements = VecTy->getVectorNumElements(); 649 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 650 } 651 default: 652 return "unknown"; 653 } 654 } 655 656 std::shared_ptr<msgpack::ArrayNode> 657 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { 658 auto Dims = std::make_shared<msgpack::ArrayNode>(); 659 if (Node->getNumOperands() != 3) 660 return Dims; 661 662 for (auto &Op : Node->operands()) 663 Dims->push_back(std::make_shared<msgpack::ScalarNode>( 664 mdconst::extract<ConstantInt>(Op)->getZExtValue())); 665 return Dims; 666 } 667 668 void MetadataStreamerV3::emitVersion() { 669 auto Version = std::make_shared<msgpack::ArrayNode>(); 670 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor)); 671 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor)); 672 getRootMetadata("amdhsa.version") = std::move(Version); 673 } 674 675 void MetadataStreamerV3::emitPrintf(const Module &Mod) { 676 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 677 if (!Node) 678 return; 679 680 auto Printf = std::make_shared<msgpack::ArrayNode>(); 681 for (auto Op : Node->operands()) 682 if (Op->getNumOperands()) 683 Printf->push_back(std::make_shared<msgpack::ScalarNode>( 684 cast<MDString>(Op->getOperand(0))->getString())); 685 getRootMetadata("amdhsa.printf") = std::move(Printf); 686 } 687 688 void MetadataStreamerV3::emitKernelLanguage(const Function &Func, 689 msgpack::MapNode &Kern) { 690 // TODO: What about other languages? 691 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 692 if (!Node || !Node->getNumOperands()) 693 return; 694 auto Op0 = Node->getOperand(0); 695 if (Op0->getNumOperands() <= 1) 696 return; 697 698 Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C"); 699 auto LanguageVersion = std::make_shared<msgpack::ArrayNode>(); 700 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( 701 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 702 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( 703 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 704 Kern[".language_version"] = std::move(LanguageVersion); 705 } 706 707 void MetadataStreamerV3::emitKernelAttrs(const Function &Func, 708 msgpack::MapNode &Kern) { 709 710 if (auto Node = Func.getMetadata("reqd_work_group_size")) 711 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 712 if (auto Node = Func.getMetadata("work_group_size_hint")) 713 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 714 if (auto Node = Func.getMetadata("vec_type_hint")) { 715 Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName( 716 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 717 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue())); 718 } 719 if (Func.hasFnAttribute("runtime-handle")) { 720 Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>( 721 Func.getFnAttribute("runtime-handle").getValueAsString().str()); 722 } 723 } 724 725 void MetadataStreamerV3::emitKernelArgs(const Function &Func, 726 msgpack::MapNode &Kern) { 727 unsigned Offset = 0; 728 auto Args = std::make_shared<msgpack::ArrayNode>(); 729 for (auto &Arg : Func.args()) 730 emitKernelArg(Arg, Offset, *Args); 731 732 emitHiddenKernelArgs(Func, Offset, *Args); 733 734 // TODO: What about other languages? 735 if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { 736 auto &DL = Func.getParent()->getDataLayout(); 737 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 738 739 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args); 740 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args); 741 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args); 742 743 auto Int8PtrTy = 744 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 745 746 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 747 // "none" argument. 748 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 749 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args); 750 else 751 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 752 753 // Emit "default queue" and "completion action" arguments if enqueue kernel 754 // is used, otherwise emit dummy "none" arguments. 755 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 756 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args); 757 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args); 758 } else { 759 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 760 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); 761 } 762 } 763 764 Kern[".args"] = std::move(Args); 765 } 766 767 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, 768 msgpack::ArrayNode &Args) { 769 auto Func = Arg.getParent(); 770 auto ArgNo = Arg.getArgNo(); 771 const MDNode *Node; 772 773 StringRef Name; 774 Node = Func->getMetadata("kernel_arg_name"); 775 if (Node && ArgNo < Node->getNumOperands()) 776 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 777 else if (Arg.hasName()) 778 Name = Arg.getName(); 779 780 StringRef TypeName; 781 Node = Func->getMetadata("kernel_arg_type"); 782 if (Node && ArgNo < Node->getNumOperands()) 783 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 784 785 StringRef BaseTypeName; 786 Node = Func->getMetadata("kernel_arg_base_type"); 787 if (Node && ArgNo < Node->getNumOperands()) 788 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 789 790 StringRef AccQual; 791 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 792 Arg.hasNoAliasAttr()) { 793 AccQual = "read_only"; 794 } else { 795 Node = Func->getMetadata("kernel_arg_access_qual"); 796 if (Node && ArgNo < Node->getNumOperands()) 797 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 798 } 799 800 StringRef TypeQual; 801 Node = Func->getMetadata("kernel_arg_type_qual"); 802 if (Node && ArgNo < Node->getNumOperands()) 803 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 804 805 Type *Ty = Arg.getType(); 806 const DataLayout &DL = Func->getParent()->getDataLayout(); 807 808 unsigned PointeeAlign = 0; 809 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 810 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 811 PointeeAlign = Arg.getParamAlignment(); 812 if (PointeeAlign == 0) 813 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 814 } 815 } 816 817 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), 818 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, 819 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, 820 TypeQual); 821 } 822 823 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, 824 StringRef ValueKind, unsigned &Offset, 825 msgpack::ArrayNode &Args, 826 unsigned PointeeAlign, StringRef Name, 827 StringRef TypeName, 828 StringRef BaseTypeName, 829 StringRef AccQual, StringRef TypeQual) { 830 auto ArgPtr = std::make_shared<msgpack::MapNode>(); 831 auto &Arg = *ArgPtr; 832 833 if (!Name.empty()) 834 Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name); 835 if (!TypeName.empty()) 836 Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName); 837 auto Size = DL.getTypeAllocSize(Ty); 838 auto Align = DL.getABITypeAlignment(Ty); 839 Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size); 840 Offset = alignTo(Offset, Align); 841 Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset); 842 Offset += Size; 843 Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind); 844 Arg[".value_type"] = 845 std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName)); 846 if (PointeeAlign) 847 Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign); 848 849 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 850 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 851 Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier); 852 853 if (auto AQ = getAccessQualifier(AccQual)) 854 Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ); 855 856 // TODO: Emit Arg[".actual_access"]. 857 858 SmallVector<StringRef, 1> SplitTypeQuals; 859 TypeQual.split(SplitTypeQuals, " ", -1, false); 860 for (StringRef Key : SplitTypeQuals) { 861 if (Key == "const") 862 Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true); 863 else if (Key == "restrict") 864 Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true); 865 else if (Key == "volatile") 866 Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true); 867 else if (Key == "pipe") 868 Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true); 869 } 870 871 Args.push_back(std::move(ArgPtr)); 872 } 873 874 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, 875 unsigned &Offset, 876 msgpack::ArrayNode &Args) { 877 int HiddenArgNumBytes = 878 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 879 880 if (!HiddenArgNumBytes) 881 return; 882 883 auto &DL = Func.getParent()->getDataLayout(); 884 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 885 886 if (HiddenArgNumBytes >= 8) 887 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); 888 if (HiddenArgNumBytes >= 16) 889 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); 890 if (HiddenArgNumBytes >= 24) 891 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); 892 893 auto Int8PtrTy = 894 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 895 896 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 897 // "none" argument. 898 if (HiddenArgNumBytes >= 32) { 899 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 900 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); 901 else 902 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 903 } 904 905 // Emit "default queue" and "completion action" arguments if enqueue kernel is 906 // used, otherwise emit dummy "none" arguments. 907 if (HiddenArgNumBytes >= 48) { 908 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 909 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); 910 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); 911 } else { 912 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 913 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); 914 } 915 } 916 } 917 918 std::shared_ptr<msgpack::MapNode> 919 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, 920 const SIProgramInfo &ProgramInfo) const { 921 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 922 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 923 const Function &F = MF.getFunction(); 924 925 auto HSAKernelProps = std::make_shared<msgpack::MapNode>(); 926 auto &Kern = *HSAKernelProps; 927 928 unsigned MaxKernArgAlign; 929 Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>( 930 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 931 Kern[".group_segment_fixed_size"] = 932 std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize); 933 Kern[".private_segment_fixed_size"] = 934 std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize); 935 Kern[".kernarg_segment_align"] = 936 std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign)); 937 Kern[".wavefront_size"] = 938 std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize()); 939 Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR); 940 Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR); 941 Kern[".max_flat_workgroup_size"] = 942 std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize()); 943 Kern[".sgpr_spill_count"] = 944 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs()); 945 Kern[".vgpr_spill_count"] = 946 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs()); 947 948 return HSAKernelProps; 949 } 950 951 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 952 return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true); 953 } 954 955 void MetadataStreamerV3::begin(const Module &Mod) { 956 emitVersion(); 957 emitPrintf(Mod); 958 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode()); 959 } 960 961 void MetadataStreamerV3::end() { 962 std::string HSAMetadataString; 963 raw_string_ostream StrOS(HSAMetadataString); 964 yaml::Output YOut(StrOS); 965 YOut << HSAMetadataRoot; 966 967 if (DumpHSAMetadata) 968 dump(StrOS.str()); 969 if (VerifyHSAMetadata) 970 verify(StrOS.str()); 971 } 972 973 void MetadataStreamerV3::emitKernel(const MachineFunction &MF, 974 const SIProgramInfo &ProgramInfo) { 975 auto &Func = MF.getFunction(); 976 auto KernelProps = getHSAKernelProps(MF, ProgramInfo); 977 978 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || 979 Func.getCallingConv() == CallingConv::SPIR_KERNEL); 980 981 auto &KernelsNode = getRootMetadata("amdhsa.kernels"); 982 auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get()); 983 984 { 985 auto &Kern = *KernelProps; 986 Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName()); 987 Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>( 988 (Twine(Func.getName()) + Twine(".kd")).str()); 989 emitKernelLanguage(Func, Kern); 990 emitKernelAttrs(Func, Kern); 991 emitKernelArgs(Func, Kern); 992 } 993 994 Kernels->push_back(std::move(KernelProps)); 995 } 996 997 } // end namespace HSAMD 998 } // end namespace AMDGPU 999 } // end namespace llvm 1000