101ce144dSScott Linder //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
201ce144dSScott Linder //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
601ce144dSScott Linder //
701ce144dSScott Linder //===----------------------------------------------------------------------===//
801ce144dSScott Linder //
901ce144dSScott Linder /// \file
1001ce144dSScott Linder /// AMDGPU HSA Metadata Streamer.
1101ce144dSScott Linder ///
1201ce144dSScott Linder //
1301ce144dSScott Linder //===----------------------------------------------------------------------===//
1401ce144dSScott Linder 
1501ce144dSScott Linder #include "AMDGPUHSAMetadataStreamer.h"
1601ce144dSScott Linder #include "AMDGPU.h"
17560d7e04Sdfukalov #include "GCNSubtarget.h"
18f5b36e56SScott Linder #include "MCTargetDesc/AMDGPUTargetStreamer.h"
1901ce144dSScott Linder #include "SIMachineFunctionInfo.h"
2001ce144dSScott Linder #include "SIProgramInfo.h"
2101ce144dSScott Linder #include "llvm/IR/Module.h"
221168119cSMatt Arsenault using namespace llvm;
231168119cSMatt Arsenault 
getArgumentTypeAlign(const Argument & Arg,const DataLayout & DL)241168119cSMatt Arsenault static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
251168119cSMatt Arsenault                                                      const DataLayout &DL) {
261168119cSMatt Arsenault   Type *Ty = Arg.getType();
271168119cSMatt Arsenault   MaybeAlign ArgAlign;
281168119cSMatt Arsenault   if (Arg.hasByRefAttr()) {
291168119cSMatt Arsenault     Ty = Arg.getParamByRefType();
301168119cSMatt Arsenault     ArgAlign = Arg.getParamAlign();
311168119cSMatt Arsenault   }
321168119cSMatt Arsenault 
331168119cSMatt Arsenault   if (!ArgAlign)
341168119cSMatt Arsenault     ArgAlign = DL.getABITypeAlign(Ty);
351168119cSMatt Arsenault 
361168119cSMatt Arsenault   return std::make_pair(Ty, *ArgAlign);
371168119cSMatt Arsenault }
381168119cSMatt Arsenault 
3901ce144dSScott Linder namespace llvm {
4001ce144dSScott Linder 
4101ce144dSScott Linder static cl::opt<bool> DumpHSAMetadata(
4201ce144dSScott Linder     "amdgpu-dump-hsa-metadata",
4301ce144dSScott Linder     cl::desc("Dump AMDGPU HSA Metadata"));
4401ce144dSScott Linder static cl::opt<bool> VerifyHSAMetadata(
4501ce144dSScott Linder     "amdgpu-verify-hsa-metadata",
4601ce144dSScott Linder     cl::desc("Verify AMDGPU HSA Metadata"));
4701ce144dSScott Linder 
4801ce144dSScott Linder namespace AMDGPU {
4901ce144dSScott Linder namespace HSAMD {
5001ce144dSScott Linder 
51f5b36e56SScott Linder //===----------------------------------------------------------------------===//
52f5b36e56SScott Linder // HSAMetadataStreamerV2
53f5b36e56SScott Linder //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const54f5b36e56SScott Linder void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
5501ce144dSScott Linder   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
5601ce144dSScott Linder }
5701ce144dSScott Linder 
verify(StringRef HSAMetadataString) const58f5b36e56SScott Linder void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
5901ce144dSScott Linder   errs() << "AMDGPU HSA Metadata Parser Test: ";
6001ce144dSScott Linder 
6101ce144dSScott Linder   HSAMD::Metadata FromHSAMetadataString;
62f82cff31SSimon Pilgrim   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
6301ce144dSScott Linder     errs() << "FAIL\n";
6401ce144dSScott Linder     return;
6501ce144dSScott Linder   }
6601ce144dSScott Linder 
6701ce144dSScott Linder   std::string ToHSAMetadataString;
6801ce144dSScott Linder   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
6901ce144dSScott Linder     errs() << "FAIL\n";
7001ce144dSScott Linder     return;
7101ce144dSScott Linder   }
7201ce144dSScott Linder 
7301ce144dSScott Linder   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
7401ce144dSScott Linder          << '\n';
7501ce144dSScott Linder   if (HSAMetadataString != ToHSAMetadataString) {
7601ce144dSScott Linder     errs() << "Original input: " << HSAMetadataString << '\n'
7701ce144dSScott Linder            << "Produced output: " << ToHSAMetadataString << '\n';
7801ce144dSScott Linder   }
7901ce144dSScott Linder }
8001ce144dSScott Linder 
81f5b36e56SScott Linder AccessQualifier
getAccessQualifier(StringRef AccQual) const82f5b36e56SScott Linder MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
8301ce144dSScott Linder   if (AccQual.empty())
8401ce144dSScott Linder     return AccessQualifier::Unknown;
8501ce144dSScott Linder 
8601ce144dSScott Linder   return StringSwitch<AccessQualifier>(AccQual)
8701ce144dSScott Linder              .Case("read_only",  AccessQualifier::ReadOnly)
8801ce144dSScott Linder              .Case("write_only", AccessQualifier::WriteOnly)
8901ce144dSScott Linder              .Case("read_write", AccessQualifier::ReadWrite)
9001ce144dSScott Linder              .Default(AccessQualifier::Default);
9101ce144dSScott Linder }
9201ce144dSScott Linder 
93f5b36e56SScott Linder AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const94f5b36e56SScott Linder MetadataStreamerV2::getAddressSpaceQualifier(
9501ce144dSScott Linder     unsigned AddressSpace) const {
96b9986746SMatt Arsenault   switch (AddressSpace) {
97b9986746SMatt Arsenault   case AMDGPUAS::PRIVATE_ADDRESS:
9801ce144dSScott Linder     return AddressSpaceQualifier::Private;
99b9986746SMatt Arsenault   case AMDGPUAS::GLOBAL_ADDRESS:
10001ce144dSScott Linder     return AddressSpaceQualifier::Global;
101b9986746SMatt Arsenault   case AMDGPUAS::CONSTANT_ADDRESS:
10201ce144dSScott Linder     return AddressSpaceQualifier::Constant;
103b9986746SMatt Arsenault   case AMDGPUAS::LOCAL_ADDRESS:
10401ce144dSScott Linder     return AddressSpaceQualifier::Local;
105b9986746SMatt Arsenault   case AMDGPUAS::FLAT_ADDRESS:
10601ce144dSScott Linder     return AddressSpaceQualifier::Generic;
107b9986746SMatt Arsenault   case AMDGPUAS::REGION_ADDRESS:
10801ce144dSScott Linder     return AddressSpaceQualifier::Region;
109b9986746SMatt Arsenault   default:
110b9986746SMatt Arsenault     return AddressSpaceQualifier::Unknown;
111b9986746SMatt Arsenault   }
11201ce144dSScott Linder }
11301ce144dSScott Linder 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const114f5b36e56SScott Linder ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
11501ce144dSScott Linder                                            StringRef BaseTypeName) const {
1166fe949c4SKazu Hirata   if (TypeQual.contains("pipe"))
11701ce144dSScott Linder     return ValueKind::Pipe;
11801ce144dSScott Linder 
11901ce144dSScott Linder   return StringSwitch<ValueKind>(BaseTypeName)
12001ce144dSScott Linder              .Case("image1d_t", ValueKind::Image)
12101ce144dSScott Linder              .Case("image1d_array_t", ValueKind::Image)
12201ce144dSScott Linder              .Case("image1d_buffer_t", ValueKind::Image)
12301ce144dSScott Linder              .Case("image2d_t", ValueKind::Image)
12401ce144dSScott Linder              .Case("image2d_array_t", ValueKind::Image)
12501ce144dSScott Linder              .Case("image2d_array_depth_t", ValueKind::Image)
12601ce144dSScott Linder              .Case("image2d_array_msaa_t", ValueKind::Image)
12701ce144dSScott Linder              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
12801ce144dSScott Linder              .Case("image2d_depth_t", ValueKind::Image)
12901ce144dSScott Linder              .Case("image2d_msaa_t", ValueKind::Image)
13001ce144dSScott Linder              .Case("image2d_msaa_depth_t", ValueKind::Image)
13101ce144dSScott Linder              .Case("image3d_t", ValueKind::Image)
13201ce144dSScott Linder              .Case("sampler_t", ValueKind::Sampler)
13301ce144dSScott Linder              .Case("queue_t", ValueKind::Queue)
13401ce144dSScott Linder              .Default(isa<PointerType>(Ty) ?
13501ce144dSScott Linder                           (Ty->getPointerAddressSpace() ==
1360da6350dSMatt Arsenault                            AMDGPUAS::LOCAL_ADDRESS ?
13701ce144dSScott Linder                            ValueKind::DynamicSharedPointer :
13801ce144dSScott Linder                            ValueKind::GlobalBuffer) :
13901ce144dSScott Linder                       ValueKind::ByValue);
14001ce144dSScott Linder }
14101ce144dSScott Linder 
getTypeName(Type * Ty,bool Signed) const142f5b36e56SScott Linder std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
14301ce144dSScott Linder   switch (Ty->getTypeID()) {
14401ce144dSScott Linder   case Type::IntegerTyID: {
14501ce144dSScott Linder     if (!Signed)
14601ce144dSScott Linder       return (Twine('u') + getTypeName(Ty, true)).str();
14701ce144dSScott Linder 
14801ce144dSScott Linder     auto BitWidth = Ty->getIntegerBitWidth();
14901ce144dSScott Linder     switch (BitWidth) {
15001ce144dSScott Linder     case 8:
15101ce144dSScott Linder       return "char";
15201ce144dSScott Linder     case 16:
15301ce144dSScott Linder       return "short";
15401ce144dSScott Linder     case 32:
15501ce144dSScott Linder       return "int";
15601ce144dSScott Linder     case 64:
15701ce144dSScott Linder       return "long";
15801ce144dSScott Linder     default:
15901ce144dSScott Linder       return (Twine('i') + Twine(BitWidth)).str();
16001ce144dSScott Linder     }
16101ce144dSScott Linder   }
16201ce144dSScott Linder   case Type::HalfTyID:
16301ce144dSScott Linder     return "half";
16401ce144dSScott Linder   case Type::FloatTyID:
16501ce144dSScott Linder     return "float";
16601ce144dSScott Linder   case Type::DoubleTyID:
16701ce144dSScott Linder     return "double";
1682dea3f12SChristopher Tetreault   case Type::FixedVectorTyID: {
1693254a001SChristopher Tetreault     auto VecTy = cast<FixedVectorType>(Ty);
17001ce144dSScott Linder     auto ElTy = VecTy->getElementType();
171e634f482SChristopher Tetreault     auto NumElements = VecTy->getNumElements();
17201ce144dSScott Linder     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
17301ce144dSScott Linder   }
17401ce144dSScott Linder   default:
17501ce144dSScott Linder     return "unknown";
17601ce144dSScott Linder   }
17701ce144dSScott Linder }
17801ce144dSScott Linder 
179f5b36e56SScott Linder std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const180f5b36e56SScott Linder MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
18101ce144dSScott Linder   std::vector<uint32_t> Dims;
18201ce144dSScott Linder   if (Node->getNumOperands() != 3)
18301ce144dSScott Linder     return Dims;
18401ce144dSScott Linder 
18501ce144dSScott Linder   for (auto &Op : Node->operands())
18601ce144dSScott Linder     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
18701ce144dSScott Linder   return Dims;
18801ce144dSScott Linder }
18901ce144dSScott Linder 
190f5b36e56SScott Linder Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const191f5b36e56SScott Linder MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
19201ce144dSScott Linder                                     const SIProgramInfo &ProgramInfo) const {
1935bfbae5cSTom Stellard   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
19401ce144dSScott Linder   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
19501ce144dSScott Linder   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
19601ce144dSScott Linder   const Function &F = MF.getFunction();
19701ce144dSScott Linder 
1984bec7d42SMatt Arsenault   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
1994bec7d42SMatt Arsenault          F.getCallingConv() == CallingConv::SPIR_KERNEL);
20001ce144dSScott Linder 
201b65fa483SGuillaume Chatelet   Align MaxKernArgAlign;
2024bec7d42SMatt Arsenault   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
2034bec7d42SMatt Arsenault                                                                MaxKernArgAlign);
204b65fa483SGuillaume Chatelet   HSACodeProps.mKernargSegmentAlign =
205b65fa483SGuillaume Chatelet     std::max(MaxKernArgAlign, Align(4)).value();
20690ff1487SMatt Arsenault 
20790ff1487SMatt Arsenault   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
20890ff1487SMatt Arsenault   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
20901ce144dSScott Linder   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
21001ce144dSScott Linder   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
21101ce144dSScott Linder   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
21201ce144dSScott Linder   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
21301ce144dSScott Linder   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
21401ce144dSScott Linder   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
21501ce144dSScott Linder   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
21601ce144dSScott Linder   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
21701ce144dSScott Linder 
21801ce144dSScott Linder   return HSACodeProps;
21901ce144dSScott Linder }
22001ce144dSScott Linder 
221f5b36e56SScott Linder Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const222f5b36e56SScott Linder MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
22301ce144dSScott Linder                                      const SIProgramInfo &ProgramInfo) const {
224aa6fb4c4SMatt Arsenault   return HSAMD::Kernel::DebugProps::Metadata();
22501ce144dSScott Linder }
22601ce144dSScott Linder 
emitVersion()227f5b36e56SScott Linder void MetadataStreamerV2::emitVersion() {
22801ce144dSScott Linder   auto &Version = HSAMetadata.mVersion;
22901ce144dSScott Linder 
230f4ace637SKonstantin Zhuravlyov   Version.push_back(VersionMajorV2);
231f4ace637SKonstantin Zhuravlyov   Version.push_back(VersionMinorV2);
23201ce144dSScott Linder }
23301ce144dSScott Linder 
emitPrintf(const Module & Mod)234f5b36e56SScott Linder void MetadataStreamerV2::emitPrintf(const Module &Mod) {
23501ce144dSScott Linder   auto &Printf = HSAMetadata.mPrintf;
23601ce144dSScott Linder 
23701ce144dSScott Linder   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
23801ce144dSScott Linder   if (!Node)
23901ce144dSScott Linder     return;
24001ce144dSScott Linder 
24101ce144dSScott Linder   for (auto Op : Node->operands())
24201ce144dSScott Linder     if (Op->getNumOperands())
243adcd0268SBenjamin Kramer       Printf.push_back(
244adcd0268SBenjamin Kramer           std::string(cast<MDString>(Op->getOperand(0))->getString()));
24501ce144dSScott Linder }
24601ce144dSScott Linder 
emitKernelLanguage(const Function & Func)247f5b36e56SScott Linder void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
24801ce144dSScott Linder   auto &Kernel = HSAMetadata.mKernels.back();
24901ce144dSScott Linder 
25001ce144dSScott Linder   // TODO: What about other languages?
25101ce144dSScott Linder   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
25201ce144dSScott Linder   if (!Node || !Node->getNumOperands())
25301ce144dSScott Linder     return;
25401ce144dSScott Linder   auto Op0 = Node->getOperand(0);
25501ce144dSScott Linder   if (Op0->getNumOperands() <= 1)
25601ce144dSScott Linder     return;
25701ce144dSScott Linder 
25801ce144dSScott Linder   Kernel.mLanguage = "OpenCL C";
25901ce144dSScott Linder   Kernel.mLanguageVersion.push_back(
26001ce144dSScott Linder       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
26101ce144dSScott Linder   Kernel.mLanguageVersion.push_back(
26201ce144dSScott Linder       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
26301ce144dSScott Linder }
26401ce144dSScott Linder 
emitKernelAttrs(const Function & Func)265f5b36e56SScott Linder void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
26601ce144dSScott Linder   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
26701ce144dSScott Linder 
26801ce144dSScott Linder   if (auto Node = Func.getMetadata("reqd_work_group_size"))
26901ce144dSScott Linder     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
27001ce144dSScott Linder   if (auto Node = Func.getMetadata("work_group_size_hint"))
27101ce144dSScott Linder     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
27201ce144dSScott Linder   if (auto Node = Func.getMetadata("vec_type_hint")) {
27301ce144dSScott Linder     Attrs.mVecTypeHint = getTypeName(
27401ce144dSScott Linder         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
27501ce144dSScott Linder         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
27601ce144dSScott Linder   }
27701ce144dSScott Linder   if (Func.hasFnAttribute("runtime-handle")) {
27801ce144dSScott Linder     Attrs.mRuntimeHandle =
27901ce144dSScott Linder         Func.getFnAttribute("runtime-handle").getValueAsString().str();
28001ce144dSScott Linder   }
28101ce144dSScott Linder }
28201ce144dSScott Linder 
emitKernelArgs(const Function & Func,const GCNSubtarget & ST)283ae0ba7deSMatt Arsenault void MetadataStreamerV2::emitKernelArgs(const Function &Func,
284ae0ba7deSMatt Arsenault                                         const GCNSubtarget &ST) {
28501ce144dSScott Linder   for (auto &Arg : Func.args())
28601ce144dSScott Linder     emitKernelArg(Arg);
28701ce144dSScott Linder 
288ae0ba7deSMatt Arsenault   emitHiddenKernelArgs(Func, ST);
28901ce144dSScott Linder }
29001ce144dSScott Linder 
emitKernelArg(const Argument & Arg)291f5b36e56SScott Linder void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
29201ce144dSScott Linder   auto Func = Arg.getParent();
29301ce144dSScott Linder   auto ArgNo = Arg.getArgNo();
29401ce144dSScott Linder   const MDNode *Node;
29501ce144dSScott Linder 
29601ce144dSScott Linder   StringRef Name;
29701ce144dSScott Linder   Node = Func->getMetadata("kernel_arg_name");
29801ce144dSScott Linder   if (Node && ArgNo < Node->getNumOperands())
29901ce144dSScott Linder     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
30001ce144dSScott Linder   else if (Arg.hasName())
30101ce144dSScott Linder     Name = Arg.getName();
30201ce144dSScott Linder 
30301ce144dSScott Linder   StringRef TypeName;
30401ce144dSScott Linder   Node = Func->getMetadata("kernel_arg_type");
30501ce144dSScott Linder   if (Node && ArgNo < Node->getNumOperands())
30601ce144dSScott Linder     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
30701ce144dSScott Linder 
30801ce144dSScott Linder   StringRef BaseTypeName;
30901ce144dSScott Linder   Node = Func->getMetadata("kernel_arg_base_type");
31001ce144dSScott Linder   if (Node && ArgNo < Node->getNumOperands())
31101ce144dSScott Linder     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
31201ce144dSScott Linder 
31301ce144dSScott Linder   StringRef AccQual;
31401ce144dSScott Linder   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
31501ce144dSScott Linder       Arg.hasNoAliasAttr()) {
31601ce144dSScott Linder     AccQual = "read_only";
31701ce144dSScott Linder   } else {
31801ce144dSScott Linder     Node = Func->getMetadata("kernel_arg_access_qual");
31901ce144dSScott Linder     if (Node && ArgNo < Node->getNumOperands())
32001ce144dSScott Linder       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
32101ce144dSScott Linder   }
32201ce144dSScott Linder 
32301ce144dSScott Linder   StringRef TypeQual;
32401ce144dSScott Linder   Node = Func->getMetadata("kernel_arg_type_qual");
32501ce144dSScott Linder   if (Node && ArgNo < Node->getNumOperands())
32601ce144dSScott Linder     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
32701ce144dSScott Linder 
32801ce144dSScott Linder   const DataLayout &DL = Func->getParent()->getDataLayout();
32901ce144dSScott Linder 
330d3085c25SGuillaume Chatelet   MaybeAlign PointeeAlign;
3311168119cSMatt Arsenault   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
3320da6350dSMatt Arsenault     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
3331168119cSMatt Arsenault       // FIXME: Should report this for all address spaces
334a5e324e3SNikita Popov       PointeeAlign = Arg.getParamAlign().valueOrOne();
33501ce144dSScott Linder     }
33601ce144dSScott Linder   }
33701ce144dSScott Linder 
3381168119cSMatt Arsenault   Type *ArgTy;
3391168119cSMatt Arsenault   Align ArgAlign;
3401168119cSMatt Arsenault   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
3411168119cSMatt Arsenault 
3421168119cSMatt Arsenault   emitKernelArg(DL, ArgTy, ArgAlign,
3431168119cSMatt Arsenault                 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
3441168119cSMatt Arsenault                 TypeName, BaseTypeName, AccQual, TypeQual);
34501ce144dSScott Linder }
34601ce144dSScott Linder 
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,ValueKind ValueKind,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)347f5b36e56SScott Linder void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
3481168119cSMatt Arsenault                                        Align Alignment, ValueKind ValueKind,
349d3085c25SGuillaume Chatelet                                        MaybeAlign PointeeAlign, StringRef Name,
350f5b36e56SScott Linder                                        StringRef TypeName,
351f5b36e56SScott Linder                                        StringRef BaseTypeName,
35201ce144dSScott Linder                                        StringRef AccQual, StringRef TypeQual) {
35301ce144dSScott Linder   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
35401ce144dSScott Linder   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
35501ce144dSScott Linder 
356adcd0268SBenjamin Kramer   Arg.mName = std::string(Name);
357adcd0268SBenjamin Kramer   Arg.mTypeName = std::string(TypeName);
35801ce144dSScott Linder   Arg.mSize = DL.getTypeAllocSize(Ty);
3591168119cSMatt Arsenault   Arg.mAlign = Alignment.value();
36001ce144dSScott Linder   Arg.mValueKind = ValueKind;
361d3085c25SGuillaume Chatelet   Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
36201ce144dSScott Linder 
36301ce144dSScott Linder   if (auto PtrTy = dyn_cast<PointerType>(Ty))
364f5b36e56SScott Linder     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
36501ce144dSScott Linder 
36601ce144dSScott Linder   Arg.mAccQual = getAccessQualifier(AccQual);
36701ce144dSScott Linder 
36801ce144dSScott Linder   // TODO: Emit Arg.mActualAccQual.
36901ce144dSScott Linder 
37001ce144dSScott Linder   SmallVector<StringRef, 1> SplitTypeQuals;
37101ce144dSScott Linder   TypeQual.split(SplitTypeQuals, " ", -1, false);
37201ce144dSScott Linder   for (StringRef Key : SplitTypeQuals) {
37301ce144dSScott Linder     auto P = StringSwitch<bool*>(Key)
37401ce144dSScott Linder                  .Case("const",    &Arg.mIsConst)
37501ce144dSScott Linder                  .Case("restrict", &Arg.mIsRestrict)
37601ce144dSScott Linder                  .Case("volatile", &Arg.mIsVolatile)
37701ce144dSScott Linder                  .Case("pipe",     &Arg.mIsPipe)
37801ce144dSScott Linder                  .Default(nullptr);
37901ce144dSScott Linder     if (P)
38001ce144dSScott Linder       *P = true;
38101ce144dSScott Linder   }
38201ce144dSScott Linder }
38301ce144dSScott Linder 
emitHiddenKernelArgs(const Function & Func,const GCNSubtarget & ST)384ae0ba7deSMatt Arsenault void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
385ae0ba7deSMatt Arsenault                                               const GCNSubtarget &ST) {
386ae0ba7deSMatt Arsenault   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
38701ce144dSScott Linder   if (!HiddenArgNumBytes)
38801ce144dSScott Linder     return;
38901ce144dSScott Linder 
39001ce144dSScott Linder   auto &DL = Func.getParent()->getDataLayout();
39101ce144dSScott Linder   auto Int64Ty = Type::getInt64Ty(Func.getContext());
39201ce144dSScott Linder 
39301ce144dSScott Linder   if (HiddenArgNumBytes >= 8)
3941168119cSMatt Arsenault     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
39501ce144dSScott Linder   if (HiddenArgNumBytes >= 16)
3961168119cSMatt Arsenault     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
39701ce144dSScott Linder   if (HiddenArgNumBytes >= 24)
3981168119cSMatt Arsenault     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
39901ce144dSScott Linder 
40001ce144dSScott Linder   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
4010da6350dSMatt Arsenault                                       AMDGPUAS::GLOBAL_ADDRESS);
40201ce144dSScott Linder 
40301ce144dSScott Linder   if (HiddenArgNumBytes >= 32) {
40409f33a43SScott Linder     // We forbid the use of features requiring hostcall when compiling OpenCL
40509f33a43SScott Linder     // before code object V5, which makes the mutual exclusion between the
40609f33a43SScott Linder     // "printf buffer" and "hostcall buffer" here sound.
40701ce144dSScott Linder     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
4081168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
40909f33a43SScott Linder     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
4101168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
41109f33a43SScott Linder     else
4121168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
41301ce144dSScott Linder   }
41401ce144dSScott Linder 
41501ce144dSScott Linder   // Emit "default queue" and "completion action" arguments if enqueue kernel is
41601ce144dSScott Linder   // used, otherwise emit dummy "none" arguments.
41701ce144dSScott Linder   if (HiddenArgNumBytes >= 48) {
41801ce144dSScott Linder     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
4191168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
4201168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
42101ce144dSScott Linder     } else {
4221168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
4231168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
42401ce144dSScott Linder     }
42501ce144dSScott Linder   }
426a6241352SYaxun Liu 
427a6241352SYaxun Liu   // Emit the pointer argument for multi-grid object.
4288edaf259SChangpeng Fang   if (HiddenArgNumBytes >= 56) {
4298edaf259SChangpeng Fang     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
4301168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
4318edaf259SChangpeng Fang     else
4328edaf259SChangpeng Fang       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
4338edaf259SChangpeng Fang   }
43401ce144dSScott Linder }
43501ce144dSScott Linder 
emitTo(AMDGPUTargetStreamer & TargetStreamer)436f5b36e56SScott Linder bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
437f5b36e56SScott Linder   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
438f5b36e56SScott Linder }
439f5b36e56SScott Linder 
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)440f4ace637SKonstantin Zhuravlyov void MetadataStreamerV2::begin(const Module &Mod,
441f4ace637SKonstantin Zhuravlyov                                const IsaInfo::AMDGPUTargetID &TargetID) {
44201ce144dSScott Linder   emitVersion();
44301ce144dSScott Linder   emitPrintf(Mod);
44401ce144dSScott Linder }
44501ce144dSScott Linder 
end()446f5b36e56SScott Linder void MetadataStreamerV2::end() {
44701ce144dSScott Linder   std::string HSAMetadataString;
44801ce144dSScott Linder   if (toString(HSAMetadata, HSAMetadataString))
44901ce144dSScott Linder     return;
45001ce144dSScott Linder 
45101ce144dSScott Linder   if (DumpHSAMetadata)
45201ce144dSScott Linder     dump(HSAMetadataString);
45301ce144dSScott Linder   if (VerifyHSAMetadata)
45401ce144dSScott Linder     verify(HSAMetadataString);
45501ce144dSScott Linder }
45601ce144dSScott Linder 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)457f5b36e56SScott Linder void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
458f5b36e56SScott Linder                                     const SIProgramInfo &ProgramInfo) {
45901ce144dSScott Linder   auto &Func = MF.getFunction();
46001ce144dSScott Linder   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
46101ce144dSScott Linder     return;
46201ce144dSScott Linder 
4634bec7d42SMatt Arsenault   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
4644bec7d42SMatt Arsenault   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
4654bec7d42SMatt Arsenault 
46601ce144dSScott Linder   HSAMetadata.mKernels.push_back(Kernel::Metadata());
46701ce144dSScott Linder   auto &Kernel = HSAMetadata.mKernels.back();
46801ce144dSScott Linder 
469ae0ba7deSMatt Arsenault   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
470adcd0268SBenjamin Kramer   Kernel.mName = std::string(Func.getName());
47101ce144dSScott Linder   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
47201ce144dSScott Linder   emitKernelLanguage(Func);
47301ce144dSScott Linder   emitKernelAttrs(Func);
474ae0ba7deSMatt Arsenault   emitKernelArgs(Func, ST);
47501ce144dSScott Linder   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
47601ce144dSScott Linder   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
47701ce144dSScott Linder }
47801ce144dSScott Linder 
479f5b36e56SScott Linder //===----------------------------------------------------------------------===//
480f5b36e56SScott Linder // HSAMetadataStreamerV3
481f5b36e56SScott Linder //===----------------------------------------------------------------------===//
482f5b36e56SScott Linder 
dump(StringRef HSAMetadataString) const483f5b36e56SScott Linder void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
484f5b36e56SScott Linder   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
485f5b36e56SScott Linder }
486f5b36e56SScott Linder 
verify(StringRef HSAMetadataString) const487f5b36e56SScott Linder void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
488f5b36e56SScott Linder   errs() << "AMDGPU HSA Metadata Parser Test: ";
489f5b36e56SScott Linder 
490ed0b9af9STim Renouf   msgpack::Document FromHSAMetadataString;
491f5b36e56SScott Linder 
492ed0b9af9STim Renouf   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
493f5b36e56SScott Linder     errs() << "FAIL\n";
494f5b36e56SScott Linder     return;
495f5b36e56SScott Linder   }
496f5b36e56SScott Linder 
497f5b36e56SScott Linder   std::string ToHSAMetadataString;
498f5b36e56SScott Linder   raw_string_ostream StrOS(ToHSAMetadataString);
499ed0b9af9STim Renouf   FromHSAMetadataString.toYAML(StrOS);
500f5b36e56SScott Linder 
501f5b36e56SScott Linder   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
502f5b36e56SScott Linder   if (HSAMetadataString != ToHSAMetadataString) {
503f5b36e56SScott Linder     errs() << "Original input: " << HSAMetadataString << '\n'
504f5b36e56SScott Linder            << "Produced output: " << StrOS.str() << '\n';
505f5b36e56SScott Linder   }
506f5b36e56SScott Linder }
507f5b36e56SScott Linder 
508f5b36e56SScott Linder Optional<StringRef>
getAccessQualifier(StringRef AccQual) const509f5b36e56SScott Linder MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
510f5b36e56SScott Linder   return StringSwitch<Optional<StringRef>>(AccQual)
511f5b36e56SScott Linder       .Case("read_only", StringRef("read_only"))
512f5b36e56SScott Linder       .Case("write_only", StringRef("write_only"))
513f5b36e56SScott Linder       .Case("read_write", StringRef("read_write"))
514f5b36e56SScott Linder       .Default(None);
515f5b36e56SScott Linder }
516f5b36e56SScott Linder 
517f5b36e56SScott Linder Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const518f5b36e56SScott Linder MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
519f5b36e56SScott Linder   switch (AddressSpace) {
520f5b36e56SScott Linder   case AMDGPUAS::PRIVATE_ADDRESS:
521f5b36e56SScott Linder     return StringRef("private");
522f5b36e56SScott Linder   case AMDGPUAS::GLOBAL_ADDRESS:
523f5b36e56SScott Linder     return StringRef("global");
524f5b36e56SScott Linder   case AMDGPUAS::CONSTANT_ADDRESS:
525f5b36e56SScott Linder     return StringRef("constant");
526f5b36e56SScott Linder   case AMDGPUAS::LOCAL_ADDRESS:
527f5b36e56SScott Linder     return StringRef("local");
528f5b36e56SScott Linder   case AMDGPUAS::FLAT_ADDRESS:
529f5b36e56SScott Linder     return StringRef("generic");
530f5b36e56SScott Linder   case AMDGPUAS::REGION_ADDRESS:
531f5b36e56SScott Linder     return StringRef("region");
532f5b36e56SScott Linder   default:
533f5b36e56SScott Linder     return None;
534f5b36e56SScott Linder   }
535f5b36e56SScott Linder }
536f5b36e56SScott Linder 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const537f5b36e56SScott Linder StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
538f5b36e56SScott Linder                                            StringRef BaseTypeName) const {
5396fe949c4SKazu Hirata   if (TypeQual.contains("pipe"))
540f5b36e56SScott Linder     return "pipe";
541f5b36e56SScott Linder 
542f5b36e56SScott Linder   return StringSwitch<StringRef>(BaseTypeName)
543f5b36e56SScott Linder       .Case("image1d_t", "image")
544f5b36e56SScott Linder       .Case("image1d_array_t", "image")
545f5b36e56SScott Linder       .Case("image1d_buffer_t", "image")
546f5b36e56SScott Linder       .Case("image2d_t", "image")
547f5b36e56SScott Linder       .Case("image2d_array_t", "image")
548f5b36e56SScott Linder       .Case("image2d_array_depth_t", "image")
549f5b36e56SScott Linder       .Case("image2d_array_msaa_t", "image")
550f5b36e56SScott Linder       .Case("image2d_array_msaa_depth_t", "image")
551f5b36e56SScott Linder       .Case("image2d_depth_t", "image")
552f5b36e56SScott Linder       .Case("image2d_msaa_t", "image")
553f5b36e56SScott Linder       .Case("image2d_msaa_depth_t", "image")
554f5b36e56SScott Linder       .Case("image3d_t", "image")
555f5b36e56SScott Linder       .Case("sampler_t", "sampler")
556f5b36e56SScott Linder       .Case("queue_t", "queue")
557f5b36e56SScott Linder       .Default(isa<PointerType>(Ty)
558f5b36e56SScott Linder                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
559f5b36e56SScott Linder                           ? "dynamic_shared_pointer"
560f5b36e56SScott Linder                           : "global_buffer")
561f5b36e56SScott Linder                    : "by_value");
562f5b36e56SScott Linder }
563f5b36e56SScott Linder 
getTypeName(Type * Ty,bool Signed) const564f5b36e56SScott Linder std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
565f5b36e56SScott Linder   switch (Ty->getTypeID()) {
566f5b36e56SScott Linder   case Type::IntegerTyID: {
567f5b36e56SScott Linder     if (!Signed)
568f5b36e56SScott Linder       return (Twine('u') + getTypeName(Ty, true)).str();
569f5b36e56SScott Linder 
570f5b36e56SScott Linder     auto BitWidth = Ty->getIntegerBitWidth();
571f5b36e56SScott Linder     switch (BitWidth) {
572f5b36e56SScott Linder     case 8:
573f5b36e56SScott Linder       return "char";
574f5b36e56SScott Linder     case 16:
575f5b36e56SScott Linder       return "short";
576f5b36e56SScott Linder     case 32:
577f5b36e56SScott Linder       return "int";
578f5b36e56SScott Linder     case 64:
579f5b36e56SScott Linder       return "long";
580f5b36e56SScott Linder     default:
581f5b36e56SScott Linder       return (Twine('i') + Twine(BitWidth)).str();
582f5b36e56SScott Linder     }
583f5b36e56SScott Linder   }
584f5b36e56SScott Linder   case Type::HalfTyID:
585f5b36e56SScott Linder     return "half";
586f5b36e56SScott Linder   case Type::FloatTyID:
587f5b36e56SScott Linder     return "float";
588f5b36e56SScott Linder   case Type::DoubleTyID:
589f5b36e56SScott Linder     return "double";
5902dea3f12SChristopher Tetreault   case Type::FixedVectorTyID: {
5913254a001SChristopher Tetreault     auto VecTy = cast<FixedVectorType>(Ty);
592f5b36e56SScott Linder     auto ElTy = VecTy->getElementType();
593e634f482SChristopher Tetreault     auto NumElements = VecTy->getNumElements();
594f5b36e56SScott Linder     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
595f5b36e56SScott Linder   }
596f5b36e56SScott Linder   default:
597f5b36e56SScott Linder     return "unknown";
598f5b36e56SScott Linder   }
599f5b36e56SScott Linder }
600f5b36e56SScott Linder 
601ed0b9af9STim Renouf msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const602f5b36e56SScott Linder MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
603ed0b9af9STim Renouf   auto Dims = HSAMetadataDoc->getArrayNode();
604f5b36e56SScott Linder   if (Node->getNumOperands() != 3)
605f5b36e56SScott Linder     return Dims;
606f5b36e56SScott Linder 
607f5b36e56SScott Linder   for (auto &Op : Node->operands())
608ed0b9af9STim Renouf     Dims.push_back(Dims.getDocument()->getNode(
609ed0b9af9STim Renouf         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
610f5b36e56SScott Linder   return Dims;
611f5b36e56SScott Linder }
612f5b36e56SScott Linder 
emitVersion()613f5b36e56SScott Linder void MetadataStreamerV3::emitVersion() {
614ed0b9af9STim Renouf   auto Version = HSAMetadataDoc->getArrayNode();
615f4ace637SKonstantin Zhuravlyov   Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
616f4ace637SKonstantin Zhuravlyov   Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
617ed0b9af9STim Renouf   getRootMetadata("amdhsa.version") = Version;
618f5b36e56SScott Linder }
619f5b36e56SScott Linder 
emitPrintf(const Module & Mod)620f5b36e56SScott Linder void MetadataStreamerV3::emitPrintf(const Module &Mod) {
621f5b36e56SScott Linder   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
622f5b36e56SScott Linder   if (!Node)
623f5b36e56SScott Linder     return;
624f5b36e56SScott Linder 
625ed0b9af9STim Renouf   auto Printf = HSAMetadataDoc->getArrayNode();
626f5b36e56SScott Linder   for (auto Op : Node->operands())
627f5b36e56SScott Linder     if (Op->getNumOperands())
628ed0b9af9STim Renouf       Printf.push_back(Printf.getDocument()->getNode(
629ed0b9af9STim Renouf           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
630ed0b9af9STim Renouf   getRootMetadata("amdhsa.printf") = Printf;
631f5b36e56SScott Linder }
632f5b36e56SScott Linder 
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)633f5b36e56SScott Linder void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
634ed0b9af9STim Renouf                                             msgpack::MapDocNode Kern) {
635f5b36e56SScott Linder   // TODO: What about other languages?
636f5b36e56SScott Linder   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
637f5b36e56SScott Linder   if (!Node || !Node->getNumOperands())
638f5b36e56SScott Linder     return;
639f5b36e56SScott Linder   auto Op0 = Node->getOperand(0);
640f5b36e56SScott Linder   if (Op0->getNumOperands() <= 1)
641f5b36e56SScott Linder     return;
642f5b36e56SScott Linder 
643ed0b9af9STim Renouf   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
644ed0b9af9STim Renouf   auto LanguageVersion = Kern.getDocument()->getArrayNode();
645ed0b9af9STim Renouf   LanguageVersion.push_back(Kern.getDocument()->getNode(
646f5b36e56SScott Linder       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
647ed0b9af9STim Renouf   LanguageVersion.push_back(Kern.getDocument()->getNode(
648f5b36e56SScott Linder       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
649ed0b9af9STim Renouf   Kern[".language_version"] = LanguageVersion;
650f5b36e56SScott Linder }
651f5b36e56SScott Linder 
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)652f5b36e56SScott Linder void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
653ed0b9af9STim Renouf                                          msgpack::MapDocNode Kern) {
654f5b36e56SScott Linder 
655f5b36e56SScott Linder   if (auto Node = Func.getMetadata("reqd_work_group_size"))
656f5b36e56SScott Linder     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
657f5b36e56SScott Linder   if (auto Node = Func.getMetadata("work_group_size_hint"))
658f5b36e56SScott Linder     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
659f5b36e56SScott Linder   if (auto Node = Func.getMetadata("vec_type_hint")) {
660ed0b9af9STim Renouf     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
661ed0b9af9STim Renouf         getTypeName(
662f5b36e56SScott Linder             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
663ed0b9af9STim Renouf             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
664ed0b9af9STim Renouf         /*Copy=*/true);
665f5b36e56SScott Linder   }
666f5b36e56SScott Linder   if (Func.hasFnAttribute("runtime-handle")) {
667ed0b9af9STim Renouf     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
668ed0b9af9STim Renouf         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
669ed0b9af9STim Renouf         /*Copy=*/true);
670f5b36e56SScott Linder   }
6715173854fSReshabh Sharma   if (Func.hasFnAttribute("device-init"))
6725173854fSReshabh Sharma     Kern[".kind"] = Kern.getDocument()->getNode("init");
6735173854fSReshabh Sharma   else if (Func.hasFnAttribute("device-fini"))
6745173854fSReshabh Sharma     Kern[".kind"] = Kern.getDocument()->getNode("fini");
675f5b36e56SScott Linder }
676f5b36e56SScott Linder 
emitKernelArgs(const MachineFunction & MF,msgpack::MapDocNode Kern)6771194b9cdSChangpeng Fang void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF,
678ed0b9af9STim Renouf                                         msgpack::MapDocNode Kern) {
6791194b9cdSChangpeng Fang   auto &Func = MF.getFunction();
680f5b36e56SScott Linder   unsigned Offset = 0;
681ed0b9af9STim Renouf   auto Args = HSAMetadataDoc->getArrayNode();
682f5b36e56SScott Linder   for (auto &Arg : Func.args())
683ed0b9af9STim Renouf     emitKernelArg(Arg, Offset, Args);
684f5b36e56SScott Linder 
6851194b9cdSChangpeng Fang   emitHiddenKernelArgs(MF, Offset, Args);
686f5b36e56SScott Linder 
687ed0b9af9STim Renouf   Kern[".args"] = Args;
688f5b36e56SScott Linder }
689f5b36e56SScott Linder 
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)690f5b36e56SScott Linder void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
691ed0b9af9STim Renouf                                        msgpack::ArrayDocNode Args) {
692f5b36e56SScott Linder   auto Func = Arg.getParent();
693f5b36e56SScott Linder   auto ArgNo = Arg.getArgNo();
694f5b36e56SScott Linder   const MDNode *Node;
695f5b36e56SScott Linder 
696f5b36e56SScott Linder   StringRef Name;
697f5b36e56SScott Linder   Node = Func->getMetadata("kernel_arg_name");
698f5b36e56SScott Linder   if (Node && ArgNo < Node->getNumOperands())
699f5b36e56SScott Linder     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
700f5b36e56SScott Linder   else if (Arg.hasName())
701f5b36e56SScott Linder     Name = Arg.getName();
702f5b36e56SScott Linder 
703f5b36e56SScott Linder   StringRef TypeName;
704f5b36e56SScott Linder   Node = Func->getMetadata("kernel_arg_type");
705f5b36e56SScott Linder   if (Node && ArgNo < Node->getNumOperands())
706f5b36e56SScott Linder     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
707f5b36e56SScott Linder 
708f5b36e56SScott Linder   StringRef BaseTypeName;
709f5b36e56SScott Linder   Node = Func->getMetadata("kernel_arg_base_type");
710f5b36e56SScott Linder   if (Node && ArgNo < Node->getNumOperands())
711f5b36e56SScott Linder     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
712f5b36e56SScott Linder 
713f5b36e56SScott Linder   StringRef AccQual;
714f5b36e56SScott Linder   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
715f5b36e56SScott Linder       Arg.hasNoAliasAttr()) {
716f5b36e56SScott Linder     AccQual = "read_only";
717f5b36e56SScott Linder   } else {
718f5b36e56SScott Linder     Node = Func->getMetadata("kernel_arg_access_qual");
719f5b36e56SScott Linder     if (Node && ArgNo < Node->getNumOperands())
720f5b36e56SScott Linder       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
721f5b36e56SScott Linder   }
722f5b36e56SScott Linder 
723f5b36e56SScott Linder   StringRef TypeQual;
724f5b36e56SScott Linder   Node = Func->getMetadata("kernel_arg_type_qual");
725f5b36e56SScott Linder   if (Node && ArgNo < Node->getNumOperands())
726f5b36e56SScott Linder     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
727f5b36e56SScott Linder 
728f5b36e56SScott Linder   const DataLayout &DL = Func->getParent()->getDataLayout();
729f5b36e56SScott Linder 
730d3085c25SGuillaume Chatelet   MaybeAlign PointeeAlign;
7311168119cSMatt Arsenault   Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
7321168119cSMatt Arsenault 
7331168119cSMatt Arsenault   // FIXME: Need to distinguish in memory alignment from pointer alignment.
734f5b36e56SScott Linder   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
735a5e324e3SNikita Popov     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
736a5e324e3SNikita Popov       PointeeAlign = Arg.getParamAlign().valueOrOne();
737f5b36e56SScott Linder   }
738f5b36e56SScott Linder 
7391168119cSMatt Arsenault   // There's no distinction between byval aggregates and raw aggregates.
7401168119cSMatt Arsenault   Type *ArgTy;
7411168119cSMatt Arsenault   Align ArgAlign;
7421168119cSMatt Arsenault   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
7431168119cSMatt Arsenault 
7441168119cSMatt Arsenault   emitKernelArg(DL, ArgTy, ArgAlign,
7451168119cSMatt Arsenault                 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
7461168119cSMatt Arsenault                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
747f5b36e56SScott Linder }
748f5b36e56SScott Linder 
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)7491168119cSMatt Arsenault void MetadataStreamerV3::emitKernelArg(
7501168119cSMatt Arsenault     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
7511168119cSMatt Arsenault     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
7521168119cSMatt Arsenault     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
753f5b36e56SScott Linder     StringRef AccQual, StringRef TypeQual) {
754ed0b9af9STim Renouf   auto Arg = Args.getDocument()->getMapNode();
755f5b36e56SScott Linder 
756f5b36e56SScott Linder   if (!Name.empty())
757ed0b9af9STim Renouf     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
758f5b36e56SScott Linder   if (!TypeName.empty())
759ed0b9af9STim Renouf     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
760f5b36e56SScott Linder   auto Size = DL.getTypeAllocSize(Ty);
761ed0b9af9STim Renouf   Arg[".size"] = Arg.getDocument()->getNode(Size);
762d3085c25SGuillaume Chatelet   Offset = alignTo(Offset, Alignment);
763ed0b9af9STim Renouf   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
764f5b36e56SScott Linder   Offset += Size;
765ed0b9af9STim Renouf   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
766f5b36e56SScott Linder   if (PointeeAlign)
767d3085c25SGuillaume Chatelet     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
768f5b36e56SScott Linder 
769f5b36e56SScott Linder   if (auto PtrTy = dyn_cast<PointerType>(Ty))
770f5b36e56SScott Linder     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
771ed0b9af9STim Renouf       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
772f5b36e56SScott Linder 
773f5b36e56SScott Linder   if (auto AQ = getAccessQualifier(AccQual))
774ed0b9af9STim Renouf     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
775f5b36e56SScott Linder 
776f5b36e56SScott Linder   // TODO: Emit Arg[".actual_access"].
777f5b36e56SScott Linder 
778f5b36e56SScott Linder   SmallVector<StringRef, 1> SplitTypeQuals;
779f5b36e56SScott Linder   TypeQual.split(SplitTypeQuals, " ", -1, false);
780f5b36e56SScott Linder   for (StringRef Key : SplitTypeQuals) {
781f5b36e56SScott Linder     if (Key == "const")
782ed0b9af9STim Renouf       Arg[".is_const"] = Arg.getDocument()->getNode(true);
783f5b36e56SScott Linder     else if (Key == "restrict")
784ed0b9af9STim Renouf       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
785f5b36e56SScott Linder     else if (Key == "volatile")
786ed0b9af9STim Renouf       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
787f5b36e56SScott Linder     else if (Key == "pipe")
788ed0b9af9STim Renouf       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
789f5b36e56SScott Linder   }
790f5b36e56SScott Linder 
791ed0b9af9STim Renouf   Args.push_back(Arg);
792f5b36e56SScott Linder }
793f5b36e56SScott Linder 
emitHiddenKernelArgs(const MachineFunction & MF,unsigned & Offset,msgpack::ArrayDocNode Args)7941194b9cdSChangpeng Fang void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
795f5b36e56SScott Linder                                               unsigned &Offset,
796ed0b9af9STim Renouf                                               msgpack::ArrayDocNode Args) {
7971194b9cdSChangpeng Fang   auto &Func = MF.getFunction();
7981194b9cdSChangpeng Fang   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
7991194b9cdSChangpeng Fang 
800ae0ba7deSMatt Arsenault   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
801f5b36e56SScott Linder   if (!HiddenArgNumBytes)
802f5b36e56SScott Linder     return;
803f5b36e56SScott Linder 
804095c48fdSkpyzhov   const Module *M = Func.getParent();
805095c48fdSkpyzhov   auto &DL = M->getDataLayout();
806f5b36e56SScott Linder   auto Int64Ty = Type::getInt64Ty(Func.getContext());
807f5b36e56SScott Linder 
8087f9868f9SChangpeng Fang   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
8097f9868f9SChangpeng Fang 
810f5b36e56SScott Linder   if (HiddenArgNumBytes >= 8)
8111168119cSMatt Arsenault     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
8121168119cSMatt Arsenault                   Args);
813f5b36e56SScott Linder   if (HiddenArgNumBytes >= 16)
8141168119cSMatt Arsenault     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
8151168119cSMatt Arsenault                   Args);
816f5b36e56SScott Linder   if (HiddenArgNumBytes >= 24)
8171168119cSMatt Arsenault     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
8181168119cSMatt Arsenault                   Args);
819f5b36e56SScott Linder 
820f5b36e56SScott Linder   auto Int8PtrTy =
821f5b36e56SScott Linder       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
822f5b36e56SScott Linder 
823f5b36e56SScott Linder   if (HiddenArgNumBytes >= 32) {
82409f33a43SScott Linder     // We forbid the use of features requiring hostcall when compiling OpenCL
82509f33a43SScott Linder     // before code object V5, which makes the mutual exclusion between the
82609f33a43SScott Linder     // "printf buffer" and "hostcall buffer" here sound.
827095c48fdSkpyzhov     if (M->getNamedMetadata("llvm.printf.fmts"))
8281168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
8291168119cSMatt Arsenault                     Args);
83009f33a43SScott Linder     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
8311168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
8321168119cSMatt Arsenault                     Args);
83309f33a43SScott Linder     else
8341168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
835f5b36e56SScott Linder   }
836f5b36e56SScott Linder 
837f5b36e56SScott Linder   // Emit "default queue" and "completion action" arguments if enqueue kernel is
838f5b36e56SScott Linder   // used, otherwise emit dummy "none" arguments.
839f5b36e56SScott Linder   if (HiddenArgNumBytes >= 48) {
840f5b36e56SScott Linder     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
8411168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
8421168119cSMatt Arsenault                     Args);
8431168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
8441168119cSMatt Arsenault                     Args);
845f5b36e56SScott Linder     } else {
8461168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
8471168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
848f5b36e56SScott Linder     }
849f5b36e56SScott Linder   }
850a6241352SYaxun Liu 
851a6241352SYaxun Liu   // Emit the pointer argument for multi-grid object.
8528edaf259SChangpeng Fang   if (HiddenArgNumBytes >= 56) {
8538edaf259SChangpeng Fang     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
8541168119cSMatt Arsenault       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
8551168119cSMatt Arsenault                     Args);
8568edaf259SChangpeng Fang     } else {
8578edaf259SChangpeng Fang       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
8588edaf259SChangpeng Fang     }
8598edaf259SChangpeng Fang   }
860f5b36e56SScott Linder }
861f5b36e56SScott Linder 
862ed0b9af9STim Renouf msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const863f5b36e56SScott Linder MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
864f5b36e56SScott Linder                                       const SIProgramInfo &ProgramInfo) const {
865f5b36e56SScott Linder   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
866f5b36e56SScott Linder   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
867f5b36e56SScott Linder   const Function &F = MF.getFunction();
868f5b36e56SScott Linder 
869ed0b9af9STim Renouf   auto Kern = HSAMetadataDoc->getMapNode();
870f5b36e56SScott Linder 
871b65fa483SGuillaume Chatelet   Align MaxKernArgAlign;
872ed0b9af9STim Renouf   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
873f5b36e56SScott Linder       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
874f5b36e56SScott Linder   Kern[".group_segment_fixed_size"] =
875ed0b9af9STim Renouf       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
876f5b36e56SScott Linder   Kern[".private_segment_fixed_size"] =
877ed0b9af9STim Renouf       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
878*d96361d7SAbinav Puthan Purayil   Kern[".uses_dynamic_stack"] =
879*d96361d7SAbinav Puthan Purayil       Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
88090ff1487SMatt Arsenault 
88190ff1487SMatt Arsenault   // FIXME: The metadata treats the minimum as 16?
882f5b36e56SScott Linder   Kern[".kernarg_segment_align"] =
883b65fa483SGuillaume Chatelet       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
884f5b36e56SScott Linder   Kern[".wavefront_size"] =
885ed0b9af9STim Renouf       Kern.getDocument()->getNode(STM.getWavefrontSize());
886ed0b9af9STim Renouf   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
887ed0b9af9STim Renouf   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
88874702444SJacob Lambert 
88974702444SJacob Lambert   // Only add AGPR count to metadata for supported devices
89074702444SJacob Lambert   if (STM.hasMAIInsts()) {
89174702444SJacob Lambert     Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
89274702444SJacob Lambert   }
89374702444SJacob Lambert 
894f5b36e56SScott Linder   Kern[".max_flat_workgroup_size"] =
895ed0b9af9STim Renouf       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
896f5b36e56SScott Linder   Kern[".sgpr_spill_count"] =
897ed0b9af9STim Renouf       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
898f5b36e56SScott Linder   Kern[".vgpr_spill_count"] =
899ed0b9af9STim Renouf       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
900f5b36e56SScott Linder 
901ed0b9af9STim Renouf   return Kern;
902f5b36e56SScott Linder }
903f5b36e56SScott Linder 
emitTo(AMDGPUTargetStreamer & TargetStreamer)904f5b36e56SScott Linder bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
905ed0b9af9STim Renouf   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
906f5b36e56SScott Linder }
907f5b36e56SScott Linder 
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)908f4ace637SKonstantin Zhuravlyov void MetadataStreamerV3::begin(const Module &Mod,
909f4ace637SKonstantin Zhuravlyov                                const IsaInfo::AMDGPUTargetID &TargetID) {
910f5b36e56SScott Linder   emitVersion();
911f5b36e56SScott Linder   emitPrintf(Mod);
912ed0b9af9STim Renouf   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
913f5b36e56SScott Linder }
914f5b36e56SScott Linder 
end()915f5b36e56SScott Linder void MetadataStreamerV3::end() {
916f5b36e56SScott Linder   std::string HSAMetadataString;
917f5b36e56SScott Linder   raw_string_ostream StrOS(HSAMetadataString);
918ed0b9af9STim Renouf   HSAMetadataDoc->toYAML(StrOS);
919f5b36e56SScott Linder 
920f5b36e56SScott Linder   if (DumpHSAMetadata)
921f5b36e56SScott Linder     dump(StrOS.str());
922f5b36e56SScott Linder   if (VerifyHSAMetadata)
923f5b36e56SScott Linder     verify(StrOS.str());
924f5b36e56SScott Linder }
925f5b36e56SScott Linder 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)926f5b36e56SScott Linder void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
927f5b36e56SScott Linder                                     const SIProgramInfo &ProgramInfo) {
928f5b36e56SScott Linder   auto &Func = MF.getFunction();
929ed0b9af9STim Renouf   auto Kern = getHSAKernelProps(MF, ProgramInfo);
930f5b36e56SScott Linder 
931f5b36e56SScott Linder   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
932f5b36e56SScott Linder          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
933f5b36e56SScott Linder 
934ed0b9af9STim Renouf   auto Kernels =
935ed0b9af9STim Renouf       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
936f5b36e56SScott Linder 
937f5b36e56SScott Linder   {
938ed0b9af9STim Renouf     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
939ed0b9af9STim Renouf     Kern[".symbol"] = Kern.getDocument()->getNode(
940ed0b9af9STim Renouf         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
941f5b36e56SScott Linder     emitKernelLanguage(Func, Kern);
942f5b36e56SScott Linder     emitKernelAttrs(Func, Kern);
9431194b9cdSChangpeng Fang     emitKernelArgs(MF, Kern);
944f5b36e56SScott Linder   }
945f5b36e56SScott Linder 
946ed0b9af9STim Renouf   Kernels.push_back(Kern);
947f5b36e56SScott Linder }
948f5b36e56SScott Linder 
949f4ace637SKonstantin Zhuravlyov //===----------------------------------------------------------------------===//
950f4ace637SKonstantin Zhuravlyov // HSAMetadataStreamerV4
951f4ace637SKonstantin Zhuravlyov //===----------------------------------------------------------------------===//
952f4ace637SKonstantin Zhuravlyov 
emitVersion()953f4ace637SKonstantin Zhuravlyov void MetadataStreamerV4::emitVersion() {
954f4ace637SKonstantin Zhuravlyov   auto Version = HSAMetadataDoc->getArrayNode();
955f4ace637SKonstantin Zhuravlyov   Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
956f4ace637SKonstantin Zhuravlyov   Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
957f4ace637SKonstantin Zhuravlyov   getRootMetadata("amdhsa.version") = Version;
958f4ace637SKonstantin Zhuravlyov }
959f4ace637SKonstantin Zhuravlyov 
emitTargetID(const IsaInfo::AMDGPUTargetID & TargetID)960f4ace637SKonstantin Zhuravlyov void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
961f4ace637SKonstantin Zhuravlyov   getRootMetadata("amdhsa.target") =
962f4ace637SKonstantin Zhuravlyov       HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
963f4ace637SKonstantin Zhuravlyov }
964f4ace637SKonstantin Zhuravlyov 
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)965f4ace637SKonstantin Zhuravlyov void MetadataStreamerV4::begin(const Module &Mod,
966f4ace637SKonstantin Zhuravlyov                                const IsaInfo::AMDGPUTargetID &TargetID) {
967f4ace637SKonstantin Zhuravlyov   emitVersion();
968f4ace637SKonstantin Zhuravlyov   emitTargetID(TargetID);
969f4ace637SKonstantin Zhuravlyov   emitPrintf(Mod);
970f4ace637SKonstantin Zhuravlyov   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
971f4ace637SKonstantin Zhuravlyov }
972f4ace637SKonstantin Zhuravlyov 
9731194b9cdSChangpeng Fang //===----------------------------------------------------------------------===//
9741194b9cdSChangpeng Fang // HSAMetadataStreamerV5
9751194b9cdSChangpeng Fang //===----------------------------------------------------------------------===//
9761194b9cdSChangpeng Fang 
emitVersion()9771194b9cdSChangpeng Fang void MetadataStreamerV5::emitVersion() {
9781194b9cdSChangpeng Fang   auto Version = HSAMetadataDoc->getArrayNode();
9791194b9cdSChangpeng Fang   Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
9801194b9cdSChangpeng Fang   Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
9811194b9cdSChangpeng Fang   getRootMetadata("amdhsa.version") = Version;
9821194b9cdSChangpeng Fang }
9831194b9cdSChangpeng Fang 
emitHiddenKernelArgs(const MachineFunction & MF,unsigned & Offset,msgpack::ArrayDocNode Args)9841194b9cdSChangpeng Fang void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
9851194b9cdSChangpeng Fang                                               unsigned &Offset,
9861194b9cdSChangpeng Fang                                               msgpack::ArrayDocNode Args) {
9871194b9cdSChangpeng Fang   auto &Func = MF.getFunction();
9881194b9cdSChangpeng Fang   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
9897f9868f9SChangpeng Fang 
9907f9868f9SChangpeng Fang   // No implicit kernel argument is used.
9917f9868f9SChangpeng Fang   if (ST.getImplicitArgNumBytes(Func) == 0)
9927f9868f9SChangpeng Fang     return;
9937f9868f9SChangpeng Fang 
9941194b9cdSChangpeng Fang   const Module *M = Func.getParent();
9951194b9cdSChangpeng Fang   auto &DL = M->getDataLayout();
996d8f99bb6SSameer Sahasrabuddhe   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
9971194b9cdSChangpeng Fang 
9981194b9cdSChangpeng Fang   auto Int64Ty = Type::getInt64Ty(Func.getContext());
9991194b9cdSChangpeng Fang   auto Int32Ty = Type::getInt32Ty(Func.getContext());
10001194b9cdSChangpeng Fang   auto Int16Ty = Type::getInt16Ty(Func.getContext());
10011194b9cdSChangpeng Fang 
10027f9868f9SChangpeng Fang   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
10031194b9cdSChangpeng Fang   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
10041194b9cdSChangpeng Fang   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
10051194b9cdSChangpeng Fang   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
10061194b9cdSChangpeng Fang 
10071194b9cdSChangpeng Fang   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
10081194b9cdSChangpeng Fang   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
10091194b9cdSChangpeng Fang   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
10101194b9cdSChangpeng Fang 
10111194b9cdSChangpeng Fang   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
10121194b9cdSChangpeng Fang   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
10131194b9cdSChangpeng Fang   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
10141194b9cdSChangpeng Fang 
10151194b9cdSChangpeng Fang   // Reserved for hidden_tool_correlation_id.
10161194b9cdSChangpeng Fang   Offset += 8;
10171194b9cdSChangpeng Fang 
10181194b9cdSChangpeng Fang   Offset += 8; // Reserved.
10191194b9cdSChangpeng Fang 
10201194b9cdSChangpeng Fang   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
10211194b9cdSChangpeng Fang   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
10221194b9cdSChangpeng Fang   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
10231194b9cdSChangpeng Fang 
10241194b9cdSChangpeng Fang   emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
10251194b9cdSChangpeng Fang 
10261194b9cdSChangpeng Fang   Offset += 6; // Reserved.
10271194b9cdSChangpeng Fang   auto Int8PtrTy =
10281194b9cdSChangpeng Fang       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
10291194b9cdSChangpeng Fang 
10301194b9cdSChangpeng Fang   if (M->getNamedMetadata("llvm.printf.fmts")) {
10311194b9cdSChangpeng Fang     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
10321194b9cdSChangpeng Fang                   Args);
10338edaf259SChangpeng Fang   } else {
10341194b9cdSChangpeng Fang     Offset += 8; // Skipped.
10358edaf259SChangpeng Fang   }
10361194b9cdSChangpeng Fang 
10378384ced9SChangpeng Fang   if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
10381194b9cdSChangpeng Fang     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
10391194b9cdSChangpeng Fang                   Args);
10408edaf259SChangpeng Fang   } else {
10411194b9cdSChangpeng Fang     Offset += 8; // Skipped.
10428edaf259SChangpeng Fang   }
10431194b9cdSChangpeng Fang 
10448edaf259SChangpeng Fang   if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
10451194b9cdSChangpeng Fang     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
10461194b9cdSChangpeng Fang                 Args);
10478edaf259SChangpeng Fang   } else {
10488edaf259SChangpeng Fang     Offset += 8; // Skipped.
10498edaf259SChangpeng Fang   }
10501194b9cdSChangpeng Fang 
10518384ced9SChangpeng Fang   if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1052ca62b1dbSChangpeng Fang     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1053ca62b1dbSChangpeng Fang   else
1054ca62b1dbSChangpeng Fang     Offset += 8; // Skipped.
10551194b9cdSChangpeng Fang 
10561194b9cdSChangpeng Fang   if (Func.hasFnAttribute("calls-enqueue-kernel")) {
10571194b9cdSChangpeng Fang     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
10581194b9cdSChangpeng Fang                   Args);
10591194b9cdSChangpeng Fang     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
10601194b9cdSChangpeng Fang                   Args);
10618edaf259SChangpeng Fang   } else {
10621194b9cdSChangpeng Fang     Offset += 16; // Skipped.
10638edaf259SChangpeng Fang   }
10641194b9cdSChangpeng Fang 
10651194b9cdSChangpeng Fang   Offset += 72; // Reserved.
10661194b9cdSChangpeng Fang 
10670f20a35bSChangpeng Fang   // hidden_private_base and hidden_shared_base are only when the subtarget has
10680f20a35bSChangpeng Fang   // ApertureRegs.
10690f20a35bSChangpeng Fang   if (!ST.hasApertureRegs()) {
10701194b9cdSChangpeng Fang     emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
10711194b9cdSChangpeng Fang     emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
10728edaf259SChangpeng Fang   } else {
10731194b9cdSChangpeng Fang     Offset += 8; // Skipped.
10748edaf259SChangpeng Fang   }
10751194b9cdSChangpeng Fang 
10761194b9cdSChangpeng Fang   if (MFI.hasQueuePtr())
10771194b9cdSChangpeng Fang     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
10781194b9cdSChangpeng Fang }
10791194b9cdSChangpeng Fang 
108001ce144dSScott Linder } // end namespace HSAMD
108101ce144dSScott Linder } // end namespace AMDGPU
108201ce144dSScott Linder } // end namespace llvm
1083