1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44 }
45 
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47   errs() << "AMDGPU HSA Metadata Parser Test: ";
48 
49   HSAMD::Metadata FromHSAMetadataString;
50   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
51     errs() << "FAIL\n";
52     return;
53   }
54 
55   std::string ToHSAMetadataString;
56   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57     errs() << "FAIL\n";
58     return;
59   }
60 
61   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62          << '\n';
63   if (HSAMetadataString != ToHSAMetadataString) {
64     errs() << "Original input: " << HSAMetadataString << '\n'
65            << "Produced output: " << ToHSAMetadataString << '\n';
66   }
67 }
68 
69 AccessQualifier
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71   if (AccQual.empty())
72     return AccessQualifier::Unknown;
73 
74   return StringSwitch<AccessQualifier>(AccQual)
75              .Case("read_only",  AccessQualifier::ReadOnly)
76              .Case("write_only", AccessQualifier::WriteOnly)
77              .Case("read_write", AccessQualifier::ReadWrite)
78              .Default(AccessQualifier::Default);
79 }
80 
81 AddressSpaceQualifier
82 MetadataStreamerV2::getAddressSpaceQualifier(
83     unsigned AddressSpace) const {
84   switch (AddressSpace) {
85   case AMDGPUAS::PRIVATE_ADDRESS:
86     return AddressSpaceQualifier::Private;
87   case AMDGPUAS::GLOBAL_ADDRESS:
88     return AddressSpaceQualifier::Global;
89   case AMDGPUAS::CONSTANT_ADDRESS:
90     return AddressSpaceQualifier::Constant;
91   case AMDGPUAS::LOCAL_ADDRESS:
92     return AddressSpaceQualifier::Local;
93   case AMDGPUAS::FLAT_ADDRESS:
94     return AddressSpaceQualifier::Generic;
95   case AMDGPUAS::REGION_ADDRESS:
96     return AddressSpaceQualifier::Region;
97   default:
98     return AddressSpaceQualifier::Unknown;
99   }
100 }
101 
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                            StringRef BaseTypeName) const {
104   if (TypeQual.find("pipe") != StringRef::npos)
105     return ValueKind::Pipe;
106 
107   return StringSwitch<ValueKind>(BaseTypeName)
108              .Case("image1d_t", ValueKind::Image)
109              .Case("image1d_array_t", ValueKind::Image)
110              .Case("image1d_buffer_t", ValueKind::Image)
111              .Case("image2d_t", ValueKind::Image)
112              .Case("image2d_array_t", ValueKind::Image)
113              .Case("image2d_array_depth_t", ValueKind::Image)
114              .Case("image2d_array_msaa_t", ValueKind::Image)
115              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116              .Case("image2d_depth_t", ValueKind::Image)
117              .Case("image2d_msaa_t", ValueKind::Image)
118              .Case("image2d_msaa_depth_t", ValueKind::Image)
119              .Case("image3d_t", ValueKind::Image)
120              .Case("sampler_t", ValueKind::Sampler)
121              .Case("queue_t", ValueKind::Queue)
122              .Default(isa<PointerType>(Ty) ?
123                           (Ty->getPointerAddressSpace() ==
124                            AMDGPUAS::LOCAL_ADDRESS ?
125                            ValueKind::DynamicSharedPointer :
126                            ValueKind::GlobalBuffer) :
127                       ValueKind::ByValue);
128 }
129 
130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131   switch (Ty->getTypeID()) {
132   case Type::IntegerTyID: {
133     auto Signed = !TypeName.startswith("u");
134     switch (Ty->getIntegerBitWidth()) {
135     case 8:
136       return Signed ? ValueType::I8 : ValueType::U8;
137     case 16:
138       return Signed ? ValueType::I16 : ValueType::U16;
139     case 32:
140       return Signed ? ValueType::I32 : ValueType::U32;
141     case 64:
142       return Signed ? ValueType::I64 : ValueType::U64;
143     default:
144       return ValueType::Struct;
145     }
146   }
147   case Type::HalfTyID:
148     return ValueType::F16;
149   case Type::FloatTyID:
150     return ValueType::F32;
151   case Type::DoubleTyID:
152     return ValueType::F64;
153   case Type::PointerTyID:
154     return getValueType(Ty->getPointerElementType(), TypeName);
155   case Type::VectorTyID:
156     return getValueType(Ty->getVectorElementType(), TypeName);
157   default:
158     return ValueType::Struct;
159   }
160 }
161 
162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163   switch (Ty->getTypeID()) {
164   case Type::IntegerTyID: {
165     if (!Signed)
166       return (Twine('u') + getTypeName(Ty, true)).str();
167 
168     auto BitWidth = Ty->getIntegerBitWidth();
169     switch (BitWidth) {
170     case 8:
171       return "char";
172     case 16:
173       return "short";
174     case 32:
175       return "int";
176     case 64:
177       return "long";
178     default:
179       return (Twine('i') + Twine(BitWidth)).str();
180     }
181   }
182   case Type::HalfTyID:
183     return "half";
184   case Type::FloatTyID:
185     return "float";
186   case Type::DoubleTyID:
187     return "double";
188   case Type::VectorTyID: {
189     auto VecTy = cast<VectorType>(Ty);
190     auto ElTy = VecTy->getElementType();
191     auto NumElements = VecTy->getVectorNumElements();
192     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193   }
194   default:
195     return "unknown";
196   }
197 }
198 
199 std::vector<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201   std::vector<uint32_t> Dims;
202   if (Node->getNumOperands() != 3)
203     return Dims;
204 
205   for (auto &Op : Node->operands())
206     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207   return Dims;
208 }
209 
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212                                     const SIProgramInfo &ProgramInfo) const {
213   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216   const Function &F = MF.getFunction();
217 
218   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219          F.getCallingConv() == CallingConv::SPIR_KERNEL);
220 
221   unsigned MaxKernArgAlign;
222   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223                                                                MaxKernArgAlign);
224   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226   HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
227   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
228   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
229   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
230   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
231   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
232   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
233   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
234   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
235 
236   return HSACodeProps;
237 }
238 
239 Kernel::DebugProps::Metadata
240 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
241                                      const SIProgramInfo &ProgramInfo) const {
242   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
243   HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
244 
245   if (!STM.debuggerSupported())
246     return HSADebugProps;
247 
248   HSADebugProps.mDebuggerABIVersion.push_back(1);
249   HSADebugProps.mDebuggerABIVersion.push_back(0);
250 
251   if (STM.debuggerEmitPrologue()) {
252     HSADebugProps.mPrivateSegmentBufferSGPR =
253         ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
254     HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
255         ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
256   }
257 
258   return HSADebugProps;
259 }
260 
261 void MetadataStreamerV2::emitVersion() {
262   auto &Version = HSAMetadata.mVersion;
263 
264   Version.push_back(VersionMajor);
265   Version.push_back(VersionMinor);
266 }
267 
268 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
269   auto &Printf = HSAMetadata.mPrintf;
270 
271   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
272   if (!Node)
273     return;
274 
275   for (auto Op : Node->operands())
276     if (Op->getNumOperands())
277       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
278 }
279 
280 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
281   auto &Kernel = HSAMetadata.mKernels.back();
282 
283   // TODO: What about other languages?
284   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
285   if (!Node || !Node->getNumOperands())
286     return;
287   auto Op0 = Node->getOperand(0);
288   if (Op0->getNumOperands() <= 1)
289     return;
290 
291   Kernel.mLanguage = "OpenCL C";
292   Kernel.mLanguageVersion.push_back(
293       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
294   Kernel.mLanguageVersion.push_back(
295       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
296 }
297 
298 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
299   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
300 
301   if (auto Node = Func.getMetadata("reqd_work_group_size"))
302     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
303   if (auto Node = Func.getMetadata("work_group_size_hint"))
304     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
305   if (auto Node = Func.getMetadata("vec_type_hint")) {
306     Attrs.mVecTypeHint = getTypeName(
307         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
308         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
309   }
310   if (Func.hasFnAttribute("runtime-handle")) {
311     Attrs.mRuntimeHandle =
312         Func.getFnAttribute("runtime-handle").getValueAsString().str();
313   }
314 }
315 
316 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
317   for (auto &Arg : Func.args())
318     emitKernelArg(Arg);
319 
320   emitHiddenKernelArgs(Func);
321 }
322 
323 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
324   auto Func = Arg.getParent();
325   auto ArgNo = Arg.getArgNo();
326   const MDNode *Node;
327 
328   StringRef Name;
329   Node = Func->getMetadata("kernel_arg_name");
330   if (Node && ArgNo < Node->getNumOperands())
331     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
332   else if (Arg.hasName())
333     Name = Arg.getName();
334 
335   StringRef TypeName;
336   Node = Func->getMetadata("kernel_arg_type");
337   if (Node && ArgNo < Node->getNumOperands())
338     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
339 
340   StringRef BaseTypeName;
341   Node = Func->getMetadata("kernel_arg_base_type");
342   if (Node && ArgNo < Node->getNumOperands())
343     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
344 
345   StringRef AccQual;
346   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
347       Arg.hasNoAliasAttr()) {
348     AccQual = "read_only";
349   } else {
350     Node = Func->getMetadata("kernel_arg_access_qual");
351     if (Node && ArgNo < Node->getNumOperands())
352       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
353   }
354 
355   StringRef TypeQual;
356   Node = Func->getMetadata("kernel_arg_type_qual");
357   if (Node && ArgNo < Node->getNumOperands())
358     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
359 
360   Type *Ty = Arg.getType();
361   const DataLayout &DL = Func->getParent()->getDataLayout();
362 
363   unsigned PointeeAlign = 0;
364   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
365     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
366       PointeeAlign = Arg.getParamAlignment();
367       if (PointeeAlign == 0)
368         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
369     }
370   }
371 
372   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
373                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
374 }
375 
376 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
377                                        ValueKind ValueKind,
378                                        unsigned PointeeAlign, StringRef Name,
379                                        StringRef TypeName,
380                                        StringRef BaseTypeName,
381                                        StringRef AccQual, StringRef TypeQual) {
382   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
383   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
384 
385   Arg.mName = Name;
386   Arg.mTypeName = TypeName;
387   Arg.mSize = DL.getTypeAllocSize(Ty);
388   Arg.mAlign = DL.getABITypeAlignment(Ty);
389   Arg.mValueKind = ValueKind;
390   Arg.mValueType = getValueType(Ty, BaseTypeName);
391   Arg.mPointeeAlign = PointeeAlign;
392 
393   if (auto PtrTy = dyn_cast<PointerType>(Ty))
394     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
395 
396   Arg.mAccQual = getAccessQualifier(AccQual);
397 
398   // TODO: Emit Arg.mActualAccQual.
399 
400   SmallVector<StringRef, 1> SplitTypeQuals;
401   TypeQual.split(SplitTypeQuals, " ", -1, false);
402   for (StringRef Key : SplitTypeQuals) {
403     auto P = StringSwitch<bool*>(Key)
404                  .Case("const",    &Arg.mIsConst)
405                  .Case("restrict", &Arg.mIsRestrict)
406                  .Case("volatile", &Arg.mIsVolatile)
407                  .Case("pipe",     &Arg.mIsPipe)
408                  .Default(nullptr);
409     if (P)
410       *P = true;
411   }
412 }
413 
414 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
415   int HiddenArgNumBytes =
416       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
417 
418   if (!HiddenArgNumBytes)
419     return;
420 
421   auto &DL = Func.getParent()->getDataLayout();
422   auto Int64Ty = Type::getInt64Ty(Func.getContext());
423 
424   if (HiddenArgNumBytes >= 8)
425     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
426   if (HiddenArgNumBytes >= 16)
427     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
428   if (HiddenArgNumBytes >= 24)
429     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
430 
431   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
432                                       AMDGPUAS::GLOBAL_ADDRESS);
433 
434   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
435   // "none" argument.
436   if (HiddenArgNumBytes >= 32) {
437     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
438       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
439     else
440       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
441   }
442 
443   // Emit "default queue" and "completion action" arguments if enqueue kernel is
444   // used, otherwise emit dummy "none" arguments.
445   if (HiddenArgNumBytes >= 48) {
446     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
447       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
448       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
449     } else {
450       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
451       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
452     }
453   }
454 }
455 
456 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
457   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
458 }
459 
460 void MetadataStreamerV2::begin(const Module &Mod) {
461   emitVersion();
462   emitPrintf(Mod);
463 }
464 
465 void MetadataStreamerV2::end() {
466   std::string HSAMetadataString;
467   if (toString(HSAMetadata, HSAMetadataString))
468     return;
469 
470   if (DumpHSAMetadata)
471     dump(HSAMetadataString);
472   if (VerifyHSAMetadata)
473     verify(HSAMetadataString);
474 }
475 
476 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
477                                     const SIProgramInfo &ProgramInfo) {
478   auto &Func = MF.getFunction();
479   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
480     return;
481 
482   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
483   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
484 
485   HSAMetadata.mKernels.push_back(Kernel::Metadata());
486   auto &Kernel = HSAMetadata.mKernels.back();
487 
488   Kernel.mName = Func.getName();
489   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
490   emitKernelLanguage(Func);
491   emitKernelAttrs(Func);
492   emitKernelArgs(Func);
493   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
494   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
495 }
496 
497 //===----------------------------------------------------------------------===//
498 // HSAMetadataStreamerV3
499 //===----------------------------------------------------------------------===//
500 
501 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
502   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
503 }
504 
505 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
506   errs() << "AMDGPU HSA Metadata Parser Test: ";
507 
508   std::shared_ptr<msgpack::Node> FromHSAMetadataString =
509       std::make_shared<msgpack::MapNode>();
510 
511   yaml::Input YIn(HSAMetadataString);
512   YIn >> FromHSAMetadataString;
513   if (YIn.error()) {
514     errs() << "FAIL\n";
515     return;
516   }
517 
518   std::string ToHSAMetadataString;
519   raw_string_ostream StrOS(ToHSAMetadataString);
520   yaml::Output YOut(StrOS);
521   YOut << FromHSAMetadataString;
522 
523   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
524   if (HSAMetadataString != ToHSAMetadataString) {
525     errs() << "Original input: " << HSAMetadataString << '\n'
526            << "Produced output: " << StrOS.str() << '\n';
527   }
528 }
529 
530 Optional<StringRef>
531 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
532   return StringSwitch<Optional<StringRef>>(AccQual)
533       .Case("read_only", StringRef("read_only"))
534       .Case("write_only", StringRef("write_only"))
535       .Case("read_write", StringRef("read_write"))
536       .Default(None);
537 }
538 
539 Optional<StringRef>
540 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
541   switch (AddressSpace) {
542   case AMDGPUAS::PRIVATE_ADDRESS:
543     return StringRef("private");
544   case AMDGPUAS::GLOBAL_ADDRESS:
545     return StringRef("global");
546   case AMDGPUAS::CONSTANT_ADDRESS:
547     return StringRef("constant");
548   case AMDGPUAS::LOCAL_ADDRESS:
549     return StringRef("local");
550   case AMDGPUAS::FLAT_ADDRESS:
551     return StringRef("generic");
552   case AMDGPUAS::REGION_ADDRESS:
553     return StringRef("region");
554   default:
555     return None;
556   }
557 }
558 
559 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
560                                            StringRef BaseTypeName) const {
561   if (TypeQual.find("pipe") != StringRef::npos)
562     return "pipe";
563 
564   return StringSwitch<StringRef>(BaseTypeName)
565       .Case("image1d_t", "image")
566       .Case("image1d_array_t", "image")
567       .Case("image1d_buffer_t", "image")
568       .Case("image2d_t", "image")
569       .Case("image2d_array_t", "image")
570       .Case("image2d_array_depth_t", "image")
571       .Case("image2d_array_msaa_t", "image")
572       .Case("image2d_array_msaa_depth_t", "image")
573       .Case("image2d_depth_t", "image")
574       .Case("image2d_msaa_t", "image")
575       .Case("image2d_msaa_depth_t", "image")
576       .Case("image3d_t", "image")
577       .Case("sampler_t", "sampler")
578       .Case("queue_t", "queue")
579       .Default(isa<PointerType>(Ty)
580                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
581                           ? "dynamic_shared_pointer"
582                           : "global_buffer")
583                    : "by_value");
584 }
585 
586 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
587   switch (Ty->getTypeID()) {
588   case Type::IntegerTyID: {
589     auto Signed = !TypeName.startswith("u");
590     switch (Ty->getIntegerBitWidth()) {
591     case 8:
592       return Signed ? "i8" : "u8";
593     case 16:
594       return Signed ? "i16" : "u16";
595     case 32:
596       return Signed ? "i32" : "u32";
597     case 64:
598       return Signed ? "i64" : "u64";
599     default:
600       return "struct";
601     }
602   }
603   case Type::HalfTyID:
604     return "f16";
605   case Type::FloatTyID:
606     return "f32";
607   case Type::DoubleTyID:
608     return "f64";
609   case Type::PointerTyID:
610     return getValueType(Ty->getPointerElementType(), TypeName);
611   case Type::VectorTyID:
612     return getValueType(Ty->getVectorElementType(), TypeName);
613   default:
614     return "struct";
615   }
616 }
617 
618 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
619   switch (Ty->getTypeID()) {
620   case Type::IntegerTyID: {
621     if (!Signed)
622       return (Twine('u') + getTypeName(Ty, true)).str();
623 
624     auto BitWidth = Ty->getIntegerBitWidth();
625     switch (BitWidth) {
626     case 8:
627       return "char";
628     case 16:
629       return "short";
630     case 32:
631       return "int";
632     case 64:
633       return "long";
634     default:
635       return (Twine('i') + Twine(BitWidth)).str();
636     }
637   }
638   case Type::HalfTyID:
639     return "half";
640   case Type::FloatTyID:
641     return "float";
642   case Type::DoubleTyID:
643     return "double";
644   case Type::VectorTyID: {
645     auto VecTy = cast<VectorType>(Ty);
646     auto ElTy = VecTy->getElementType();
647     auto NumElements = VecTy->getVectorNumElements();
648     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
649   }
650   default:
651     return "unknown";
652   }
653 }
654 
655 std::shared_ptr<msgpack::ArrayNode>
656 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
657   auto Dims = std::make_shared<msgpack::ArrayNode>();
658   if (Node->getNumOperands() != 3)
659     return Dims;
660 
661   for (auto &Op : Node->operands())
662     Dims->push_back(std::make_shared<msgpack::ScalarNode>(
663         mdconst::extract<ConstantInt>(Op)->getZExtValue()));
664   return Dims;
665 }
666 
667 void MetadataStreamerV3::emitVersion() {
668   auto Version = std::make_shared<msgpack::ArrayNode>();
669   Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
670   Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
671   getRootMetadata("amdhsa.version") = std::move(Version);
672 }
673 
674 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
675   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
676   if (!Node)
677     return;
678 
679   auto Printf = std::make_shared<msgpack::ArrayNode>();
680   for (auto Op : Node->operands())
681     if (Op->getNumOperands())
682       Printf->push_back(std::make_shared<msgpack::ScalarNode>(
683           cast<MDString>(Op->getOperand(0))->getString()));
684   getRootMetadata("amdhsa.printf") = std::move(Printf);
685 }
686 
687 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
688                                             msgpack::MapNode &Kern) {
689   // TODO: What about other languages?
690   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
691   if (!Node || !Node->getNumOperands())
692     return;
693   auto Op0 = Node->getOperand(0);
694   if (Op0->getNumOperands() <= 1)
695     return;
696 
697   Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
698   auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
699   LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
700       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
701   LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
702       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
703   Kern[".language_version"] = std::move(LanguageVersion);
704 }
705 
706 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
707                                          msgpack::MapNode &Kern) {
708 
709   if (auto Node = Func.getMetadata("reqd_work_group_size"))
710     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
711   if (auto Node = Func.getMetadata("work_group_size_hint"))
712     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
713   if (auto Node = Func.getMetadata("vec_type_hint")) {
714     Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
715         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
716         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
717   }
718   if (Func.hasFnAttribute("runtime-handle")) {
719     Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
720         Func.getFnAttribute("runtime-handle").getValueAsString().str());
721   }
722 }
723 
724 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
725                                         msgpack::MapNode &Kern) {
726   unsigned Offset = 0;
727   auto Args = std::make_shared<msgpack::ArrayNode>();
728   for (auto &Arg : Func.args())
729     emitKernelArg(Arg, Offset, *Args);
730 
731   emitHiddenKernelArgs(Func, Offset, *Args);
732 
733   // TODO: What about other languages?
734   if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
735     auto &DL = Func.getParent()->getDataLayout();
736     auto Int64Ty = Type::getInt64Ty(Func.getContext());
737 
738     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
739     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
740     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
741 
742     auto Int8PtrTy =
743         Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
744 
745     // Emit "printf buffer" argument if printf is used, otherwise emit dummy
746     // "none" argument.
747     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
748       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
749     else
750       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
751 
752     // Emit "default queue" and "completion action" arguments if enqueue kernel
753     // is used, otherwise emit dummy "none" arguments.
754     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
755       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
756       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
757     } else {
758       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
759       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
760     }
761   }
762 
763   Kern[".args"] = std::move(Args);
764 }
765 
766 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
767                                        msgpack::ArrayNode &Args) {
768   auto Func = Arg.getParent();
769   auto ArgNo = Arg.getArgNo();
770   const MDNode *Node;
771 
772   StringRef Name;
773   Node = Func->getMetadata("kernel_arg_name");
774   if (Node && ArgNo < Node->getNumOperands())
775     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
776   else if (Arg.hasName())
777     Name = Arg.getName();
778 
779   StringRef TypeName;
780   Node = Func->getMetadata("kernel_arg_type");
781   if (Node && ArgNo < Node->getNumOperands())
782     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
783 
784   StringRef BaseTypeName;
785   Node = Func->getMetadata("kernel_arg_base_type");
786   if (Node && ArgNo < Node->getNumOperands())
787     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
788 
789   StringRef AccQual;
790   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
791       Arg.hasNoAliasAttr()) {
792     AccQual = "read_only";
793   } else {
794     Node = Func->getMetadata("kernel_arg_access_qual");
795     if (Node && ArgNo < Node->getNumOperands())
796       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
797   }
798 
799   StringRef TypeQual;
800   Node = Func->getMetadata("kernel_arg_type_qual");
801   if (Node && ArgNo < Node->getNumOperands())
802     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
803 
804   Type *Ty = Arg.getType();
805   const DataLayout &DL = Func->getParent()->getDataLayout();
806 
807   unsigned PointeeAlign = 0;
808   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
809     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
810       PointeeAlign = Arg.getParamAlignment();
811       if (PointeeAlign == 0)
812         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
813     }
814   }
815 
816   emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
817                 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
818                 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
819                 TypeQual);
820 }
821 
822 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
823                                        StringRef ValueKind, unsigned &Offset,
824                                        msgpack::ArrayNode &Args,
825                                        unsigned PointeeAlign, StringRef Name,
826                                        StringRef TypeName,
827                                        StringRef BaseTypeName,
828                                        StringRef AccQual, StringRef TypeQual) {
829   auto ArgPtr = std::make_shared<msgpack::MapNode>();
830   auto &Arg = *ArgPtr;
831 
832   if (!Name.empty())
833     Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
834   if (!TypeName.empty())
835     Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
836   auto Size = DL.getTypeAllocSize(Ty);
837   auto Align = DL.getABITypeAlignment(Ty);
838   Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
839   Offset = alignTo(Offset, Align);
840   Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
841   Offset += Size;
842   Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
843   Arg[".value_type"] =
844       std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
845   if (PointeeAlign)
846     Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
847 
848   if (auto PtrTy = dyn_cast<PointerType>(Ty))
849     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
850       Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
851 
852   if (auto AQ = getAccessQualifier(AccQual))
853     Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
854 
855   // TODO: Emit Arg[".actual_access"].
856 
857   SmallVector<StringRef, 1> SplitTypeQuals;
858   TypeQual.split(SplitTypeQuals, " ", -1, false);
859   for (StringRef Key : SplitTypeQuals) {
860     if (Key == "const")
861       Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
862     else if (Key == "restrict")
863       Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
864     else if (Key == "volatile")
865       Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
866     else if (Key == "pipe")
867       Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
868   }
869 
870   Args.push_back(std::move(ArgPtr));
871 }
872 
873 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
874                                               unsigned &Offset,
875                                               msgpack::ArrayNode &Args) {
876   int HiddenArgNumBytes =
877       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
878 
879   if (!HiddenArgNumBytes)
880     return;
881 
882   auto &DL = Func.getParent()->getDataLayout();
883   auto Int64Ty = Type::getInt64Ty(Func.getContext());
884 
885   if (HiddenArgNumBytes >= 8)
886     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
887   if (HiddenArgNumBytes >= 16)
888     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
889   if (HiddenArgNumBytes >= 24)
890     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
891 
892   auto Int8PtrTy =
893       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
894 
895   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
896   // "none" argument.
897   if (HiddenArgNumBytes >= 32) {
898     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
899       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
900     else
901       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
902   }
903 
904   // Emit "default queue" and "completion action" arguments if enqueue kernel is
905   // used, otherwise emit dummy "none" arguments.
906   if (HiddenArgNumBytes >= 48) {
907     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
908       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
909       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
910     } else {
911       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
912       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
913     }
914   }
915 }
916 
917 std::shared_ptr<msgpack::MapNode>
918 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
919                                       const SIProgramInfo &ProgramInfo) const {
920   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
921   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
922   const Function &F = MF.getFunction();
923 
924   auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
925   auto &Kern = *HSAKernelProps;
926 
927   unsigned MaxKernArgAlign;
928   Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
929       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
930   Kern[".group_segment_fixed_size"] =
931       std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
932   Kern[".private_segment_fixed_size"] =
933       std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
934   Kern[".kernarg_segment_align"] =
935       std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
936   Kern[".wavefront_size"] =
937       std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
938   Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
939   Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
940   Kern[".max_flat_workgroup_size"] =
941       std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
942   Kern[".sgpr_spill_count"] =
943       std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
944   Kern[".vgpr_spill_count"] =
945       std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
946 
947   return HSAKernelProps;
948 }
949 
950 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
951   return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
952 }
953 
954 void MetadataStreamerV3::begin(const Module &Mod) {
955   emitVersion();
956   emitPrintf(Mod);
957   getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
958 }
959 
960 void MetadataStreamerV3::end() {
961   std::string HSAMetadataString;
962   raw_string_ostream StrOS(HSAMetadataString);
963   yaml::Output YOut(StrOS);
964   YOut << HSAMetadataRoot;
965 
966   if (DumpHSAMetadata)
967     dump(StrOS.str());
968   if (VerifyHSAMetadata)
969     verify(StrOS.str());
970 }
971 
972 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
973                                     const SIProgramInfo &ProgramInfo) {
974   auto &Func = MF.getFunction();
975   auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
976 
977   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
978          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
979 
980   auto &KernelsNode = getRootMetadata("amdhsa.kernels");
981   auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
982 
983   {
984     auto &Kern = *KernelProps;
985     Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
986     Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
987         (Twine(Func.getName()) + Twine(".kd")).str());
988     emitKernelLanguage(Func, Kern);
989     emitKernelAttrs(Func, Kern);
990     emitKernelArgs(Func, Kern);
991   }
992 
993   Kernels->push_back(std::move(KernelProps));
994 }
995 
996 } // end namespace HSAMD
997 } // end namespace AMDGPU
998 } // end namespace llvm
999