1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44 }
45 
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47   errs() << "AMDGPU HSA Metadata Parser Test: ";
48 
49   HSAMD::Metadata FromHSAMetadataString;
50   if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
51     errs() << "FAIL\n";
52     return;
53   }
54 
55   std::string ToHSAMetadataString;
56   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57     errs() << "FAIL\n";
58     return;
59   }
60 
61   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62          << '\n';
63   if (HSAMetadataString != ToHSAMetadataString) {
64     errs() << "Original input: " << HSAMetadataString << '\n'
65            << "Produced output: " << ToHSAMetadataString << '\n';
66   }
67 }
68 
69 AccessQualifier
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71   if (AccQual.empty())
72     return AccessQualifier::Unknown;
73 
74   return StringSwitch<AccessQualifier>(AccQual)
75              .Case("read_only",  AccessQualifier::ReadOnly)
76              .Case("write_only", AccessQualifier::WriteOnly)
77              .Case("read_write", AccessQualifier::ReadWrite)
78              .Default(AccessQualifier::Default);
79 }
80 
81 AddressSpaceQualifier
82 MetadataStreamerV2::getAddressSpaceQualifier(
83     unsigned AddressSpace) const {
84   switch (AddressSpace) {
85   case AMDGPUAS::PRIVATE_ADDRESS:
86     return AddressSpaceQualifier::Private;
87   case AMDGPUAS::GLOBAL_ADDRESS:
88     return AddressSpaceQualifier::Global;
89   case AMDGPUAS::CONSTANT_ADDRESS:
90     return AddressSpaceQualifier::Constant;
91   case AMDGPUAS::LOCAL_ADDRESS:
92     return AddressSpaceQualifier::Local;
93   case AMDGPUAS::FLAT_ADDRESS:
94     return AddressSpaceQualifier::Generic;
95   case AMDGPUAS::REGION_ADDRESS:
96     return AddressSpaceQualifier::Region;
97   default:
98     return AddressSpaceQualifier::Unknown;
99   }
100 }
101 
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                            StringRef BaseTypeName) const {
104   if (TypeQual.find("pipe") != StringRef::npos)
105     return ValueKind::Pipe;
106 
107   return StringSwitch<ValueKind>(BaseTypeName)
108              .Case("image1d_t", ValueKind::Image)
109              .Case("image1d_array_t", ValueKind::Image)
110              .Case("image1d_buffer_t", ValueKind::Image)
111              .Case("image2d_t", ValueKind::Image)
112              .Case("image2d_array_t", ValueKind::Image)
113              .Case("image2d_array_depth_t", ValueKind::Image)
114              .Case("image2d_array_msaa_t", ValueKind::Image)
115              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116              .Case("image2d_depth_t", ValueKind::Image)
117              .Case("image2d_msaa_t", ValueKind::Image)
118              .Case("image2d_msaa_depth_t", ValueKind::Image)
119              .Case("image3d_t", ValueKind::Image)
120              .Case("sampler_t", ValueKind::Sampler)
121              .Case("queue_t", ValueKind::Queue)
122              .Default(isa<PointerType>(Ty) ?
123                           (Ty->getPointerAddressSpace() ==
124                            AMDGPUAS::LOCAL_ADDRESS ?
125                            ValueKind::DynamicSharedPointer :
126                            ValueKind::GlobalBuffer) :
127                       ValueKind::ByValue);
128 }
129 
130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131   switch (Ty->getTypeID()) {
132   case Type::IntegerTyID: {
133     auto Signed = !TypeName.startswith("u");
134     switch (Ty->getIntegerBitWidth()) {
135     case 8:
136       return Signed ? ValueType::I8 : ValueType::U8;
137     case 16:
138       return Signed ? ValueType::I16 : ValueType::U16;
139     case 32:
140       return Signed ? ValueType::I32 : ValueType::U32;
141     case 64:
142       return Signed ? ValueType::I64 : ValueType::U64;
143     default:
144       return ValueType::Struct;
145     }
146   }
147   case Type::HalfTyID:
148     return ValueType::F16;
149   case Type::FloatTyID:
150     return ValueType::F32;
151   case Type::DoubleTyID:
152     return ValueType::F64;
153   case Type::PointerTyID:
154     return getValueType(Ty->getPointerElementType(), TypeName);
155   case Type::FixedVectorTyID:
156     return getValueType(cast<VectorType>(Ty)->getElementType(), TypeName);
157   default:
158     return ValueType::Struct;
159   }
160 }
161 
162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163   switch (Ty->getTypeID()) {
164   case Type::IntegerTyID: {
165     if (!Signed)
166       return (Twine('u') + getTypeName(Ty, true)).str();
167 
168     auto BitWidth = Ty->getIntegerBitWidth();
169     switch (BitWidth) {
170     case 8:
171       return "char";
172     case 16:
173       return "short";
174     case 32:
175       return "int";
176     case 64:
177       return "long";
178     default:
179       return (Twine('i') + Twine(BitWidth)).str();
180     }
181   }
182   case Type::HalfTyID:
183     return "half";
184   case Type::FloatTyID:
185     return "float";
186   case Type::DoubleTyID:
187     return "double";
188   case Type::FixedVectorTyID: {
189     auto VecTy = cast<FixedVectorType>(Ty);
190     auto ElTy = VecTy->getElementType();
191     auto NumElements = VecTy->getNumElements();
192     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193   }
194   default:
195     return "unknown";
196   }
197 }
198 
199 std::vector<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201   std::vector<uint32_t> Dims;
202   if (Node->getNumOperands() != 3)
203     return Dims;
204 
205   for (auto &Op : Node->operands())
206     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207   return Dims;
208 }
209 
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212                                     const SIProgramInfo &ProgramInfo) const {
213   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216   const Function &F = MF.getFunction();
217 
218   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219          F.getCallingConv() == CallingConv::SPIR_KERNEL);
220 
221   Align MaxKernArgAlign;
222   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223                                                                MaxKernArgAlign);
224   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226   HSACodeProps.mKernargSegmentAlign =
227       std::max(MaxKernArgAlign, Align(4)).value();
228   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
229   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
230   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
231   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
232   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
233   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
234   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
235   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
236 
237   return HSACodeProps;
238 }
239 
240 Kernel::DebugProps::Metadata
241 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
242                                      const SIProgramInfo &ProgramInfo) const {
243   return HSAMD::Kernel::DebugProps::Metadata();
244 }
245 
246 void MetadataStreamerV2::emitVersion() {
247   auto &Version = HSAMetadata.mVersion;
248 
249   Version.push_back(VersionMajor);
250   Version.push_back(VersionMinor);
251 }
252 
253 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
254   auto &Printf = HSAMetadata.mPrintf;
255 
256   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
257   if (!Node)
258     return;
259 
260   for (auto Op : Node->operands())
261     if (Op->getNumOperands())
262       Printf.push_back(
263           std::string(cast<MDString>(Op->getOperand(0))->getString()));
264 }
265 
266 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
267   auto &Kernel = HSAMetadata.mKernels.back();
268 
269   // TODO: What about other languages?
270   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
271   if (!Node || !Node->getNumOperands())
272     return;
273   auto Op0 = Node->getOperand(0);
274   if (Op0->getNumOperands() <= 1)
275     return;
276 
277   Kernel.mLanguage = "OpenCL C";
278   Kernel.mLanguageVersion.push_back(
279       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
280   Kernel.mLanguageVersion.push_back(
281       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
282 }
283 
284 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
285   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
286 
287   if (auto Node = Func.getMetadata("reqd_work_group_size"))
288     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
289   if (auto Node = Func.getMetadata("work_group_size_hint"))
290     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
291   if (auto Node = Func.getMetadata("vec_type_hint")) {
292     Attrs.mVecTypeHint = getTypeName(
293         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
294         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
295   }
296   if (Func.hasFnAttribute("runtime-handle")) {
297     Attrs.mRuntimeHandle =
298         Func.getFnAttribute("runtime-handle").getValueAsString().str();
299   }
300 }
301 
302 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
303   for (auto &Arg : Func.args())
304     emitKernelArg(Arg);
305 
306   emitHiddenKernelArgs(Func);
307 }
308 
309 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
310   auto Func = Arg.getParent();
311   auto ArgNo = Arg.getArgNo();
312   const MDNode *Node;
313 
314   StringRef Name;
315   Node = Func->getMetadata("kernel_arg_name");
316   if (Node && ArgNo < Node->getNumOperands())
317     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
318   else if (Arg.hasName())
319     Name = Arg.getName();
320 
321   StringRef TypeName;
322   Node = Func->getMetadata("kernel_arg_type");
323   if (Node && ArgNo < Node->getNumOperands())
324     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
325 
326   StringRef BaseTypeName;
327   Node = Func->getMetadata("kernel_arg_base_type");
328   if (Node && ArgNo < Node->getNumOperands())
329     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
330 
331   StringRef AccQual;
332   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
333       Arg.hasNoAliasAttr()) {
334     AccQual = "read_only";
335   } else {
336     Node = Func->getMetadata("kernel_arg_access_qual");
337     if (Node && ArgNo < Node->getNumOperands())
338       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
339   }
340 
341   StringRef TypeQual;
342   Node = Func->getMetadata("kernel_arg_type_qual");
343   if (Node && ArgNo < Node->getNumOperands())
344     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
345 
346   Type *Ty = Arg.getType();
347   const DataLayout &DL = Func->getParent()->getDataLayout();
348 
349   unsigned PointeeAlign = 0;
350   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
351     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
352       PointeeAlign = Arg.getParamAlignment();
353       if (PointeeAlign == 0)
354         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
355     }
356   }
357 
358   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
359                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
360 }
361 
362 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
363                                        ValueKind ValueKind,
364                                        unsigned PointeeAlign, StringRef Name,
365                                        StringRef TypeName,
366                                        StringRef BaseTypeName,
367                                        StringRef AccQual, StringRef TypeQual) {
368   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
369   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
370 
371   Arg.mName = std::string(Name);
372   Arg.mTypeName = std::string(TypeName);
373   Arg.mSize = DL.getTypeAllocSize(Ty);
374   Arg.mAlign = DL.getABITypeAlignment(Ty);
375   Arg.mValueKind = ValueKind;
376   Arg.mValueType = getValueType(Ty, BaseTypeName);
377   Arg.mPointeeAlign = PointeeAlign;
378 
379   if (auto PtrTy = dyn_cast<PointerType>(Ty))
380     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
381 
382   Arg.mAccQual = getAccessQualifier(AccQual);
383 
384   // TODO: Emit Arg.mActualAccQual.
385 
386   SmallVector<StringRef, 1> SplitTypeQuals;
387   TypeQual.split(SplitTypeQuals, " ", -1, false);
388   for (StringRef Key : SplitTypeQuals) {
389     auto P = StringSwitch<bool*>(Key)
390                  .Case("const",    &Arg.mIsConst)
391                  .Case("restrict", &Arg.mIsRestrict)
392                  .Case("volatile", &Arg.mIsVolatile)
393                  .Case("pipe",     &Arg.mIsPipe)
394                  .Default(nullptr);
395     if (P)
396       *P = true;
397   }
398 }
399 
400 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
401   int HiddenArgNumBytes =
402       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
403 
404   if (!HiddenArgNumBytes)
405     return;
406 
407   auto &DL = Func.getParent()->getDataLayout();
408   auto Int64Ty = Type::getInt64Ty(Func.getContext());
409 
410   if (HiddenArgNumBytes >= 8)
411     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
412   if (HiddenArgNumBytes >= 16)
413     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
414   if (HiddenArgNumBytes >= 24)
415     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
416 
417   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
418                                       AMDGPUAS::GLOBAL_ADDRESS);
419 
420   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
421   // "none" argument.
422   if (HiddenArgNumBytes >= 32) {
423     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
424       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
425     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
426       // The printf runtime binding pass should have ensured that hostcall and
427       // printf are not used in the same module.
428       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
429       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
430     } else
431       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
432   }
433 
434   // Emit "default queue" and "completion action" arguments if enqueue kernel is
435   // used, otherwise emit dummy "none" arguments.
436   if (HiddenArgNumBytes >= 48) {
437     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
438       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
439       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
440     } else {
441       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
442       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
443     }
444   }
445 
446   // Emit the pointer argument for multi-grid object.
447   if (HiddenArgNumBytes >= 56)
448     emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
449 }
450 
451 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
452   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
453 }
454 
455 void MetadataStreamerV2::begin(const Module &Mod) {
456   emitVersion();
457   emitPrintf(Mod);
458 }
459 
460 void MetadataStreamerV2::end() {
461   std::string HSAMetadataString;
462   if (toString(HSAMetadata, HSAMetadataString))
463     return;
464 
465   if (DumpHSAMetadata)
466     dump(HSAMetadataString);
467   if (VerifyHSAMetadata)
468     verify(HSAMetadataString);
469 }
470 
471 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
472                                     const SIProgramInfo &ProgramInfo) {
473   auto &Func = MF.getFunction();
474   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
475     return;
476 
477   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
478   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
479 
480   HSAMetadata.mKernels.push_back(Kernel::Metadata());
481   auto &Kernel = HSAMetadata.mKernels.back();
482 
483   Kernel.mName = std::string(Func.getName());
484   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
485   emitKernelLanguage(Func);
486   emitKernelAttrs(Func);
487   emitKernelArgs(Func);
488   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
489   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
490 }
491 
492 //===----------------------------------------------------------------------===//
493 // HSAMetadataStreamerV3
494 //===----------------------------------------------------------------------===//
495 
496 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
497   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
498 }
499 
500 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
501   errs() << "AMDGPU HSA Metadata Parser Test: ";
502 
503   msgpack::Document FromHSAMetadataString;
504 
505   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
506     errs() << "FAIL\n";
507     return;
508   }
509 
510   std::string ToHSAMetadataString;
511   raw_string_ostream StrOS(ToHSAMetadataString);
512   FromHSAMetadataString.toYAML(StrOS);
513 
514   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
515   if (HSAMetadataString != ToHSAMetadataString) {
516     errs() << "Original input: " << HSAMetadataString << '\n'
517            << "Produced output: " << StrOS.str() << '\n';
518   }
519 }
520 
521 Optional<StringRef>
522 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
523   return StringSwitch<Optional<StringRef>>(AccQual)
524       .Case("read_only", StringRef("read_only"))
525       .Case("write_only", StringRef("write_only"))
526       .Case("read_write", StringRef("read_write"))
527       .Default(None);
528 }
529 
530 Optional<StringRef>
531 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
532   switch (AddressSpace) {
533   case AMDGPUAS::PRIVATE_ADDRESS:
534     return StringRef("private");
535   case AMDGPUAS::GLOBAL_ADDRESS:
536     return StringRef("global");
537   case AMDGPUAS::CONSTANT_ADDRESS:
538     return StringRef("constant");
539   case AMDGPUAS::LOCAL_ADDRESS:
540     return StringRef("local");
541   case AMDGPUAS::FLAT_ADDRESS:
542     return StringRef("generic");
543   case AMDGPUAS::REGION_ADDRESS:
544     return StringRef("region");
545   default:
546     return None;
547   }
548 }
549 
550 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
551                                            StringRef BaseTypeName) const {
552   if (TypeQual.find("pipe") != StringRef::npos)
553     return "pipe";
554 
555   return StringSwitch<StringRef>(BaseTypeName)
556       .Case("image1d_t", "image")
557       .Case("image1d_array_t", "image")
558       .Case("image1d_buffer_t", "image")
559       .Case("image2d_t", "image")
560       .Case("image2d_array_t", "image")
561       .Case("image2d_array_depth_t", "image")
562       .Case("image2d_array_msaa_t", "image")
563       .Case("image2d_array_msaa_depth_t", "image")
564       .Case("image2d_depth_t", "image")
565       .Case("image2d_msaa_t", "image")
566       .Case("image2d_msaa_depth_t", "image")
567       .Case("image3d_t", "image")
568       .Case("sampler_t", "sampler")
569       .Case("queue_t", "queue")
570       .Default(isa<PointerType>(Ty)
571                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
572                           ? "dynamic_shared_pointer"
573                           : "global_buffer")
574                    : "by_value");
575 }
576 
577 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
578   switch (Ty->getTypeID()) {
579   case Type::IntegerTyID: {
580     auto Signed = !TypeName.startswith("u");
581     switch (Ty->getIntegerBitWidth()) {
582     case 8:
583       return Signed ? "i8" : "u8";
584     case 16:
585       return Signed ? "i16" : "u16";
586     case 32:
587       return Signed ? "i32" : "u32";
588     case 64:
589       return Signed ? "i64" : "u64";
590     default:
591       return "struct";
592     }
593   }
594   case Type::HalfTyID:
595     return "f16";
596   case Type::FloatTyID:
597     return "f32";
598   case Type::DoubleTyID:
599     return "f64";
600   case Type::PointerTyID:
601     return getValueType(Ty->getPointerElementType(), TypeName);
602   case Type::FixedVectorTyID:
603     return getValueType(cast<VectorType>(Ty)->getElementType(), TypeName);
604   default:
605     return "struct";
606   }
607 }
608 
609 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
610   switch (Ty->getTypeID()) {
611   case Type::IntegerTyID: {
612     if (!Signed)
613       return (Twine('u') + getTypeName(Ty, true)).str();
614 
615     auto BitWidth = Ty->getIntegerBitWidth();
616     switch (BitWidth) {
617     case 8:
618       return "char";
619     case 16:
620       return "short";
621     case 32:
622       return "int";
623     case 64:
624       return "long";
625     default:
626       return (Twine('i') + Twine(BitWidth)).str();
627     }
628   }
629   case Type::HalfTyID:
630     return "half";
631   case Type::FloatTyID:
632     return "float";
633   case Type::DoubleTyID:
634     return "double";
635   case Type::FixedVectorTyID: {
636     auto VecTy = cast<FixedVectorType>(Ty);
637     auto ElTy = VecTy->getElementType();
638     auto NumElements = VecTy->getNumElements();
639     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
640   }
641   default:
642     return "unknown";
643   }
644 }
645 
646 msgpack::ArrayDocNode
647 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
648   auto Dims = HSAMetadataDoc->getArrayNode();
649   if (Node->getNumOperands() != 3)
650     return Dims;
651 
652   for (auto &Op : Node->operands())
653     Dims.push_back(Dims.getDocument()->getNode(
654         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
655   return Dims;
656 }
657 
658 void MetadataStreamerV3::emitVersion() {
659   auto Version = HSAMetadataDoc->getArrayNode();
660   Version.push_back(Version.getDocument()->getNode(VersionMajor));
661   Version.push_back(Version.getDocument()->getNode(VersionMinor));
662   getRootMetadata("amdhsa.version") = Version;
663 }
664 
665 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
666   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
667   if (!Node)
668     return;
669 
670   auto Printf = HSAMetadataDoc->getArrayNode();
671   for (auto Op : Node->operands())
672     if (Op->getNumOperands())
673       Printf.push_back(Printf.getDocument()->getNode(
674           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
675   getRootMetadata("amdhsa.printf") = Printf;
676 }
677 
678 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
679                                             msgpack::MapDocNode Kern) {
680   // TODO: What about other languages?
681   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
682   if (!Node || !Node->getNumOperands())
683     return;
684   auto Op0 = Node->getOperand(0);
685   if (Op0->getNumOperands() <= 1)
686     return;
687 
688   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
689   auto LanguageVersion = Kern.getDocument()->getArrayNode();
690   LanguageVersion.push_back(Kern.getDocument()->getNode(
691       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
692   LanguageVersion.push_back(Kern.getDocument()->getNode(
693       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
694   Kern[".language_version"] = LanguageVersion;
695 }
696 
697 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
698                                          msgpack::MapDocNode Kern) {
699 
700   if (auto Node = Func.getMetadata("reqd_work_group_size"))
701     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
702   if (auto Node = Func.getMetadata("work_group_size_hint"))
703     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
704   if (auto Node = Func.getMetadata("vec_type_hint")) {
705     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
706         getTypeName(
707             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
708             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
709         /*Copy=*/true);
710   }
711   if (Func.hasFnAttribute("runtime-handle")) {
712     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
713         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
714         /*Copy=*/true);
715   }
716 }
717 
718 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
719                                         msgpack::MapDocNode Kern) {
720   unsigned Offset = 0;
721   auto Args = HSAMetadataDoc->getArrayNode();
722   for (auto &Arg : Func.args())
723     emitKernelArg(Arg, Offset, Args);
724 
725   emitHiddenKernelArgs(Func, Offset, Args);
726 
727   Kern[".args"] = Args;
728 }
729 
730 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
731                                        msgpack::ArrayDocNode Args) {
732   auto Func = Arg.getParent();
733   auto ArgNo = Arg.getArgNo();
734   const MDNode *Node;
735 
736   StringRef Name;
737   Node = Func->getMetadata("kernel_arg_name");
738   if (Node && ArgNo < Node->getNumOperands())
739     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
740   else if (Arg.hasName())
741     Name = Arg.getName();
742 
743   StringRef TypeName;
744   Node = Func->getMetadata("kernel_arg_type");
745   if (Node && ArgNo < Node->getNumOperands())
746     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
747 
748   StringRef BaseTypeName;
749   Node = Func->getMetadata("kernel_arg_base_type");
750   if (Node && ArgNo < Node->getNumOperands())
751     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
752 
753   StringRef AccQual;
754   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
755       Arg.hasNoAliasAttr()) {
756     AccQual = "read_only";
757   } else {
758     Node = Func->getMetadata("kernel_arg_access_qual");
759     if (Node && ArgNo < Node->getNumOperands())
760       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
761   }
762 
763   StringRef TypeQual;
764   Node = Func->getMetadata("kernel_arg_type_qual");
765   if (Node && ArgNo < Node->getNumOperands())
766     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
767 
768   Type *Ty = Arg.getType();
769   const DataLayout &DL = Func->getParent()->getDataLayout();
770 
771   unsigned PointeeAlign = 0;
772   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
773     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
774       PointeeAlign = Arg.getParamAlignment();
775       if (PointeeAlign == 0)
776         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
777     }
778   }
779 
780   emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
781                 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
782                 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
783                 TypeQual);
784 }
785 
786 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
787                                        StringRef ValueKind, unsigned &Offset,
788                                        msgpack::ArrayDocNode Args,
789                                        unsigned PointeeAlign, StringRef Name,
790                                        StringRef TypeName,
791                                        StringRef BaseTypeName,
792                                        StringRef AccQual, StringRef TypeQual) {
793   auto Arg = Args.getDocument()->getMapNode();
794 
795   if (!Name.empty())
796     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
797   if (!TypeName.empty())
798     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
799   auto Size = DL.getTypeAllocSize(Ty);
800   auto Align = DL.getABITypeAlignment(Ty);
801   Arg[".size"] = Arg.getDocument()->getNode(Size);
802   Offset = alignTo(Offset, Align);
803   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
804   Offset += Size;
805   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
806   Arg[".value_type"] =
807       Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
808   if (PointeeAlign)
809     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
810 
811   if (auto PtrTy = dyn_cast<PointerType>(Ty))
812     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
813       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
814 
815   if (auto AQ = getAccessQualifier(AccQual))
816     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
817 
818   // TODO: Emit Arg[".actual_access"].
819 
820   SmallVector<StringRef, 1> SplitTypeQuals;
821   TypeQual.split(SplitTypeQuals, " ", -1, false);
822   for (StringRef Key : SplitTypeQuals) {
823     if (Key == "const")
824       Arg[".is_const"] = Arg.getDocument()->getNode(true);
825     else if (Key == "restrict")
826       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
827     else if (Key == "volatile")
828       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
829     else if (Key == "pipe")
830       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
831   }
832 
833   Args.push_back(Arg);
834 }
835 
836 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
837                                               unsigned &Offset,
838                                               msgpack::ArrayDocNode Args) {
839   int HiddenArgNumBytes =
840       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
841 
842   if (!HiddenArgNumBytes)
843     return;
844 
845   auto &DL = Func.getParent()->getDataLayout();
846   auto Int64Ty = Type::getInt64Ty(Func.getContext());
847 
848   if (HiddenArgNumBytes >= 8)
849     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
850   if (HiddenArgNumBytes >= 16)
851     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
852   if (HiddenArgNumBytes >= 24)
853     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
854 
855   auto Int8PtrTy =
856       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
857 
858   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
859   // "none" argument.
860   if (HiddenArgNumBytes >= 32) {
861     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
862       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
863     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
864       // The printf runtime binding pass should have ensured that hostcall and
865       // printf are not used in the same module.
866       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
867       emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
868     } else
869       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
870   }
871 
872   // Emit "default queue" and "completion action" arguments if enqueue kernel is
873   // used, otherwise emit dummy "none" arguments.
874   if (HiddenArgNumBytes >= 48) {
875     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
876       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
877       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
878     } else {
879       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
880       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
881     }
882   }
883 
884   // Emit the pointer argument for multi-grid object.
885   if (HiddenArgNumBytes >= 56)
886     emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
887 }
888 
889 msgpack::MapDocNode
890 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
891                                       const SIProgramInfo &ProgramInfo) const {
892   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
893   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
894   const Function &F = MF.getFunction();
895 
896   auto Kern = HSAMetadataDoc->getMapNode();
897 
898   Align MaxKernArgAlign;
899   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
900       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
901   Kern[".group_segment_fixed_size"] =
902       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
903   Kern[".private_segment_fixed_size"] =
904       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
905   Kern[".kernarg_segment_align"] =
906       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
907   Kern[".wavefront_size"] =
908       Kern.getDocument()->getNode(STM.getWavefrontSize());
909   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
910   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
911   Kern[".max_flat_workgroup_size"] =
912       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
913   Kern[".sgpr_spill_count"] =
914       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
915   Kern[".vgpr_spill_count"] =
916       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
917 
918   return Kern;
919 }
920 
921 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
922   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
923 }
924 
925 void MetadataStreamerV3::begin(const Module &Mod) {
926   emitVersion();
927   emitPrintf(Mod);
928   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
929 }
930 
931 void MetadataStreamerV3::end() {
932   std::string HSAMetadataString;
933   raw_string_ostream StrOS(HSAMetadataString);
934   HSAMetadataDoc->toYAML(StrOS);
935 
936   if (DumpHSAMetadata)
937     dump(StrOS.str());
938   if (VerifyHSAMetadata)
939     verify(StrOS.str());
940 }
941 
942 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
943                                     const SIProgramInfo &ProgramInfo) {
944   auto &Func = MF.getFunction();
945   auto Kern = getHSAKernelProps(MF, ProgramInfo);
946 
947   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
948          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
949 
950   auto Kernels =
951       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
952 
953   {
954     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
955     Kern[".symbol"] = Kern.getDocument()->getNode(
956         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
957     emitKernelLanguage(Func, Kern);
958     emitKernelAttrs(Func, Kern);
959     emitKernelArgs(Func, Kern);
960   }
961 
962   Kernels.push_back(Kern);
963 }
964 
965 } // end namespace HSAMD
966 } // end namespace AMDGPU
967 } // end namespace llvm
968