1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44 }
45 
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47   errs() << "AMDGPU HSA Metadata Parser Test: ";
48 
49   HSAMD::Metadata FromHSAMetadataString;
50   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
51     errs() << "FAIL\n";
52     return;
53   }
54 
55   std::string ToHSAMetadataString;
56   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57     errs() << "FAIL\n";
58     return;
59   }
60 
61   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62          << '\n';
63   if (HSAMetadataString != ToHSAMetadataString) {
64     errs() << "Original input: " << HSAMetadataString << '\n'
65            << "Produced output: " << ToHSAMetadataString << '\n';
66   }
67 }
68 
69 AccessQualifier
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71   if (AccQual.empty())
72     return AccessQualifier::Unknown;
73 
74   return StringSwitch<AccessQualifier>(AccQual)
75              .Case("read_only",  AccessQualifier::ReadOnly)
76              .Case("write_only", AccessQualifier::WriteOnly)
77              .Case("read_write", AccessQualifier::ReadWrite)
78              .Default(AccessQualifier::Default);
79 }
80 
81 AddressSpaceQualifier
82 MetadataStreamerV2::getAddressSpaceQualifier(
83     unsigned AddressSpace) const {
84   switch (AddressSpace) {
85   case AMDGPUAS::PRIVATE_ADDRESS:
86     return AddressSpaceQualifier::Private;
87   case AMDGPUAS::GLOBAL_ADDRESS:
88     return AddressSpaceQualifier::Global;
89   case AMDGPUAS::CONSTANT_ADDRESS:
90     return AddressSpaceQualifier::Constant;
91   case AMDGPUAS::LOCAL_ADDRESS:
92     return AddressSpaceQualifier::Local;
93   case AMDGPUAS::FLAT_ADDRESS:
94     return AddressSpaceQualifier::Generic;
95   case AMDGPUAS::REGION_ADDRESS:
96     return AddressSpaceQualifier::Region;
97   default:
98     return AddressSpaceQualifier::Unknown;
99   }
100 }
101 
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                            StringRef BaseTypeName) const {
104   if (TypeQual.find("pipe") != StringRef::npos)
105     return ValueKind::Pipe;
106 
107   return StringSwitch<ValueKind>(BaseTypeName)
108              .Case("image1d_t", ValueKind::Image)
109              .Case("image1d_array_t", ValueKind::Image)
110              .Case("image1d_buffer_t", ValueKind::Image)
111              .Case("image2d_t", ValueKind::Image)
112              .Case("image2d_array_t", ValueKind::Image)
113              .Case("image2d_array_depth_t", ValueKind::Image)
114              .Case("image2d_array_msaa_t", ValueKind::Image)
115              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116              .Case("image2d_depth_t", ValueKind::Image)
117              .Case("image2d_msaa_t", ValueKind::Image)
118              .Case("image2d_msaa_depth_t", ValueKind::Image)
119              .Case("image3d_t", ValueKind::Image)
120              .Case("sampler_t", ValueKind::Sampler)
121              .Case("queue_t", ValueKind::Queue)
122              .Default(isa<PointerType>(Ty) ?
123                           (Ty->getPointerAddressSpace() ==
124                            AMDGPUAS::LOCAL_ADDRESS ?
125                            ValueKind::DynamicSharedPointer :
126                            ValueKind::GlobalBuffer) :
127                       ValueKind::ByValue);
128 }
129 
130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131   switch (Ty->getTypeID()) {
132   case Type::IntegerTyID: {
133     auto Signed = !TypeName.startswith("u");
134     switch (Ty->getIntegerBitWidth()) {
135     case 8:
136       return Signed ? ValueType::I8 : ValueType::U8;
137     case 16:
138       return Signed ? ValueType::I16 : ValueType::U16;
139     case 32:
140       return Signed ? ValueType::I32 : ValueType::U32;
141     case 64:
142       return Signed ? ValueType::I64 : ValueType::U64;
143     default:
144       return ValueType::Struct;
145     }
146   }
147   case Type::HalfTyID:
148     return ValueType::F16;
149   case Type::FloatTyID:
150     return ValueType::F32;
151   case Type::DoubleTyID:
152     return ValueType::F64;
153   case Type::PointerTyID:
154     return getValueType(Ty->getPointerElementType(), TypeName);
155   case Type::VectorTyID:
156     return getValueType(Ty->getVectorElementType(), TypeName);
157   default:
158     return ValueType::Struct;
159   }
160 }
161 
162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163   switch (Ty->getTypeID()) {
164   case Type::IntegerTyID: {
165     if (!Signed)
166       return (Twine('u') + getTypeName(Ty, true)).str();
167 
168     auto BitWidth = Ty->getIntegerBitWidth();
169     switch (BitWidth) {
170     case 8:
171       return "char";
172     case 16:
173       return "short";
174     case 32:
175       return "int";
176     case 64:
177       return "long";
178     default:
179       return (Twine('i') + Twine(BitWidth)).str();
180     }
181   }
182   case Type::HalfTyID:
183     return "half";
184   case Type::FloatTyID:
185     return "float";
186   case Type::DoubleTyID:
187     return "double";
188   case Type::VectorTyID: {
189     auto VecTy = cast<VectorType>(Ty);
190     auto ElTy = VecTy->getElementType();
191     auto NumElements = VecTy->getVectorNumElements();
192     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193   }
194   default:
195     return "unknown";
196   }
197 }
198 
199 std::vector<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201   std::vector<uint32_t> Dims;
202   if (Node->getNumOperands() != 3)
203     return Dims;
204 
205   for (auto &Op : Node->operands())
206     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207   return Dims;
208 }
209 
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212                                     const SIProgramInfo &ProgramInfo) const {
213   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216   const Function &F = MF.getFunction();
217 
218   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219          F.getCallingConv() == CallingConv::SPIR_KERNEL);
220 
221   unsigned MaxKernArgAlign;
222   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223                                                                MaxKernArgAlign);
224   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226   HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
227   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
228   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
229   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
230   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
231   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
232   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
233   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
234   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
235 
236   return HSACodeProps;
237 }
238 
239 Kernel::DebugProps::Metadata
240 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
241                                      const SIProgramInfo &ProgramInfo) const {
242   return HSAMD::Kernel::DebugProps::Metadata();
243 }
244 
245 void MetadataStreamerV2::emitVersion() {
246   auto &Version = HSAMetadata.mVersion;
247 
248   Version.push_back(VersionMajor);
249   Version.push_back(VersionMinor);
250 }
251 
252 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
253   auto &Printf = HSAMetadata.mPrintf;
254 
255   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
256   if (!Node)
257     return;
258 
259   for (auto Op : Node->operands())
260     if (Op->getNumOperands())
261       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
262 }
263 
264 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
265   auto &Kernel = HSAMetadata.mKernels.back();
266 
267   // TODO: What about other languages?
268   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
269   if (!Node || !Node->getNumOperands())
270     return;
271   auto Op0 = Node->getOperand(0);
272   if (Op0->getNumOperands() <= 1)
273     return;
274 
275   Kernel.mLanguage = "OpenCL C";
276   Kernel.mLanguageVersion.push_back(
277       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
278   Kernel.mLanguageVersion.push_back(
279       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
280 }
281 
282 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
283   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
284 
285   if (auto Node = Func.getMetadata("reqd_work_group_size"))
286     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
287   if (auto Node = Func.getMetadata("work_group_size_hint"))
288     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
289   if (auto Node = Func.getMetadata("vec_type_hint")) {
290     Attrs.mVecTypeHint = getTypeName(
291         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
292         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
293   }
294   if (Func.hasFnAttribute("runtime-handle")) {
295     Attrs.mRuntimeHandle =
296         Func.getFnAttribute("runtime-handle").getValueAsString().str();
297   }
298 }
299 
300 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
301   for (auto &Arg : Func.args())
302     emitKernelArg(Arg);
303 
304   emitHiddenKernelArgs(Func);
305 }
306 
307 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
308   auto Func = Arg.getParent();
309   auto ArgNo = Arg.getArgNo();
310   const MDNode *Node;
311 
312   StringRef Name;
313   Node = Func->getMetadata("kernel_arg_name");
314   if (Node && ArgNo < Node->getNumOperands())
315     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
316   else if (Arg.hasName())
317     Name = Arg.getName();
318 
319   StringRef TypeName;
320   Node = Func->getMetadata("kernel_arg_type");
321   if (Node && ArgNo < Node->getNumOperands())
322     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
323 
324   StringRef BaseTypeName;
325   Node = Func->getMetadata("kernel_arg_base_type");
326   if (Node && ArgNo < Node->getNumOperands())
327     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
328 
329   StringRef AccQual;
330   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
331       Arg.hasNoAliasAttr()) {
332     AccQual = "read_only";
333   } else {
334     Node = Func->getMetadata("kernel_arg_access_qual");
335     if (Node && ArgNo < Node->getNumOperands())
336       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
337   }
338 
339   StringRef TypeQual;
340   Node = Func->getMetadata("kernel_arg_type_qual");
341   if (Node && ArgNo < Node->getNumOperands())
342     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
343 
344   Type *Ty = Arg.getType();
345   const DataLayout &DL = Func->getParent()->getDataLayout();
346 
347   unsigned PointeeAlign = 0;
348   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
349     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
350       PointeeAlign = Arg.getParamAlignment();
351       if (PointeeAlign == 0)
352         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
353     }
354   }
355 
356   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
357                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
358 }
359 
360 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
361                                        ValueKind ValueKind,
362                                        unsigned PointeeAlign, StringRef Name,
363                                        StringRef TypeName,
364                                        StringRef BaseTypeName,
365                                        StringRef AccQual, StringRef TypeQual) {
366   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
367   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
368 
369   Arg.mName = Name;
370   Arg.mTypeName = TypeName;
371   Arg.mSize = DL.getTypeAllocSize(Ty);
372   Arg.mAlign = DL.getABITypeAlignment(Ty);
373   Arg.mValueKind = ValueKind;
374   Arg.mValueType = getValueType(Ty, BaseTypeName);
375   Arg.mPointeeAlign = PointeeAlign;
376 
377   if (auto PtrTy = dyn_cast<PointerType>(Ty))
378     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
379 
380   Arg.mAccQual = getAccessQualifier(AccQual);
381 
382   // TODO: Emit Arg.mActualAccQual.
383 
384   SmallVector<StringRef, 1> SplitTypeQuals;
385   TypeQual.split(SplitTypeQuals, " ", -1, false);
386   for (StringRef Key : SplitTypeQuals) {
387     auto P = StringSwitch<bool*>(Key)
388                  .Case("const",    &Arg.mIsConst)
389                  .Case("restrict", &Arg.mIsRestrict)
390                  .Case("volatile", &Arg.mIsVolatile)
391                  .Case("pipe",     &Arg.mIsPipe)
392                  .Default(nullptr);
393     if (P)
394       *P = true;
395   }
396 }
397 
398 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
399   int HiddenArgNumBytes =
400       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
401 
402   if (!HiddenArgNumBytes)
403     return;
404 
405   auto &DL = Func.getParent()->getDataLayout();
406   auto Int64Ty = Type::getInt64Ty(Func.getContext());
407 
408   if (HiddenArgNumBytes >= 8)
409     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
410   if (HiddenArgNumBytes >= 16)
411     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
412   if (HiddenArgNumBytes >= 24)
413     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
414 
415   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
416                                       AMDGPUAS::GLOBAL_ADDRESS);
417 
418   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
419   // "none" argument.
420   if (HiddenArgNumBytes >= 32) {
421     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
422       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
423     else
424       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
425   }
426 
427   // Emit "default queue" and "completion action" arguments if enqueue kernel is
428   // used, otherwise emit dummy "none" arguments.
429   if (HiddenArgNumBytes >= 48) {
430     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
431       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
432       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
433     } else {
434       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
435       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
436     }
437   }
438 }
439 
440 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
441   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
442 }
443 
444 void MetadataStreamerV2::begin(const Module &Mod) {
445   emitVersion();
446   emitPrintf(Mod);
447 }
448 
449 void MetadataStreamerV2::end() {
450   std::string HSAMetadataString;
451   if (toString(HSAMetadata, HSAMetadataString))
452     return;
453 
454   if (DumpHSAMetadata)
455     dump(HSAMetadataString);
456   if (VerifyHSAMetadata)
457     verify(HSAMetadataString);
458 }
459 
460 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
461                                     const SIProgramInfo &ProgramInfo) {
462   auto &Func = MF.getFunction();
463   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
464     return;
465 
466   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
467   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
468 
469   HSAMetadata.mKernels.push_back(Kernel::Metadata());
470   auto &Kernel = HSAMetadata.mKernels.back();
471 
472   Kernel.mName = Func.getName();
473   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
474   emitKernelLanguage(Func);
475   emitKernelAttrs(Func);
476   emitKernelArgs(Func);
477   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
478   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
479 }
480 
481 //===----------------------------------------------------------------------===//
482 // HSAMetadataStreamerV3
483 //===----------------------------------------------------------------------===//
484 
485 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
486   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
487 }
488 
489 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
490   errs() << "AMDGPU HSA Metadata Parser Test: ";
491 
492   msgpack::Document FromHSAMetadataString;
493 
494   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
495     errs() << "FAIL\n";
496     return;
497   }
498 
499   std::string ToHSAMetadataString;
500   raw_string_ostream StrOS(ToHSAMetadataString);
501   FromHSAMetadataString.toYAML(StrOS);
502 
503   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
504   if (HSAMetadataString != ToHSAMetadataString) {
505     errs() << "Original input: " << HSAMetadataString << '\n'
506            << "Produced output: " << StrOS.str() << '\n';
507   }
508 }
509 
510 Optional<StringRef>
511 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
512   return StringSwitch<Optional<StringRef>>(AccQual)
513       .Case("read_only", StringRef("read_only"))
514       .Case("write_only", StringRef("write_only"))
515       .Case("read_write", StringRef("read_write"))
516       .Default(None);
517 }
518 
519 Optional<StringRef>
520 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
521   switch (AddressSpace) {
522   case AMDGPUAS::PRIVATE_ADDRESS:
523     return StringRef("private");
524   case AMDGPUAS::GLOBAL_ADDRESS:
525     return StringRef("global");
526   case AMDGPUAS::CONSTANT_ADDRESS:
527     return StringRef("constant");
528   case AMDGPUAS::LOCAL_ADDRESS:
529     return StringRef("local");
530   case AMDGPUAS::FLAT_ADDRESS:
531     return StringRef("generic");
532   case AMDGPUAS::REGION_ADDRESS:
533     return StringRef("region");
534   default:
535     return None;
536   }
537 }
538 
539 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
540                                            StringRef BaseTypeName) const {
541   if (TypeQual.find("pipe") != StringRef::npos)
542     return "pipe";
543 
544   return StringSwitch<StringRef>(BaseTypeName)
545       .Case("image1d_t", "image")
546       .Case("image1d_array_t", "image")
547       .Case("image1d_buffer_t", "image")
548       .Case("image2d_t", "image")
549       .Case("image2d_array_t", "image")
550       .Case("image2d_array_depth_t", "image")
551       .Case("image2d_array_msaa_t", "image")
552       .Case("image2d_array_msaa_depth_t", "image")
553       .Case("image2d_depth_t", "image")
554       .Case("image2d_msaa_t", "image")
555       .Case("image2d_msaa_depth_t", "image")
556       .Case("image3d_t", "image")
557       .Case("sampler_t", "sampler")
558       .Case("queue_t", "queue")
559       .Default(isa<PointerType>(Ty)
560                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
561                           ? "dynamic_shared_pointer"
562                           : "global_buffer")
563                    : "by_value");
564 }
565 
566 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
567   switch (Ty->getTypeID()) {
568   case Type::IntegerTyID: {
569     auto Signed = !TypeName.startswith("u");
570     switch (Ty->getIntegerBitWidth()) {
571     case 8:
572       return Signed ? "i8" : "u8";
573     case 16:
574       return Signed ? "i16" : "u16";
575     case 32:
576       return Signed ? "i32" : "u32";
577     case 64:
578       return Signed ? "i64" : "u64";
579     default:
580       return "struct";
581     }
582   }
583   case Type::HalfTyID:
584     return "f16";
585   case Type::FloatTyID:
586     return "f32";
587   case Type::DoubleTyID:
588     return "f64";
589   case Type::PointerTyID:
590     return getValueType(Ty->getPointerElementType(), TypeName);
591   case Type::VectorTyID:
592     return getValueType(Ty->getVectorElementType(), TypeName);
593   default:
594     return "struct";
595   }
596 }
597 
598 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
599   switch (Ty->getTypeID()) {
600   case Type::IntegerTyID: {
601     if (!Signed)
602       return (Twine('u') + getTypeName(Ty, true)).str();
603 
604     auto BitWidth = Ty->getIntegerBitWidth();
605     switch (BitWidth) {
606     case 8:
607       return "char";
608     case 16:
609       return "short";
610     case 32:
611       return "int";
612     case 64:
613       return "long";
614     default:
615       return (Twine('i') + Twine(BitWidth)).str();
616     }
617   }
618   case Type::HalfTyID:
619     return "half";
620   case Type::FloatTyID:
621     return "float";
622   case Type::DoubleTyID:
623     return "double";
624   case Type::VectorTyID: {
625     auto VecTy = cast<VectorType>(Ty);
626     auto ElTy = VecTy->getElementType();
627     auto NumElements = VecTy->getVectorNumElements();
628     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
629   }
630   default:
631     return "unknown";
632   }
633 }
634 
635 msgpack::ArrayDocNode
636 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
637   auto Dims = HSAMetadataDoc->getArrayNode();
638   if (Node->getNumOperands() != 3)
639     return Dims;
640 
641   for (auto &Op : Node->operands())
642     Dims.push_back(Dims.getDocument()->getNode(
643         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
644   return Dims;
645 }
646 
647 void MetadataStreamerV3::emitVersion() {
648   auto Version = HSAMetadataDoc->getArrayNode();
649   Version.push_back(Version.getDocument()->getNode(VersionMajor));
650   Version.push_back(Version.getDocument()->getNode(VersionMinor));
651   getRootMetadata("amdhsa.version") = Version;
652 }
653 
654 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
655   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
656   if (!Node)
657     return;
658 
659   auto Printf = HSAMetadataDoc->getArrayNode();
660   for (auto Op : Node->operands())
661     if (Op->getNumOperands())
662       Printf.push_back(Printf.getDocument()->getNode(
663           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
664   getRootMetadata("amdhsa.printf") = Printf;
665 }
666 
667 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
668                                             msgpack::MapDocNode Kern) {
669   // TODO: What about other languages?
670   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
671   if (!Node || !Node->getNumOperands())
672     return;
673   auto Op0 = Node->getOperand(0);
674   if (Op0->getNumOperands() <= 1)
675     return;
676 
677   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
678   auto LanguageVersion = Kern.getDocument()->getArrayNode();
679   LanguageVersion.push_back(Kern.getDocument()->getNode(
680       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
681   LanguageVersion.push_back(Kern.getDocument()->getNode(
682       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
683   Kern[".language_version"] = LanguageVersion;
684 }
685 
686 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
687                                          msgpack::MapDocNode Kern) {
688 
689   if (auto Node = Func.getMetadata("reqd_work_group_size"))
690     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
691   if (auto Node = Func.getMetadata("work_group_size_hint"))
692     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
693   if (auto Node = Func.getMetadata("vec_type_hint")) {
694     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
695         getTypeName(
696             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
697             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
698         /*Copy=*/true);
699   }
700   if (Func.hasFnAttribute("runtime-handle")) {
701     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
702         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
703         /*Copy=*/true);
704   }
705 }
706 
707 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
708                                         msgpack::MapDocNode Kern) {
709   unsigned Offset = 0;
710   auto Args = HSAMetadataDoc->getArrayNode();
711   for (auto &Arg : Func.args())
712     emitKernelArg(Arg, Offset, Args);
713 
714   emitHiddenKernelArgs(Func, Offset, Args);
715 
716   // TODO: What about other languages?
717   if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
718     auto &DL = Func.getParent()->getDataLayout();
719     auto Int64Ty = Type::getInt64Ty(Func.getContext());
720 
721     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
722     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
723     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
724 
725     auto Int8PtrTy =
726         Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
727 
728     // Emit "printf buffer" argument if printf is used, otherwise emit dummy
729     // "none" argument.
730     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
731       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
732     else
733       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
734 
735     // Emit "default queue" and "completion action" arguments if enqueue kernel
736     // is used, otherwise emit dummy "none" arguments.
737     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
738       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
739       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
740     } else {
741       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
742       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
743     }
744   }
745 
746   Kern[".args"] = Args;
747 }
748 
749 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
750                                        msgpack::ArrayDocNode Args) {
751   auto Func = Arg.getParent();
752   auto ArgNo = Arg.getArgNo();
753   const MDNode *Node;
754 
755   StringRef Name;
756   Node = Func->getMetadata("kernel_arg_name");
757   if (Node && ArgNo < Node->getNumOperands())
758     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
759   else if (Arg.hasName())
760     Name = Arg.getName();
761 
762   StringRef TypeName;
763   Node = Func->getMetadata("kernel_arg_type");
764   if (Node && ArgNo < Node->getNumOperands())
765     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
766 
767   StringRef BaseTypeName;
768   Node = Func->getMetadata("kernel_arg_base_type");
769   if (Node && ArgNo < Node->getNumOperands())
770     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
771 
772   StringRef AccQual;
773   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
774       Arg.hasNoAliasAttr()) {
775     AccQual = "read_only";
776   } else {
777     Node = Func->getMetadata("kernel_arg_access_qual");
778     if (Node && ArgNo < Node->getNumOperands())
779       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
780   }
781 
782   StringRef TypeQual;
783   Node = Func->getMetadata("kernel_arg_type_qual");
784   if (Node && ArgNo < Node->getNumOperands())
785     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
786 
787   Type *Ty = Arg.getType();
788   const DataLayout &DL = Func->getParent()->getDataLayout();
789 
790   unsigned PointeeAlign = 0;
791   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
792     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
793       PointeeAlign = Arg.getParamAlignment();
794       if (PointeeAlign == 0)
795         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
796     }
797   }
798 
799   emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
800                 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
801                 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
802                 TypeQual);
803 }
804 
805 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
806                                        StringRef ValueKind, unsigned &Offset,
807                                        msgpack::ArrayDocNode Args,
808                                        unsigned PointeeAlign, StringRef Name,
809                                        StringRef TypeName,
810                                        StringRef BaseTypeName,
811                                        StringRef AccQual, StringRef TypeQual) {
812   auto Arg = Args.getDocument()->getMapNode();
813 
814   if (!Name.empty())
815     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
816   if (!TypeName.empty())
817     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
818   auto Size = DL.getTypeAllocSize(Ty);
819   auto Align = DL.getABITypeAlignment(Ty);
820   Arg[".size"] = Arg.getDocument()->getNode(Size);
821   Offset = alignTo(Offset, Align);
822   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
823   Offset += Size;
824   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
825   Arg[".value_type"] =
826       Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
827   if (PointeeAlign)
828     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
829 
830   if (auto PtrTy = dyn_cast<PointerType>(Ty))
831     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
832       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
833 
834   if (auto AQ = getAccessQualifier(AccQual))
835     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
836 
837   // TODO: Emit Arg[".actual_access"].
838 
839   SmallVector<StringRef, 1> SplitTypeQuals;
840   TypeQual.split(SplitTypeQuals, " ", -1, false);
841   for (StringRef Key : SplitTypeQuals) {
842     if (Key == "const")
843       Arg[".is_const"] = Arg.getDocument()->getNode(true);
844     else if (Key == "restrict")
845       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
846     else if (Key == "volatile")
847       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
848     else if (Key == "pipe")
849       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
850   }
851 
852   Args.push_back(Arg);
853 }
854 
855 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
856                                               unsigned &Offset,
857                                               msgpack::ArrayDocNode Args) {
858   int HiddenArgNumBytes =
859       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
860 
861   if (!HiddenArgNumBytes)
862     return;
863 
864   auto &DL = Func.getParent()->getDataLayout();
865   auto Int64Ty = Type::getInt64Ty(Func.getContext());
866 
867   if (HiddenArgNumBytes >= 8)
868     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
869   if (HiddenArgNumBytes >= 16)
870     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
871   if (HiddenArgNumBytes >= 24)
872     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
873 
874   auto Int8PtrTy =
875       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
876 
877   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
878   // "none" argument.
879   if (HiddenArgNumBytes >= 32) {
880     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
881       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
882     else
883       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
884   }
885 
886   // Emit "default queue" and "completion action" arguments if enqueue kernel is
887   // used, otherwise emit dummy "none" arguments.
888   if (HiddenArgNumBytes >= 48) {
889     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
890       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
891       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
892     } else {
893       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
894       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
895     }
896   }
897 }
898 
899 msgpack::MapDocNode
900 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
901                                       const SIProgramInfo &ProgramInfo) const {
902   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
903   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
904   const Function &F = MF.getFunction();
905 
906   auto Kern = HSAMetadataDoc->getMapNode();
907 
908   unsigned MaxKernArgAlign;
909   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
910       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
911   Kern[".group_segment_fixed_size"] =
912       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
913   Kern[".private_segment_fixed_size"] =
914       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
915   Kern[".kernarg_segment_align"] =
916       Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign));
917   Kern[".wavefront_size"] =
918       Kern.getDocument()->getNode(STM.getWavefrontSize());
919   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
920   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
921   Kern[".max_flat_workgroup_size"] =
922       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
923   Kern[".sgpr_spill_count"] =
924       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
925   Kern[".vgpr_spill_count"] =
926       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
927 
928   return Kern;
929 }
930 
931 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
932   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
933 }
934 
935 void MetadataStreamerV3::begin(const Module &Mod) {
936   emitVersion();
937   emitPrintf(Mod);
938   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
939 }
940 
941 void MetadataStreamerV3::end() {
942   std::string HSAMetadataString;
943   raw_string_ostream StrOS(HSAMetadataString);
944   HSAMetadataDoc->toYAML(StrOS);
945 
946   if (DumpHSAMetadata)
947     dump(StrOS.str());
948   if (VerifyHSAMetadata)
949     verify(StrOS.str());
950 }
951 
952 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
953                                     const SIProgramInfo &ProgramInfo) {
954   auto &Func = MF.getFunction();
955   auto Kern = getHSAKernelProps(MF, ProgramInfo);
956 
957   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
958          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
959 
960   auto Kernels =
961       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
962 
963   {
964     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
965     Kern[".symbol"] = Kern.getDocument()->getNode(
966         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
967     emitKernelLanguage(Func, Kern);
968     emitKernelAttrs(Func, Kern);
969     emitKernelArgs(Func, Kern);
970   }
971 
972   Kernels.push_back(Kern);
973 }
974 
975 } // end namespace HSAMD
976 } // end namespace AMDGPU
977 } // end namespace llvm
978