1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU HSA Metadata Streamer.
12 ///
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
39 void MetadataStreamer::dump(StringRef HSAMetadataString) const {
40   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
41 }
42 
43 void MetadataStreamer::verify(StringRef HSAMetadataString) const {
44   errs() << "AMDGPU HSA Metadata Parser Test: ";
45 
46   HSAMD::Metadata FromHSAMetadataString;
47   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
48     errs() << "FAIL\n";
49     return;
50   }
51 
52   std::string ToHSAMetadataString;
53   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
54     errs() << "FAIL\n";
55     return;
56   }
57 
58   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
59          << '\n';
60   if (HSAMetadataString != ToHSAMetadataString) {
61     errs() << "Original input: " << HSAMetadataString << '\n'
62            << "Produced output: " << ToHSAMetadataString << '\n';
63   }
64 }
65 
66 AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
67   if (AccQual.empty())
68     return AccessQualifier::Unknown;
69 
70   return StringSwitch<AccessQualifier>(AccQual)
71              .Case("read_only",  AccessQualifier::ReadOnly)
72              .Case("write_only", AccessQualifier::WriteOnly)
73              .Case("read_write", AccessQualifier::ReadWrite)
74              .Default(AccessQualifier::Default);
75 }
76 
77 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
78     unsigned AddressSpace) const {
79   switch (AddressSpace) {
80   case AMDGPUAS::PRIVATE_ADDRESS:
81     return AddressSpaceQualifier::Private;
82   case AMDGPUAS::GLOBAL_ADDRESS:
83     return AddressSpaceQualifier::Global;
84   case AMDGPUAS::CONSTANT_ADDRESS:
85     return AddressSpaceQualifier::Constant;
86   case AMDGPUAS::LOCAL_ADDRESS:
87     return AddressSpaceQualifier::Local;
88   case AMDGPUAS::FLAT_ADDRESS:
89     return AddressSpaceQualifier::Generic;
90   case AMDGPUAS::REGION_ADDRESS:
91     return AddressSpaceQualifier::Region;
92   default:
93     return AddressSpaceQualifier::Unknown;
94   }
95 }
96 
97 ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
98                                          StringRef BaseTypeName) const {
99   if (TypeQual.find("pipe") != StringRef::npos)
100     return ValueKind::Pipe;
101 
102   return StringSwitch<ValueKind>(BaseTypeName)
103              .Case("image1d_t", ValueKind::Image)
104              .Case("image1d_array_t", ValueKind::Image)
105              .Case("image1d_buffer_t", ValueKind::Image)
106              .Case("image2d_t", ValueKind::Image)
107              .Case("image2d_array_t", ValueKind::Image)
108              .Case("image2d_array_depth_t", ValueKind::Image)
109              .Case("image2d_array_msaa_t", ValueKind::Image)
110              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
111              .Case("image2d_depth_t", ValueKind::Image)
112              .Case("image2d_msaa_t", ValueKind::Image)
113              .Case("image2d_msaa_depth_t", ValueKind::Image)
114              .Case("image3d_t", ValueKind::Image)
115              .Case("sampler_t", ValueKind::Sampler)
116              .Case("queue_t", ValueKind::Queue)
117              .Default(isa<PointerType>(Ty) ?
118                           (Ty->getPointerAddressSpace() ==
119                            AMDGPUAS::LOCAL_ADDRESS ?
120                            ValueKind::DynamicSharedPointer :
121                            ValueKind::GlobalBuffer) :
122                       ValueKind::ByValue);
123 }
124 
125 ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
126   switch (Ty->getTypeID()) {
127   case Type::IntegerTyID: {
128     auto Signed = !TypeName.startswith("u");
129     switch (Ty->getIntegerBitWidth()) {
130     case 8:
131       return Signed ? ValueType::I8 : ValueType::U8;
132     case 16:
133       return Signed ? ValueType::I16 : ValueType::U16;
134     case 32:
135       return Signed ? ValueType::I32 : ValueType::U32;
136     case 64:
137       return Signed ? ValueType::I64 : ValueType::U64;
138     default:
139       return ValueType::Struct;
140     }
141   }
142   case Type::HalfTyID:
143     return ValueType::F16;
144   case Type::FloatTyID:
145     return ValueType::F32;
146   case Type::DoubleTyID:
147     return ValueType::F64;
148   case Type::PointerTyID:
149     return getValueType(Ty->getPointerElementType(), TypeName);
150   case Type::VectorTyID:
151     return getValueType(Ty->getVectorElementType(), TypeName);
152   default:
153     return ValueType::Struct;
154   }
155 }
156 
157 std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
158   switch (Ty->getTypeID()) {
159   case Type::IntegerTyID: {
160     if (!Signed)
161       return (Twine('u') + getTypeName(Ty, true)).str();
162 
163     auto BitWidth = Ty->getIntegerBitWidth();
164     switch (BitWidth) {
165     case 8:
166       return "char";
167     case 16:
168       return "short";
169     case 32:
170       return "int";
171     case 64:
172       return "long";
173     default:
174       return (Twine('i') + Twine(BitWidth)).str();
175     }
176   }
177   case Type::HalfTyID:
178     return "half";
179   case Type::FloatTyID:
180     return "float";
181   case Type::DoubleTyID:
182     return "double";
183   case Type::VectorTyID: {
184     auto VecTy = cast<VectorType>(Ty);
185     auto ElTy = VecTy->getElementType();
186     auto NumElements = VecTy->getVectorNumElements();
187     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
188   }
189   default:
190     return "unknown";
191   }
192 }
193 
194 std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
195     MDNode *Node) const {
196   std::vector<uint32_t> Dims;
197   if (Node->getNumOperands() != 3)
198     return Dims;
199 
200   for (auto &Op : Node->operands())
201     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
202   return Dims;
203 }
204 
205 Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
206     const MachineFunction &MF,
207     const SIProgramInfo &ProgramInfo) const {
208   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
209   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
210   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
211   const Function &F = MF.getFunction();
212 
213   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
214          F.getCallingConv() == CallingConv::SPIR_KERNEL);
215 
216   unsigned MaxKernArgAlign;
217   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
218                                                                MaxKernArgAlign);
219   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
220   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
221   HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
222   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
223   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
224   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
225   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
226   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
227   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
228   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
229   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
230 
231   return HSACodeProps;
232 }
233 
234 Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
235     const MachineFunction &MF,
236     const SIProgramInfo &ProgramInfo) const {
237   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
238   HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
239 
240   if (!STM.debuggerSupported())
241     return HSADebugProps;
242 
243   HSADebugProps.mDebuggerABIVersion.push_back(1);
244   HSADebugProps.mDebuggerABIVersion.push_back(0);
245 
246   if (STM.debuggerEmitPrologue()) {
247     HSADebugProps.mPrivateSegmentBufferSGPR =
248         ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
249     HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
250         ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
251   }
252 
253   return HSADebugProps;
254 }
255 
256 void MetadataStreamer::emitVersion() {
257   auto &Version = HSAMetadata.mVersion;
258 
259   Version.push_back(VersionMajor);
260   Version.push_back(VersionMinor);
261 }
262 
263 void MetadataStreamer::emitPrintf(const Module &Mod) {
264   auto &Printf = HSAMetadata.mPrintf;
265 
266   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
267   if (!Node)
268     return;
269 
270   for (auto Op : Node->operands())
271     if (Op->getNumOperands())
272       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
273 }
274 
275 void MetadataStreamer::emitKernelLanguage(const Function &Func) {
276   auto &Kernel = HSAMetadata.mKernels.back();
277 
278   // TODO: What about other languages?
279   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
280   if (!Node || !Node->getNumOperands())
281     return;
282   auto Op0 = Node->getOperand(0);
283   if (Op0->getNumOperands() <= 1)
284     return;
285 
286   Kernel.mLanguage = "OpenCL C";
287   Kernel.mLanguageVersion.push_back(
288       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
289   Kernel.mLanguageVersion.push_back(
290       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
291 }
292 
293 void MetadataStreamer::emitKernelAttrs(const Function &Func) {
294   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
295 
296   if (auto Node = Func.getMetadata("reqd_work_group_size"))
297     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
298   if (auto Node = Func.getMetadata("work_group_size_hint"))
299     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
300   if (auto Node = Func.getMetadata("vec_type_hint")) {
301     Attrs.mVecTypeHint = getTypeName(
302         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
303         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
304   }
305   if (Func.hasFnAttribute("runtime-handle")) {
306     Attrs.mRuntimeHandle =
307         Func.getFnAttribute("runtime-handle").getValueAsString().str();
308   }
309 }
310 
311 void MetadataStreamer::emitKernelArgs(const Function &Func) {
312   for (auto &Arg : Func.args())
313     emitKernelArg(Arg);
314 
315   emitHiddenKernelArgs(Func);
316 }
317 
318 void MetadataStreamer::emitKernelArg(const Argument &Arg) {
319   auto Func = Arg.getParent();
320   auto ArgNo = Arg.getArgNo();
321   const MDNode *Node;
322 
323   StringRef Name;
324   Node = Func->getMetadata("kernel_arg_name");
325   if (Node && ArgNo < Node->getNumOperands())
326     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
327   else if (Arg.hasName())
328     Name = Arg.getName();
329 
330   StringRef TypeName;
331   Node = Func->getMetadata("kernel_arg_type");
332   if (Node && ArgNo < Node->getNumOperands())
333     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
334 
335   StringRef BaseTypeName;
336   Node = Func->getMetadata("kernel_arg_base_type");
337   if (Node && ArgNo < Node->getNumOperands())
338     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
339 
340   StringRef AccQual;
341   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
342       Arg.hasNoAliasAttr()) {
343     AccQual = "read_only";
344   } else {
345     Node = Func->getMetadata("kernel_arg_access_qual");
346     if (Node && ArgNo < Node->getNumOperands())
347       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
348   }
349 
350   StringRef TypeQual;
351   Node = Func->getMetadata("kernel_arg_type_qual");
352   if (Node && ArgNo < Node->getNumOperands())
353     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
354 
355   Type *Ty = Arg.getType();
356   const DataLayout &DL = Func->getParent()->getDataLayout();
357 
358   unsigned PointeeAlign = 0;
359   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
360     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
361       PointeeAlign = Arg.getParamAlignment();
362       if (PointeeAlign == 0)
363         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
364     }
365   }
366 
367   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
368                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
369 }
370 
371 void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
372                                      ValueKind ValueKind,
373                                      unsigned PointeeAlign,
374                                      StringRef Name,
375                                      StringRef TypeName, StringRef BaseTypeName,
376                                      StringRef AccQual, StringRef TypeQual) {
377   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
378   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
379 
380   Arg.mName = Name;
381   Arg.mTypeName = TypeName;
382   Arg.mSize = DL.getTypeAllocSize(Ty);
383   Arg.mAlign = DL.getABITypeAlignment(Ty);
384   Arg.mValueKind = ValueKind;
385   Arg.mValueType = getValueType(Ty, BaseTypeName);
386   Arg.mPointeeAlign = PointeeAlign;
387 
388   if (auto PtrTy = dyn_cast<PointerType>(Ty))
389     Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
390 
391   Arg.mAccQual = getAccessQualifier(AccQual);
392 
393   // TODO: Emit Arg.mActualAccQual.
394 
395   SmallVector<StringRef, 1> SplitTypeQuals;
396   TypeQual.split(SplitTypeQuals, " ", -1, false);
397   for (StringRef Key : SplitTypeQuals) {
398     auto P = StringSwitch<bool*>(Key)
399                  .Case("const",    &Arg.mIsConst)
400                  .Case("restrict", &Arg.mIsRestrict)
401                  .Case("volatile", &Arg.mIsVolatile)
402                  .Case("pipe",     &Arg.mIsPipe)
403                  .Default(nullptr);
404     if (P)
405       *P = true;
406   }
407 }
408 
409 void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
410   int HiddenArgNumBytes =
411       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
412 
413   if (!HiddenArgNumBytes)
414     return;
415 
416   auto &DL = Func.getParent()->getDataLayout();
417   auto Int64Ty = Type::getInt64Ty(Func.getContext());
418 
419   if (HiddenArgNumBytes >= 8)
420     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
421   if (HiddenArgNumBytes >= 16)
422     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
423   if (HiddenArgNumBytes >= 24)
424     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
425 
426   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
427                                       AMDGPUAS::GLOBAL_ADDRESS);
428 
429   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
430   // "none" argument.
431   if (HiddenArgNumBytes >= 32) {
432     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
433       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
434     else
435       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
436   }
437 
438   // Emit "default queue" and "completion action" arguments if enqueue kernel is
439   // used, otherwise emit dummy "none" arguments.
440   if (HiddenArgNumBytes >= 48) {
441     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
442       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
443       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
444     } else {
445       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
446       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
447     }
448   }
449 }
450 
451 void MetadataStreamer::begin(const Module &Mod) {
452   emitVersion();
453   emitPrintf(Mod);
454 }
455 
456 void MetadataStreamer::end() {
457   std::string HSAMetadataString;
458   if (toString(HSAMetadata, HSAMetadataString))
459     return;
460 
461   if (DumpHSAMetadata)
462     dump(HSAMetadataString);
463   if (VerifyHSAMetadata)
464     verify(HSAMetadataString);
465 }
466 
467 void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
468   auto &Func = MF.getFunction();
469   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
470     return;
471 
472   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
473   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
474 
475   HSAMetadata.mKernels.push_back(Kernel::Metadata());
476   auto &Kernel = HSAMetadata.mKernels.back();
477 
478   Kernel.mName = Func.getName();
479   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
480   emitKernelLanguage(Func);
481   emitKernelAttrs(Func);
482   emitKernelArgs(Func);
483   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
484   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
485 }
486 
487 } // end namespace HSAMD
488 } // end namespace AMDGPU
489 } // end namespace llvm
490