1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU HSA Metadata Streamer.
12 ///
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIProgramInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 namespace llvm {
29
30 static cl::opt<bool> DumpHSAMetadata(
31 "amdgpu-dump-hsa-metadata",
32 cl::desc("Dump AMDGPU HSA Metadata"));
33 static cl::opt<bool> VerifyHSAMetadata(
34 "amdgpu-verify-hsa-metadata",
35 cl::desc("Verify AMDGPU HSA Metadata"));
36
37 namespace AMDGPU {
38 namespace HSAMD {
39
40 //===----------------------------------------------------------------------===//
41 // HSAMetadataStreamerV2
42 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const43 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
44 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
45 }
46
verify(StringRef HSAMetadataString) const47 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
48 errs() << "AMDGPU HSA Metadata Parser Test: ";
49
50 HSAMD::Metadata FromHSAMetadataString;
51 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
52 errs() << "FAIL\n";
53 return;
54 }
55
56 std::string ToHSAMetadataString;
57 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
58 errs() << "FAIL\n";
59 return;
60 }
61
62 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
63 << '\n';
64 if (HSAMetadataString != ToHSAMetadataString) {
65 errs() << "Original input: " << HSAMetadataString << '\n'
66 << "Produced output: " << ToHSAMetadataString << '\n';
67 }
68 }
69
70 AccessQualifier
getAccessQualifier(StringRef AccQual) const71 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
72 if (AccQual.empty())
73 return AccessQualifier::Unknown;
74
75 return StringSwitch<AccessQualifier>(AccQual)
76 .Case("read_only", AccessQualifier::ReadOnly)
77 .Case("write_only", AccessQualifier::WriteOnly)
78 .Case("read_write", AccessQualifier::ReadWrite)
79 .Default(AccessQualifier::Default);
80 }
81
82 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const83 MetadataStreamerV2::getAddressSpaceQualifier(
84 unsigned AddressSpace) const {
85 switch (AddressSpace) {
86 case AMDGPUAS::PRIVATE_ADDRESS:
87 return AddressSpaceQualifier::Private;
88 case AMDGPUAS::GLOBAL_ADDRESS:
89 return AddressSpaceQualifier::Global;
90 case AMDGPUAS::CONSTANT_ADDRESS:
91 return AddressSpaceQualifier::Constant;
92 case AMDGPUAS::LOCAL_ADDRESS:
93 return AddressSpaceQualifier::Local;
94 case AMDGPUAS::FLAT_ADDRESS:
95 return AddressSpaceQualifier::Generic;
96 case AMDGPUAS::REGION_ADDRESS:
97 return AddressSpaceQualifier::Region;
98 default:
99 return AddressSpaceQualifier::Unknown;
100 }
101 }
102
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const103 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
104 StringRef BaseTypeName) const {
105 if (TypeQual.find("pipe") != StringRef::npos)
106 return ValueKind::Pipe;
107
108 return StringSwitch<ValueKind>(BaseTypeName)
109 .Case("image1d_t", ValueKind::Image)
110 .Case("image1d_array_t", ValueKind::Image)
111 .Case("image1d_buffer_t", ValueKind::Image)
112 .Case("image2d_t", ValueKind::Image)
113 .Case("image2d_array_t", ValueKind::Image)
114 .Case("image2d_array_depth_t", ValueKind::Image)
115 .Case("image2d_array_msaa_t", ValueKind::Image)
116 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
117 .Case("image2d_depth_t", ValueKind::Image)
118 .Case("image2d_msaa_t", ValueKind::Image)
119 .Case("image2d_msaa_depth_t", ValueKind::Image)
120 .Case("image3d_t", ValueKind::Image)
121 .Case("sampler_t", ValueKind::Sampler)
122 .Case("queue_t", ValueKind::Queue)
123 .Default(isa<PointerType>(Ty) ?
124 (Ty->getPointerAddressSpace() ==
125 AMDGPUAS::LOCAL_ADDRESS ?
126 ValueKind::DynamicSharedPointer :
127 ValueKind::GlobalBuffer) :
128 ValueKind::ByValue);
129 }
130
getValueType(Type * Ty,StringRef TypeName) const131 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
132 switch (Ty->getTypeID()) {
133 case Type::IntegerTyID: {
134 auto Signed = !TypeName.startswith("u");
135 switch (Ty->getIntegerBitWidth()) {
136 case 8:
137 return Signed ? ValueType::I8 : ValueType::U8;
138 case 16:
139 return Signed ? ValueType::I16 : ValueType::U16;
140 case 32:
141 return Signed ? ValueType::I32 : ValueType::U32;
142 case 64:
143 return Signed ? ValueType::I64 : ValueType::U64;
144 default:
145 return ValueType::Struct;
146 }
147 }
148 case Type::HalfTyID:
149 return ValueType::F16;
150 case Type::FloatTyID:
151 return ValueType::F32;
152 case Type::DoubleTyID:
153 return ValueType::F64;
154 case Type::PointerTyID:
155 return getValueType(Ty->getPointerElementType(), TypeName);
156 case Type::VectorTyID:
157 return getValueType(Ty->getVectorElementType(), TypeName);
158 default:
159 return ValueType::Struct;
160 }
161 }
162
getTypeName(Type * Ty,bool Signed) const163 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
164 switch (Ty->getTypeID()) {
165 case Type::IntegerTyID: {
166 if (!Signed)
167 return (Twine('u') + getTypeName(Ty, true)).str();
168
169 auto BitWidth = Ty->getIntegerBitWidth();
170 switch (BitWidth) {
171 case 8:
172 return "char";
173 case 16:
174 return "short";
175 case 32:
176 return "int";
177 case 64:
178 return "long";
179 default:
180 return (Twine('i') + Twine(BitWidth)).str();
181 }
182 }
183 case Type::HalfTyID:
184 return "half";
185 case Type::FloatTyID:
186 return "float";
187 case Type::DoubleTyID:
188 return "double";
189 case Type::VectorTyID: {
190 auto VecTy = cast<VectorType>(Ty);
191 auto ElTy = VecTy->getElementType();
192 auto NumElements = VecTy->getVectorNumElements();
193 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
194 }
195 default:
196 return "unknown";
197 }
198 }
199
200 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const201 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
202 std::vector<uint32_t> Dims;
203 if (Node->getNumOperands() != 3)
204 return Dims;
205
206 for (auto &Op : Node->operands())
207 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
208 return Dims;
209 }
210
211 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const212 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
213 const SIProgramInfo &ProgramInfo) const {
214 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
215 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
216 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
217 const Function &F = MF.getFunction();
218
219 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
220 F.getCallingConv() == CallingConv::SPIR_KERNEL);
221
222 unsigned MaxKernArgAlign;
223 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
224 MaxKernArgAlign);
225 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
226 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
227 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
228 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
229 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
230 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
231 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
232 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
233 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
234 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
235 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
236
237 return HSACodeProps;
238 }
239
240 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const241 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
242 const SIProgramInfo &ProgramInfo) const {
243 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
244 HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
245
246 if (!STM.debuggerSupported())
247 return HSADebugProps;
248
249 HSADebugProps.mDebuggerABIVersion.push_back(1);
250 HSADebugProps.mDebuggerABIVersion.push_back(0);
251
252 if (STM.debuggerEmitPrologue()) {
253 HSADebugProps.mPrivateSegmentBufferSGPR =
254 ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
255 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
256 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
257 }
258
259 return HSADebugProps;
260 }
261
emitVersion()262 void MetadataStreamerV2::emitVersion() {
263 auto &Version = HSAMetadata.mVersion;
264
265 Version.push_back(VersionMajor);
266 Version.push_back(VersionMinor);
267 }
268
emitPrintf(const Module & Mod)269 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
270 auto &Printf = HSAMetadata.mPrintf;
271
272 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
273 if (!Node)
274 return;
275
276 for (auto Op : Node->operands())
277 if (Op->getNumOperands())
278 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
279 }
280
emitKernelLanguage(const Function & Func)281 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
282 auto &Kernel = HSAMetadata.mKernels.back();
283
284 // TODO: What about other languages?
285 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
286 if (!Node || !Node->getNumOperands())
287 return;
288 auto Op0 = Node->getOperand(0);
289 if (Op0->getNumOperands() <= 1)
290 return;
291
292 Kernel.mLanguage = "OpenCL C";
293 Kernel.mLanguageVersion.push_back(
294 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
295 Kernel.mLanguageVersion.push_back(
296 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
297 }
298
emitKernelAttrs(const Function & Func)299 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
300 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
301
302 if (auto Node = Func.getMetadata("reqd_work_group_size"))
303 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
304 if (auto Node = Func.getMetadata("work_group_size_hint"))
305 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
306 if (auto Node = Func.getMetadata("vec_type_hint")) {
307 Attrs.mVecTypeHint = getTypeName(
308 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
309 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
310 }
311 if (Func.hasFnAttribute("runtime-handle")) {
312 Attrs.mRuntimeHandle =
313 Func.getFnAttribute("runtime-handle").getValueAsString().str();
314 }
315 }
316
emitKernelArgs(const Function & Func)317 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
318 for (auto &Arg : Func.args())
319 emitKernelArg(Arg);
320
321 emitHiddenKernelArgs(Func);
322 }
323
emitKernelArg(const Argument & Arg)324 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
325 auto Func = Arg.getParent();
326 auto ArgNo = Arg.getArgNo();
327 const MDNode *Node;
328
329 StringRef Name;
330 Node = Func->getMetadata("kernel_arg_name");
331 if (Node && ArgNo < Node->getNumOperands())
332 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
333 else if (Arg.hasName())
334 Name = Arg.getName();
335
336 StringRef TypeName;
337 Node = Func->getMetadata("kernel_arg_type");
338 if (Node && ArgNo < Node->getNumOperands())
339 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
340
341 StringRef BaseTypeName;
342 Node = Func->getMetadata("kernel_arg_base_type");
343 if (Node && ArgNo < Node->getNumOperands())
344 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
345
346 StringRef AccQual;
347 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
348 Arg.hasNoAliasAttr()) {
349 AccQual = "read_only";
350 } else {
351 Node = Func->getMetadata("kernel_arg_access_qual");
352 if (Node && ArgNo < Node->getNumOperands())
353 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
354 }
355
356 StringRef TypeQual;
357 Node = Func->getMetadata("kernel_arg_type_qual");
358 if (Node && ArgNo < Node->getNumOperands())
359 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
360
361 Type *Ty = Arg.getType();
362 const DataLayout &DL = Func->getParent()->getDataLayout();
363
364 unsigned PointeeAlign = 0;
365 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
366 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
367 PointeeAlign = Arg.getParamAlignment();
368 if (PointeeAlign == 0)
369 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
370 }
371 }
372
373 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
374 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
375 }
376
emitKernelArg(const DataLayout & DL,Type * Ty,ValueKind ValueKind,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)377 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
378 ValueKind ValueKind,
379 unsigned PointeeAlign, StringRef Name,
380 StringRef TypeName,
381 StringRef BaseTypeName,
382 StringRef AccQual, StringRef TypeQual) {
383 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
384 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
385
386 Arg.mName = Name;
387 Arg.mTypeName = TypeName;
388 Arg.mSize = DL.getTypeAllocSize(Ty);
389 Arg.mAlign = DL.getABITypeAlignment(Ty);
390 Arg.mValueKind = ValueKind;
391 Arg.mValueType = getValueType(Ty, BaseTypeName);
392 Arg.mPointeeAlign = PointeeAlign;
393
394 if (auto PtrTy = dyn_cast<PointerType>(Ty))
395 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
396
397 Arg.mAccQual = getAccessQualifier(AccQual);
398
399 // TODO: Emit Arg.mActualAccQual.
400
401 SmallVector<StringRef, 1> SplitTypeQuals;
402 TypeQual.split(SplitTypeQuals, " ", -1, false);
403 for (StringRef Key : SplitTypeQuals) {
404 auto P = StringSwitch<bool*>(Key)
405 .Case("const", &Arg.mIsConst)
406 .Case("restrict", &Arg.mIsRestrict)
407 .Case("volatile", &Arg.mIsVolatile)
408 .Case("pipe", &Arg.mIsPipe)
409 .Default(nullptr);
410 if (P)
411 *P = true;
412 }
413 }
414
emitHiddenKernelArgs(const Function & Func)415 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
416 int HiddenArgNumBytes =
417 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
418
419 if (!HiddenArgNumBytes)
420 return;
421
422 auto &DL = Func.getParent()->getDataLayout();
423 auto Int64Ty = Type::getInt64Ty(Func.getContext());
424
425 if (HiddenArgNumBytes >= 8)
426 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
427 if (HiddenArgNumBytes >= 16)
428 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
429 if (HiddenArgNumBytes >= 24)
430 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
431
432 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
433 AMDGPUAS::GLOBAL_ADDRESS);
434
435 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
436 // "none" argument.
437 if (HiddenArgNumBytes >= 32) {
438 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
439 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
440 else
441 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
442 }
443
444 // Emit "default queue" and "completion action" arguments if enqueue kernel is
445 // used, otherwise emit dummy "none" arguments.
446 if (HiddenArgNumBytes >= 48) {
447 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
448 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
449 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
450 } else {
451 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
452 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
453 }
454 }
455 }
456
emitTo(AMDGPUTargetStreamer & TargetStreamer)457 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
458 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
459 }
460
begin(const Module & Mod)461 void MetadataStreamerV2::begin(const Module &Mod) {
462 emitVersion();
463 emitPrintf(Mod);
464 }
465
end()466 void MetadataStreamerV2::end() {
467 std::string HSAMetadataString;
468 if (toString(HSAMetadata, HSAMetadataString))
469 return;
470
471 if (DumpHSAMetadata)
472 dump(HSAMetadataString);
473 if (VerifyHSAMetadata)
474 verify(HSAMetadataString);
475 }
476
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)477 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
478 const SIProgramInfo &ProgramInfo) {
479 auto &Func = MF.getFunction();
480 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
481 return;
482
483 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
484 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
485
486 HSAMetadata.mKernels.push_back(Kernel::Metadata());
487 auto &Kernel = HSAMetadata.mKernels.back();
488
489 Kernel.mName = Func.getName();
490 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
491 emitKernelLanguage(Func);
492 emitKernelAttrs(Func);
493 emitKernelArgs(Func);
494 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
495 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
496 }
497
498 //===----------------------------------------------------------------------===//
499 // HSAMetadataStreamerV3
500 //===----------------------------------------------------------------------===//
501
dump(StringRef HSAMetadataString) const502 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
503 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
504 }
505
verify(StringRef HSAMetadataString) const506 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
507 errs() << "AMDGPU HSA Metadata Parser Test: ";
508
509 std::shared_ptr<msgpack::Node> FromHSAMetadataString =
510 std::make_shared<msgpack::MapNode>();
511
512 yaml::Input YIn(HSAMetadataString);
513 YIn >> FromHSAMetadataString;
514 if (YIn.error()) {
515 errs() << "FAIL\n";
516 return;
517 }
518
519 std::string ToHSAMetadataString;
520 raw_string_ostream StrOS(ToHSAMetadataString);
521 yaml::Output YOut(StrOS);
522 YOut << FromHSAMetadataString;
523
524 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
525 if (HSAMetadataString != ToHSAMetadataString) {
526 errs() << "Original input: " << HSAMetadataString << '\n'
527 << "Produced output: " << StrOS.str() << '\n';
528 }
529 }
530
531 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const532 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
533 return StringSwitch<Optional<StringRef>>(AccQual)
534 .Case("read_only", StringRef("read_only"))
535 .Case("write_only", StringRef("write_only"))
536 .Case("read_write", StringRef("read_write"))
537 .Default(None);
538 }
539
540 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const541 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
542 switch (AddressSpace) {
543 case AMDGPUAS::PRIVATE_ADDRESS:
544 return StringRef("private");
545 case AMDGPUAS::GLOBAL_ADDRESS:
546 return StringRef("global");
547 case AMDGPUAS::CONSTANT_ADDRESS:
548 return StringRef("constant");
549 case AMDGPUAS::LOCAL_ADDRESS:
550 return StringRef("local");
551 case AMDGPUAS::FLAT_ADDRESS:
552 return StringRef("generic");
553 case AMDGPUAS::REGION_ADDRESS:
554 return StringRef("region");
555 default:
556 return None;
557 }
558 }
559
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const560 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
561 StringRef BaseTypeName) const {
562 if (TypeQual.find("pipe") != StringRef::npos)
563 return "pipe";
564
565 return StringSwitch<StringRef>(BaseTypeName)
566 .Case("image1d_t", "image")
567 .Case("image1d_array_t", "image")
568 .Case("image1d_buffer_t", "image")
569 .Case("image2d_t", "image")
570 .Case("image2d_array_t", "image")
571 .Case("image2d_array_depth_t", "image")
572 .Case("image2d_array_msaa_t", "image")
573 .Case("image2d_array_msaa_depth_t", "image")
574 .Case("image2d_depth_t", "image")
575 .Case("image2d_msaa_t", "image")
576 .Case("image2d_msaa_depth_t", "image")
577 .Case("image3d_t", "image")
578 .Case("sampler_t", "sampler")
579 .Case("queue_t", "queue")
580 .Default(isa<PointerType>(Ty)
581 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
582 ? "dynamic_shared_pointer"
583 : "global_buffer")
584 : "by_value");
585 }
586
getValueType(Type * Ty,StringRef TypeName) const587 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
588 switch (Ty->getTypeID()) {
589 case Type::IntegerTyID: {
590 auto Signed = !TypeName.startswith("u");
591 switch (Ty->getIntegerBitWidth()) {
592 case 8:
593 return Signed ? "i8" : "u8";
594 case 16:
595 return Signed ? "i16" : "u16";
596 case 32:
597 return Signed ? "i32" : "u32";
598 case 64:
599 return Signed ? "i64" : "u64";
600 default:
601 return "struct";
602 }
603 }
604 case Type::HalfTyID:
605 return "f16";
606 case Type::FloatTyID:
607 return "f32";
608 case Type::DoubleTyID:
609 return "f64";
610 case Type::PointerTyID:
611 return getValueType(Ty->getPointerElementType(), TypeName);
612 case Type::VectorTyID:
613 return getValueType(Ty->getVectorElementType(), TypeName);
614 default:
615 return "struct";
616 }
617 }
618
getTypeName(Type * Ty,bool Signed) const619 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
620 switch (Ty->getTypeID()) {
621 case Type::IntegerTyID: {
622 if (!Signed)
623 return (Twine('u') + getTypeName(Ty, true)).str();
624
625 auto BitWidth = Ty->getIntegerBitWidth();
626 switch (BitWidth) {
627 case 8:
628 return "char";
629 case 16:
630 return "short";
631 case 32:
632 return "int";
633 case 64:
634 return "long";
635 default:
636 return (Twine('i') + Twine(BitWidth)).str();
637 }
638 }
639 case Type::HalfTyID:
640 return "half";
641 case Type::FloatTyID:
642 return "float";
643 case Type::DoubleTyID:
644 return "double";
645 case Type::VectorTyID: {
646 auto VecTy = cast<VectorType>(Ty);
647 auto ElTy = VecTy->getElementType();
648 auto NumElements = VecTy->getVectorNumElements();
649 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
650 }
651 default:
652 return "unknown";
653 }
654 }
655
656 std::shared_ptr<msgpack::ArrayNode>
getWorkGroupDimensions(MDNode * Node) const657 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
658 auto Dims = std::make_shared<msgpack::ArrayNode>();
659 if (Node->getNumOperands() != 3)
660 return Dims;
661
662 for (auto &Op : Node->operands())
663 Dims->push_back(std::make_shared<msgpack::ScalarNode>(
664 mdconst::extract<ConstantInt>(Op)->getZExtValue()));
665 return Dims;
666 }
667
emitVersion()668 void MetadataStreamerV3::emitVersion() {
669 auto Version = std::make_shared<msgpack::ArrayNode>();
670 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
671 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
672 getRootMetadata("amdhsa.version") = std::move(Version);
673 }
674
emitPrintf(const Module & Mod)675 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
676 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
677 if (!Node)
678 return;
679
680 auto Printf = std::make_shared<msgpack::ArrayNode>();
681 for (auto Op : Node->operands())
682 if (Op->getNumOperands())
683 Printf->push_back(std::make_shared<msgpack::ScalarNode>(
684 cast<MDString>(Op->getOperand(0))->getString()));
685 getRootMetadata("amdhsa.printf") = std::move(Printf);
686 }
687
emitKernelLanguage(const Function & Func,msgpack::MapNode & Kern)688 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
689 msgpack::MapNode &Kern) {
690 // TODO: What about other languages?
691 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
692 if (!Node || !Node->getNumOperands())
693 return;
694 auto Op0 = Node->getOperand(0);
695 if (Op0->getNumOperands() <= 1)
696 return;
697
698 Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
699 auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
700 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
701 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
702 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
703 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
704 Kern[".language_version"] = std::move(LanguageVersion);
705 }
706
emitKernelAttrs(const Function & Func,msgpack::MapNode & Kern)707 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
708 msgpack::MapNode &Kern) {
709
710 if (auto Node = Func.getMetadata("reqd_work_group_size"))
711 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
712 if (auto Node = Func.getMetadata("work_group_size_hint"))
713 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
714 if (auto Node = Func.getMetadata("vec_type_hint")) {
715 Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
716 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
717 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
718 }
719 if (Func.hasFnAttribute("runtime-handle")) {
720 Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
721 Func.getFnAttribute("runtime-handle").getValueAsString().str());
722 }
723 }
724
emitKernelArgs(const Function & Func,msgpack::MapNode & Kern)725 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
726 msgpack::MapNode &Kern) {
727 unsigned Offset = 0;
728 auto Args = std::make_shared<msgpack::ArrayNode>();
729 for (auto &Arg : Func.args())
730 emitKernelArg(Arg, Offset, *Args);
731
732 emitHiddenKernelArgs(Func, Offset, *Args);
733
734 // TODO: What about other languages?
735 if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
736 auto &DL = Func.getParent()->getDataLayout();
737 auto Int64Ty = Type::getInt64Ty(Func.getContext());
738
739 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
740 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
741 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
742
743 auto Int8PtrTy =
744 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
745
746 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
747 // "none" argument.
748 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
749 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
750 else
751 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
752
753 // Emit "default queue" and "completion action" arguments if enqueue kernel
754 // is used, otherwise emit dummy "none" arguments.
755 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
756 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
757 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
758 } else {
759 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
760 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
761 }
762 }
763
764 Kern[".args"] = std::move(Args);
765 }
766
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayNode & Args)767 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
768 msgpack::ArrayNode &Args) {
769 auto Func = Arg.getParent();
770 auto ArgNo = Arg.getArgNo();
771 const MDNode *Node;
772
773 StringRef Name;
774 Node = Func->getMetadata("kernel_arg_name");
775 if (Node && ArgNo < Node->getNumOperands())
776 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
777 else if (Arg.hasName())
778 Name = Arg.getName();
779
780 StringRef TypeName;
781 Node = Func->getMetadata("kernel_arg_type");
782 if (Node && ArgNo < Node->getNumOperands())
783 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
784
785 StringRef BaseTypeName;
786 Node = Func->getMetadata("kernel_arg_base_type");
787 if (Node && ArgNo < Node->getNumOperands())
788 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
789
790 StringRef AccQual;
791 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
792 Arg.hasNoAliasAttr()) {
793 AccQual = "read_only";
794 } else {
795 Node = Func->getMetadata("kernel_arg_access_qual");
796 if (Node && ArgNo < Node->getNumOperands())
797 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
798 }
799
800 StringRef TypeQual;
801 Node = Func->getMetadata("kernel_arg_type_qual");
802 if (Node && ArgNo < Node->getNumOperands())
803 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
804
805 Type *Ty = Arg.getType();
806 const DataLayout &DL = Func->getParent()->getDataLayout();
807
808 unsigned PointeeAlign = 0;
809 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
810 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
811 PointeeAlign = Arg.getParamAlignment();
812 if (PointeeAlign == 0)
813 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
814 }
815 }
816
817 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
818 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
819 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
820 TypeQual);
821 }
822
emitKernelArg(const DataLayout & DL,Type * Ty,StringRef ValueKind,unsigned & Offset,msgpack::ArrayNode & Args,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)823 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
824 StringRef ValueKind, unsigned &Offset,
825 msgpack::ArrayNode &Args,
826 unsigned PointeeAlign, StringRef Name,
827 StringRef TypeName,
828 StringRef BaseTypeName,
829 StringRef AccQual, StringRef TypeQual) {
830 auto ArgPtr = std::make_shared<msgpack::MapNode>();
831 auto &Arg = *ArgPtr;
832
833 if (!Name.empty())
834 Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
835 if (!TypeName.empty())
836 Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
837 auto Size = DL.getTypeAllocSize(Ty);
838 auto Align = DL.getABITypeAlignment(Ty);
839 Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
840 Offset = alignTo(Offset, Align);
841 Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
842 Offset += Size;
843 Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
844 Arg[".value_type"] =
845 std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
846 if (PointeeAlign)
847 Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
848
849 if (auto PtrTy = dyn_cast<PointerType>(Ty))
850 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
851 Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
852
853 if (auto AQ = getAccessQualifier(AccQual))
854 Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
855
856 // TODO: Emit Arg[".actual_access"].
857
858 SmallVector<StringRef, 1> SplitTypeQuals;
859 TypeQual.split(SplitTypeQuals, " ", -1, false);
860 for (StringRef Key : SplitTypeQuals) {
861 if (Key == "const")
862 Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
863 else if (Key == "restrict")
864 Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
865 else if (Key == "volatile")
866 Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
867 else if (Key == "pipe")
868 Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
869 }
870
871 Args.push_back(std::move(ArgPtr));
872 }
873
emitHiddenKernelArgs(const Function & Func,unsigned & Offset,msgpack::ArrayNode & Args)874 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
875 unsigned &Offset,
876 msgpack::ArrayNode &Args) {
877 int HiddenArgNumBytes =
878 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
879
880 if (!HiddenArgNumBytes)
881 return;
882
883 auto &DL = Func.getParent()->getDataLayout();
884 auto Int64Ty = Type::getInt64Ty(Func.getContext());
885
886 if (HiddenArgNumBytes >= 8)
887 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
888 if (HiddenArgNumBytes >= 16)
889 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
890 if (HiddenArgNumBytes >= 24)
891 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
892
893 auto Int8PtrTy =
894 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
895
896 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
897 // "none" argument.
898 if (HiddenArgNumBytes >= 32) {
899 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
900 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
901 else
902 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
903 }
904
905 // Emit "default queue" and "completion action" arguments if enqueue kernel is
906 // used, otherwise emit dummy "none" arguments.
907 if (HiddenArgNumBytes >= 48) {
908 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
909 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
910 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
911 } else {
912 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
913 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
914 }
915 }
916 }
917
918 std::shared_ptr<msgpack::MapNode>
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const919 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
920 const SIProgramInfo &ProgramInfo) const {
921 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
922 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
923 const Function &F = MF.getFunction();
924
925 auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
926 auto &Kern = *HSAKernelProps;
927
928 unsigned MaxKernArgAlign;
929 Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
930 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
931 Kern[".group_segment_fixed_size"] =
932 std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
933 Kern[".private_segment_fixed_size"] =
934 std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
935 Kern[".kernarg_segment_align"] =
936 std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
937 Kern[".wavefront_size"] =
938 std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
939 Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
940 Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
941 Kern[".max_flat_workgroup_size"] =
942 std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
943 Kern[".sgpr_spill_count"] =
944 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
945 Kern[".vgpr_spill_count"] =
946 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
947
948 return HSAKernelProps;
949 }
950
emitTo(AMDGPUTargetStreamer & TargetStreamer)951 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
952 return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
953 }
954
begin(const Module & Mod)955 void MetadataStreamerV3::begin(const Module &Mod) {
956 emitVersion();
957 emitPrintf(Mod);
958 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
959 }
960
end()961 void MetadataStreamerV3::end() {
962 std::string HSAMetadataString;
963 raw_string_ostream StrOS(HSAMetadataString);
964 yaml::Output YOut(StrOS);
965 YOut << HSAMetadataRoot;
966
967 if (DumpHSAMetadata)
968 dump(StrOS.str());
969 if (VerifyHSAMetadata)
970 verify(StrOS.str());
971 }
972
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)973 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
974 const SIProgramInfo &ProgramInfo) {
975 auto &Func = MF.getFunction();
976 auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
977
978 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
979 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
980
981 auto &KernelsNode = getRootMetadata("amdhsa.kernels");
982 auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
983
984 {
985 auto &Kern = *KernelProps;
986 Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
987 Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
988 (Twine(Func.getName()) + Twine(".kd")).str());
989 emitKernelLanguage(Func, Kern);
990 emitKernelAttrs(Func, Kern);
991 emitKernelArgs(Func, Kern);
992 }
993
994 Kernels->push_back(std::move(KernelProps));
995 }
996
997 } // end namespace HSAMD
998 } // end namespace AMDGPU
999 } // end namespace llvm
1000