1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23
getArgumentTypeAlign(const Argument & Arg,const DataLayout & DL)24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25 const DataLayout &DL) {
26 Type *Ty = Arg.getType();
27 MaybeAlign ArgAlign;
28 if (Arg.hasByRefAttr()) {
29 Ty = Arg.getParamByRefType();
30 ArgAlign = Arg.getParamAlign();
31 }
32
33 if (!ArgAlign)
34 ArgAlign = DL.getABITypeAlign(Ty);
35
36 return std::make_pair(Ty, *ArgAlign);
37 }
38
39 namespace llvm {
40
41 static cl::opt<bool> DumpHSAMetadata(
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
47
48 namespace AMDGPU {
49 namespace HSAMD {
50
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57
verify(StringRef HSAMetadataString) const58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59 errs() << "AMDGPU HSA Metadata Parser Test: ";
60
61 HSAMD::Metadata FromHSAMetadataString;
62 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63 errs() << "FAIL\n";
64 return;
65 }
66
67 std::string ToHSAMetadataString;
68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69 errs() << "FAIL\n";
70 return;
71 }
72
73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74 << '\n';
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() << "Original input: " << HSAMetadataString << '\n'
77 << "Produced output: " << ToHSAMetadataString << '\n';
78 }
79 }
80
81 AccessQualifier
getAccessQualifier(StringRef AccQual) const82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83 if (AccQual.empty())
84 return AccessQualifier::Unknown;
85
86 return StringSwitch<AccessQualifier>(AccQual)
87 .Case("read_only", AccessQualifier::ReadOnly)
88 .Case("write_only", AccessQualifier::WriteOnly)
89 .Case("read_write", AccessQualifier::ReadWrite)
90 .Default(AccessQualifier::Default);
91 }
92
93 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const94 MetadataStreamerV2::getAddressSpaceQualifier(
95 unsigned AddressSpace) const {
96 switch (AddressSpace) {
97 case AMDGPUAS::PRIVATE_ADDRESS:
98 return AddressSpaceQualifier::Private;
99 case AMDGPUAS::GLOBAL_ADDRESS:
100 return AddressSpaceQualifier::Global;
101 case AMDGPUAS::CONSTANT_ADDRESS:
102 return AddressSpaceQualifier::Constant;
103 case AMDGPUAS::LOCAL_ADDRESS:
104 return AddressSpaceQualifier::Local;
105 case AMDGPUAS::FLAT_ADDRESS:
106 return AddressSpaceQualifier::Generic;
107 case AMDGPUAS::REGION_ADDRESS:
108 return AddressSpaceQualifier::Region;
109 default:
110 return AddressSpaceQualifier::Unknown;
111 }
112 }
113
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115 StringRef BaseTypeName) const {
116 if (TypeQual.contains("pipe"))
117 return ValueKind::Pipe;
118
119 return StringSwitch<ValueKind>(BaseTypeName)
120 .Case("image1d_t", ValueKind::Image)
121 .Case("image1d_array_t", ValueKind::Image)
122 .Case("image1d_buffer_t", ValueKind::Image)
123 .Case("image2d_t", ValueKind::Image)
124 .Case("image2d_array_t", ValueKind::Image)
125 .Case("image2d_array_depth_t", ValueKind::Image)
126 .Case("image2d_array_msaa_t", ValueKind::Image)
127 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128 .Case("image2d_depth_t", ValueKind::Image)
129 .Case("image2d_msaa_t", ValueKind::Image)
130 .Case("image2d_msaa_depth_t", ValueKind::Image)
131 .Case("image3d_t", ValueKind::Image)
132 .Case("sampler_t", ValueKind::Sampler)
133 .Case("queue_t", ValueKind::Queue)
134 .Default(isa<PointerType>(Ty) ?
135 (Ty->getPointerAddressSpace() ==
136 AMDGPUAS::LOCAL_ADDRESS ?
137 ValueKind::DynamicSharedPointer :
138 ValueKind::GlobalBuffer) :
139 ValueKind::ByValue);
140 }
141
getTypeName(Type * Ty,bool Signed) const142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143 switch (Ty->getTypeID()) {
144 case Type::IntegerTyID: {
145 if (!Signed)
146 return (Twine('u') + getTypeName(Ty, true)).str();
147
148 auto BitWidth = Ty->getIntegerBitWidth();
149 switch (BitWidth) {
150 case 8:
151 return "char";
152 case 16:
153 return "short";
154 case 32:
155 return "int";
156 case 64:
157 return "long";
158 default:
159 return (Twine('i') + Twine(BitWidth)).str();
160 }
161 }
162 case Type::HalfTyID:
163 return "half";
164 case Type::FloatTyID:
165 return "float";
166 case Type::DoubleTyID:
167 return "double";
168 case Type::FixedVectorTyID: {
169 auto VecTy = cast<FixedVectorType>(Ty);
170 auto ElTy = VecTy->getElementType();
171 auto NumElements = VecTy->getNumElements();
172 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
173 }
174 default:
175 return "unknown";
176 }
177 }
178
179 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181 std::vector<uint32_t> Dims;
182 if (Node->getNumOperands() != 3)
183 return Dims;
184
185 for (auto &Op : Node->operands())
186 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187 return Dims;
188 }
189
190 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192 const SIProgramInfo &ProgramInfo) const {
193 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
194 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
195 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196 const Function &F = MF.getFunction();
197
198 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199 F.getCallingConv() == CallingConv::SPIR_KERNEL);
200
201 Align MaxKernArgAlign;
202 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203 MaxKernArgAlign);
204 HSACodeProps.mKernargSegmentAlign =
205 std::max(MaxKernArgAlign, Align(4)).value();
206
207 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
208 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
209 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
210 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
211 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
212 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
213 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
214 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
215 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
216 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
217
218 return HSACodeProps;
219 }
220
221 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const222 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
223 const SIProgramInfo &ProgramInfo) const {
224 return HSAMD::Kernel::DebugProps::Metadata();
225 }
226
emitVersion()227 void MetadataStreamerV2::emitVersion() {
228 auto &Version = HSAMetadata.mVersion;
229
230 Version.push_back(VersionMajorV2);
231 Version.push_back(VersionMinorV2);
232 }
233
emitPrintf(const Module & Mod)234 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
235 auto &Printf = HSAMetadata.mPrintf;
236
237 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
238 if (!Node)
239 return;
240
241 for (auto Op : Node->operands())
242 if (Op->getNumOperands())
243 Printf.push_back(
244 std::string(cast<MDString>(Op->getOperand(0))->getString()));
245 }
246
emitKernelLanguage(const Function & Func)247 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
248 auto &Kernel = HSAMetadata.mKernels.back();
249
250 // TODO: What about other languages?
251 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
252 if (!Node || !Node->getNumOperands())
253 return;
254 auto Op0 = Node->getOperand(0);
255 if (Op0->getNumOperands() <= 1)
256 return;
257
258 Kernel.mLanguage = "OpenCL C";
259 Kernel.mLanguageVersion.push_back(
260 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
261 Kernel.mLanguageVersion.push_back(
262 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
263 }
264
emitKernelAttrs(const Function & Func)265 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
266 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
267
268 if (auto Node = Func.getMetadata("reqd_work_group_size"))
269 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
270 if (auto Node = Func.getMetadata("work_group_size_hint"))
271 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
272 if (auto Node = Func.getMetadata("vec_type_hint")) {
273 Attrs.mVecTypeHint = getTypeName(
274 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
275 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
276 }
277 if (Func.hasFnAttribute("runtime-handle")) {
278 Attrs.mRuntimeHandle =
279 Func.getFnAttribute("runtime-handle").getValueAsString().str();
280 }
281 }
282
emitKernelArgs(const Function & Func,const GCNSubtarget & ST)283 void MetadataStreamerV2::emitKernelArgs(const Function &Func,
284 const GCNSubtarget &ST) {
285 for (auto &Arg : Func.args())
286 emitKernelArg(Arg);
287
288 emitHiddenKernelArgs(Func, ST);
289 }
290
emitKernelArg(const Argument & Arg)291 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
292 auto Func = Arg.getParent();
293 auto ArgNo = Arg.getArgNo();
294 const MDNode *Node;
295
296 StringRef Name;
297 Node = Func->getMetadata("kernel_arg_name");
298 if (Node && ArgNo < Node->getNumOperands())
299 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
300 else if (Arg.hasName())
301 Name = Arg.getName();
302
303 StringRef TypeName;
304 Node = Func->getMetadata("kernel_arg_type");
305 if (Node && ArgNo < Node->getNumOperands())
306 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
307
308 StringRef BaseTypeName;
309 Node = Func->getMetadata("kernel_arg_base_type");
310 if (Node && ArgNo < Node->getNumOperands())
311 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
312
313 StringRef AccQual;
314 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
315 Arg.hasNoAliasAttr()) {
316 AccQual = "read_only";
317 } else {
318 Node = Func->getMetadata("kernel_arg_access_qual");
319 if (Node && ArgNo < Node->getNumOperands())
320 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
321 }
322
323 StringRef TypeQual;
324 Node = Func->getMetadata("kernel_arg_type_qual");
325 if (Node && ArgNo < Node->getNumOperands())
326 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
327
328 const DataLayout &DL = Func->getParent()->getDataLayout();
329
330 MaybeAlign PointeeAlign;
331 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
332 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
333 // FIXME: Should report this for all address spaces
334 PointeeAlign = Arg.getParamAlign().valueOrOne();
335 }
336 }
337
338 Type *ArgTy;
339 Align ArgAlign;
340 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
341
342 emitKernelArg(DL, ArgTy, ArgAlign,
343 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
344 TypeName, BaseTypeName, AccQual, TypeQual);
345 }
346
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,ValueKind ValueKind,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)347 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
348 Align Alignment, ValueKind ValueKind,
349 MaybeAlign PointeeAlign, StringRef Name,
350 StringRef TypeName,
351 StringRef BaseTypeName,
352 StringRef AccQual, StringRef TypeQual) {
353 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
354 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
355
356 Arg.mName = std::string(Name);
357 Arg.mTypeName = std::string(TypeName);
358 Arg.mSize = DL.getTypeAllocSize(Ty);
359 Arg.mAlign = Alignment.value();
360 Arg.mValueKind = ValueKind;
361 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
362
363 if (auto PtrTy = dyn_cast<PointerType>(Ty))
364 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
365
366 Arg.mAccQual = getAccessQualifier(AccQual);
367
368 // TODO: Emit Arg.mActualAccQual.
369
370 SmallVector<StringRef, 1> SplitTypeQuals;
371 TypeQual.split(SplitTypeQuals, " ", -1, false);
372 for (StringRef Key : SplitTypeQuals) {
373 auto P = StringSwitch<bool*>(Key)
374 .Case("const", &Arg.mIsConst)
375 .Case("restrict", &Arg.mIsRestrict)
376 .Case("volatile", &Arg.mIsVolatile)
377 .Case("pipe", &Arg.mIsPipe)
378 .Default(nullptr);
379 if (P)
380 *P = true;
381 }
382 }
383
emitHiddenKernelArgs(const Function & Func,const GCNSubtarget & ST)384 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
385 const GCNSubtarget &ST) {
386 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
387 if (!HiddenArgNumBytes)
388 return;
389
390 auto &DL = Func.getParent()->getDataLayout();
391 auto Int64Ty = Type::getInt64Ty(Func.getContext());
392
393 if (HiddenArgNumBytes >= 8)
394 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395 if (HiddenArgNumBytes >= 16)
396 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397 if (HiddenArgNumBytes >= 24)
398 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
399
400 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
401 AMDGPUAS::GLOBAL_ADDRESS);
402
403 if (HiddenArgNumBytes >= 32) {
404 // We forbid the use of features requiring hostcall when compiling OpenCL
405 // before code object V5, which makes the mutual exclusion between the
406 // "printf buffer" and "hostcall buffer" here sound.
407 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
408 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
409 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
410 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
411 else
412 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
413 }
414
415 // Emit "default queue" and "completion action" arguments if enqueue kernel is
416 // used, otherwise emit dummy "none" arguments.
417 if (HiddenArgNumBytes >= 48) {
418 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
419 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
420 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
421 } else {
422 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
423 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
424 }
425 }
426
427 // Emit the pointer argument for multi-grid object.
428 if (HiddenArgNumBytes >= 56) {
429 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
430 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
431 else
432 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
433 }
434 }
435
emitTo(AMDGPUTargetStreamer & TargetStreamer)436 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
437 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
438 }
439
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)440 void MetadataStreamerV2::begin(const Module &Mod,
441 const IsaInfo::AMDGPUTargetID &TargetID) {
442 emitVersion();
443 emitPrintf(Mod);
444 }
445
end()446 void MetadataStreamerV2::end() {
447 std::string HSAMetadataString;
448 if (toString(HSAMetadata, HSAMetadataString))
449 return;
450
451 if (DumpHSAMetadata)
452 dump(HSAMetadataString);
453 if (VerifyHSAMetadata)
454 verify(HSAMetadataString);
455 }
456
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)457 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
458 const SIProgramInfo &ProgramInfo) {
459 auto &Func = MF.getFunction();
460 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
461 return;
462
463 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
464 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
465
466 HSAMetadata.mKernels.push_back(Kernel::Metadata());
467 auto &Kernel = HSAMetadata.mKernels.back();
468
469 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
470 Kernel.mName = std::string(Func.getName());
471 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
472 emitKernelLanguage(Func);
473 emitKernelAttrs(Func);
474 emitKernelArgs(Func, ST);
475 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
476 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
477 }
478
479 //===----------------------------------------------------------------------===//
480 // HSAMetadataStreamerV3
481 //===----------------------------------------------------------------------===//
482
dump(StringRef HSAMetadataString) const483 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
484 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
485 }
486
verify(StringRef HSAMetadataString) const487 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
488 errs() << "AMDGPU HSA Metadata Parser Test: ";
489
490 msgpack::Document FromHSAMetadataString;
491
492 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
493 errs() << "FAIL\n";
494 return;
495 }
496
497 std::string ToHSAMetadataString;
498 raw_string_ostream StrOS(ToHSAMetadataString);
499 FromHSAMetadataString.toYAML(StrOS);
500
501 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
502 if (HSAMetadataString != ToHSAMetadataString) {
503 errs() << "Original input: " << HSAMetadataString << '\n'
504 << "Produced output: " << StrOS.str() << '\n';
505 }
506 }
507
508 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const509 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
510 return StringSwitch<Optional<StringRef>>(AccQual)
511 .Case("read_only", StringRef("read_only"))
512 .Case("write_only", StringRef("write_only"))
513 .Case("read_write", StringRef("read_write"))
514 .Default(None);
515 }
516
517 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const518 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
519 switch (AddressSpace) {
520 case AMDGPUAS::PRIVATE_ADDRESS:
521 return StringRef("private");
522 case AMDGPUAS::GLOBAL_ADDRESS:
523 return StringRef("global");
524 case AMDGPUAS::CONSTANT_ADDRESS:
525 return StringRef("constant");
526 case AMDGPUAS::LOCAL_ADDRESS:
527 return StringRef("local");
528 case AMDGPUAS::FLAT_ADDRESS:
529 return StringRef("generic");
530 case AMDGPUAS::REGION_ADDRESS:
531 return StringRef("region");
532 default:
533 return None;
534 }
535 }
536
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const537 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
538 StringRef BaseTypeName) const {
539 if (TypeQual.contains("pipe"))
540 return "pipe";
541
542 return StringSwitch<StringRef>(BaseTypeName)
543 .Case("image1d_t", "image")
544 .Case("image1d_array_t", "image")
545 .Case("image1d_buffer_t", "image")
546 .Case("image2d_t", "image")
547 .Case("image2d_array_t", "image")
548 .Case("image2d_array_depth_t", "image")
549 .Case("image2d_array_msaa_t", "image")
550 .Case("image2d_array_msaa_depth_t", "image")
551 .Case("image2d_depth_t", "image")
552 .Case("image2d_msaa_t", "image")
553 .Case("image2d_msaa_depth_t", "image")
554 .Case("image3d_t", "image")
555 .Case("sampler_t", "sampler")
556 .Case("queue_t", "queue")
557 .Default(isa<PointerType>(Ty)
558 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
559 ? "dynamic_shared_pointer"
560 : "global_buffer")
561 : "by_value");
562 }
563
getTypeName(Type * Ty,bool Signed) const564 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
565 switch (Ty->getTypeID()) {
566 case Type::IntegerTyID: {
567 if (!Signed)
568 return (Twine('u') + getTypeName(Ty, true)).str();
569
570 auto BitWidth = Ty->getIntegerBitWidth();
571 switch (BitWidth) {
572 case 8:
573 return "char";
574 case 16:
575 return "short";
576 case 32:
577 return "int";
578 case 64:
579 return "long";
580 default:
581 return (Twine('i') + Twine(BitWidth)).str();
582 }
583 }
584 case Type::HalfTyID:
585 return "half";
586 case Type::FloatTyID:
587 return "float";
588 case Type::DoubleTyID:
589 return "double";
590 case Type::FixedVectorTyID: {
591 auto VecTy = cast<FixedVectorType>(Ty);
592 auto ElTy = VecTy->getElementType();
593 auto NumElements = VecTy->getNumElements();
594 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
595 }
596 default:
597 return "unknown";
598 }
599 }
600
601 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const602 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
603 auto Dims = HSAMetadataDoc->getArrayNode();
604 if (Node->getNumOperands() != 3)
605 return Dims;
606
607 for (auto &Op : Node->operands())
608 Dims.push_back(Dims.getDocument()->getNode(
609 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
610 return Dims;
611 }
612
emitVersion()613 void MetadataStreamerV3::emitVersion() {
614 auto Version = HSAMetadataDoc->getArrayNode();
615 Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
616 Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
617 getRootMetadata("amdhsa.version") = Version;
618 }
619
emitPrintf(const Module & Mod)620 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
621 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
622 if (!Node)
623 return;
624
625 auto Printf = HSAMetadataDoc->getArrayNode();
626 for (auto Op : Node->operands())
627 if (Op->getNumOperands())
628 Printf.push_back(Printf.getDocument()->getNode(
629 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
630 getRootMetadata("amdhsa.printf") = Printf;
631 }
632
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)633 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
634 msgpack::MapDocNode Kern) {
635 // TODO: What about other languages?
636 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
637 if (!Node || !Node->getNumOperands())
638 return;
639 auto Op0 = Node->getOperand(0);
640 if (Op0->getNumOperands() <= 1)
641 return;
642
643 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
644 auto LanguageVersion = Kern.getDocument()->getArrayNode();
645 LanguageVersion.push_back(Kern.getDocument()->getNode(
646 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
647 LanguageVersion.push_back(Kern.getDocument()->getNode(
648 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
649 Kern[".language_version"] = LanguageVersion;
650 }
651
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)652 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
653 msgpack::MapDocNode Kern) {
654
655 if (auto Node = Func.getMetadata("reqd_work_group_size"))
656 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
657 if (auto Node = Func.getMetadata("work_group_size_hint"))
658 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
659 if (auto Node = Func.getMetadata("vec_type_hint")) {
660 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
661 getTypeName(
662 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
663 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
664 /*Copy=*/true);
665 }
666 if (Func.hasFnAttribute("runtime-handle")) {
667 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
668 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
669 /*Copy=*/true);
670 }
671 if (Func.hasFnAttribute("device-init"))
672 Kern[".kind"] = Kern.getDocument()->getNode("init");
673 else if (Func.hasFnAttribute("device-fini"))
674 Kern[".kind"] = Kern.getDocument()->getNode("fini");
675 }
676
emitKernelArgs(const MachineFunction & MF,msgpack::MapDocNode Kern)677 void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF,
678 msgpack::MapDocNode Kern) {
679 auto &Func = MF.getFunction();
680 unsigned Offset = 0;
681 auto Args = HSAMetadataDoc->getArrayNode();
682 for (auto &Arg : Func.args())
683 emitKernelArg(Arg, Offset, Args);
684
685 emitHiddenKernelArgs(MF, Offset, Args);
686
687 Kern[".args"] = Args;
688 }
689
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)690 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
691 msgpack::ArrayDocNode Args) {
692 auto Func = Arg.getParent();
693 auto ArgNo = Arg.getArgNo();
694 const MDNode *Node;
695
696 StringRef Name;
697 Node = Func->getMetadata("kernel_arg_name");
698 if (Node && ArgNo < Node->getNumOperands())
699 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
700 else if (Arg.hasName())
701 Name = Arg.getName();
702
703 StringRef TypeName;
704 Node = Func->getMetadata("kernel_arg_type");
705 if (Node && ArgNo < Node->getNumOperands())
706 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
707
708 StringRef BaseTypeName;
709 Node = Func->getMetadata("kernel_arg_base_type");
710 if (Node && ArgNo < Node->getNumOperands())
711 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
712
713 StringRef AccQual;
714 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
715 Arg.hasNoAliasAttr()) {
716 AccQual = "read_only";
717 } else {
718 Node = Func->getMetadata("kernel_arg_access_qual");
719 if (Node && ArgNo < Node->getNumOperands())
720 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
721 }
722
723 StringRef TypeQual;
724 Node = Func->getMetadata("kernel_arg_type_qual");
725 if (Node && ArgNo < Node->getNumOperands())
726 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
727
728 const DataLayout &DL = Func->getParent()->getDataLayout();
729
730 MaybeAlign PointeeAlign;
731 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
732
733 // FIXME: Need to distinguish in memory alignment from pointer alignment.
734 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
735 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
736 PointeeAlign = Arg.getParamAlign().valueOrOne();
737 }
738
739 // There's no distinction between byval aggregates and raw aggregates.
740 Type *ArgTy;
741 Align ArgAlign;
742 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
743
744 emitKernelArg(DL, ArgTy, ArgAlign,
745 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
746 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
747 }
748
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)749 void MetadataStreamerV3::emitKernelArg(
750 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
751 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
752 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
753 StringRef AccQual, StringRef TypeQual) {
754 auto Arg = Args.getDocument()->getMapNode();
755
756 if (!Name.empty())
757 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
758 if (!TypeName.empty())
759 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
760 auto Size = DL.getTypeAllocSize(Ty);
761 Arg[".size"] = Arg.getDocument()->getNode(Size);
762 Offset = alignTo(Offset, Alignment);
763 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
764 Offset += Size;
765 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
766 if (PointeeAlign)
767 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
768
769 if (auto PtrTy = dyn_cast<PointerType>(Ty))
770 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
771 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
772
773 if (auto AQ = getAccessQualifier(AccQual))
774 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
775
776 // TODO: Emit Arg[".actual_access"].
777
778 SmallVector<StringRef, 1> SplitTypeQuals;
779 TypeQual.split(SplitTypeQuals, " ", -1, false);
780 for (StringRef Key : SplitTypeQuals) {
781 if (Key == "const")
782 Arg[".is_const"] = Arg.getDocument()->getNode(true);
783 else if (Key == "restrict")
784 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
785 else if (Key == "volatile")
786 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
787 else if (Key == "pipe")
788 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
789 }
790
791 Args.push_back(Arg);
792 }
793
emitHiddenKernelArgs(const MachineFunction & MF,unsigned & Offset,msgpack::ArrayDocNode Args)794 void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
795 unsigned &Offset,
796 msgpack::ArrayDocNode Args) {
797 auto &Func = MF.getFunction();
798 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
799
800 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
801 if (!HiddenArgNumBytes)
802 return;
803
804 const Module *M = Func.getParent();
805 auto &DL = M->getDataLayout();
806 auto Int64Ty = Type::getInt64Ty(Func.getContext());
807
808 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
809
810 if (HiddenArgNumBytes >= 8)
811 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
812 Args);
813 if (HiddenArgNumBytes >= 16)
814 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
815 Args);
816 if (HiddenArgNumBytes >= 24)
817 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
818 Args);
819
820 auto Int8PtrTy =
821 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
822
823 if (HiddenArgNumBytes >= 32) {
824 // We forbid the use of features requiring hostcall when compiling OpenCL
825 // before code object V5, which makes the mutual exclusion between the
826 // "printf buffer" and "hostcall buffer" here sound.
827 if (M->getNamedMetadata("llvm.printf.fmts"))
828 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
829 Args);
830 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
831 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
832 Args);
833 else
834 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
835 }
836
837 // Emit "default queue" and "completion action" arguments if enqueue kernel is
838 // used, otherwise emit dummy "none" arguments.
839 if (HiddenArgNumBytes >= 48) {
840 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
841 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
842 Args);
843 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
844 Args);
845 } else {
846 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
847 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
848 }
849 }
850
851 // Emit the pointer argument for multi-grid object.
852 if (HiddenArgNumBytes >= 56) {
853 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
854 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
855 Args);
856 } else {
857 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
858 }
859 }
860 }
861
862 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const863 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
864 const SIProgramInfo &ProgramInfo) const {
865 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
866 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
867 const Function &F = MF.getFunction();
868
869 auto Kern = HSAMetadataDoc->getMapNode();
870
871 Align MaxKernArgAlign;
872 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
873 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
874 Kern[".group_segment_fixed_size"] =
875 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
876 Kern[".private_segment_fixed_size"] =
877 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
878 Kern[".uses_dynamic_stack"] =
879 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
880
881 // FIXME: The metadata treats the minimum as 16?
882 Kern[".kernarg_segment_align"] =
883 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
884 Kern[".wavefront_size"] =
885 Kern.getDocument()->getNode(STM.getWavefrontSize());
886 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
887 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
888
889 // Only add AGPR count to metadata for supported devices
890 if (STM.hasMAIInsts()) {
891 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
892 }
893
894 Kern[".max_flat_workgroup_size"] =
895 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
896 Kern[".sgpr_spill_count"] =
897 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
898 Kern[".vgpr_spill_count"] =
899 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
900
901 return Kern;
902 }
903
emitTo(AMDGPUTargetStreamer & TargetStreamer)904 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
905 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
906 }
907
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)908 void MetadataStreamerV3::begin(const Module &Mod,
909 const IsaInfo::AMDGPUTargetID &TargetID) {
910 emitVersion();
911 emitPrintf(Mod);
912 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
913 }
914
end()915 void MetadataStreamerV3::end() {
916 std::string HSAMetadataString;
917 raw_string_ostream StrOS(HSAMetadataString);
918 HSAMetadataDoc->toYAML(StrOS);
919
920 if (DumpHSAMetadata)
921 dump(StrOS.str());
922 if (VerifyHSAMetadata)
923 verify(StrOS.str());
924 }
925
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)926 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
927 const SIProgramInfo &ProgramInfo) {
928 auto &Func = MF.getFunction();
929 auto Kern = getHSAKernelProps(MF, ProgramInfo);
930
931 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
932 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
933
934 auto Kernels =
935 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
936
937 {
938 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
939 Kern[".symbol"] = Kern.getDocument()->getNode(
940 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
941 emitKernelLanguage(Func, Kern);
942 emitKernelAttrs(Func, Kern);
943 emitKernelArgs(MF, Kern);
944 }
945
946 Kernels.push_back(Kern);
947 }
948
949 //===----------------------------------------------------------------------===//
950 // HSAMetadataStreamerV4
951 //===----------------------------------------------------------------------===//
952
emitVersion()953 void MetadataStreamerV4::emitVersion() {
954 auto Version = HSAMetadataDoc->getArrayNode();
955 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
956 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
957 getRootMetadata("amdhsa.version") = Version;
958 }
959
emitTargetID(const IsaInfo::AMDGPUTargetID & TargetID)960 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
961 getRootMetadata("amdhsa.target") =
962 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
963 }
964
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)965 void MetadataStreamerV4::begin(const Module &Mod,
966 const IsaInfo::AMDGPUTargetID &TargetID) {
967 emitVersion();
968 emitTargetID(TargetID);
969 emitPrintf(Mod);
970 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
971 }
972
973 //===----------------------------------------------------------------------===//
974 // HSAMetadataStreamerV5
975 //===----------------------------------------------------------------------===//
976
emitVersion()977 void MetadataStreamerV5::emitVersion() {
978 auto Version = HSAMetadataDoc->getArrayNode();
979 Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
980 Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
981 getRootMetadata("amdhsa.version") = Version;
982 }
983
emitHiddenKernelArgs(const MachineFunction & MF,unsigned & Offset,msgpack::ArrayDocNode Args)984 void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
985 unsigned &Offset,
986 msgpack::ArrayDocNode Args) {
987 auto &Func = MF.getFunction();
988 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
989
990 // No implicit kernel argument is used.
991 if (ST.getImplicitArgNumBytes(Func) == 0)
992 return;
993
994 const Module *M = Func.getParent();
995 auto &DL = M->getDataLayout();
996 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
997
998 auto Int64Ty = Type::getInt64Ty(Func.getContext());
999 auto Int32Ty = Type::getInt32Ty(Func.getContext());
1000 auto Int16Ty = Type::getInt16Ty(Func.getContext());
1001
1002 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
1003 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
1004 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
1005 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
1006
1007 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
1008 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
1009 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
1010
1011 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
1012 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
1013 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
1014
1015 // Reserved for hidden_tool_correlation_id.
1016 Offset += 8;
1017
1018 Offset += 8; // Reserved.
1019
1020 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
1021 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1022 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1023
1024 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1025
1026 Offset += 6; // Reserved.
1027 auto Int8PtrTy =
1028 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1029
1030 if (M->getNamedMetadata("llvm.printf.fmts")) {
1031 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1032 Args);
1033 } else {
1034 Offset += 8; // Skipped.
1035 }
1036
1037 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
1038 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1039 Args);
1040 } else {
1041 Offset += 8; // Skipped.
1042 }
1043
1044 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1045 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1046 Args);
1047 } else {
1048 Offset += 8; // Skipped.
1049 }
1050
1051 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1052 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1053 else
1054 Offset += 8; // Skipped.
1055
1056 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
1057 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1058 Args);
1059 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1060 Args);
1061 } else {
1062 Offset += 16; // Skipped.
1063 }
1064
1065 Offset += 72; // Reserved.
1066
1067 // hidden_private_base and hidden_shared_base are only when the subtarget has
1068 // ApertureRegs.
1069 if (!ST.hasApertureRegs()) {
1070 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1071 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1072 } else {
1073 Offset += 8; // Skipped.
1074 }
1075
1076 if (MFI.hasQueuePtr())
1077 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1078 }
1079
1080 } // end namespace HSAMD
1081 } // end namespace AMDGPU
1082 } // end namespace llvm
1083