1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file provides AMDGPU specific target streamer methods.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUTargetStreamer.h"
16 #include "SIDefines.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/Metadata.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCELFStreamer.h"
26 #include "llvm/MC/MCObjectFileInfo.h"
27 #include "llvm/MC/MCSectionELF.h"
28 #include "llvm/Support/ELF.h"
29 #include "llvm/Support/FormattedStream.h"
30 
31 namespace llvm {
32 #include "AMDGPUPTNote.h"
33 }
34 
35 using namespace llvm;
36 using namespace llvm::AMDGPU;
37 
38 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
39     : MCTargetStreamer(S) {}
40 
41 //===----------------------------------------------------------------------===//
42 // AMDGPUTargetAsmStreamer
43 //===----------------------------------------------------------------------===//
44 
45 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
46                                                  formatted_raw_ostream &OS)
47     : AMDGPUTargetStreamer(S), OS(OS) { }
48 
49 void
50 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
51                                                            uint32_t Minor) {
52   OS << "\t.hsa_code_object_version " <<
53         Twine(Major) << "," << Twine(Minor) << '\n';
54 }
55 
56 void
57 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
58                                                        uint32_t Minor,
59                                                        uint32_t Stepping,
60                                                        StringRef VendorName,
61                                                        StringRef ArchName) {
62   OS << "\t.hsa_code_object_isa " <<
63         Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
64         ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
65 
66 }
67 
68 void
69 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
70   OS << "\t.amd_kernel_code_t\n";
71   dumpAmdKernelCode(&Header, OS, "\t\t");
72   OS << "\t.end_amd_kernel_code_t\n";
73 }
74 
75 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
76                                                    unsigned Type) {
77   switch (Type) {
78     default: llvm_unreachable("Invalid AMDGPU symbol type");
79     case ELF::STT_AMDGPU_HSA_KERNEL:
80       OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
81       break;
82   }
83 }
84 
85 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
86     StringRef GlobalName) {
87   OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
88 }
89 
90 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
91     StringRef GlobalName) {
92   OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
93 }
94 
95 //===----------------------------------------------------------------------===//
96 // AMDGPUTargetELFStreamer
97 //===----------------------------------------------------------------------===//
98 
99 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
100     : AMDGPUTargetStreamer(S), Streamer(S) {}
101 
102 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
103   return static_cast<MCELFStreamer &>(Streamer);
104 }
105 
106 void
107 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
108                                                            uint32_t Minor) {
109   MCStreamer &OS = getStreamer();
110   MCSectionELF *Note =
111       OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE,
112                                     ELF::SHF_ALLOC);
113 
114   auto NameSZ = sizeof(PT_NOTE::NoteName);
115   OS.PushSection();
116   OS.SwitchSection(Note);
117   OS.EmitIntValue(NameSZ, 4);                                     // namesz
118   OS.EmitIntValue(8, 4);                                          // descz
119   OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
120   OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ));             // name
121   OS.EmitValueToAlignment(4);
122   OS.EmitIntValue(Major, 4);                                      // desc
123   OS.EmitIntValue(Minor, 4);
124   OS.EmitValueToAlignment(4);
125   OS.PopSection();
126 }
127 
128 void
129 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
130                                                        uint32_t Minor,
131                                                        uint32_t Stepping,
132                                                        StringRef VendorName,
133                                                        StringRef ArchName) {
134   MCStreamer &OS = getStreamer();
135   MCSectionELF *Note =
136       OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE,
137                                     ELF::SHF_ALLOC);
138 
139   uint16_t VendorNameSize = VendorName.size() + 1;
140   uint16_t ArchNameSize = ArchName.size() + 1;
141   unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
142                     sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
143                     VendorNameSize + ArchNameSize;
144 
145   OS.PushSection();
146   OS.SwitchSection(Note);
147   auto NameSZ = sizeof(PT_NOTE::NoteName);
148   OS.EmitIntValue(NameSZ, 4);                              // namesz
149   OS.EmitIntValue(DescSZ, 4);                              // descsz
150   OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_ISA, 4);          // type
151   OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ));      // name
152   OS.EmitValueToAlignment(4);
153   OS.EmitIntValue(VendorNameSize, 2);                      // desc
154   OS.EmitIntValue(ArchNameSize, 2);
155   OS.EmitIntValue(Major, 4);
156   OS.EmitIntValue(Minor, 4);
157   OS.EmitIntValue(Stepping, 4);
158   OS.EmitBytes(VendorName);
159   OS.EmitIntValue(0, 1); // NULL terminate VendorName
160   OS.EmitBytes(ArchName);
161   OS.EmitIntValue(0, 1); // NULL terminte ArchName
162   OS.EmitValueToAlignment(4);
163   OS.PopSection();
164 }
165 
166 void
167 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
168 
169   MCStreamer &OS = getStreamer();
170   OS.PushSection();
171   OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
172   OS.PopSection();
173 }
174 
175 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
176                                                    unsigned Type) {
177   MCSymbolELF *Symbol = cast<MCSymbolELF>(
178       getStreamer().getContext().getOrCreateSymbol(SymbolName));
179   Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
180 }
181 
182 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
183     StringRef GlobalName) {
184 
185   MCSymbolELF *Symbol = cast<MCSymbolELF>(
186       getStreamer().getContext().getOrCreateSymbol(GlobalName));
187   Symbol->setType(ELF::STT_OBJECT);
188   Symbol->setBinding(ELF::STB_LOCAL);
189 }
190 
191 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
192     StringRef GlobalName) {
193 
194   MCSymbolELF *Symbol = cast<MCSymbolELF>(
195       getStreamer().getContext().getOrCreateSymbol(GlobalName));
196   Symbol->setType(ELF::STT_OBJECT);
197   Symbol->setBinding(ELF::STB_GLOBAL);
198 }
199 
200 void AMDGPUTargetStreamer::emitRuntimeMDIntValue(RuntimeMD::Key K, uint64_t V,
201                                                  unsigned Size) {
202   auto &S = getStreamer();
203   S.EmitIntValue(K, 1);
204   S.EmitIntValue(V, Size);
205 }
206 
207 void AMDGPUTargetStreamer::emitRuntimeMDStringValue(RuntimeMD::Key K,
208                                                     StringRef R) {
209   auto &S = getStreamer();
210   S.EmitIntValue(K, 1);
211   S.EmitIntValue(R.size(), 4);
212   S.EmitBytes(R);
213 }
214 
215 void AMDGPUTargetStreamer::emitRuntimeMDThreeIntValues(RuntimeMD::Key K,
216                                                        MDNode *Node,
217                                                        unsigned Size) {
218   assert(Node->getNumOperands() == 3);
219 
220   auto &S = getStreamer();
221   S.EmitIntValue(K, 1);
222   for (const MDOperand &Op : Node->operands()) {
223     const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
224     S.EmitIntValue(CI->getZExtValue(), Size);
225   }
226 }
227 
228 void AMDGPUTargetStreamer::emitStartOfRuntimeMetadata(const Module &M) {
229   emitRuntimeMDIntValue(RuntimeMD::KeyMDVersion,
230                         RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
231   if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
232     if (MD->getNumOperands() != 0) {
233       auto Node = MD->getOperand(0);
234       if (Node->getNumOperands() > 1) {
235         emitRuntimeMDIntValue(RuntimeMD::KeyLanguage,
236                               RuntimeMD::OpenCL_C, 1);
237         uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
238                          ->getZExtValue();
239         uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
240                          ->getZExtValue();
241         emitRuntimeMDIntValue(RuntimeMD::KeyLanguageVersion,
242                               Major * 100 + Minor * 10, 2);
243       }
244     }
245   }
246 
247   if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
248     for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
249       auto Node = MD->getOperand(I);
250       if (Node->getNumOperands() > 0)
251         emitRuntimeMDStringValue(RuntimeMD::KeyPrintfInfo,
252             cast<MDString>(Node->getOperand(0))->getString());
253     }
254   }
255 }
256 
257 static std::string getOCLTypeName(Type *Ty, bool Signed) {
258   switch (Ty->getTypeID()) {
259   case Type::HalfTyID:
260     return "half";
261   case Type::FloatTyID:
262     return "float";
263   case Type::DoubleTyID:
264     return "double";
265   case Type::IntegerTyID: {
266     if (!Signed)
267       return (Twine('u') + getOCLTypeName(Ty, true)).str();
268     unsigned BW = Ty->getIntegerBitWidth();
269     switch (BW) {
270     case 8:
271       return "char";
272     case 16:
273       return "short";
274     case 32:
275       return "int";
276     case 64:
277       return "long";
278     default:
279       return (Twine('i') + Twine(BW)).str();
280     }
281   }
282   case Type::VectorTyID: {
283     VectorType *VecTy = cast<VectorType>(Ty);
284     Type *EleTy = VecTy->getElementType();
285     unsigned Size = VecTy->getVectorNumElements();
286     return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
287   }
288   default:
289     return "unknown";
290   }
291 }
292 
293 static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
294   Type *Ty, StringRef TypeName) {
295   switch (Ty->getTypeID()) {
296   case Type::HalfTyID:
297     return RuntimeMD::KernelArg::F16;
298   case Type::FloatTyID:
299     return RuntimeMD::KernelArg::F32;
300   case Type::DoubleTyID:
301     return RuntimeMD::KernelArg::F64;
302   case Type::IntegerTyID: {
303     bool Signed = !TypeName.startswith("u");
304     switch (Ty->getIntegerBitWidth()) {
305     case 8:
306       return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
307     case 16:
308       return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
309     case 32:
310       return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
311     case 64:
312       return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
313     default:
314       // Runtime does not recognize other integer types. Report as struct type.
315       return RuntimeMD::KernelArg::Struct;
316     }
317   }
318   case Type::VectorTyID:
319     return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
320   case Type::PointerTyID:
321     return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
322   default:
323     return RuntimeMD::KernelArg::Struct;
324   }
325 }
326 
327 static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
328     AMDGPUAS::AddressSpaces A) {
329   switch (A) {
330   case AMDGPUAS::GLOBAL_ADDRESS:
331     return RuntimeMD::KernelArg::Global;
332   case AMDGPUAS::CONSTANT_ADDRESS:
333     return RuntimeMD::KernelArg::Constant;
334   case AMDGPUAS::LOCAL_ADDRESS:
335     return RuntimeMD::KernelArg::Local;
336   case AMDGPUAS::FLAT_ADDRESS:
337     return RuntimeMD::KernelArg::Generic;
338   case AMDGPUAS::REGION_ADDRESS:
339     return RuntimeMD::KernelArg::Region;
340   default:
341     return RuntimeMD::KernelArg::Private;
342   }
343 }
344 
345 void AMDGPUTargetStreamer::emitRuntimeMetadataForKernelArg(const DataLayout &DL,
346     Type *T, RuntimeMD::KernelArg::Kind Kind,
347     StringRef BaseTypeName, StringRef TypeName,
348     StringRef ArgName, StringRef TypeQual, StringRef AccQual) {
349   auto &S = getStreamer();
350 
351   // Emit KeyArgBegin.
352   S.EmitIntValue(RuntimeMD::KeyArgBegin, 1);
353 
354   // Emit KeyArgSize and KeyArgAlign.
355   emitRuntimeMDIntValue(RuntimeMD::KeyArgSize,
356                         DL.getTypeAllocSize(T), 4);
357   emitRuntimeMDIntValue(RuntimeMD::KeyArgAlign,
358                         DL.getABITypeAlignment(T), 4);
359   if (auto PT = dyn_cast<PointerType>(T)) {
360     auto ET = PT->getElementType();
361     if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
362       emitRuntimeMDIntValue(RuntimeMD::KeyArgPointeeAlign,
363                             DL.getABITypeAlignment(ET), 4);
364   }
365 
366   // Emit KeyArgTypeName.
367   if (!TypeName.empty())
368     emitRuntimeMDStringValue(RuntimeMD::KeyArgTypeName, TypeName);
369 
370   // Emit KeyArgName.
371   if (!ArgName.empty())
372     emitRuntimeMDStringValue(RuntimeMD::KeyArgName, ArgName);
373 
374   // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
375   SmallVector<StringRef, 1> SplitQ;
376   TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
377 
378   for (StringRef KeyName : SplitQ) {
379     auto Key = StringSwitch<RuntimeMD::Key>(KeyName)
380       .Case("volatile", RuntimeMD::KeyArgIsVolatile)
381       .Case("restrict", RuntimeMD::KeyArgIsRestrict)
382       .Case("const",    RuntimeMD::KeyArgIsConst)
383       .Case("pipe",     RuntimeMD::KeyArgIsPipe)
384       .Default(RuntimeMD::KeyNull);
385     S.EmitIntValue(Key, 1);
386   }
387 
388   // Emit KeyArgKind.
389   emitRuntimeMDIntValue(RuntimeMD::KeyArgKind, Kind, 1);
390 
391   // Emit KeyArgValueType.
392   emitRuntimeMDIntValue(RuntimeMD::KeyArgValueType,
393                         getRuntimeMDValueType(T, BaseTypeName), 2);
394 
395   // Emit KeyArgAccQual.
396   if (!AccQual.empty()) {
397     auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
398       .Case("read_only",  RuntimeMD::KernelArg::ReadOnly)
399       .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
400       .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
401       .Default(RuntimeMD::KernelArg::None);
402     emitRuntimeMDIntValue(RuntimeMD::KeyArgAccQual, AQ, 1);
403   }
404 
405   // Emit KeyArgAddrQual.
406   if (auto *PT = dyn_cast<PointerType>(T))
407     emitRuntimeMDIntValue(RuntimeMD::KeyArgAddrQual,
408         getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
409             PT->getAddressSpace())), 1);
410 
411   // Emit KeyArgEnd
412   S.EmitIntValue(RuntimeMD::KeyArgEnd, 1);
413 }
414 
415 void AMDGPUTargetStreamer::emitRuntimeMetadata(const Function &F) {
416   if (!F.getMetadata("kernel_arg_type"))
417     return;
418   auto &S = getStreamer();
419   S.EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
420   emitRuntimeMDStringValue(RuntimeMD::KeyKernelName, F.getName());
421 
422   const DataLayout &DL = F.getParent()->getDataLayout();
423   for (auto &Arg : F.args()) {
424     unsigned I = Arg.getArgNo();
425     Type *T = Arg.getType();
426     auto TypeName = dyn_cast<MDString>(F.getMetadata(
427         "kernel_arg_type")->getOperand(I))->getString();
428     auto BaseTypeName = cast<MDString>(F.getMetadata(
429         "kernel_arg_base_type")->getOperand(I))->getString();
430     StringRef ArgName;
431     if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
432       ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
433     auto TypeQual = cast<MDString>(F.getMetadata(
434         "kernel_arg_type_qual")->getOperand(I))->getString();
435     auto AccQual = cast<MDString>(F.getMetadata(
436         "kernel_arg_access_qual")->getOperand(I))->getString();
437     RuntimeMD::KernelArg::Kind Kind;
438     if (TypeQual.find("pipe") != StringRef::npos)
439       Kind = RuntimeMD::KernelArg::Pipe;
440     else Kind = StringSwitch<RuntimeMD::KernelArg::Kind>(BaseTypeName)
441       .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
442       .Case("queue_t",   RuntimeMD::KernelArg::Queue)
443       .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
444              "image2d_t" , "image2d_array_t",  RuntimeMD::KernelArg::Image)
445       .Cases("image2d_depth_t", "image2d_array_depth_t",
446              "image2d_msaa_t", "image2d_array_msaa_t",
447              "image2d_msaa_depth_t",  RuntimeMD::KernelArg::Image)
448       .Cases("image2d_array_msaa_depth_t", "image3d_t",
449              RuntimeMD::KernelArg::Image)
450       .Default(isa<PointerType>(T) ?
451                    (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
452                    RuntimeMD::KernelArg::DynamicSharedPointer :
453                    RuntimeMD::KernelArg::GlobalBuffer) :
454                    RuntimeMD::KernelArg::ByValue);
455     emitRuntimeMetadataForKernelArg(DL, T,
456         Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual);
457   }
458 
459   // Emit hidden kernel arguments for OpenCL kernels.
460   if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
461     auto Int64T = Type::getInt64Ty(F.getContext());
462     emitRuntimeMetadataForKernelArg(DL, Int64T,
463                                     RuntimeMD::KernelArg::HiddenGlobalOffsetX);
464     emitRuntimeMetadataForKernelArg(DL, Int64T,
465                                     RuntimeMD::KernelArg::HiddenGlobalOffsetY);
466     emitRuntimeMetadataForKernelArg(DL, Int64T,
467                                     RuntimeMD::KernelArg::HiddenGlobalOffsetZ);
468     if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
469       auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
470           RuntimeMD::KernelArg::Global);
471       emitRuntimeMetadataForKernelArg(DL, Int8PtrT,
472                                       RuntimeMD::KernelArg::HiddenPrintfBuffer);
473     }
474   }
475 
476   // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
477   if (auto RWGS = F.getMetadata("reqd_work_group_size")) {
478     emitRuntimeMDThreeIntValues(RuntimeMD::KeyReqdWorkGroupSize,
479                                 RWGS, 4);
480   }
481 
482   if (auto WGSH = F.getMetadata("work_group_size_hint")) {
483     emitRuntimeMDThreeIntValues(RuntimeMD::KeyWorkGroupSizeHint,
484                                 WGSH, 4);
485   }
486 
487   if (auto VTH = F.getMetadata("vec_type_hint")) {
488     auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
489       VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
490       VTH->getOperand(1))->getZExtValue());
491     emitRuntimeMDStringValue(RuntimeMD::KeyVecTypeHint, TypeName);
492   }
493 
494   // Emit KeyKernelEnd
495   S.EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
496 }
497 
498 void AMDGPUTargetStreamer::emitRuntimeMetadataAsNoteElement(Module &M) {
499   auto &S = getStreamer();
500   auto &Context = S.getContext();
501 
502   auto NameSZ = sizeof(PT_NOTE::NoteName); // Size of note name including trailing null.
503 
504   S.PushSection();
505   S.SwitchSection(Context.getELFSection(
506       PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
507 
508   // Create two labels to mark the beginning and end of the desc field
509   // and a MCExpr to calculate the size of the desc field.
510   auto *DescBegin = Context.createTempSymbol();
511   auto *DescEnd = Context.createTempSymbol();
512   auto *DescSZ = MCBinaryExpr::createSub(
513       MCSymbolRefExpr::create(DescEnd, Context),
514       MCSymbolRefExpr::create(DescBegin, Context), Context);
515 
516   // Emit the note element for runtime metadata.
517   // Name and desc should be padded to 4 byte boundary but size of name and
518   // desc should not include padding 0's.
519   S.EmitIntValue(NameSZ, 4);                                  // namesz
520   S.EmitValue(DescSZ, 4);                                     // descz
521   S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type
522   S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ));          // name
523   S.EmitValueToAlignment(4);                                  // padding 0
524   S.EmitLabel(DescBegin);
525   emitRuntimeMetadata(M);                                     // desc
526   S.EmitLabel(DescEnd);
527   S.EmitValueToAlignment(4);                                  // padding 0
528   S.PopSection();
529 }
530 
531 void AMDGPUTargetStreamer::emitRuntimeMetadata(Module &M) {
532   emitStartOfRuntimeMetadata(M);
533   for (auto &F : M.functions())
534     emitRuntimeMetadata(F);
535 }
536 
537