1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file provides AMDGPU specific target streamer methods.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUTargetStreamer.h"
15 #include "AMDGPU.h"
16 #include "SIDefines.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
21 #include "llvm/BinaryFormat/ELF.h"
22 #include "llvm/BinaryFormat/MsgPackTypes.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Metadata.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCELFStreamer.h"
29 #include "llvm/MC/MCObjectFileInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/Support/FormattedStream.h"
32 #include "llvm/Support/TargetParser.h"
33
34 namespace llvm {
35 #include "AMDGPUPTNote.h"
36 }
37
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::AMDGPU::HSAMD;
41
42 //===----------------------------------------------------------------------===//
43 // AMDGPUTargetStreamer
44 //===----------------------------------------------------------------------===//
45
EmitHSAMetadataV2(StringRef HSAMetadataString)46 bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
47 HSAMD::Metadata HSAMetadata;
48 if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
49 return false;
50
51 return EmitHSAMetadata(HSAMetadata);
52 }
53
EmitHSAMetadataV3(StringRef HSAMetadataString)54 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
55 std::shared_ptr<msgpack::Node> HSAMetadataRoot;
56 yaml::Input YIn(HSAMetadataString);
57 YIn >> HSAMetadataRoot;
58 if (YIn.error())
59 return false;
60 return EmitHSAMetadata(HSAMetadataRoot, false);
61 }
62
getArchNameFromElfMach(unsigned ElfMach)63 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
64 AMDGPU::GPUKind AK;
65
66 switch (ElfMach) {
67 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
68 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
69 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
70 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
71 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
72 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
73 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
74 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
75 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
76 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
77 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
78 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
79 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
80 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
81 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
82 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
99 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
100 }
101
102 StringRef GPUName = getArchNameAMDGCN(AK);
103 if (GPUName != "")
104 return GPUName;
105 return getArchNameR600(AK);
106 }
107
getElfMach(StringRef GPU)108 unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
109 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
110 if (AK == AMDGPU::GPUKind::GK_NONE)
111 AK = parseArchR600(GPU);
112
113 switch (AK) {
114 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
115 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
116 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
117 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
118 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
119 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
120 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
121 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
122 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
123 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
124 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
125 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
126 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
127 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
128 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
129 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
130 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
131 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
132 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
133 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
134 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
135 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
136 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
137 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
138 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
139 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
140 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
141 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
142 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
143 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
144 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
145 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
146 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
147 }
148
149 llvm_unreachable("unknown GPU");
150 }
151
152 //===----------------------------------------------------------------------===//
153 // AMDGPUTargetAsmStreamer
154 //===----------------------------------------------------------------------===//
155
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)156 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
157 formatted_raw_ostream &OS)
158 : AMDGPUTargetStreamer(S), OS(OS) { }
159
EmitDirectiveAMDGCNTarget(StringRef Target)160 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
161 OS << "\t.amdgcn_target \"" << Target << "\"\n";
162 }
163
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)164 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
165 uint32_t Major, uint32_t Minor) {
166 OS << "\t.hsa_code_object_version " <<
167 Twine(Major) << "," << Twine(Minor) << '\n';
168 }
169
170 void
EmitDirectiveHSACodeObjectISA(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)171 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
172 uint32_t Minor,
173 uint32_t Stepping,
174 StringRef VendorName,
175 StringRef ArchName) {
176 OS << "\t.hsa_code_object_isa " <<
177 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
178 ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
179
180 }
181
182 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)183 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
184 OS << "\t.amd_kernel_code_t\n";
185 dumpAmdKernelCode(&Header, OS, "\t\t");
186 OS << "\t.end_amd_kernel_code_t\n";
187 }
188
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)189 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
190 unsigned Type) {
191 switch (Type) {
192 default: llvm_unreachable("Invalid AMDGPU symbol type");
193 case ELF::STT_AMDGPU_HSA_KERNEL:
194 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
195 break;
196 }
197 }
198
EmitISAVersion(StringRef IsaVersionString)199 bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
200 OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
201 return true;
202 }
203
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)204 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
205 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
206 std::string HSAMetadataString;
207 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
208 return false;
209
210 OS << '\t' << AssemblerDirectiveBegin << '\n';
211 OS << HSAMetadataString << '\n';
212 OS << '\t' << AssemblerDirectiveEnd << '\n';
213 return true;
214 }
215
EmitHSAMetadata(std::shared_ptr<msgpack::Node> & HSAMetadataRoot,bool Strict)216 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
217 std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
218 V3::MetadataVerifier Verifier(Strict);
219 if (!Verifier.verify(*HSAMetadataRoot))
220 return false;
221
222 std::string HSAMetadataString;
223 raw_string_ostream StrOS(HSAMetadataString);
224 yaml::Output YOut(StrOS);
225 YOut << HSAMetadataRoot;
226
227 OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
228 OS << StrOS.str() << '\n';
229 OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
230 return true;
231 }
232
EmitPALMetadata(const PALMD::Metadata & PALMetadata)233 bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
234 const PALMD::Metadata &PALMetadata) {
235 std::string PALMetadataString;
236 if (PALMD::toString(PALMetadata, PALMetadataString))
237 return false;
238
239 OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
240 return true;
241 }
242
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KD,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr,bool ReserveXNACK)243 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
244 const MCSubtargetInfo &STI, StringRef KernelName,
245 const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
246 bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
247 IsaVersion IVersion = getIsaVersion(STI.getCPU());
248
249 OS << "\t.amdhsa_kernel " << KernelName << '\n';
250
251 #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
252 STREAM << "\t\t" << DIRECTIVE << " " \
253 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
254
255 OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
256 << '\n';
257 OS << "\t\t.amdhsa_private_segment_fixed_size "
258 << KD.private_segment_fixed_size << '\n';
259
260 PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
261 kernel_code_properties,
262 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
263 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
264 kernel_code_properties,
265 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
266 PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
267 kernel_code_properties,
268 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
269 PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
270 kernel_code_properties,
271 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
272 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
273 kernel_code_properties,
274 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
275 PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
276 kernel_code_properties,
277 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
278 PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
279 kernel_code_properties,
280 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
281 PRINT_FIELD(
282 OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
283 compute_pgm_rsrc2,
284 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
285 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
286 compute_pgm_rsrc2,
287 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
288 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
289 compute_pgm_rsrc2,
290 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
291 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
292 compute_pgm_rsrc2,
293 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
294 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
295 compute_pgm_rsrc2,
296 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
297 PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
298 compute_pgm_rsrc2,
299 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
300
301 // These directives are required.
302 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
303 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
304
305 if (!ReserveVCC)
306 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
307 if (IVersion.Major >= 7 && !ReserveFlatScr)
308 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
309 if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
310 OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
311
312 PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
313 compute_pgm_rsrc1,
314 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
315 PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
316 compute_pgm_rsrc1,
317 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
318 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
319 compute_pgm_rsrc1,
320 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
321 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
322 compute_pgm_rsrc1,
323 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
324 PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
325 compute_pgm_rsrc1,
326 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
327 PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
328 compute_pgm_rsrc1,
329 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
330 if (IVersion.Major >= 9)
331 PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
332 compute_pgm_rsrc1,
333 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
334 PRINT_FIELD(
335 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
336 compute_pgm_rsrc2,
337 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
338 PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
339 compute_pgm_rsrc2,
340 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
341 PRINT_FIELD(
342 OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
343 compute_pgm_rsrc2,
344 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
345 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
346 compute_pgm_rsrc2,
347 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
348 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
349 compute_pgm_rsrc2,
350 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
351 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
352 compute_pgm_rsrc2,
353 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
354 PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
355 compute_pgm_rsrc2,
356 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
357 #undef PRINT_FIELD
358
359 OS << "\t.end_amdhsa_kernel\n";
360 }
361
362 //===----------------------------------------------------------------------===//
363 // AMDGPUTargetELFStreamer
364 //===----------------------------------------------------------------------===//
365
AMDGPUTargetELFStreamer(MCStreamer & S,const MCSubtargetInfo & STI)366 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
367 MCStreamer &S, const MCSubtargetInfo &STI)
368 : AMDGPUTargetStreamer(S), Streamer(S) {
369 MCAssembler &MCA = getStreamer().getAssembler();
370 unsigned EFlags = MCA.getELFHeaderEFlags();
371
372 EFlags &= ~ELF::EF_AMDGPU_MACH;
373 EFlags |= getElfMach(STI.getCPU());
374
375 EFlags &= ~ELF::EF_AMDGPU_XNACK;
376 if (AMDGPU::hasXNACK(STI))
377 EFlags |= ELF::EF_AMDGPU_XNACK;
378
379 EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
380 if (AMDGPU::hasSRAMECC(STI))
381 EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
382
383 MCA.setELFHeaderEFlags(EFlags);
384 }
385
getStreamer()386 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
387 return static_cast<MCELFStreamer &>(Streamer);
388 }
389
EmitNote(StringRef Name,const MCExpr * DescSZ,unsigned NoteType,function_ref<void (MCELFStreamer &)> EmitDesc)390 void AMDGPUTargetELFStreamer::EmitNote(
391 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
392 function_ref<void(MCELFStreamer &)> EmitDesc) {
393 auto &S = getStreamer();
394 auto &Context = S.getContext();
395
396 auto NameSZ = Name.size() + 1;
397
398 S.PushSection();
399 S.SwitchSection(Context.getELFSection(
400 ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
401 S.EmitIntValue(NameSZ, 4); // namesz
402 S.EmitValue(DescSZ, 4); // descz
403 S.EmitIntValue(NoteType, 4); // type
404 S.EmitBytes(Name); // name
405 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
406 EmitDesc(S); // desc
407 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
408 S.PopSection();
409 }
410
EmitDirectiveAMDGCNTarget(StringRef Target)411 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
412
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)413 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
414 uint32_t Major, uint32_t Minor) {
415
416 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
417 ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
418 OS.EmitIntValue(Major, 4);
419 OS.EmitIntValue(Minor, 4);
420 });
421 }
422
423 void
EmitDirectiveHSACodeObjectISA(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)424 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
425 uint32_t Minor,
426 uint32_t Stepping,
427 StringRef VendorName,
428 StringRef ArchName) {
429 uint16_t VendorNameSize = VendorName.size() + 1;
430 uint16_t ArchNameSize = ArchName.size() + 1;
431
432 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
433 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
434 VendorNameSize + ArchNameSize;
435
436 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
437 ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
438 OS.EmitIntValue(VendorNameSize, 2);
439 OS.EmitIntValue(ArchNameSize, 2);
440 OS.EmitIntValue(Major, 4);
441 OS.EmitIntValue(Minor, 4);
442 OS.EmitIntValue(Stepping, 4);
443 OS.EmitBytes(VendorName);
444 OS.EmitIntValue(0, 1); // NULL terminate VendorName
445 OS.EmitBytes(ArchName);
446 OS.EmitIntValue(0, 1); // NULL terminte ArchName
447 });
448 }
449
450 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)451 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
452
453 MCStreamer &OS = getStreamer();
454 OS.PushSection();
455 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
456 OS.PopSection();
457 }
458
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)459 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
460 unsigned Type) {
461 MCSymbolELF *Symbol = cast<MCSymbolELF>(
462 getStreamer().getContext().getOrCreateSymbol(SymbolName));
463 Symbol->setType(Type);
464 }
465
EmitISAVersion(StringRef IsaVersionString)466 bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
467 // Create two labels to mark the beginning and end of the desc field
468 // and a MCExpr to calculate the size of the desc field.
469 auto &Context = getContext();
470 auto *DescBegin = Context.createTempSymbol();
471 auto *DescEnd = Context.createTempSymbol();
472 auto *DescSZ = MCBinaryExpr::createSub(
473 MCSymbolRefExpr::create(DescEnd, Context),
474 MCSymbolRefExpr::create(DescBegin, Context), Context);
475
476 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
477 [&](MCELFStreamer &OS) {
478 OS.EmitLabel(DescBegin);
479 OS.EmitBytes(IsaVersionString);
480 OS.EmitLabel(DescEnd);
481 });
482 return true;
483 }
484
EmitHSAMetadata(std::shared_ptr<msgpack::Node> & HSAMetadataRoot,bool Strict)485 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
486 std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
487 V3::MetadataVerifier Verifier(Strict);
488 if (!Verifier.verify(*HSAMetadataRoot))
489 return false;
490
491 std::string HSAMetadataString;
492 raw_string_ostream StrOS(HSAMetadataString);
493 msgpack::Writer MPWriter(StrOS);
494 HSAMetadataRoot->write(MPWriter);
495
496 // Create two labels to mark the beginning and end of the desc field
497 // and a MCExpr to calculate the size of the desc field.
498 auto &Context = getContext();
499 auto *DescBegin = Context.createTempSymbol();
500 auto *DescEnd = Context.createTempSymbol();
501 auto *DescSZ = MCBinaryExpr::createSub(
502 MCSymbolRefExpr::create(DescEnd, Context),
503 MCSymbolRefExpr::create(DescBegin, Context), Context);
504
505 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
506 [&](MCELFStreamer &OS) {
507 OS.EmitLabel(DescBegin);
508 OS.EmitBytes(StrOS.str());
509 OS.EmitLabel(DescEnd);
510 });
511 return true;
512 }
513
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)514 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
515 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
516 std::string HSAMetadataString;
517 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
518 return false;
519
520 // Create two labels to mark the beginning and end of the desc field
521 // and a MCExpr to calculate the size of the desc field.
522 auto &Context = getContext();
523 auto *DescBegin = Context.createTempSymbol();
524 auto *DescEnd = Context.createTempSymbol();
525 auto *DescSZ = MCBinaryExpr::createSub(
526 MCSymbolRefExpr::create(DescEnd, Context),
527 MCSymbolRefExpr::create(DescBegin, Context), Context);
528
529 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
530 [&](MCELFStreamer &OS) {
531 OS.EmitLabel(DescBegin);
532 OS.EmitBytes(HSAMetadataString);
533 OS.EmitLabel(DescEnd);
534 });
535 return true;
536 }
537
EmitPALMetadata(const PALMD::Metadata & PALMetadata)538 bool AMDGPUTargetELFStreamer::EmitPALMetadata(
539 const PALMD::Metadata &PALMetadata) {
540 EmitNote(ElfNote::NoteNameV2,
541 MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
542 getContext()),
543 ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
544 for (auto I : PALMetadata)
545 OS.EmitIntValue(I, sizeof(uint32_t));
546 });
547 return true;
548 }
549
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KernelDescriptor,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr,bool ReserveXNACK)550 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
551 const MCSubtargetInfo &STI, StringRef KernelName,
552 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
553 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
554 bool ReserveXNACK) {
555 auto &Streamer = getStreamer();
556 auto &Context = Streamer.getContext();
557
558 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
559 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
560 KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
561 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
562 KernelDescriptorSymbol->setSize(
563 MCConstantExpr::create(sizeof(KernelDescriptor), Context));
564
565 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
566 Context.getOrCreateSymbol(Twine(KernelName)));
567 KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
568
569 Streamer.EmitLabel(KernelDescriptorSymbol);
570 Streamer.EmitBytes(StringRef(
571 (const char*)&(KernelDescriptor),
572 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
573 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
574 // expression being created is:
575 // (start of kernel code) - (start of kernel descriptor)
576 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
577 Streamer.EmitValue(MCBinaryExpr::createSub(
578 MCSymbolRefExpr::create(
579 KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
580 MCSymbolRefExpr::create(
581 KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
582 Context),
583 sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
584 Streamer.EmitBytes(StringRef(
585 (const char*)&(KernelDescriptor) +
586 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
587 sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
588 sizeof(KernelDescriptor) -
589 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
590 sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
591 }
592