1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file provides AMDGPU specific target streamer methods. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUTargetStreamer.h" 15 #include "SIDefines.h" 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCELFStreamer.h" 19 #include "llvm/MC/MCObjectFileInfo.h" 20 #include "llvm/MC/MCSectionELF.h" 21 #include "llvm/Support/ELF.h" 22 #include "llvm/Support/FormattedStream.h" 23 24 using namespace llvm; 25 26 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) 27 : MCTargetStreamer(S) { } 28 29 //===----------------------------------------------------------------------===// 30 // AMDGPUTargetAsmStreamer 31 //===----------------------------------------------------------------------===// 32 33 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, 34 formatted_raw_ostream &OS) 35 : AMDGPUTargetStreamer(S), OS(OS) { } 36 37 void 38 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, 39 uint32_t Minor) { 40 OS << "\t.hsa_code_object_version " << 41 Twine(Major) << "," << Twine(Minor) << '\n'; 42 } 43 44 void 45 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, 46 uint32_t Minor, 47 uint32_t Stepping, 48 StringRef VendorName, 49 StringRef ArchName) { 50 OS << "\t.hsa_code_object_isa " << 51 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << 52 ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; 53 54 } 55 56 void 57 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { 58 uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32); 59 bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties & 60 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); 61 bool EnableSGPRDispatchPtr = (Header.code_properties & 62 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); 63 bool EnableSGPRQueuePtr = (Header.code_properties & 64 AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); 65 bool EnableSGPRKernargSegmentPtr = (Header.code_properties & 66 AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); 67 bool EnableSGPRDispatchID = (Header.code_properties & 68 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); 69 bool EnableSGPRFlatScratchInit = (Header.code_properties & 70 AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); 71 bool EnableSGPRPrivateSegmentSize = (Header.code_properties & 72 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); 73 bool EnableSGPRGridWorkgroupCountX = (Header.code_properties & 74 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X); 75 bool EnableSGPRGridWorkgroupCountY = (Header.code_properties & 76 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y); 77 bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties & 78 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z); 79 bool EnableOrderedAppendGDS = (Header.code_properties & 80 AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS); 81 uint32_t PrivateElementSize = (Header.code_properties & 82 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >> 83 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT; 84 bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64); 85 bool IsDynamicCallstack = (Header.code_properties & 86 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK); 87 bool IsDebugEnabled = (Header.code_properties & 88 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED); 89 bool IsXNackEnabled = (Header.code_properties & 90 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED); 91 92 OS << "\t.amd_kernel_code_t\n" << 93 "\t\tkernel_code_version_major = " << 94 Header.amd_kernel_code_version_major << '\n' << 95 "\t\tkernel_code_version_minor = " << 96 Header.amd_kernel_code_version_minor << '\n' << 97 "\t\tmachine_kind = " << 98 Header.amd_machine_kind << '\n' << 99 "\t\tmachine_version_major = " << 100 Header.amd_machine_version_major << '\n' << 101 "\t\tmachine_version_minor = " << 102 Header.amd_machine_version_minor << '\n' << 103 "\t\tmachine_version_stepping = " << 104 Header.amd_machine_version_stepping << '\n' << 105 "\t\tkernel_code_entry_byte_offset = " << 106 Header.kernel_code_entry_byte_offset << '\n' << 107 "\t\tkernel_code_prefetch_byte_size = " << 108 Header.kernel_code_prefetch_byte_size << '\n' << 109 "\t\tmax_scratch_backing_memory_byte_size = " << 110 Header.max_scratch_backing_memory_byte_size << '\n' << 111 "\t\tcompute_pgm_rsrc1_vgprs = " << 112 G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' << 113 "\t\tcompute_pgm_rsrc1_sgprs = " << 114 G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' << 115 "\t\tcompute_pgm_rsrc1_priority = " << 116 G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' << 117 "\t\tcompute_pgm_rsrc1_float_mode = " << 118 G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' << 119 "\t\tcompute_pgm_rsrc1_priv = " << 120 G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' << 121 "\t\tcompute_pgm_rsrc1_dx10_clamp = " << 122 G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' << 123 "\t\tcompute_pgm_rsrc1_debug_mode = " << 124 G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' << 125 "\t\tcompute_pgm_rsrc1_ieee_mode = " << 126 G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' << 127 "\t\tcompute_pgm_rsrc2_scratch_en = " << 128 G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' << 129 "\t\tcompute_pgm_rsrc2_user_sgpr = " << 130 G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' << 131 "\t\tcompute_pgm_rsrc2_tgid_x_en = " << 132 G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' << 133 "\t\tcompute_pgm_rsrc2_tgid_y_en = " << 134 G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' << 135 "\t\tcompute_pgm_rsrc2_tgid_z_en = " << 136 G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' << 137 "\t\tcompute_pgm_rsrc2_tg_size_en = " << 138 G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' << 139 "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " << 140 G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' << 141 "\t\tcompute_pgm_rsrc2_excp_en_msb = " << 142 G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' << 143 "\t\tcompute_pgm_rsrc2_lds_size = " << 144 G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' << 145 "\t\tcompute_pgm_rsrc2_excp_en = " << 146 G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' << 147 148 "\t\tenable_sgpr_private_segment_buffer = " << 149 EnableSGPRPrivateSegmentBuffer << '\n' << 150 "\t\tenable_sgpr_dispatch_ptr = " << 151 EnableSGPRDispatchPtr << '\n' << 152 "\t\tenable_sgpr_queue_ptr = " << 153 EnableSGPRQueuePtr << '\n' << 154 "\t\tenable_sgpr_kernarg_segment_ptr = " << 155 EnableSGPRKernargSegmentPtr << '\n' << 156 "\t\tenable_sgpr_dispatch_id = " << 157 EnableSGPRDispatchID << '\n' << 158 "\t\tenable_sgpr_flat_scratch_init = " << 159 EnableSGPRFlatScratchInit << '\n' << 160 "\t\tenable_sgpr_private_segment_size = " << 161 EnableSGPRPrivateSegmentSize << '\n' << 162 "\t\tenable_sgpr_grid_workgroup_count_x = " << 163 EnableSGPRGridWorkgroupCountX << '\n' << 164 "\t\tenable_sgpr_grid_workgroup_count_y = " << 165 EnableSGPRGridWorkgroupCountY << '\n' << 166 "\t\tenable_sgpr_grid_workgroup_count_z = " << 167 EnableSGPRGridWorkgroupCountZ << '\n' << 168 "\t\tenable_ordered_append_gds = " << 169 EnableOrderedAppendGDS << '\n' << 170 "\t\tprivate_element_size = " << 171 PrivateElementSize << '\n' << 172 "\t\tis_ptr64 = " << 173 IsPtr64 << '\n' << 174 "\t\tis_dynamic_callstack = " << 175 IsDynamicCallstack << '\n' << 176 "\t\tis_debug_enabled = " << 177 IsDebugEnabled << '\n' << 178 "\t\tis_xnack_enabled = " << 179 IsXNackEnabled << '\n' << 180 "\t\tworkitem_private_segment_byte_size = " << 181 Header.workitem_private_segment_byte_size << '\n' << 182 "\t\tworkgroup_group_segment_byte_size = " << 183 Header.workgroup_group_segment_byte_size << '\n' << 184 "\t\tgds_segment_byte_size = " << 185 Header.gds_segment_byte_size << '\n' << 186 "\t\tkernarg_segment_byte_size = " << 187 Header.kernarg_segment_byte_size << '\n' << 188 "\t\tworkgroup_fbarrier_count = " << 189 Header.workgroup_fbarrier_count << '\n' << 190 "\t\twavefront_sgpr_count = " << 191 Header.wavefront_sgpr_count << '\n' << 192 "\t\tworkitem_vgpr_count = " << 193 Header.workitem_vgpr_count << '\n' << 194 "\t\treserved_vgpr_first = " << 195 Header.reserved_vgpr_first << '\n' << 196 "\t\treserved_vgpr_count = " << 197 Header.reserved_vgpr_count << '\n' << 198 "\t\treserved_sgpr_first = " << 199 Header.reserved_sgpr_first << '\n' << 200 "\t\treserved_sgpr_count = " << 201 Header.reserved_sgpr_count << '\n' << 202 "\t\tdebug_wavefront_private_segment_offset_sgpr = " << 203 Header.debug_wavefront_private_segment_offset_sgpr << '\n' << 204 "\t\tdebug_private_segment_buffer_sgpr = " << 205 Header.debug_private_segment_buffer_sgpr << '\n' << 206 "\t\tkernarg_segment_alignment = " << 207 (uint32_t)Header.kernarg_segment_alignment << '\n' << 208 "\t\tgroup_segment_alignment = " << 209 (uint32_t)Header.group_segment_alignment << '\n' << 210 "\t\tprivate_segment_alignment = " << 211 (uint32_t)Header.private_segment_alignment << '\n' << 212 "\t\twavefront_size = " << 213 (uint32_t)Header.wavefront_size << '\n' << 214 "\t\tcall_convention = " << 215 Header.call_convention << '\n' << 216 "\t\truntime_loader_kernel_symbol = " << 217 Header.runtime_loader_kernel_symbol << '\n' << 218 // TODO: control_directives 219 "\t.end_amd_kernel_code_t\n"; 220 221 } 222 223 //===----------------------------------------------------------------------===// 224 // AMDGPUTargetELFStreamer 225 //===----------------------------------------------------------------------===// 226 227 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S) 228 : AMDGPUTargetStreamer(S), Streamer(S) { } 229 230 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { 231 return static_cast<MCELFStreamer &>(Streamer); 232 } 233 234 void 235 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, 236 uint32_t Minor) { 237 MCStreamer &OS = getStreamer(); 238 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0); 239 240 unsigned NameSZ = 4; 241 242 OS.PushSection(); 243 OS.SwitchSection(Note); 244 OS.EmitIntValue(NameSZ, 4); // namesz 245 OS.EmitIntValue(8, 4); // descz 246 OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type 247 OS.EmitBytes(StringRef("AMD", NameSZ)); // name 248 OS.EmitIntValue(Major, 4); // desc 249 OS.EmitIntValue(Minor, 4); 250 OS.EmitValueToAlignment(4); 251 OS.PopSection(); 252 } 253 254 void 255 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, 256 uint32_t Minor, 257 uint32_t Stepping, 258 StringRef VendorName, 259 StringRef ArchName) { 260 MCStreamer &OS = getStreamer(); 261 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0); 262 263 unsigned NameSZ = 4; 264 uint16_t VendorNameSize = VendorName.size() + 1; 265 uint16_t ArchNameSize = ArchName.size() + 1; 266 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + 267 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + 268 VendorNameSize + ArchNameSize; 269 270 OS.PushSection(); 271 OS.SwitchSection(Note); 272 OS.EmitIntValue(NameSZ, 4); // namesz 273 OS.EmitIntValue(DescSZ, 4); // descsz 274 OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type 275 OS.EmitBytes(StringRef("AMD", 4)); // name 276 OS.EmitIntValue(VendorNameSize, 2); // desc 277 OS.EmitIntValue(ArchNameSize, 2); 278 OS.EmitIntValue(Major, 4); 279 OS.EmitIntValue(Minor, 4); 280 OS.EmitIntValue(Stepping, 4); 281 OS.EmitBytes(VendorName); 282 OS.EmitIntValue(0, 1); // NULL terminate VendorName 283 OS.EmitBytes(ArchName); 284 OS.EmitIntValue(0, 1); // NULL terminte ArchName 285 OS.EmitValueToAlignment(4); 286 OS.PopSection(); 287 } 288 289 void 290 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { 291 292 MCStreamer &OS = getStreamer(); 293 OS.PushSection(); 294 OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection()); 295 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); 296 OS.PopSection(); 297 } 298