1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // RTL for generic 64-bit machine 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <cassert> 14 #include <cstdio> 15 #include <cstdlib> 16 #include <cstring> 17 #include <dlfcn.h> 18 #include <ffi.h> 19 #include <gelf.h> 20 #include <link.h> 21 #include <list> 22 #include <string> 23 #include <vector> 24 25 #include "Debug.h" 26 #include "omptargetplugin.h" 27 28 #ifndef TARGET_NAME 29 #define TARGET_NAME Generic ELF - 64bit 30 #endif 31 #define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL" 32 33 #ifndef TARGET_ELF_ID 34 #define TARGET_ELF_ID 0 35 #endif 36 37 #include "elf_common.h" 38 39 #define NUMBER_OF_DEVICES 4 40 #define OFFLOADSECTIONNAME "omp_offloading_entries" 41 42 /// Array of Dynamic libraries loaded for this target. 43 struct DynLibTy { 44 std::string FileName; 45 void *Handle; 46 }; 47 48 /// Keep entries table per device. 49 struct FuncOrGblEntryTy { 50 __tgt_target_table Table; 51 }; 52 53 /// Class containing all the device information. 54 class RTLDeviceInfoTy { 55 std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries; 56 57 public: 58 std::list<DynLibTy> DynLibs; 59 60 // Record entry point associated with device. 61 void createOffloadTable(int32_t DeviceId, __tgt_offload_entry *Begin, 62 __tgt_offload_entry *End) { 63 assert(DeviceId < (int32_t)FuncGblEntries.size() && 64 "Unexpected device id!"); 65 FuncGblEntries[DeviceId].emplace_back(); 66 FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); 67 68 E.Table.EntriesBegin = Begin; 69 E.Table.EntriesEnd = End; 70 } 71 72 // Return true if the entry is associated with device. 73 bool findOffloadEntry(int32_t DeviceId, void *Addr) { 74 assert(DeviceId < (int32_t)FuncGblEntries.size() && 75 "Unexpected device id!"); 76 FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); 77 78 for (__tgt_offload_entry *I = E.Table.EntriesBegin, 79 *End = E.Table.EntriesEnd; 80 I < End; ++I) { 81 if (I->addr == Addr) 82 return true; 83 } 84 85 return false; 86 } 87 88 // Return the pointer to the target entries table. 89 __tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) { 90 assert(DeviceId < (int32_t)FuncGblEntries.size() && 91 "Unexpected device id!"); 92 FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); 93 94 return &E.Table; 95 } 96 97 RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); } 98 99 ~RTLDeviceInfoTy() { 100 // Close dynamic libraries 101 for (auto &Lib : DynLibs) { 102 if (Lib.Handle) { 103 dlclose(Lib.Handle); 104 remove(Lib.FileName.c_str()); 105 } 106 } 107 } 108 }; 109 110 static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES); 111 112 #ifdef __cplusplus 113 extern "C" { 114 #endif 115 116 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { 117 // If we don't have a valid ELF ID we can just fail. 118 #if TARGET_ELF_ID < 1 119 return 0; 120 #else 121 return elf_check_machine(Image, TARGET_ELF_ID); 122 #endif 123 } 124 125 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; } 126 127 int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; } 128 129 __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, 130 __tgt_device_image *Image) { 131 132 DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId, 133 DPxPTR(Image->ImageStart)); 134 135 assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id"); 136 137 size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart; 138 size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin); 139 DP("Expecting to have %zd entries defined.\n", NumEntries); 140 141 // Is the library version incompatible with the header file? 142 if (elf_version(EV_CURRENT) == EV_NONE) { 143 DP("Incompatible ELF library!\n"); 144 return NULL; 145 } 146 147 // Obtain elf handler 148 Elf *E = elf_memory((char *)Image->ImageStart, ImageSize); 149 if (!E) { 150 DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1)); 151 return NULL; 152 } 153 154 if (elf_kind(E) != ELF_K_ELF) { 155 DP("Invalid Elf kind!\n"); 156 elf_end(E); 157 return NULL; 158 } 159 160 // Find the entries section offset 161 Elf_Scn *Section = 0; 162 Elf64_Off EntriesOffset = 0; 163 164 size_t Shstrndx; 165 166 if (elf_getshdrstrndx(E, &Shstrndx)) { 167 DP("Unable to get ELF strings index!\n"); 168 elf_end(E); 169 return NULL; 170 } 171 172 while ((Section = elf_nextscn(E, Section))) { 173 GElf_Shdr Hdr; 174 gelf_getshdr(Section, &Hdr); 175 176 if (!strcmp(elf_strptr(E, Shstrndx, Hdr.sh_name), OFFLOADSECTIONNAME)) { 177 EntriesOffset = Hdr.sh_addr; 178 break; 179 } 180 } 181 182 if (!EntriesOffset) { 183 DP("Entries Section Offset Not Found\n"); 184 elf_end(E); 185 return NULL; 186 } 187 188 DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(EntriesOffset)); 189 190 // load dynamic library and get the entry points. We use the dl library 191 // to do the loading of the library, but we could do it directly to avoid the 192 // dump to the temporary file. 193 // 194 // 1) Create tmp file with the library contents. 195 // 2) Use dlopen to load the file and dlsym to retrieve the symbols. 196 char TmpName[] = "/tmp/tmpfile_XXXXXX"; 197 int TmpFd = mkstemp(TmpName); 198 199 if (TmpFd == -1) { 200 elf_end(E); 201 return NULL; 202 } 203 204 FILE *Ftmp = fdopen(TmpFd, "wb"); 205 206 if (!Ftmp) { 207 elf_end(E); 208 return NULL; 209 } 210 211 fwrite(Image->ImageStart, ImageSize, 1, Ftmp); 212 fclose(Ftmp); 213 214 DynLibTy Lib = {TmpName, dlopen(TmpName, RTLD_LAZY)}; 215 216 if (!Lib.Handle) { 217 DP("Target library loading error: %s\n", dlerror()); 218 elf_end(E); 219 return NULL; 220 } 221 222 DeviceInfo.DynLibs.push_back(Lib); 223 224 struct link_map *LibInfo = (struct link_map *)Lib.Handle; 225 226 // The place where the entries info is loaded is the library base address 227 // plus the offset determined from the ELF file. 228 Elf64_Addr EntriesAddr = LibInfo->l_addr + EntriesOffset; 229 230 DP("Pointer to first entry to be loaded is (" DPxMOD ").\n", 231 DPxPTR(EntriesAddr)); 232 233 // Table of pointers to all the entries in the target. 234 __tgt_offload_entry *EntriesTable = (__tgt_offload_entry *)EntriesAddr; 235 236 __tgt_offload_entry *EntriesBegin = &EntriesTable[0]; 237 __tgt_offload_entry *EntriesEnd = EntriesBegin + NumEntries; 238 239 if (!EntriesBegin) { 240 DP("Can't obtain entries begin\n"); 241 elf_end(E); 242 return NULL; 243 } 244 245 DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n", 246 DPxPTR(EntriesBegin), DPxPTR(EntriesEnd)); 247 DeviceInfo.createOffloadTable(DeviceId, EntriesBegin, EntriesEnd); 248 249 elf_end(E); 250 251 return DeviceInfo.getOffloadEntriesTable(DeviceId); 252 } 253 254 void __tgt_rtl_print_device_info(int32_t DeviceId) { 255 printf(" This is a generic-elf-64bit device\n"); 256 } 257 258 // Sample implementation of explicit memory allocator. For this plugin all kinds 259 // are equivalent to each other. 260 void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr, 261 int32_t Kind) { 262 void *Ptr = NULL; 263 264 switch (Kind) { 265 case TARGET_ALLOC_DEVICE: 266 case TARGET_ALLOC_HOST: 267 case TARGET_ALLOC_SHARED: 268 case TARGET_ALLOC_DEFAULT: 269 Ptr = malloc(Size); 270 break; 271 default: 272 REPORT("Invalid target data allocation kind"); 273 } 274 275 return Ptr; 276 } 277 278 int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, 279 int64_t Size) { 280 memcpy(TgtPtr, HstPtr, Size); 281 return OFFLOAD_SUCCESS; 282 } 283 284 int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, 285 int64_t Size) { 286 memcpy(HstPtr, TgtPtr, Size); 287 return OFFLOAD_SUCCESS; 288 } 289 290 int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) { 291 free(TgtPtr); 292 return OFFLOAD_SUCCESS; 293 } 294 295 int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, 296 void **TgtArgs, ptrdiff_t *TgtOffsets, 297 int32_t ArgNum, int32_t TeamNum, 298 int32_t ThreadLimit, 299 uint64_t LoopTripcount /*not used*/) { 300 // ignore team num and thread limit. 301 302 // Use libffi to launch execution. 303 ffi_cif Cif; 304 305 // All args are references. 306 std::vector<ffi_type *> ArgsTypes(ArgNum, &ffi_type_pointer); 307 std::vector<void *> Args(ArgNum); 308 std::vector<void *> Ptrs(ArgNum); 309 310 for (int32_t I = 0; I < ArgNum; ++I) { 311 Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]); 312 Args[I] = &Ptrs[I]; 313 } 314 315 ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, ArgNum, 316 &ffi_type_void, &ArgsTypes[0]); 317 318 assert(Status == FFI_OK && "Unable to prepare target launch!"); 319 320 if (Status != FFI_OK) 321 return OFFLOAD_FAIL; 322 323 DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr)); 324 325 void (*Entry)(void); 326 *((void **)&Entry) = TgtEntryPtr; 327 ffi_call(&Cif, Entry, NULL, &Args[0]); 328 return OFFLOAD_SUCCESS; 329 } 330 331 int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, 332 void **TgtArgs, ptrdiff_t *TgtOffsets, 333 int32_t ArgNum) { 334 // use one team and one thread. 335 return __tgt_rtl_run_target_team_region(DeviceId, TgtEntryPtr, TgtArgs, 336 TgtOffsets, ArgNum, 1, 1, 0); 337 } 338 339 #ifdef __cplusplus 340 } 341 #endif 342