1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // RTL for generic 64-bit machine
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <cassert>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <cstring>
17 #include <dlfcn.h>
18 #include <ffi.h>
19 #include <gelf.h>
20 #include <link.h>
21 #include <list>
22 #include <string>
23 #include <vector>
24 
25 #include "Debug.h"
26 #include "omptargetplugin.h"
27 
28 #ifndef TARGET_NAME
29 #define TARGET_NAME Generic ELF - 64bit
30 #endif
31 #define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL"
32 
33 #ifndef TARGET_ELF_ID
34 #define TARGET_ELF_ID 0
35 #endif
36 
37 #include "elf_common.h"
38 
39 #define NUMBER_OF_DEVICES 4
40 #define OFFLOADSECTIONNAME "omp_offloading_entries"
41 
42 /// Array of Dynamic libraries loaded for this target.
43 struct DynLibTy {
44   std::string FileName;
45   void *Handle;
46 };
47 
48 /// Keep entries table per device.
49 struct FuncOrGblEntryTy {
50   __tgt_target_table Table;
51 };
52 
53 /// Class containing all the device information.
54 class RTLDeviceInfoTy {
55   std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
56 
57 public:
58   std::list<DynLibTy> DynLibs;
59 
60   // Record entry point associated with device.
createOffloadTable(int32_t DeviceId,__tgt_offload_entry * Begin,__tgt_offload_entry * End)61   void createOffloadTable(int32_t DeviceId, __tgt_offload_entry *Begin,
62                           __tgt_offload_entry *End) {
63     assert(DeviceId < (int32_t)FuncGblEntries.size() &&
64            "Unexpected device id!");
65     FuncGblEntries[DeviceId].emplace_back();
66     FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
67 
68     E.Table.EntriesBegin = Begin;
69     E.Table.EntriesEnd = End;
70   }
71 
72   // Return true if the entry is associated with device.
findOffloadEntry(int32_t DeviceId,void * Addr)73   bool findOffloadEntry(int32_t DeviceId, void *Addr) {
74     assert(DeviceId < (int32_t)FuncGblEntries.size() &&
75            "Unexpected device id!");
76     FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
77 
78     for (__tgt_offload_entry *I = E.Table.EntriesBegin,
79                              *End = E.Table.EntriesEnd;
80          I < End; ++I) {
81       if (I->addr == Addr)
82         return true;
83     }
84 
85     return false;
86   }
87 
88   // Return the pointer to the target entries table.
getOffloadEntriesTable(int32_t DeviceId)89   __tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) {
90     assert(DeviceId < (int32_t)FuncGblEntries.size() &&
91            "Unexpected device id!");
92     FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
93 
94     return &E.Table;
95   }
96 
RTLDeviceInfoTy(int32_t NumDevices)97   RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); }
98 
~RTLDeviceInfoTy()99   ~RTLDeviceInfoTy() {
100     // Close dynamic libraries
101     for (auto &Lib : DynLibs) {
102       if (Lib.Handle) {
103         dlclose(Lib.Handle);
104         remove(Lib.FileName.c_str());
105       }
106     }
107   }
108 };
109 
110 static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
111 
112 #ifdef __cplusplus
113 extern "C" {
114 #endif
115 
__tgt_rtl_is_valid_binary(__tgt_device_image * Image)116 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
117 // If we don't have a valid ELF ID we can just fail.
118 #if TARGET_ELF_ID < 1
119   return 0;
120 #else
121   return elf_check_machine(Image, TARGET_ELF_ID);
122 #endif
123 }
124 
__tgt_rtl_number_of_devices()125 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
126 
__tgt_rtl_init_device(int32_t DeviceId)127 int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; }
128 
__tgt_rtl_load_binary(int32_t DeviceId,__tgt_device_image * Image)129 __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
130                                           __tgt_device_image *Image) {
131 
132   DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId,
133      DPxPTR(Image->ImageStart));
134 
135   assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id");
136 
137   size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
138   size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin);
139   DP("Expecting to have %zd entries defined.\n", NumEntries);
140 
141   // Is the library version incompatible with the header file?
142   if (elf_version(EV_CURRENT) == EV_NONE) {
143     DP("Incompatible ELF library!\n");
144     return NULL;
145   }
146 
147   // Obtain elf handler
148   Elf *E = elf_memory((char *)Image->ImageStart, ImageSize);
149   if (!E) {
150     DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
151     return NULL;
152   }
153 
154   if (elf_kind(E) != ELF_K_ELF) {
155     DP("Invalid Elf kind!\n");
156     elf_end(E);
157     return NULL;
158   }
159 
160   // Find the entries section offset
161   Elf_Scn *Section = 0;
162   Elf64_Off EntriesOffset = 0;
163 
164   size_t Shstrndx;
165 
166   if (elf_getshdrstrndx(E, &Shstrndx)) {
167     DP("Unable to get ELF strings index!\n");
168     elf_end(E);
169     return NULL;
170   }
171 
172   while ((Section = elf_nextscn(E, Section))) {
173     GElf_Shdr Hdr;
174     gelf_getshdr(Section, &Hdr);
175 
176     if (!strcmp(elf_strptr(E, Shstrndx, Hdr.sh_name), OFFLOADSECTIONNAME)) {
177       EntriesOffset = Hdr.sh_addr;
178       break;
179     }
180   }
181 
182   if (!EntriesOffset) {
183     DP("Entries Section Offset Not Found\n");
184     elf_end(E);
185     return NULL;
186   }
187 
188   DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(EntriesOffset));
189 
190   // load dynamic library and get the entry points. We use the dl library
191   // to do the loading of the library, but we could do it directly to avoid the
192   // dump to the temporary file.
193   //
194   // 1) Create tmp file with the library contents.
195   // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
196   char TmpName[] = "/tmp/tmpfile_XXXXXX";
197   int TmpFd = mkstemp(TmpName);
198 
199   if (TmpFd == -1) {
200     elf_end(E);
201     return NULL;
202   }
203 
204   FILE *Ftmp = fdopen(TmpFd, "wb");
205 
206   if (!Ftmp) {
207     elf_end(E);
208     return NULL;
209   }
210 
211   fwrite(Image->ImageStart, ImageSize, 1, Ftmp);
212   fclose(Ftmp);
213 
214   DynLibTy Lib = {TmpName, dlopen(TmpName, RTLD_LAZY)};
215 
216   if (!Lib.Handle) {
217     DP("Target library loading error: %s\n", dlerror());
218     elf_end(E);
219     return NULL;
220   }
221 
222   DeviceInfo.DynLibs.push_back(Lib);
223 
224   struct link_map *LibInfo = (struct link_map *)Lib.Handle;
225 
226   // The place where the entries info is loaded is the library base address
227   // plus the offset determined from the ELF file.
228   Elf64_Addr EntriesAddr = LibInfo->l_addr + EntriesOffset;
229 
230   DP("Pointer to first entry to be loaded is (" DPxMOD ").\n",
231      DPxPTR(EntriesAddr));
232 
233   // Table of pointers to all the entries in the target.
234   __tgt_offload_entry *EntriesTable = (__tgt_offload_entry *)EntriesAddr;
235 
236   __tgt_offload_entry *EntriesBegin = &EntriesTable[0];
237   __tgt_offload_entry *EntriesEnd = EntriesBegin + NumEntries;
238 
239   if (!EntriesBegin) {
240     DP("Can't obtain entries begin\n");
241     elf_end(E);
242     return NULL;
243   }
244 
245   DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n",
246      DPxPTR(EntriesBegin), DPxPTR(EntriesEnd));
247   DeviceInfo.createOffloadTable(DeviceId, EntriesBegin, EntriesEnd);
248 
249   elf_end(E);
250 
251   return DeviceInfo.getOffloadEntriesTable(DeviceId);
252 }
253 
__tgt_rtl_print_device_info(int32_t DeviceId)254 void __tgt_rtl_print_device_info(int32_t DeviceId) {
255   printf("    This is a generic-elf-64bit device\n");
256 }
257 
258 // Sample implementation of explicit memory allocator. For this plugin all kinds
259 // are equivalent to each other.
__tgt_rtl_data_alloc(int32_t DeviceId,int64_t Size,void * HstPtr,int32_t Kind)260 void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
261                            int32_t Kind) {
262   void *Ptr = NULL;
263 
264   switch (Kind) {
265   case TARGET_ALLOC_DEVICE:
266   case TARGET_ALLOC_HOST:
267   case TARGET_ALLOC_SHARED:
268   case TARGET_ALLOC_DEFAULT:
269     Ptr = malloc(Size);
270     break;
271   default:
272     REPORT("Invalid target data allocation kind");
273   }
274 
275   return Ptr;
276 }
277 
__tgt_rtl_data_submit(int32_t DeviceId,void * TgtPtr,void * HstPtr,int64_t Size)278 int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
279                               int64_t Size) {
280   memcpy(TgtPtr, HstPtr, Size);
281   return OFFLOAD_SUCCESS;
282 }
283 
__tgt_rtl_data_retrieve(int32_t DeviceId,void * HstPtr,void * TgtPtr,int64_t Size)284 int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
285                                 int64_t Size) {
286   memcpy(HstPtr, TgtPtr, Size);
287   return OFFLOAD_SUCCESS;
288 }
289 
__tgt_rtl_data_delete(int32_t DeviceId,void * TgtPtr)290 int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
291   free(TgtPtr);
292   return OFFLOAD_SUCCESS;
293 }
294 
__tgt_rtl_run_target_team_region(int32_t DeviceId,void * TgtEntryPtr,void ** TgtArgs,ptrdiff_t * TgtOffsets,int32_t ArgNum,int32_t TeamNum,int32_t ThreadLimit,uint64_t LoopTripcount)295 int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
296                                          void **TgtArgs, ptrdiff_t *TgtOffsets,
297                                          int32_t ArgNum, int32_t TeamNum,
298                                          int32_t ThreadLimit,
299                                          uint64_t LoopTripcount /*not used*/) {
300   // ignore team num and thread limit.
301 
302   // Use libffi to launch execution.
303   ffi_cif Cif;
304 
305   // All args are references.
306   std::vector<ffi_type *> ArgsTypes(ArgNum, &ffi_type_pointer);
307   std::vector<void *> Args(ArgNum);
308   std::vector<void *> Ptrs(ArgNum);
309 
310   for (int32_t I = 0; I < ArgNum; ++I) {
311     Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]);
312     Args[I] = &Ptrs[I];
313   }
314 
315   ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, ArgNum,
316                                    &ffi_type_void, &ArgsTypes[0]);
317 
318   assert(Status == FFI_OK && "Unable to prepare target launch!");
319 
320   if (Status != FFI_OK)
321     return OFFLOAD_FAIL;
322 
323   DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr));
324 
325   void (*Entry)(void);
326   *((void **)&Entry) = TgtEntryPtr;
327   ffi_call(&Cif, Entry, NULL, &Args[0]);
328   return OFFLOAD_SUCCESS;
329 }
330 
__tgt_rtl_run_target_region(int32_t DeviceId,void * TgtEntryPtr,void ** TgtArgs,ptrdiff_t * TgtOffsets,int32_t ArgNum)331 int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
332                                     void **TgtArgs, ptrdiff_t *TgtOffsets,
333                                     int32_t ArgNum) {
334   // use one team and one thread.
335   return __tgt_rtl_run_target_team_region(DeviceId, TgtEntryPtr, TgtArgs,
336                                           TgtOffsets, ArgNum, 1, 1, 0);
337 }
338 
339 #ifdef __cplusplus
340 }
341 #endif
342