1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.txt for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // RTL for generic 64-bit machine
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include <cassert>
15 #include <cstdio>
16 #include <cstring>
17 #include <cstdlib>
18 #include <dlfcn.h>
19 #include <ffi.h>
20 #include <gelf.h>
21 #include <link.h>
22 #include <list>
23 #include <string>
24 #include <vector>
25 
26 #include "omptargetplugin.h"
27 
28 #ifndef TARGET_NAME
29 #define TARGET_NAME Generic ELF - 64bit
30 #endif
31 
32 #ifndef TARGET_ELF_ID
33 #define TARGET_ELF_ID 0
34 #endif
35 
36 #ifdef OMPTARGET_DEBUG
37 static int DebugLevel = 0;
38 
39 #define GETNAME2(name) #name
40 #define GETNAME(name) GETNAME2(name)
41 #define DP(...) \
42   do { \
43     if (DebugLevel > 0) { \
44       DEBUGP("Target " GETNAME(TARGET_NAME) " RTL", __VA_ARGS__); \
45     } \
46   } while (false)
47 #else // OMPTARGET_DEBUG
48 #define DP(...) {}
49 #endif // OMPTARGET_DEBUG
50 
51 #include "../../common/elf_common.c"
52 
53 #define NUMBER_OF_DEVICES 4
54 #define OFFLOADSECTIONNAME ".omp_offloading.entries"
55 
56 /// Array of Dynamic libraries loaded for this target.
57 struct DynLibTy {
58   char *FileName;
59   void *Handle;
60 };
61 
62 /// Keep entries table per device.
63 struct FuncOrGblEntryTy {
64   __tgt_target_table Table;
65 };
66 
67 /// Class containing all the device information.
68 class RTLDeviceInfoTy {
69   std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
70 
71 public:
72   std::list<DynLibTy> DynLibs;
73 
74   // Record entry point associated with device.
75   void createOffloadTable(int32_t device_id, __tgt_offload_entry *begin,
76                           __tgt_offload_entry *end) {
77     assert(device_id < (int32_t)FuncGblEntries.size() &&
78            "Unexpected device id!");
79     FuncGblEntries[device_id].emplace_back();
80     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
81 
82     E.Table.EntriesBegin = begin;
83     E.Table.EntriesEnd = end;
84   }
85 
86   // Return true if the entry is associated with device.
87   bool findOffloadEntry(int32_t device_id, void *addr) {
88     assert(device_id < (int32_t)FuncGblEntries.size() &&
89            "Unexpected device id!");
90     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
91 
92     for (__tgt_offload_entry *i = E.Table.EntriesBegin, *e = E.Table.EntriesEnd;
93          i < e; ++i) {
94       if (i->addr == addr)
95         return true;
96     }
97 
98     return false;
99   }
100 
101   // Return the pointer to the target entries table.
102   __tgt_target_table *getOffloadEntriesTable(int32_t device_id) {
103     assert(device_id < (int32_t)FuncGblEntries.size() &&
104            "Unexpected device id!");
105     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
106 
107     return &E.Table;
108   }
109 
110   RTLDeviceInfoTy(int32_t num_devices) {
111 #ifdef OMPTARGET_DEBUG
112     if (char *envStr = getenv("LIBOMPTARGET_DEBUG")) {
113       DebugLevel = std::stoi(envStr);
114     }
115 #endif // OMPTARGET_DEBUG
116 
117     FuncGblEntries.resize(num_devices);
118   }
119 
120   ~RTLDeviceInfoTy() {
121     // Close dynamic libraries
122     for (auto &lib : DynLibs) {
123       if (lib.Handle) {
124         dlclose(lib.Handle);
125         remove(lib.FileName);
126       }
127     }
128   }
129 };
130 
131 static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
132 
133 #ifdef __cplusplus
134 extern "C" {
135 #endif
136 
137 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
138 // If we don't have a valid ELF ID we can just fail.
139 #if TARGET_ELF_ID < 1
140   return 0;
141 #else
142   return elf_check_machine(image, TARGET_ELF_ID);
143 #endif
144 }
145 
146 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
147 
148 int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; }
149 
150 __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
151                                           __tgt_device_image *image) {
152 
153   DP("Dev %d: load binary from " DPxMOD " image\n", device_id,
154      DPxPTR(image->ImageStart));
155 
156   assert(device_id >= 0 && device_id < NUMBER_OF_DEVICES && "bad dev id");
157 
158   size_t ImageSize = (size_t)image->ImageEnd - (size_t)image->ImageStart;
159   size_t NumEntries = (size_t)(image->EntriesEnd - image->EntriesBegin);
160   DP("Expecting to have %zd entries defined.\n", NumEntries);
161 
162   // Is the library version incompatible with the header file?
163   if (elf_version(EV_CURRENT) == EV_NONE) {
164     DP("Incompatible ELF library!\n");
165     return NULL;
166   }
167 
168   // Obtain elf handler
169   Elf *e = elf_memory((char *)image->ImageStart, ImageSize);
170   if (!e) {
171     DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
172     return NULL;
173   }
174 
175   if (elf_kind(e) != ELF_K_ELF) {
176     DP("Invalid Elf kind!\n");
177     elf_end(e);
178     return NULL;
179   }
180 
181   // Find the entries section offset
182   Elf_Scn *section = 0;
183   Elf64_Off entries_offset = 0;
184 
185   size_t shstrndx;
186 
187   if (elf_getshdrstrndx(e, &shstrndx)) {
188     DP("Unable to get ELF strings index!\n");
189     elf_end(e);
190     return NULL;
191   }
192 
193   while ((section = elf_nextscn(e, section))) {
194     GElf_Shdr hdr;
195     gelf_getshdr(section, &hdr);
196 
197     if (!strcmp(elf_strptr(e, shstrndx, hdr.sh_name), OFFLOADSECTIONNAME)) {
198       entries_offset = hdr.sh_addr;
199       break;
200     }
201   }
202 
203   if (!entries_offset) {
204     DP("Entries Section Offset Not Found\n");
205     elf_end(e);
206     return NULL;
207   }
208 
209   DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(entries_offset));
210 
211   // load dynamic library and get the entry points. We use the dl library
212   // to do the loading of the library, but we could do it directly to avoid the
213   // dump to the temporary file.
214   //
215   // 1) Create tmp file with the library contents.
216   // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
217   char tmp_name[] = "/tmp/tmpfile_XXXXXX";
218   int tmp_fd = mkstemp(tmp_name);
219 
220   if (tmp_fd == -1) {
221     elf_end(e);
222     return NULL;
223   }
224 
225   FILE *ftmp = fdopen(tmp_fd, "wb");
226 
227   if (!ftmp) {
228     elf_end(e);
229     return NULL;
230   }
231 
232   fwrite(image->ImageStart, ImageSize, 1, ftmp);
233   fclose(ftmp);
234 
235   DynLibTy Lib = {tmp_name, dlopen(tmp_name, RTLD_LAZY)};
236 
237   if (!Lib.Handle) {
238     DP("Target library loading error: %s\n", dlerror());
239     elf_end(e);
240     return NULL;
241   }
242 
243   DeviceInfo.DynLibs.push_back(Lib);
244 
245   struct link_map *libInfo = (struct link_map *)Lib.Handle;
246 
247   // The place where the entries info is loaded is the library base address
248   // plus the offset determined from the ELF file.
249   Elf64_Addr entries_addr = libInfo->l_addr + entries_offset;
250 
251   DP("Pointer to first entry to be loaded is (" DPxMOD ").\n",
252       DPxPTR(entries_addr));
253 
254   // Table of pointers to all the entries in the target.
255   __tgt_offload_entry *entries_table = (__tgt_offload_entry *)entries_addr;
256 
257   __tgt_offload_entry *entries_begin = &entries_table[0];
258   __tgt_offload_entry *entries_end = entries_begin + NumEntries;
259 
260   if (!entries_begin) {
261     DP("Can't obtain entries begin\n");
262     elf_end(e);
263     return NULL;
264   }
265 
266   DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n",
267       DPxPTR(entries_begin), DPxPTR(entries_end));
268   DeviceInfo.createOffloadTable(device_id, entries_begin, entries_end);
269 
270   elf_end(e);
271 
272   return DeviceInfo.getOffloadEntriesTable(device_id);
273 }
274 
275 void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
276   void *ptr = malloc(size);
277   return ptr;
278 }
279 
280 int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
281                               int64_t size) {
282   memcpy(tgt_ptr, hst_ptr, size);
283   return OFFLOAD_SUCCESS;
284 }
285 
286 int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
287                                 int64_t size) {
288   memcpy(hst_ptr, tgt_ptr, size);
289   return OFFLOAD_SUCCESS;
290 }
291 
292 int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
293   free(tgt_ptr);
294   return OFFLOAD_SUCCESS;
295 }
296 
297 int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
298     void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
299     int32_t thread_limit, uint64_t loop_tripcount /*not used*/) {
300   // ignore team num and thread limit.
301 
302   // Use libffi to launch execution.
303   ffi_cif cif;
304 
305   // All args are references.
306   std::vector<ffi_type *> args_types(arg_num, &ffi_type_pointer);
307   std::vector<void *> args(arg_num);
308   std::vector<void *> ptrs(arg_num);
309 
310   for (int32_t i = 0; i < arg_num; ++i) {
311     ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
312     args[i] = &ptrs[i];
313   }
314 
315   ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num,
316                                    &ffi_type_void, &args_types[0]);
317 
318   assert(status == FFI_OK && "Unable to prepare target launch!");
319 
320   if (status != FFI_OK)
321     return OFFLOAD_FAIL;
322 
323   DP("Running entry point at " DPxMOD "...\n", DPxPTR(tgt_entry_ptr));
324 
325   void (*entry)(void);
326   *((void**) &entry) = tgt_entry_ptr;
327   ffi_call(&cif, entry, NULL, &args[0]);
328   return OFFLOAD_SUCCESS;
329 }
330 
331 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
332     void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num) {
333   // use one team and one thread.
334   return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
335       tgt_offsets, arg_num, 1, 1, 0);
336 }
337 
338 #ifdef __cplusplus
339 }
340 #endif
341