1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // RTL for generic 64-bit machine
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include <cassert>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <cstring>
17 #include <dlfcn.h>
18 #include <ffi.h>
19 #include <gelf.h>
20 #include <link.h>
21 #include <list>
22 #include <string>
23 #include <vector>
24
25 #include "Debug.h"
26 #include "omptargetplugin.h"
27
28 #ifndef TARGET_NAME
29 #define TARGET_NAME Generic ELF - 64bit
30 #endif
31 #define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL"
32
33 #ifndef TARGET_ELF_ID
34 #define TARGET_ELF_ID 0
35 #endif
36
37 #include "elf_common.h"
38
39 #define NUMBER_OF_DEVICES 4
40 #define OFFLOADSECTIONNAME "omp_offloading_entries"
41
42 /// Array of Dynamic libraries loaded for this target.
43 struct DynLibTy {
44 std::string FileName;
45 void *Handle;
46 };
47
48 /// Keep entries table per device.
49 struct FuncOrGblEntryTy {
50 __tgt_target_table Table;
51 };
52
53 /// Class containing all the device information.
54 class RTLDeviceInfoTy {
55 std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
56
57 public:
58 std::list<DynLibTy> DynLibs;
59
60 // Record entry point associated with device.
createOffloadTable(int32_t DeviceId,__tgt_offload_entry * Begin,__tgt_offload_entry * End)61 void createOffloadTable(int32_t DeviceId, __tgt_offload_entry *Begin,
62 __tgt_offload_entry *End) {
63 assert(DeviceId < (int32_t)FuncGblEntries.size() &&
64 "Unexpected device id!");
65 FuncGblEntries[DeviceId].emplace_back();
66 FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
67
68 E.Table.EntriesBegin = Begin;
69 E.Table.EntriesEnd = End;
70 }
71
72 // Return true if the entry is associated with device.
findOffloadEntry(int32_t DeviceId,void * Addr)73 bool findOffloadEntry(int32_t DeviceId, void *Addr) {
74 assert(DeviceId < (int32_t)FuncGblEntries.size() &&
75 "Unexpected device id!");
76 FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
77
78 for (__tgt_offload_entry *I = E.Table.EntriesBegin,
79 *End = E.Table.EntriesEnd;
80 I < End; ++I) {
81 if (I->addr == Addr)
82 return true;
83 }
84
85 return false;
86 }
87
88 // Return the pointer to the target entries table.
getOffloadEntriesTable(int32_t DeviceId)89 __tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) {
90 assert(DeviceId < (int32_t)FuncGblEntries.size() &&
91 "Unexpected device id!");
92 FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
93
94 return &E.Table;
95 }
96
RTLDeviceInfoTy(int32_t NumDevices)97 RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); }
98
~RTLDeviceInfoTy()99 ~RTLDeviceInfoTy() {
100 // Close dynamic libraries
101 for (auto &Lib : DynLibs) {
102 if (Lib.Handle) {
103 dlclose(Lib.Handle);
104 remove(Lib.FileName.c_str());
105 }
106 }
107 }
108 };
109
110 static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
111
112 #ifdef __cplusplus
113 extern "C" {
114 #endif
115
__tgt_rtl_is_valid_binary(__tgt_device_image * Image)116 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
117 // If we don't have a valid ELF ID we can just fail.
118 #if TARGET_ELF_ID < 1
119 return 0;
120 #else
121 return elf_check_machine(Image, TARGET_ELF_ID);
122 #endif
123 }
124
__tgt_rtl_number_of_devices()125 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
126
__tgt_rtl_init_device(int32_t DeviceId)127 int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; }
128
__tgt_rtl_load_binary(int32_t DeviceId,__tgt_device_image * Image)129 __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
130 __tgt_device_image *Image) {
131
132 DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId,
133 DPxPTR(Image->ImageStart));
134
135 assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id");
136
137 size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
138 size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin);
139 DP("Expecting to have %zd entries defined.\n", NumEntries);
140
141 // Is the library version incompatible with the header file?
142 if (elf_version(EV_CURRENT) == EV_NONE) {
143 DP("Incompatible ELF library!\n");
144 return NULL;
145 }
146
147 // Obtain elf handler
148 Elf *E = elf_memory((char *)Image->ImageStart, ImageSize);
149 if (!E) {
150 DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
151 return NULL;
152 }
153
154 if (elf_kind(E) != ELF_K_ELF) {
155 DP("Invalid Elf kind!\n");
156 elf_end(E);
157 return NULL;
158 }
159
160 // Find the entries section offset
161 Elf_Scn *Section = 0;
162 Elf64_Off EntriesOffset = 0;
163
164 size_t Shstrndx;
165
166 if (elf_getshdrstrndx(E, &Shstrndx)) {
167 DP("Unable to get ELF strings index!\n");
168 elf_end(E);
169 return NULL;
170 }
171
172 while ((Section = elf_nextscn(E, Section))) {
173 GElf_Shdr Hdr;
174 gelf_getshdr(Section, &Hdr);
175
176 if (!strcmp(elf_strptr(E, Shstrndx, Hdr.sh_name), OFFLOADSECTIONNAME)) {
177 EntriesOffset = Hdr.sh_addr;
178 break;
179 }
180 }
181
182 if (!EntriesOffset) {
183 DP("Entries Section Offset Not Found\n");
184 elf_end(E);
185 return NULL;
186 }
187
188 DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(EntriesOffset));
189
190 // load dynamic library and get the entry points. We use the dl library
191 // to do the loading of the library, but we could do it directly to avoid the
192 // dump to the temporary file.
193 //
194 // 1) Create tmp file with the library contents.
195 // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
196 char TmpName[] = "/tmp/tmpfile_XXXXXX";
197 int TmpFd = mkstemp(TmpName);
198
199 if (TmpFd == -1) {
200 elf_end(E);
201 return NULL;
202 }
203
204 FILE *Ftmp = fdopen(TmpFd, "wb");
205
206 if (!Ftmp) {
207 elf_end(E);
208 return NULL;
209 }
210
211 fwrite(Image->ImageStart, ImageSize, 1, Ftmp);
212 fclose(Ftmp);
213
214 DynLibTy Lib = {TmpName, dlopen(TmpName, RTLD_LAZY)};
215
216 if (!Lib.Handle) {
217 DP("Target library loading error: %s\n", dlerror());
218 elf_end(E);
219 return NULL;
220 }
221
222 DeviceInfo.DynLibs.push_back(Lib);
223
224 struct link_map *LibInfo = (struct link_map *)Lib.Handle;
225
226 // The place where the entries info is loaded is the library base address
227 // plus the offset determined from the ELF file.
228 Elf64_Addr EntriesAddr = LibInfo->l_addr + EntriesOffset;
229
230 DP("Pointer to first entry to be loaded is (" DPxMOD ").\n",
231 DPxPTR(EntriesAddr));
232
233 // Table of pointers to all the entries in the target.
234 __tgt_offload_entry *EntriesTable = (__tgt_offload_entry *)EntriesAddr;
235
236 __tgt_offload_entry *EntriesBegin = &EntriesTable[0];
237 __tgt_offload_entry *EntriesEnd = EntriesBegin + NumEntries;
238
239 if (!EntriesBegin) {
240 DP("Can't obtain entries begin\n");
241 elf_end(E);
242 return NULL;
243 }
244
245 DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n",
246 DPxPTR(EntriesBegin), DPxPTR(EntriesEnd));
247 DeviceInfo.createOffloadTable(DeviceId, EntriesBegin, EntriesEnd);
248
249 elf_end(E);
250
251 return DeviceInfo.getOffloadEntriesTable(DeviceId);
252 }
253
__tgt_rtl_print_device_info(int32_t DeviceId)254 void __tgt_rtl_print_device_info(int32_t DeviceId) {
255 printf(" This is a generic-elf-64bit device\n");
256 }
257
258 // Sample implementation of explicit memory allocator. For this plugin all kinds
259 // are equivalent to each other.
__tgt_rtl_data_alloc(int32_t DeviceId,int64_t Size,void * HstPtr,int32_t Kind)260 void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
261 int32_t Kind) {
262 void *Ptr = NULL;
263
264 switch (Kind) {
265 case TARGET_ALLOC_DEVICE:
266 case TARGET_ALLOC_HOST:
267 case TARGET_ALLOC_SHARED:
268 case TARGET_ALLOC_DEFAULT:
269 Ptr = malloc(Size);
270 break;
271 default:
272 REPORT("Invalid target data allocation kind");
273 }
274
275 return Ptr;
276 }
277
__tgt_rtl_data_submit(int32_t DeviceId,void * TgtPtr,void * HstPtr,int64_t Size)278 int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
279 int64_t Size) {
280 memcpy(TgtPtr, HstPtr, Size);
281 return OFFLOAD_SUCCESS;
282 }
283
__tgt_rtl_data_retrieve(int32_t DeviceId,void * HstPtr,void * TgtPtr,int64_t Size)284 int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
285 int64_t Size) {
286 memcpy(HstPtr, TgtPtr, Size);
287 return OFFLOAD_SUCCESS;
288 }
289
__tgt_rtl_data_delete(int32_t DeviceId,void * TgtPtr)290 int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
291 free(TgtPtr);
292 return OFFLOAD_SUCCESS;
293 }
294
__tgt_rtl_run_target_team_region(int32_t DeviceId,void * TgtEntryPtr,void ** TgtArgs,ptrdiff_t * TgtOffsets,int32_t ArgNum,int32_t TeamNum,int32_t ThreadLimit,uint64_t LoopTripcount)295 int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
296 void **TgtArgs, ptrdiff_t *TgtOffsets,
297 int32_t ArgNum, int32_t TeamNum,
298 int32_t ThreadLimit,
299 uint64_t LoopTripcount /*not used*/) {
300 // ignore team num and thread limit.
301
302 // Use libffi to launch execution.
303 ffi_cif Cif;
304
305 // All args are references.
306 std::vector<ffi_type *> ArgsTypes(ArgNum, &ffi_type_pointer);
307 std::vector<void *> Args(ArgNum);
308 std::vector<void *> Ptrs(ArgNum);
309
310 for (int32_t I = 0; I < ArgNum; ++I) {
311 Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]);
312 Args[I] = &Ptrs[I];
313 }
314
315 ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, ArgNum,
316 &ffi_type_void, &ArgsTypes[0]);
317
318 assert(Status == FFI_OK && "Unable to prepare target launch!");
319
320 if (Status != FFI_OK)
321 return OFFLOAD_FAIL;
322
323 DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr));
324
325 void (*Entry)(void);
326 *((void **)&Entry) = TgtEntryPtr;
327 ffi_call(&Cif, Entry, NULL, &Args[0]);
328 return OFFLOAD_SUCCESS;
329 }
330
__tgt_rtl_run_target_region(int32_t DeviceId,void * TgtEntryPtr,void ** TgtArgs,ptrdiff_t * TgtOffsets,int32_t ArgNum)331 int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
332 void **TgtArgs, ptrdiff_t *TgtOffsets,
333 int32_t ArgNum) {
334 // use one team and one thread.
335 return __tgt_rtl_run_target_team_region(DeviceId, TgtEntryPtr, TgtArgs,
336 TgtOffsets, ArgNum, 1, 1, 0);
337 }
338
339 #ifdef __cplusplus
340 }
341 #endif
342