1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.txt for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Implementation of the interface to be used by Clang during the codegen of a
11 // target region.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include <omptarget.h>
16 
17 #include "device.h"
18 #include "private.h"
19 #include "rtl.h"
20 
21 #include <cassert>
22 #include <cstdlib>
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds a target shared library to the target execution image
26 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
27   RTLs.RegisterLib(desc);
28 }
29 
30 ////////////////////////////////////////////////////////////////////////////////
31 /// unloads a target shared library
32 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
33   RTLs.UnregisterLib(desc);
34 }
35 
36 // Following datatypes and functions (tgt_oldmap_type, combined_entry_t,
37 // translate_map, cleanup_map) will be removed once the compiler starts using
38 // the new map types.
39 
40 // Old map types
41 enum tgt_oldmap_type {
42   OMP_TGT_OLDMAPTYPE_TO          = 0x001, // copy data from host to device
43   OMP_TGT_OLDMAPTYPE_FROM        = 0x002, // copy data from device to host
44   OMP_TGT_OLDMAPTYPE_ALWAYS      = 0x004, // copy regardless of the ref. count
45   OMP_TGT_OLDMAPTYPE_DELETE      = 0x008, // force unmapping of data
46   OMP_TGT_OLDMAPTYPE_MAP_PTR     = 0x010, // map pointer as well as pointee
47   OMP_TGT_OLDMAPTYPE_FIRST_MAP   = 0x020, // first occurrence of mapped variable
48   OMP_TGT_OLDMAPTYPE_RETURN_PTR  = 0x040, // return TgtBase addr of mapped data
49   OMP_TGT_OLDMAPTYPE_PRIVATE_PTR = 0x080, // private variable - not mapped
50   OMP_TGT_OLDMAPTYPE_PRIVATE_VAL = 0x100  // copy by value - not mapped
51 };
52 
53 // Temporary functions for map translation and cleanup
54 struct combined_entry_t {
55   int num_members; // number of members in combined entry
56   void *base_addr; // base address of combined entry
57   void *begin_addr; // begin address of combined entry
58   void *end_addr; // size of combined entry
59 };
60 
61 static void translate_map(int32_t arg_num, void **args_base, void **args,
62     int64_t *arg_sizes, int64_t *arg_types, int32_t &new_arg_num,
63     void **&new_args_base, void **&new_args, int64_t *&new_arg_sizes,
64     int64_t *&new_arg_types, bool is_target_construct) {
65   if (arg_num <= 0) {
66     DP("Nothing to translate\n");
67     new_arg_num = 0;
68     return;
69   }
70 
71   // array of combined entries
72   combined_entry_t *cmb_entries =
73       (combined_entry_t *) alloca(arg_num * sizeof(combined_entry_t));
74   // number of combined entries
75   long num_combined = 0;
76   // old entry is MAP_PTR?
77   bool *is_ptr_old = (bool *) alloca(arg_num * sizeof(bool));
78   // old entry is member of member_of[old] cmb_entry
79   int *member_of = (int *) alloca(arg_num * sizeof(int));
80   // temporary storage for modifications of the original arg_types
81   int64_t *mod_arg_types = (int64_t *) alloca(arg_num  *sizeof(int64_t));
82 
83   DP("Translating %d map entries\n", arg_num);
84   for (int i = 0; i < arg_num; ++i) {
85     member_of[i] = -1;
86     is_ptr_old[i] = false;
87     mod_arg_types[i] = arg_types[i];
88     // Scan previous entries to see whether this entry shares the same base
89     for (int j = 0; j < i; ++j) {
90       void *new_begin_addr = NULL;
91       void *new_end_addr = NULL;
92 
93       if (mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
94         if (args_base[i] == args[j]) {
95           if (!(mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR)) {
96             DP("Entry %d has the same base as entry %d's begin address\n", i,
97                 j);
98             new_begin_addr = args_base[i];
99             new_end_addr = (char *)args_base[i] + sizeof(void *);
100             assert(arg_sizes[j] == sizeof(void *));
101             is_ptr_old[j] = true;
102           } else {
103             DP("Entry %d has the same base as entry %d's begin address, but "
104                 "%d's base was a MAP_PTR too\n", i, j, j);
105             int32_t to_from_always_delete =
106                 OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM |
107                 OMP_TGT_OLDMAPTYPE_ALWAYS | OMP_TGT_OLDMAPTYPE_DELETE;
108             if (mod_arg_types[j] & to_from_always_delete) {
109               DP("Resetting to/from/always/delete flags for entry %d because "
110                   "it is only a pointer to pointer\n", j);
111               mod_arg_types[j] &= ~to_from_always_delete;
112             }
113           }
114         }
115       } else {
116         if (!(mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_FIRST_MAP) &&
117             args_base[i] == args_base[j]) {
118           DP("Entry %d has the same base address as entry %d\n", i, j);
119           new_begin_addr = args[i];
120           new_end_addr = (char *)args[i] + arg_sizes[i];
121         }
122       }
123 
124       // If we have combined the entry with a previous one
125       if (new_begin_addr) {
126         int id;
127         if(member_of[j] == -1) {
128           // We have a new entry
129           id = num_combined++;
130           DP("Creating new combined entry %d for old entry %d\n", id, j);
131           // Initialize new entry
132           cmb_entries[id].num_members = 1;
133           cmb_entries[id].base_addr = args_base[j];
134           if (mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
135             cmb_entries[id].begin_addr = args_base[j];
136             cmb_entries[id].end_addr = (char *)args_base[j] + arg_sizes[j];
137           } else {
138             cmb_entries[id].begin_addr = args[j];
139             cmb_entries[id].end_addr = (char *)args[j] + arg_sizes[j];
140           }
141           member_of[j] = id;
142         } else {
143           // Reuse existing combined entry
144           DP("Reusing existing combined entry %d\n", member_of[j]);
145           id = member_of[j];
146         }
147 
148         // Update combined entry
149         DP("Adding entry %d to combined entry %d\n", i, id);
150         cmb_entries[id].num_members++;
151         // base_addr stays the same
152         cmb_entries[id].begin_addr =
153             std::min(cmb_entries[id].begin_addr, new_begin_addr);
154         cmb_entries[id].end_addr =
155             std::max(cmb_entries[id].end_addr, new_end_addr);
156         member_of[i] = id;
157         break;
158       }
159     }
160   }
161 
162   DP("New entries: %ld combined + %d original\n", num_combined, arg_num);
163   new_arg_num = arg_num + num_combined;
164   new_args_base = (void **) malloc(new_arg_num * sizeof(void *));
165   new_args = (void **) malloc(new_arg_num * sizeof(void *));
166   new_arg_sizes = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
167   new_arg_types = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
168 
169   const int64_t alignment = 8;
170 
171   int next_id = 0; // next ID
172   int next_cid = 0; // next combined ID
173   int *combined_to_new_id = (int *) alloca(num_combined * sizeof(int));
174   for (int i = 0; i < arg_num; ++i) {
175     // It is member_of
176     if (member_of[i] == next_cid) {
177       int cid = next_cid++; // ID of this combined entry
178       int nid = next_id++; // ID of the new (global) entry
179       combined_to_new_id[cid] = nid;
180       DP("Combined entry %3d will become new entry %3d\n", cid, nid);
181 
182       int64_t padding = (int64_t)cmb_entries[cid].begin_addr % alignment;
183       if (padding) {
184         DP("Using a padding of %" PRId64 " for begin address " DPxMOD "\n",
185             padding, DPxPTR(cmb_entries[cid].begin_addr));
186         cmb_entries[cid].begin_addr =
187             (char *)cmb_entries[cid].begin_addr - padding;
188       }
189 
190       new_args_base[nid] = cmb_entries[cid].base_addr;
191       new_args[nid] = cmb_entries[cid].begin_addr;
192       new_arg_sizes[nid] = (int64_t) ((char *)cmb_entries[cid].end_addr -
193           (char *)cmb_entries[cid].begin_addr);
194       new_arg_types[nid] = OMP_TGT_MAPTYPE_TARGET_PARAM;
195       DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", "
196           "size %" PRId64 ", type 0x%" PRIx64 "\n", nid,
197           DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
198           new_arg_types[nid]);
199     } else if (member_of[i] != -1) {
200       DP("Combined entry %3d has been encountered before, do nothing\n",
201           member_of[i]);
202     }
203 
204     // Now that the combined entry (the one the old entry was a member of) has
205     // been inserted into the new arguments list, proceed with the old entry.
206     int nid = next_id++;
207     DP("Old entry %3d will become new entry %3d\n", i, nid);
208 
209     new_args_base[nid] = args_base[i];
210     new_args[nid] = args[i];
211     new_arg_sizes[nid] = arg_sizes[i];
212     int64_t old_type = mod_arg_types[i];
213 
214     if (is_ptr_old[i]) {
215       // Reset TO and FROM flags
216       old_type &= ~(OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM);
217     }
218 
219     if (member_of[i] == -1) {
220       if (!is_target_construct)
221         old_type &= ~OMP_TGT_MAPTYPE_TARGET_PARAM;
222       new_arg_types[nid] = old_type;
223       DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
224           ", type 0x%" PRIx64 " (old entry %d not MEMBER_OF)\n", nid,
225           DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
226           new_arg_types[nid], i);
227     } else {
228       // Old entry is not FIRST_MAP
229       old_type &= ~OMP_TGT_OLDMAPTYPE_FIRST_MAP;
230       // Add MEMBER_OF
231       int new_member_of = combined_to_new_id[member_of[i]];
232       old_type |= ((int64_t)new_member_of + 1) << 48;
233       new_arg_types[nid] = old_type;
234       DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
235         ", type 0x%" PRIx64 " (old entry %d MEMBER_OF %d)\n", nid,
236         DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
237         new_arg_types[nid], i, new_member_of);
238     }
239   }
240 }
241 
242 static void cleanup_map(int32_t new_arg_num, void **new_args_base,
243     void **new_args, int64_t *new_arg_sizes, int64_t *new_arg_types,
244     int32_t arg_num, void **args_base) {
245   if (new_arg_num > 0) {
246     int offset = new_arg_num - arg_num;
247     for (int32_t i = 0; i < arg_num; ++i) {
248       // Restore old base address
249       args_base[i] = new_args_base[i+offset];
250     }
251     free(new_args_base);
252     free(new_args);
253     free(new_arg_sizes);
254     free(new_arg_types);
255   }
256 }
257 
258 /// creates host-to-target data mapping, stores it in the
259 /// libomptarget.so internal structure (an entry in a stack of data maps)
260 /// and passes the data to the device.
261 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
262     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
263   DP("Entering data begin region for device %ld with %d mappings\n", device_id,
264      arg_num);
265 
266   // No devices available?
267   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
268     device_id = omp_get_default_device();
269     DP("Use default device id %ld\n", device_id);
270   }
271 
272   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
273     DP("Failed to get device %ld ready\n", device_id);
274     return;
275   }
276 
277   DeviceTy& Device = Devices[device_id];
278 
279   // Translate maps
280   int32_t new_arg_num;
281   void **new_args_base;
282   void **new_args;
283   int64_t *new_arg_sizes;
284   int64_t *new_arg_types;
285   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
286       new_args_base, new_args, new_arg_sizes, new_arg_types, false);
287 
288   //target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types);
289   target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
290       new_arg_types);
291 
292   // Cleanup translation memory
293   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
294       new_arg_types, arg_num, args_base);
295 }
296 
297 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
298     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
299     int32_t depNum, void *depList, int32_t noAliasDepNum,
300     void *noAliasDepList) {
301   if (depNum + noAliasDepNum > 0)
302     __kmpc_omp_taskwait(NULL, 0);
303 
304   __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
305                           arg_types);
306 }
307 
308 /// passes data from the target, releases target memory and destroys
309 /// the host-target mapping (top entry from the stack of data maps)
310 /// created by the last __tgt_target_data_begin.
311 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
312     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
313   DP("Entering data end region with %d mappings\n", arg_num);
314 
315   // No devices available?
316   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
317     device_id = omp_get_default_device();
318   }
319 
320   RTLsMtx.lock();
321   size_t Devices_size = Devices.size();
322   RTLsMtx.unlock();
323   if (Devices_size <= (size_t)device_id) {
324     DP("Device ID  %ld does not have a matching RTL.\n", device_id);
325     return;
326   }
327 
328   DeviceTy &Device = Devices[device_id];
329   if (!Device.IsInit) {
330     DP("uninit device: ignore");
331     return;
332   }
333 
334   // Translate maps
335   int32_t new_arg_num;
336   void **new_args_base;
337   void **new_args;
338   int64_t *new_arg_sizes;
339   int64_t *new_arg_types;
340   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
341       new_args_base, new_args, new_arg_sizes, new_arg_types, false);
342 
343   //target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
344   target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
345       new_arg_types);
346 
347   // Cleanup translation memory
348   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
349       new_arg_types, arg_num, args_base);
350 }
351 
352 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
353     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
354     int32_t depNum, void *depList, int32_t noAliasDepNum,
355     void *noAliasDepList) {
356   if (depNum + noAliasDepNum > 0)
357     __kmpc_omp_taskwait(NULL, 0);
358 
359   __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
360                         arg_types);
361 }
362 
363 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
364     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
365   DP("Entering data update with %d mappings\n", arg_num);
366 
367   // No devices available?
368   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
369     device_id = omp_get_default_device();
370   }
371 
372   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
373     DP("Failed to get device %ld ready\n", device_id);
374     return;
375   }
376 
377   DeviceTy& Device = Devices[device_id];
378   target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types);
379 }
380 
381 EXTERN void __tgt_target_data_update_nowait(
382     int64_t device_id, int32_t arg_num, void **args_base, void **args,
383     int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
384     int32_t noAliasDepNum, void *noAliasDepList) {
385   if (depNum + noAliasDepNum > 0)
386     __kmpc_omp_taskwait(NULL, 0);
387 
388   __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
389                            arg_types);
390 }
391 
392 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
393     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
394   DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
395      DPxPTR(host_ptr), device_id);
396 
397   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
398     device_id = omp_get_default_device();
399   }
400 
401   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
402     DP("Failed to get device %ld ready\n", device_id);
403     return OFFLOAD_FAIL;
404   }
405 
406   // Translate maps
407   int32_t new_arg_num;
408   void **new_args_base;
409   void **new_args;
410   int64_t *new_arg_sizes;
411   int64_t *new_arg_types;
412   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
413       new_args_base, new_args, new_arg_sizes, new_arg_types, true);
414 
415   //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
416   //    arg_types, 0, 0, false /*team*/, false /*recursive*/);
417   int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
418       new_arg_sizes, new_arg_types, 0, 0, false /*team*/);
419 
420   // Cleanup translation memory
421   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
422       new_arg_types, arg_num, args_base);
423 
424   return rc;
425 }
426 
427 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
428     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
429     int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
430     void *noAliasDepList) {
431   if (depNum + noAliasDepNum > 0)
432     __kmpc_omp_taskwait(NULL, 0);
433 
434   return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
435                       arg_types);
436 }
437 
438 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
439     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
440     int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
441   DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
442      DPxPTR(host_ptr), device_id);
443 
444   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
445     device_id = omp_get_default_device();
446   }
447 
448   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
449     DP("Failed to get device %ld ready\n", device_id);
450     return OFFLOAD_FAIL;
451   }
452 
453   // Translate maps
454   int32_t new_arg_num;
455   void **new_args_base;
456   void **new_args;
457   int64_t *new_arg_sizes;
458   int64_t *new_arg_types;
459   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
460       new_args_base, new_args, new_arg_sizes, new_arg_types, true);
461 
462   //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
463   //              arg_types, team_num, thread_limit, true /*team*/,
464   //              false /*recursive*/);
465   int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
466       new_arg_sizes, new_arg_types, team_num, thread_limit, true /*team*/);
467 
468   // Cleanup translation memory
469   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
470       new_arg_types, arg_num, args_base);
471 
472   return rc;
473 }
474 
475 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
476     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
477     int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
478     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
479   if (depNum + noAliasDepNum > 0)
480     __kmpc_omp_taskwait(NULL, 0);
481 
482   return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
483                             arg_sizes, arg_types, team_num, thread_limit);
484 }
485 
486 
487 // The trip count mechanism will be revised - this scheme is not thread-safe.
488 EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
489     uint64_t loop_tripcount) {
490   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
491     device_id = omp_get_default_device();
492   }
493 
494   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
495     DP("Failed to get device %ld ready\n", device_id);
496     return;
497   }
498 
499   DP("__kmpc_push_target_tripcount(%ld, %" PRIu64 ")\n", device_id,
500       loop_tripcount);
501   Devices[device_id].loopTripCnt = loop_tripcount;
502 }
503