1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.txt for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Implementation of the interface to be used by Clang during the codegen of a
11 // target region.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include <omptarget.h>
16 
17 #include "device.h"
18 #include "private.h"
19 #include "rtl.h"
20 
21 #include <cassert>
22 
23 ////////////////////////////////////////////////////////////////////////////////
24 /// adds a target shared library to the target execution image
25 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
26   RTLs.RegisterLib(desc);
27 }
28 
29 ////////////////////////////////////////////////////////////////////////////////
30 /// unloads a target shared library
31 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
32   RTLs.UnregisterLib(desc);
33 }
34 
35 // Following datatypes and functions (tgt_oldmap_type, combined_entry_t,
36 // translate_map, cleanup_map) will be removed once the compiler starts using
37 // the new map types.
38 
39 // Old map types
40 enum tgt_oldmap_type {
41   OMP_TGT_OLDMAPTYPE_TO          = 0x001, // copy data from host to device
42   OMP_TGT_OLDMAPTYPE_FROM        = 0x002, // copy data from device to host
43   OMP_TGT_OLDMAPTYPE_ALWAYS      = 0x004, // copy regardless of the ref. count
44   OMP_TGT_OLDMAPTYPE_DELETE      = 0x008, // force unmapping of data
45   OMP_TGT_OLDMAPTYPE_MAP_PTR     = 0x010, // map pointer as well as pointee
46   OMP_TGT_OLDMAPTYPE_FIRST_MAP   = 0x020, // first occurrence of mapped variable
47   OMP_TGT_OLDMAPTYPE_RETURN_PTR  = 0x040, // return TgtBase addr of mapped data
48   OMP_TGT_OLDMAPTYPE_PRIVATE_PTR = 0x080, // private variable - not mapped
49   OMP_TGT_OLDMAPTYPE_PRIVATE_VAL = 0x100  // copy by value - not mapped
50 };
51 
52 // Temporary functions for map translation and cleanup
53 struct combined_entry_t {
54   int num_members; // number of members in combined entry
55   void *base_addr; // base address of combined entry
56   void *begin_addr; // begin address of combined entry
57   void *end_addr; // size of combined entry
58 };
59 
60 static void translate_map(int32_t arg_num, void **args_base, void **args,
61     int64_t *arg_sizes, int64_t *arg_types, int32_t &new_arg_num,
62     void **&new_args_base, void **&new_args, int64_t *&new_arg_sizes,
63     int64_t *&new_arg_types, bool is_target_construct) {
64   if (arg_num <= 0) {
65     DP("Nothing to translate\n");
66     new_arg_num = 0;
67     return;
68   }
69 
70   // array of combined entries
71   combined_entry_t *cmb_entries =
72       (combined_entry_t *) alloca(arg_num * sizeof(combined_entry_t));
73   // number of combined entries
74   long num_combined = 0;
75   // old entry is MAP_PTR?
76   bool *is_ptr_old = (bool *) alloca(arg_num * sizeof(bool));
77   // old entry is member of member_of[old] cmb_entry
78   int *member_of = (int *) alloca(arg_num * sizeof(int));
79   // temporary storage for modifications of the original arg_types
80   int64_t *mod_arg_types = (int64_t *) alloca(arg_num  *sizeof(int64_t));
81 
82   DP("Translating %d map entries\n", arg_num);
83   for (int i = 0; i < arg_num; ++i) {
84     member_of[i] = -1;
85     is_ptr_old[i] = false;
86     mod_arg_types[i] = arg_types[i];
87     // Scan previous entries to see whether this entry shares the same base
88     for (int j = 0; j < i; ++j) {
89       void *new_begin_addr = NULL;
90       void *new_end_addr = NULL;
91 
92       if (mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
93         if (args_base[i] == args[j]) {
94           if (!(mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR)) {
95             DP("Entry %d has the same base as entry %d's begin address\n", i,
96                 j);
97             new_begin_addr = args_base[i];
98             new_end_addr = (char *)args_base[i] + sizeof(void *);
99             assert(arg_sizes[j] == sizeof(void *));
100             is_ptr_old[j] = true;
101           } else {
102             DP("Entry %d has the same base as entry %d's begin address, but "
103                 "%d's base was a MAP_PTR too\n", i, j, j);
104             int32_t to_from_always_delete =
105                 OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM |
106                 OMP_TGT_OLDMAPTYPE_ALWAYS | OMP_TGT_OLDMAPTYPE_DELETE;
107             if (mod_arg_types[j] & to_from_always_delete) {
108               DP("Resetting to/from/always/delete flags for entry %d because "
109                   "it is only a pointer to pointer\n", j);
110               mod_arg_types[j] &= ~to_from_always_delete;
111             }
112           }
113         }
114       } else {
115         if (!(mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_FIRST_MAP) &&
116             args_base[i] == args_base[j]) {
117           DP("Entry %d has the same base address as entry %d\n", i, j);
118           new_begin_addr = args[i];
119           new_end_addr = (char *)args[i] + arg_sizes[i];
120         }
121       }
122 
123       // If we have combined the entry with a previous one
124       if (new_begin_addr) {
125         int id;
126         if(member_of[j] == -1) {
127           // We have a new entry
128           id = num_combined++;
129           DP("Creating new combined entry %d for old entry %d\n", id, j);
130           // Initialize new entry
131           cmb_entries[id].num_members = 1;
132           cmb_entries[id].base_addr = args_base[j];
133           if (mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
134             cmb_entries[id].begin_addr = args_base[j];
135             cmb_entries[id].end_addr = (char *)args_base[j] + arg_sizes[j];
136           } else {
137             cmb_entries[id].begin_addr = args[j];
138             cmb_entries[id].end_addr = (char *)args[j] + arg_sizes[j];
139           }
140           member_of[j] = id;
141         } else {
142           // Reuse existing combined entry
143           DP("Reusing existing combined entry %d\n", member_of[j]);
144           id = member_of[j];
145         }
146 
147         // Update combined entry
148         DP("Adding entry %d to combined entry %d\n", i, id);
149         cmb_entries[id].num_members++;
150         // base_addr stays the same
151         cmb_entries[id].begin_addr =
152             std::min(cmb_entries[id].begin_addr, new_begin_addr);
153         cmb_entries[id].end_addr =
154             std::max(cmb_entries[id].end_addr, new_end_addr);
155         member_of[i] = id;
156         break;
157       }
158     }
159   }
160 
161   DP("New entries: %ld combined + %d original\n", num_combined, arg_num);
162   new_arg_num = arg_num + num_combined;
163   new_args_base = (void **) malloc(new_arg_num * sizeof(void *));
164   new_args = (void **) malloc(new_arg_num * sizeof(void *));
165   new_arg_sizes = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
166   new_arg_types = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
167 
168   const int64_t alignment = 8;
169 
170   int next_id = 0; // next ID
171   int next_cid = 0; // next combined ID
172   int *combined_to_new_id = (int *) alloca(num_combined * sizeof(int));
173   for (int i = 0; i < arg_num; ++i) {
174     // It is member_of
175     if (member_of[i] == next_cid) {
176       int cid = next_cid++; // ID of this combined entry
177       int nid = next_id++; // ID of the new (global) entry
178       combined_to_new_id[cid] = nid;
179       DP("Combined entry %3d will become new entry %3d\n", cid, nid);
180 
181       int64_t padding = (int64_t)cmb_entries[cid].begin_addr % alignment;
182       if (padding) {
183         DP("Using a padding of %" PRId64 " for begin address " DPxMOD "\n",
184             padding, DPxPTR(cmb_entries[cid].begin_addr));
185         cmb_entries[cid].begin_addr =
186             (char *)cmb_entries[cid].begin_addr - padding;
187       }
188 
189       new_args_base[nid] = cmb_entries[cid].base_addr;
190       new_args[nid] = cmb_entries[cid].begin_addr;
191       new_arg_sizes[nid] = (int64_t) ((char *)cmb_entries[cid].end_addr -
192           (char *)cmb_entries[cid].begin_addr);
193       new_arg_types[nid] = OMP_TGT_MAPTYPE_TARGET_PARAM;
194       DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", "
195           "size %" PRId64 ", type 0x%" PRIx64 "\n", nid,
196           DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
197           new_arg_types[nid]);
198     } else if (member_of[i] != -1) {
199       DP("Combined entry %3d has been encountered before, do nothing\n",
200           member_of[i]);
201     }
202 
203     // Now that the combined entry (the one the old entry was a member of) has
204     // been inserted into the new arguments list, proceed with the old entry.
205     int nid = next_id++;
206     DP("Old entry %3d will become new entry %3d\n", i, nid);
207 
208     new_args_base[nid] = args_base[i];
209     new_args[nid] = args[i];
210     new_arg_sizes[nid] = arg_sizes[i];
211     int64_t old_type = mod_arg_types[i];
212 
213     if (is_ptr_old[i]) {
214       // Reset TO and FROM flags
215       old_type &= ~(OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM);
216     }
217 
218     if (member_of[i] == -1) {
219       if (!is_target_construct)
220         old_type &= ~OMP_TGT_MAPTYPE_TARGET_PARAM;
221       new_arg_types[nid] = old_type;
222       DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
223           ", type 0x%" PRIx64 " (old entry %d not MEMBER_OF)\n", nid,
224           DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
225           new_arg_types[nid], i);
226     } else {
227       // Old entry is not FIRST_MAP
228       old_type &= ~OMP_TGT_OLDMAPTYPE_FIRST_MAP;
229       // Add MEMBER_OF
230       int new_member_of = combined_to_new_id[member_of[i]];
231       old_type |= ((int64_t)new_member_of + 1) << 48;
232       new_arg_types[nid] = old_type;
233       DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
234         ", type 0x%" PRIx64 " (old entry %d MEMBER_OF %d)\n", nid,
235         DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
236         new_arg_types[nid], i, new_member_of);
237     }
238   }
239 }
240 
241 static void cleanup_map(int32_t new_arg_num, void **new_args_base,
242     void **new_args, int64_t *new_arg_sizes, int64_t *new_arg_types,
243     int32_t arg_num, void **args_base) {
244   if (new_arg_num > 0) {
245     int offset = new_arg_num - arg_num;
246     for (int32_t i = 0; i < arg_num; ++i) {
247       // Restore old base address
248       args_base[i] = new_args_base[i+offset];
249     }
250     free(new_args_base);
251     free(new_args);
252     free(new_arg_sizes);
253     free(new_arg_types);
254   }
255 }
256 
257 /// creates host-to-target data mapping, stores it in the
258 /// libomptarget.so internal structure (an entry in a stack of data maps)
259 /// and passes the data to the device.
260 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
261     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
262   DP("Entering data begin region for device %ld with %d mappings\n", device_id,
263      arg_num);
264 
265   // No devices available?
266   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
267     device_id = omp_get_default_device();
268     DP("Use default device id %ld\n", device_id);
269   }
270 
271   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
272     DP("Failed to get device %ld ready\n", device_id);
273     return;
274   }
275 
276   DeviceTy& Device = Devices[device_id];
277 
278   // Translate maps
279   int32_t new_arg_num;
280   void **new_args_base;
281   void **new_args;
282   int64_t *new_arg_sizes;
283   int64_t *new_arg_types;
284   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
285       new_args_base, new_args, new_arg_sizes, new_arg_types, false);
286 
287   //target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types);
288   target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
289       new_arg_types);
290 
291   // Cleanup translation memory
292   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
293       new_arg_types, arg_num, args_base);
294 }
295 
296 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
297     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
298     int32_t depNum, void *depList, int32_t noAliasDepNum,
299     void *noAliasDepList) {
300   if (depNum + noAliasDepNum > 0)
301     __kmpc_omp_taskwait(NULL, 0);
302 
303   __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
304                           arg_types);
305 }
306 
307 /// passes data from the target, releases target memory and destroys
308 /// the host-target mapping (top entry from the stack of data maps)
309 /// created by the last __tgt_target_data_begin.
310 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
311     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
312   DP("Entering data end region with %d mappings\n", arg_num);
313 
314   // No devices available?
315   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
316     device_id = omp_get_default_device();
317   }
318 
319   RTLsMtx.lock();
320   size_t Devices_size = Devices.size();
321   RTLsMtx.unlock();
322   if (Devices_size <= (size_t)device_id) {
323     DP("Device ID  %ld does not have a matching RTL.\n", device_id);
324     return;
325   }
326 
327   DeviceTy &Device = Devices[device_id];
328   if (!Device.IsInit) {
329     DP("uninit device: ignore");
330     return;
331   }
332 
333   // Translate maps
334   int32_t new_arg_num;
335   void **new_args_base;
336   void **new_args;
337   int64_t *new_arg_sizes;
338   int64_t *new_arg_types;
339   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
340       new_args_base, new_args, new_arg_sizes, new_arg_types, false);
341 
342   //target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
343   target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
344       new_arg_types);
345 
346   // Cleanup translation memory
347   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
348       new_arg_types, arg_num, args_base);
349 }
350 
351 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
352     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
353     int32_t depNum, void *depList, int32_t noAliasDepNum,
354     void *noAliasDepList) {
355   if (depNum + noAliasDepNum > 0)
356     __kmpc_omp_taskwait(NULL, 0);
357 
358   __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
359                         arg_types);
360 }
361 
362 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
363     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
364   DP("Entering data update with %d mappings\n", arg_num);
365 
366   // No devices available?
367   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
368     device_id = omp_get_default_device();
369   }
370 
371   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
372     DP("Failed to get device %ld ready\n", device_id);
373     return;
374   }
375 
376   DeviceTy& Device = Devices[device_id];
377   target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types);
378 }
379 
380 EXTERN void __tgt_target_data_update_nowait(
381     int64_t device_id, int32_t arg_num, void **args_base, void **args,
382     int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
383     int32_t noAliasDepNum, void *noAliasDepList) {
384   if (depNum + noAliasDepNum > 0)
385     __kmpc_omp_taskwait(NULL, 0);
386 
387   __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
388                            arg_types);
389 }
390 
391 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
392     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
393   DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
394      DPxPTR(host_ptr), device_id);
395 
396   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
397     device_id = omp_get_default_device();
398   }
399 
400   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
401     DP("Failed to get device %ld ready\n", device_id);
402     return OFFLOAD_FAIL;
403   }
404 
405   // Translate maps
406   int32_t new_arg_num;
407   void **new_args_base;
408   void **new_args;
409   int64_t *new_arg_sizes;
410   int64_t *new_arg_types;
411   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
412       new_args_base, new_args, new_arg_sizes, new_arg_types, true);
413 
414   //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
415   //    arg_types, 0, 0, false /*team*/, false /*recursive*/);
416   int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
417       new_arg_sizes, new_arg_types, 0, 0, false /*team*/);
418 
419   // Cleanup translation memory
420   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
421       new_arg_types, arg_num, args_base);
422 
423   return rc;
424 }
425 
426 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
427     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
428     int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
429     void *noAliasDepList) {
430   if (depNum + noAliasDepNum > 0)
431     __kmpc_omp_taskwait(NULL, 0);
432 
433   return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
434                       arg_types);
435 }
436 
437 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
438     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
439     int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
440   DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
441      DPxPTR(host_ptr), device_id);
442 
443   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
444     device_id = omp_get_default_device();
445   }
446 
447   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
448     DP("Failed to get device %ld ready\n", device_id);
449     return OFFLOAD_FAIL;
450   }
451 
452   // Translate maps
453   int32_t new_arg_num;
454   void **new_args_base;
455   void **new_args;
456   int64_t *new_arg_sizes;
457   int64_t *new_arg_types;
458   translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
459       new_args_base, new_args, new_arg_sizes, new_arg_types, true);
460 
461   //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
462   //              arg_types, team_num, thread_limit, true /*team*/,
463   //              false /*recursive*/);
464   int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
465       new_arg_sizes, new_arg_types, team_num, thread_limit, true /*team*/);
466 
467   // Cleanup translation memory
468   cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
469       new_arg_types, arg_num, args_base);
470 
471   return rc;
472 }
473 
474 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
475     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
476     int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
477     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
478   if (depNum + noAliasDepNum > 0)
479     __kmpc_omp_taskwait(NULL, 0);
480 
481   return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
482                             arg_sizes, arg_types, team_num, thread_limit);
483 }
484 
485 
486 // The trip count mechanism will be revised - this scheme is not thread-safe.
487 EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
488     uint64_t loop_tripcount) {
489   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
490     device_id = omp_get_default_device();
491   }
492 
493   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
494     DP("Failed to get device %ld ready\n", device_id);
495     return;
496   }
497 
498   DP("__kmpc_push_target_tripcount(%ld, %" PRIu64 ")\n", device_id,
499       loop_tripcount);
500   Devices[device_id].loopTripCnt = loop_tripcount;
501 }
502