1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "device.h"
15 #include "omptarget.h"
16 #include "private.h"
17 #include "rtl.h"
18 
19 #include <cassert>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <mutex>
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds requires flags
26 EXTERN void __tgt_register_requires(int64_t flags) {
27   TIMESCOPE();
28   PM->RTLs.RegisterRequires(flags);
29 }
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 /// adds a target shared library to the target execution image
33 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
34   TIMESCOPE();
35   std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs);
36   for (auto &RTL : PM->RTLs.AllRTLs) {
37     if (RTL.register_lib) {
38       if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) {
39         DP("Could not register library with %s", RTL.RTLName.c_str());
40       }
41     }
42   }
43   PM->RTLs.RegisterLib(desc);
44 }
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 /// unloads a target shared library
48 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
49   TIMESCOPE();
50   PM->RTLs.UnregisterLib(desc);
51   for (auto &RTL : PM->RTLs.UsedRTLs) {
52     if (RTL->unregister_lib) {
53       if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) {
54         DP("Could not register library with %s", RTL->RTLName.c_str());
55       }
56     }
57   }
58 }
59 
60 /// creates host-to-target data mapping, stores it in the
61 /// libomptarget.so internal structure (an entry in a stack of data maps)
62 /// and passes the data to the device.
63 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
64                                     void **args_base, void **args,
65                                     int64_t *arg_sizes, int64_t *arg_types) {
66   TIMESCOPE();
67   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
68                                  arg_sizes, arg_types, nullptr, nullptr);
69 }
70 
71 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
72                                            void **args_base, void **args,
73                                            int64_t *arg_sizes,
74                                            int64_t *arg_types, int32_t depNum,
75                                            void *depList, int32_t noAliasDepNum,
76                                            void *noAliasDepList) {
77   TIMESCOPE();
78   if (depNum + noAliasDepNum > 0)
79     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
80 
81   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
82                                  arg_sizes, arg_types, nullptr, nullptr);
83 }
84 
85 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
86                                            int32_t arg_num, void **args_base,
87                                            void **args, int64_t *arg_sizes,
88                                            int64_t *arg_types,
89                                            map_var_info_t *arg_names,
90                                            void **arg_mappers) {
91   TIMESCOPE_WITH_IDENT(loc);
92   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
93      device_id, arg_num);
94   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
95     DP("Not offloading to device %" PRId64 "\n", device_id);
96     return;
97   }
98 
99   DeviceTy &Device = PM->Devices[device_id];
100 
101   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
102     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
103                          arg_names, "Entering OpenMP data region");
104 #ifdef OMPTARGET_DEBUG
105   for (int i = 0; i < arg_num; ++i) {
106     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
107        ", Type=0x%" PRIx64 ", Name=%s\n",
108        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
109        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
110   }
111 #endif
112 
113   AsyncInfoTy AsyncInfo(Device);
114   int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes,
115                            arg_types, arg_names, arg_mappers, AsyncInfo);
116   if (rc == OFFLOAD_SUCCESS)
117     rc = AsyncInfo.synchronize();
118   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
119 }
120 
121 EXTERN void __tgt_target_data_begin_nowait_mapper(
122     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
123     void **args, int64_t *arg_sizes, int64_t *arg_types,
124     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
125     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
126   TIMESCOPE_WITH_IDENT(loc);
127   if (depNum + noAliasDepNum > 0)
128     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
129 
130   __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args,
131                                  arg_sizes, arg_types, arg_names, arg_mappers);
132 }
133 
134 /// passes data from the target, releases target memory and destroys
135 /// the host-target mapping (top entry from the stack of data maps)
136 /// created by the last __tgt_target_data_begin.
137 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
138                                   void **args_base, void **args,
139                                   int64_t *arg_sizes, int64_t *arg_types) {
140   TIMESCOPE();
141   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
142                                arg_sizes, arg_types, nullptr, nullptr);
143 }
144 
145 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
146                                          void **args_base, void **args,
147                                          int64_t *arg_sizes, int64_t *arg_types,
148                                          int32_t depNum, void *depList,
149                                          int32_t noAliasDepNum,
150                                          void *noAliasDepList) {
151   TIMESCOPE();
152   if (depNum + noAliasDepNum > 0)
153     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
154 
155   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
156                                arg_sizes, arg_types, nullptr, nullptr);
157 }
158 
159 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
160                                          int32_t arg_num, void **args_base,
161                                          void **args, int64_t *arg_sizes,
162                                          int64_t *arg_types,
163                                          map_var_info_t *arg_names,
164                                          void **arg_mappers) {
165   TIMESCOPE_WITH_IDENT(loc);
166   DP("Entering data end region with %d mappings\n", arg_num);
167   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
168     DP("Not offloading to device %" PRId64 "\n", device_id);
169     return;
170   }
171 
172   DeviceTy &Device = PM->Devices[device_id];
173 
174   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
175     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
176                          arg_names, "Exiting OpenMP data region");
177 #ifdef OMPTARGET_DEBUG
178   for (int i = 0; i < arg_num; ++i) {
179     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
180        ", Type=0x%" PRIx64 ", Name=%s\n",
181        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
182        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
183   }
184 #endif
185 
186   AsyncInfoTy AsyncInfo(Device);
187   int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes,
188                          arg_types, arg_names, arg_mappers, AsyncInfo);
189   if (rc == OFFLOAD_SUCCESS)
190     rc = AsyncInfo.synchronize();
191   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
192 }
193 
194 EXTERN void __tgt_target_data_end_nowait_mapper(
195     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
196     void **args, int64_t *arg_sizes, int64_t *arg_types,
197     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
198     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
199   TIMESCOPE_WITH_IDENT(loc);
200   if (depNum + noAliasDepNum > 0)
201     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
202 
203   __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args,
204                                arg_sizes, arg_types, arg_names, arg_mappers);
205 }
206 
207 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
208                                      void **args_base, void **args,
209                                      int64_t *arg_sizes, int64_t *arg_types) {
210   TIMESCOPE();
211   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
212                                   arg_sizes, arg_types, nullptr, nullptr);
213 }
214 
215 EXTERN void __tgt_target_data_update_nowait(
216     int64_t device_id, int32_t arg_num, void **args_base, void **args,
217     int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
218     int32_t noAliasDepNum, void *noAliasDepList) {
219   TIMESCOPE();
220   if (depNum + noAliasDepNum > 0)
221     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
222 
223   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
224                                   arg_sizes, arg_types, nullptr, nullptr);
225 }
226 
227 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
228                                             int32_t arg_num, void **args_base,
229                                             void **args, int64_t *arg_sizes,
230                                             int64_t *arg_types,
231                                             map_var_info_t *arg_names,
232                                             void **arg_mappers) {
233   TIMESCOPE_WITH_IDENT(loc);
234   DP("Entering data update with %d mappings\n", arg_num);
235   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
236     DP("Not offloading to device %" PRId64 "\n", device_id);
237     return;
238   }
239 
240   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
241     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
242                          arg_names, "Updating OpenMP data");
243 
244   DeviceTy &Device = PM->Devices[device_id];
245   AsyncInfoTy AsyncInfo(Device);
246   int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
247                             arg_types, arg_names, arg_mappers, AsyncInfo);
248   if (rc == OFFLOAD_SUCCESS)
249     rc = AsyncInfo.synchronize();
250   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
251 }
252 
253 EXTERN void __tgt_target_data_update_nowait_mapper(
254     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
255     void **args, int64_t *arg_sizes, int64_t *arg_types,
256     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
257     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
258   TIMESCOPE_WITH_IDENT(loc);
259   if (depNum + noAliasDepNum > 0)
260     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
261 
262   __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args,
263                                   arg_sizes, arg_types, arg_names, arg_mappers);
264 }
265 
266 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
267                         void **args_base, void **args, int64_t *arg_sizes,
268                         int64_t *arg_types) {
269   TIMESCOPE();
270   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
271                              args, arg_sizes, arg_types, nullptr, nullptr);
272 }
273 
274 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
275                                int32_t arg_num, void **args_base, void **args,
276                                int64_t *arg_sizes, int64_t *arg_types,
277                                int32_t depNum, void *depList,
278                                int32_t noAliasDepNum, void *noAliasDepList) {
279   TIMESCOPE();
280   if (depNum + noAliasDepNum > 0)
281     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
282 
283   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
284                              args, arg_sizes, arg_types, nullptr, nullptr);
285 }
286 
287 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
288                                int32_t arg_num, void **args_base, void **args,
289                                int64_t *arg_sizes, int64_t *arg_types,
290                                map_var_info_t *arg_names, void **arg_mappers) {
291   TIMESCOPE_WITH_IDENT(loc);
292   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
293      "\n",
294      DPxPTR(host_ptr), device_id);
295   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
296     DP("Not offloading to device %" PRId64 "\n", device_id);
297     return OFFLOAD_FAIL;
298   }
299 
300   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
301     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
302                          arg_names, "Entering OpenMP kernel");
303 #ifdef OMPTARGET_DEBUG
304   for (int i = 0; i < arg_num; ++i) {
305     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
306        ", Type=0x%" PRIx64 ", Name=%s\n",
307        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
308        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
309   }
310 #endif
311 
312   DeviceTy &Device = PM->Devices[device_id];
313   AsyncInfoTy AsyncInfo(Device);
314   int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
315                   arg_types, arg_names, arg_mappers, 0, 0, false /*team*/,
316                   AsyncInfo);
317   if (rc == OFFLOAD_SUCCESS)
318     rc = AsyncInfo.synchronize();
319   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
320   return rc;
321 }
322 
323 EXTERN int __tgt_target_nowait_mapper(
324     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
325     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
326     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
327     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
328   TIMESCOPE_WITH_IDENT(loc);
329   if (depNum + noAliasDepNum > 0)
330     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
331 
332   return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args,
333                              arg_sizes, arg_types, arg_names, arg_mappers);
334 }
335 
336 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
337                               int32_t arg_num, void **args_base, void **args,
338                               int64_t *arg_sizes, int64_t *arg_types,
339                               int32_t team_num, int32_t thread_limit) {
340   TIMESCOPE();
341   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
342                                    args_base, args, arg_sizes, arg_types,
343                                    nullptr, nullptr, team_num, thread_limit);
344 }
345 
346 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
347                                      int32_t arg_num, void **args_base,
348                                      void **args, int64_t *arg_sizes,
349                                      int64_t *arg_types, int32_t team_num,
350                                      int32_t thread_limit, int32_t depNum,
351                                      void *depList, int32_t noAliasDepNum,
352                                      void *noAliasDepList) {
353   TIMESCOPE();
354   if (depNum + noAliasDepNum > 0)
355     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
356 
357   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
358                                    args_base, args, arg_sizes, arg_types,
359                                    nullptr, nullptr, team_num, thread_limit);
360 }
361 
362 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
363                                      void *host_ptr, int32_t arg_num,
364                                      void **args_base, void **args,
365                                      int64_t *arg_sizes, int64_t *arg_types,
366                                      map_var_info_t *arg_names,
367                                      void **arg_mappers, int32_t team_num,
368                                      int32_t thread_limit) {
369   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
370      "\n",
371      DPxPTR(host_ptr), device_id);
372   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
373     DP("Not offloading to device %" PRId64 "\n", device_id);
374     return OFFLOAD_FAIL;
375   }
376 
377   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
378     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
379                          arg_names, "Entering OpenMP kernel");
380 #ifdef OMPTARGET_DEBUG
381   for (int i = 0; i < arg_num; ++i) {
382     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
383        ", Type=0x%" PRIx64 ", Name=%s\n",
384        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
385        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
386   }
387 #endif
388 
389   DeviceTy &Device = PM->Devices[device_id];
390   AsyncInfoTy AsyncInfo(Device);
391   int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
392                   arg_types, arg_names, arg_mappers, team_num, thread_limit,
393                   true /*team*/, AsyncInfo);
394   if (rc == OFFLOAD_SUCCESS)
395     rc = AsyncInfo.synchronize();
396   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
397   return rc;
398 }
399 
400 EXTERN int __tgt_target_teams_nowait_mapper(
401     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
402     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
403     map_var_info_t *arg_names, void **arg_mappers, int32_t team_num,
404     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
405     void *noAliasDepList) {
406   TIMESCOPE_WITH_IDENT(loc);
407   if (depNum + noAliasDepNum > 0)
408     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
409 
410   return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base,
411                                    args, arg_sizes, arg_types, arg_names,
412                                    arg_mappers, team_num, thread_limit);
413 }
414 
415 // Get the current number of components for a user-defined mapper.
416 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) {
417   TIMESCOPE();
418   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
419   int64_t size = MapperComponentsPtr->Components.size();
420   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
421      DPxPTR(rt_mapper_handle), size);
422   return size;
423 }
424 
425 // Push back one component for a user-defined mapper.
426 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
427                                         void *begin, int64_t size, int64_t type,
428                                         void *name) {
429   TIMESCOPE();
430   DP("__tgt_push_mapper_component(Handle=" DPxMOD
431      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
432      ", Type=0x%" PRIx64 ", Name=%s).\n",
433      DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type,
434      (name) ? getNameFromMapping(name).c_str() : "unknown");
435   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
436   MapperComponentsPtr->Components.push_back(
437       MapComponentInfoTy(base, begin, size, type, name));
438 }
439 
440 EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
441                                          uint64_t loop_tripcount) {
442   __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount);
443 }
444 
445 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
446                                                 uint64_t loop_tripcount) {
447   TIMESCOPE_WITH_IDENT(loc);
448   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
449     DP("Not offloading to device %" PRId64 "\n", device_id);
450     return;
451   }
452 
453   DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
454      loop_tripcount);
455   PM->TblMapMtx.lock();
456   PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
457                                              loop_tripcount);
458   PM->TblMapMtx.unlock();
459 }
460