1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "device.h"
15 #include "omptarget.h"
16 #include "private.h"
17 #include "rtl.h"
18 
19 #include <cassert>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <mutex>
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds requires flags
26 EXTERN void __tgt_register_requires(int64_t flags) {
27   TIMESCOPE();
28   PM->RTLs.RegisterRequires(flags);
29 }
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 /// adds a target shared library to the target execution image
33 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
34   TIMESCOPE();
35   std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs);
36   for (auto &RTL : PM->RTLs.AllRTLs) {
37     if (RTL.register_lib) {
38       if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) {
39         DP("Could not register library with %s", RTL.RTLName.c_str());
40       }
41     }
42   }
43   PM->RTLs.RegisterLib(desc);
44 }
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 /// Initialize all available devices without registering any image
48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
49 
50 ////////////////////////////////////////////////////////////////////////////////
51 /// unloads a target shared library
52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
53   TIMESCOPE();
54   PM->RTLs.UnregisterLib(desc);
55   for (auto &RTL : PM->RTLs.UsedRTLs) {
56     if (RTL->unregister_lib) {
57       if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) {
58         DP("Could not register library with %s", RTL->RTLName.c_str());
59       }
60     }
61   }
62 }
63 
64 /// creates host-to-target data mapping, stores it in the
65 /// libomptarget.so internal structure (an entry in a stack of data maps)
66 /// and passes the data to the device.
67 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
68                                     void **args_base, void **args,
69                                     int64_t *arg_sizes, int64_t *arg_types) {
70   TIMESCOPE();
71   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
72                                  arg_sizes, arg_types, nullptr, nullptr);
73 }
74 
75 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
76                                            void **args_base, void **args,
77                                            int64_t *arg_sizes,
78                                            int64_t *arg_types, int32_t depNum,
79                                            void *depList, int32_t noAliasDepNum,
80                                            void *noAliasDepList) {
81   TIMESCOPE();
82 
83   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
84                                  arg_sizes, arg_types, nullptr, nullptr);
85 }
86 
87 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
88                                            int32_t arg_num, void **args_base,
89                                            void **args, int64_t *arg_sizes,
90                                            int64_t *arg_types,
91                                            map_var_info_t *arg_names,
92                                            void **arg_mappers) {
93   TIMESCOPE_WITH_IDENT(loc);
94   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
95      device_id, arg_num);
96   if (checkDeviceAndCtors(device_id, loc)) {
97     DP("Not offloading to device %" PRId64 "\n", device_id);
98     return;
99   }
100 
101   DeviceTy &Device = *PM->Devices[device_id];
102 
103   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
104     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
105                          arg_names, "Entering OpenMP data region");
106 #ifdef OMPTARGET_DEBUG
107   for (int i = 0; i < arg_num; ++i) {
108     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
109        ", Type=0x%" PRIx64 ", Name=%s\n",
110        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
111        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
112   }
113 #endif
114 
115   AsyncInfoTy AsyncInfo(Device);
116   int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes,
117                            arg_types, arg_names, arg_mappers, AsyncInfo);
118   if (rc == OFFLOAD_SUCCESS)
119     rc = AsyncInfo.synchronize();
120   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
121 }
122 
123 EXTERN void __tgt_target_data_begin_nowait_mapper(
124     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
125     void **args, int64_t *arg_sizes, int64_t *arg_types,
126     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
127     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
128   TIMESCOPE_WITH_IDENT(loc);
129 
130   __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args,
131                                  arg_sizes, arg_types, arg_names, arg_mappers);
132 }
133 
134 /// passes data from the target, releases target memory and destroys
135 /// the host-target mapping (top entry from the stack of data maps)
136 /// created by the last __tgt_target_data_begin.
137 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
138                                   void **args_base, void **args,
139                                   int64_t *arg_sizes, int64_t *arg_types) {
140   TIMESCOPE();
141   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
142                                arg_sizes, arg_types, nullptr, nullptr);
143 }
144 
145 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
146                                          void **args_base, void **args,
147                                          int64_t *arg_sizes, int64_t *arg_types,
148                                          int32_t depNum, void *depList,
149                                          int32_t noAliasDepNum,
150                                          void *noAliasDepList) {
151   TIMESCOPE();
152 
153   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
154                                arg_sizes, arg_types, nullptr, nullptr);
155 }
156 
157 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
158                                          int32_t arg_num, void **args_base,
159                                          void **args, int64_t *arg_sizes,
160                                          int64_t *arg_types,
161                                          map_var_info_t *arg_names,
162                                          void **arg_mappers) {
163   TIMESCOPE_WITH_IDENT(loc);
164   DP("Entering data end region with %d mappings\n", arg_num);
165   if (checkDeviceAndCtors(device_id, loc)) {
166     DP("Not offloading to device %" PRId64 "\n", device_id);
167     return;
168   }
169 
170   DeviceTy &Device = *PM->Devices[device_id];
171 
172   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
173     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
174                          arg_names, "Exiting OpenMP data region");
175 #ifdef OMPTARGET_DEBUG
176   for (int i = 0; i < arg_num; ++i) {
177     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
178        ", Type=0x%" PRIx64 ", Name=%s\n",
179        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
180        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
181   }
182 #endif
183 
184   AsyncInfoTy AsyncInfo(Device);
185   int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes,
186                          arg_types, arg_names, arg_mappers, AsyncInfo);
187   if (rc == OFFLOAD_SUCCESS)
188     rc = AsyncInfo.synchronize();
189   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
190 }
191 
192 EXTERN void __tgt_target_data_end_nowait_mapper(
193     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
194     void **args, int64_t *arg_sizes, int64_t *arg_types,
195     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
196     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
197   TIMESCOPE_WITH_IDENT(loc);
198 
199   __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args,
200                                arg_sizes, arg_types, arg_names, arg_mappers);
201 }
202 
203 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
204                                      void **args_base, void **args,
205                                      int64_t *arg_sizes, int64_t *arg_types) {
206   TIMESCOPE();
207   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
208                                   arg_sizes, arg_types, nullptr, nullptr);
209 }
210 
211 EXTERN void __tgt_target_data_update_nowait(
212     int64_t device_id, int32_t arg_num, void **args_base, void **args,
213     int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
214     int32_t noAliasDepNum, void *noAliasDepList) {
215   TIMESCOPE();
216 
217   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
218                                   arg_sizes, arg_types, nullptr, nullptr);
219 }
220 
221 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
222                                             int32_t arg_num, void **args_base,
223                                             void **args, int64_t *arg_sizes,
224                                             int64_t *arg_types,
225                                             map_var_info_t *arg_names,
226                                             void **arg_mappers) {
227   TIMESCOPE_WITH_IDENT(loc);
228   DP("Entering data update with %d mappings\n", arg_num);
229   if (checkDeviceAndCtors(device_id, loc)) {
230     DP("Not offloading to device %" PRId64 "\n", device_id);
231     return;
232   }
233 
234   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
235     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
236                          arg_names, "Updating OpenMP data");
237 
238   DeviceTy &Device = *PM->Devices[device_id];
239   AsyncInfoTy AsyncInfo(Device);
240   int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
241                             arg_types, arg_names, arg_mappers, AsyncInfo);
242   if (rc == OFFLOAD_SUCCESS)
243     rc = AsyncInfo.synchronize();
244   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
245 }
246 
247 EXTERN void __tgt_target_data_update_nowait_mapper(
248     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
249     void **args, int64_t *arg_sizes, int64_t *arg_types,
250     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
251     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
252   TIMESCOPE_WITH_IDENT(loc);
253 
254   __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args,
255                                   arg_sizes, arg_types, arg_names, arg_mappers);
256 }
257 
258 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
259                         void **args_base, void **args, int64_t *arg_sizes,
260                         int64_t *arg_types) {
261   TIMESCOPE();
262   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
263                              args, arg_sizes, arg_types, nullptr, nullptr);
264 }
265 
266 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
267                                int32_t arg_num, void **args_base, void **args,
268                                int64_t *arg_sizes, int64_t *arg_types,
269                                int32_t depNum, void *depList,
270                                int32_t noAliasDepNum, void *noAliasDepList) {
271   TIMESCOPE();
272 
273   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
274                              args, arg_sizes, arg_types, nullptr, nullptr);
275 }
276 
277 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
278                                int32_t arg_num, void **args_base, void **args,
279                                int64_t *arg_sizes, int64_t *arg_types,
280                                map_var_info_t *arg_names, void **arg_mappers) {
281   TIMESCOPE_WITH_IDENT(loc);
282   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
283      "\n",
284      DPxPTR(host_ptr), device_id);
285   if (checkDeviceAndCtors(device_id, loc)) {
286     DP("Not offloading to device %" PRId64 "\n", device_id);
287     return OMP_TGT_FAIL;
288   }
289 
290   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
291     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
292                          arg_names, "Entering OpenMP kernel");
293 #ifdef OMPTARGET_DEBUG
294   for (int i = 0; i < arg_num; ++i) {
295     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
296        ", Type=0x%" PRIx64 ", Name=%s\n",
297        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
298        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
299   }
300 #endif
301 
302   DeviceTy &Device = *PM->Devices[device_id];
303   AsyncInfoTy AsyncInfo(Device);
304   int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
305                   arg_types, arg_names, arg_mappers, 0, 0, false /*team*/,
306                   AsyncInfo);
307   if (rc == OFFLOAD_SUCCESS)
308     rc = AsyncInfo.synchronize();
309   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
310   assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!");
311   return OMP_TGT_SUCCESS;
312 }
313 
314 EXTERN int __tgt_target_nowait_mapper(
315     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
316     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
317     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
318     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
319   TIMESCOPE_WITH_IDENT(loc);
320 
321   return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args,
322                              arg_sizes, arg_types, arg_names, arg_mappers);
323 }
324 
325 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
326                               int32_t arg_num, void **args_base, void **args,
327                               int64_t *arg_sizes, int64_t *arg_types,
328                               int32_t team_num, int32_t thread_limit) {
329   TIMESCOPE();
330   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
331                                    args_base, args, arg_sizes, arg_types,
332                                    nullptr, nullptr, team_num, thread_limit);
333 }
334 
335 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
336                                      int32_t arg_num, void **args_base,
337                                      void **args, int64_t *arg_sizes,
338                                      int64_t *arg_types, int32_t team_num,
339                                      int32_t thread_limit, int32_t depNum,
340                                      void *depList, int32_t noAliasDepNum,
341                                      void *noAliasDepList) {
342   TIMESCOPE();
343 
344   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
345                                    args_base, args, arg_sizes, arg_types,
346                                    nullptr, nullptr, team_num, thread_limit);
347 }
348 
349 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
350                                      void *host_ptr, int32_t arg_num,
351                                      void **args_base, void **args,
352                                      int64_t *arg_sizes, int64_t *arg_types,
353                                      map_var_info_t *arg_names,
354                                      void **arg_mappers, int32_t team_num,
355                                      int32_t thread_limit) {
356   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
357      "\n",
358      DPxPTR(host_ptr), device_id);
359   if (checkDeviceAndCtors(device_id, loc)) {
360     DP("Not offloading to device %" PRId64 "\n", device_id);
361     return OMP_TGT_FAIL;
362   }
363 
364   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
365     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
366                          arg_names, "Entering OpenMP kernel");
367 #ifdef OMPTARGET_DEBUG
368   for (int i = 0; i < arg_num; ++i) {
369     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
370        ", Type=0x%" PRIx64 ", Name=%s\n",
371        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
372        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
373   }
374 #endif
375 
376   DeviceTy &Device = *PM->Devices[device_id];
377   AsyncInfoTy AsyncInfo(Device);
378   int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
379                   arg_types, arg_names, arg_mappers, team_num, thread_limit,
380                   true /*team*/, AsyncInfo);
381   if (rc == OFFLOAD_SUCCESS)
382     rc = AsyncInfo.synchronize();
383   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
384   assert(rc == OFFLOAD_SUCCESS &&
385          "__tgt_target_teams_mapper unexpected failure!");
386   return OMP_TGT_SUCCESS;
387 }
388 
389 EXTERN int __tgt_target_teams_nowait_mapper(
390     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
391     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
392     map_var_info_t *arg_names, void **arg_mappers, int32_t team_num,
393     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
394     void *noAliasDepList) {
395   TIMESCOPE_WITH_IDENT(loc);
396 
397   return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base,
398                                    args, arg_sizes, arg_types, arg_names,
399                                    arg_mappers, team_num, thread_limit);
400 }
401 
402 // Get the current number of components for a user-defined mapper.
403 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) {
404   TIMESCOPE();
405   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
406   int64_t size = MapperComponentsPtr->Components.size();
407   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
408      DPxPTR(rt_mapper_handle), size);
409   return size;
410 }
411 
412 // Push back one component for a user-defined mapper.
413 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
414                                         void *begin, int64_t size, int64_t type,
415                                         void *name) {
416   TIMESCOPE();
417   DP("__tgt_push_mapper_component(Handle=" DPxMOD
418      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
419      ", Type=0x%" PRIx64 ", Name=%s).\n",
420      DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type,
421      (name) ? getNameFromMapping(name).c_str() : "unknown");
422   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
423   MapperComponentsPtr->Components.push_back(
424       MapComponentInfoTy(base, begin, size, type, name));
425 }
426 
427 EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
428                                          uint64_t loop_tripcount) {
429   __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount);
430 }
431 
432 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
433                                                 uint64_t loop_tripcount) {
434   TIMESCOPE_WITH_IDENT(loc);
435   if (checkDeviceAndCtors(device_id, loc)) {
436     DP("Not offloading to device %" PRId64 "\n", device_id);
437     return;
438   }
439 
440   DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
441      loop_tripcount);
442   PM->TblMapMtx.lock();
443   PM->Devices[device_id]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
444                                               loop_tripcount);
445   PM->TblMapMtx.unlock();
446 }
447 
448 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
449   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
450   InfoLevel.store(NewInfoLevel);
451   for (auto &R : PM->RTLs.AllRTLs) {
452     if (R.set_info_flag)
453       R.set_info_flag(NewInfoLevel);
454   }
455 }
456 
457 EXTERN int __tgt_print_device_info(int64_t device_id) {
458   return PM->Devices[device_id]->printDeviceInfo(
459       PM->Devices[device_id]->RTLDeviceID);
460 }
461