1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "device.h"
15 #include "omptarget.h"
16 #include "private.h"
17 #include "rtl.h"
18 
19 #include <cassert>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <mutex>
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds requires flags
26 EXTERN void __tgt_register_requires(int64_t Flags) {
27   TIMESCOPE();
28   PM->RTLs.registerRequires(Flags);
29 }
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 /// adds a target shared library to the target execution image
33 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
34   TIMESCOPE();
35   std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs);
36   for (auto &RTL : PM->RTLs.AllRTLs) {
37     if (RTL.register_lib) {
38       if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) {
39         DP("Could not register library with %s", RTL.RTLName.c_str());
40       }
41     }
42   }
43   PM->RTLs.registerLib(Desc);
44 }
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 /// Initialize all available devices without registering any image
48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
49 
50 ////////////////////////////////////////////////////////////////////////////////
51 /// unloads a target shared library
52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
53   TIMESCOPE();
54   PM->RTLs.unregisterLib(Desc);
55   for (auto &RTL : PM->RTLs.UsedRTLs) {
56     if (RTL->unregister_lib) {
57       if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) {
58         DP("Could not register library with %s", RTL->RTLName.c_str());
59       }
60     }
61   }
62 }
63 
64 /// creates host-to-target data mapping, stores it in the
65 /// libomptarget.so internal structure (an entry in a stack of data maps)
66 /// and passes the data to the device.
67 EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
68                                     void **ArgsBase, void **Args,
69                                     int64_t *ArgSizes, int64_t *ArgTypes) {
70   TIMESCOPE();
71   __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
72                                  ArgSizes, ArgTypes, nullptr, nullptr);
73 }
74 
75 EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
76                                            void **ArgsBase, void **Args,
77                                            int64_t *ArgSizes, int64_t *ArgTypes,
78                                            int32_t DepNum, void *DepList,
79                                            int32_t NoAliasDepNum,
80                                            void *NoAliasDepList) {
81   TIMESCOPE();
82 
83   __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
84                                  ArgSizes, ArgTypes, nullptr, nullptr);
85 }
86 
87 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
88                                            int32_t ArgNum, void **ArgsBase,
89                                            void **Args, int64_t *ArgSizes,
90                                            int64_t *ArgTypes,
91                                            map_var_info_t *ArgNames,
92                                            void **ArgMappers) {
93   TIMESCOPE_WITH_IDENT(Loc);
94   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
95      DeviceId, ArgNum);
96   if (checkDeviceAndCtors(DeviceId, Loc)) {
97     DP("Not offloading to device %" PRId64 "\n", DeviceId);
98     return;
99   }
100 
101   DeviceTy &Device = *PM->Devices[DeviceId];
102 
103   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
104     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
105                          "Entering OpenMP data region");
106 #ifdef OMPTARGET_DEBUG
107   for (int I = 0; I < ArgNum; ++I) {
108     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
109        ", Type=0x%" PRIx64 ", Name=%s\n",
110        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
111        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
112   }
113 #endif
114 
115   AsyncInfoTy AsyncInfo(Device);
116   int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
117                            ArgTypes, ArgNames, ArgMappers, AsyncInfo);
118   if (Rc == OFFLOAD_SUCCESS)
119     Rc = AsyncInfo.synchronize();
120   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
121 }
122 
123 EXTERN void __tgt_target_data_begin_nowait_mapper(
124     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
125     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
126     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
127     void *NoAliasDepList) {
128   TIMESCOPE_WITH_IDENT(Loc);
129 
130   __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
131                                  ArgSizes, ArgTypes, ArgNames, ArgMappers);
132 }
133 
134 /// passes data from the target, releases target memory and destroys
135 /// the host-target mapping (top entry from the stack of data maps)
136 /// created by the last __tgt_target_data_begin.
137 EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
138                                   void **ArgsBase, void **Args,
139                                   int64_t *ArgSizes, int64_t *ArgTypes) {
140   TIMESCOPE();
141   __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
142                                ArgSizes, ArgTypes, nullptr, nullptr);
143 }
144 
145 EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
146                                          void **ArgsBase, void **Args,
147                                          int64_t *ArgSizes, int64_t *ArgTypes,
148                                          int32_t DepNum, void *DepList,
149                                          int32_t NoAliasDepNum,
150                                          void *NoAliasDepList) {
151   TIMESCOPE();
152 
153   __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
154                                ArgSizes, ArgTypes, nullptr, nullptr);
155 }
156 
157 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
158                                          int32_t ArgNum, void **ArgsBase,
159                                          void **Args, int64_t *ArgSizes,
160                                          int64_t *ArgTypes,
161                                          map_var_info_t *ArgNames,
162                                          void **ArgMappers) {
163   TIMESCOPE_WITH_IDENT(Loc);
164   DP("Entering data end region with %d mappings\n", ArgNum);
165   if (checkDeviceAndCtors(DeviceId, Loc)) {
166     DP("Not offloading to device %" PRId64 "\n", DeviceId);
167     return;
168   }
169 
170   DeviceTy &Device = *PM->Devices[DeviceId];
171 
172   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
173     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
174                          "Exiting OpenMP data region");
175 #ifdef OMPTARGET_DEBUG
176   for (int I = 0; I < ArgNum; ++I) {
177     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
178        ", Type=0x%" PRIx64 ", Name=%s\n",
179        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
180        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
181   }
182 #endif
183 
184   AsyncInfoTy AsyncInfo(Device);
185   int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
186                          ArgTypes, ArgNames, ArgMappers, AsyncInfo);
187   if (Rc == OFFLOAD_SUCCESS)
188     Rc = AsyncInfo.synchronize();
189   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
190 }
191 
192 EXTERN void __tgt_target_data_end_nowait_mapper(
193     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
194     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
195     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
196     void *NoAliasDepList) {
197   TIMESCOPE_WITH_IDENT(Loc);
198 
199   __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
200                                ArgTypes, ArgNames, ArgMappers);
201 }
202 
203 EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
204                                      void **ArgsBase, void **Args,
205                                      int64_t *ArgSizes, int64_t *ArgTypes) {
206   TIMESCOPE();
207   __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
208                                   ArgSizes, ArgTypes, nullptr, nullptr);
209 }
210 
211 EXTERN void __tgt_target_data_update_nowait(
212     int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args,
213     int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
214     int32_t NoAliasDepNum, void *NoAliasDepList) {
215   TIMESCOPE();
216 
217   __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
218                                   ArgSizes, ArgTypes, nullptr, nullptr);
219 }
220 
221 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
222                                             int32_t ArgNum, void **ArgsBase,
223                                             void **Args, int64_t *ArgSizes,
224                                             int64_t *ArgTypes,
225                                             map_var_info_t *ArgNames,
226                                             void **ArgMappers) {
227   TIMESCOPE_WITH_IDENT(Loc);
228   DP("Entering data update with %d mappings\n", ArgNum);
229   if (checkDeviceAndCtors(DeviceId, Loc)) {
230     DP("Not offloading to device %" PRId64 "\n", DeviceId);
231     return;
232   }
233 
234   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
235     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
236                          "Updating OpenMP data");
237 
238   DeviceTy &Device = *PM->Devices[DeviceId];
239   AsyncInfoTy AsyncInfo(Device);
240   int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
241                             ArgTypes, ArgNames, ArgMappers, AsyncInfo);
242   if (Rc == OFFLOAD_SUCCESS)
243     Rc = AsyncInfo.synchronize();
244   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
245 }
246 
247 EXTERN void __tgt_target_data_update_nowait_mapper(
248     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
249     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
250     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
251     void *NoAliasDepList) {
252   TIMESCOPE_WITH_IDENT(Loc);
253 
254   __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
255                                   ArgSizes, ArgTypes, ArgNames, ArgMappers);
256 }
257 
258 EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
259                         void **ArgsBase, void **Args, int64_t *ArgSizes,
260                         int64_t *ArgTypes) {
261   TIMESCOPE();
262   return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
263                              ArgSizes, ArgTypes, nullptr, nullptr);
264 }
265 
266 EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
267                                void **ArgsBase, void **Args, int64_t *ArgSizes,
268                                int64_t *ArgTypes, int32_t DepNum, void *DepList,
269                                int32_t NoAliasDepNum, void *NoAliasDepList) {
270   TIMESCOPE();
271 
272   return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
273                              ArgSizes, ArgTypes, nullptr, nullptr);
274 }
275 
276 EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
277                                int32_t ArgNum, void **ArgsBase, void **Args,
278                                int64_t *ArgSizes, int64_t *ArgTypes,
279                                map_var_info_t *ArgNames, void **ArgMappers) {
280   TIMESCOPE_WITH_IDENT(Loc);
281   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
282      "\n",
283      DPxPTR(HostPtr), DeviceId);
284   if (checkDeviceAndCtors(DeviceId, Loc)) {
285     DP("Not offloading to device %" PRId64 "\n", DeviceId);
286     return OMP_TGT_FAIL;
287   }
288 
289   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
290     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
291                          "Entering OpenMP kernel");
292 #ifdef OMPTARGET_DEBUG
293   for (int I = 0; I < ArgNum; ++I) {
294     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
295        ", Type=0x%" PRIx64 ", Name=%s\n",
296        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
297        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
298   }
299 #endif
300 
301   DeviceTy &Device = *PM->Devices[DeviceId];
302   AsyncInfoTy AsyncInfo(Device);
303   int Rc =
304       target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes,
305              ArgNames, ArgMappers, 0, 0, false /*team*/, AsyncInfo);
306   if (Rc == OFFLOAD_SUCCESS)
307     Rc = AsyncInfo.synchronize();
308   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
309   assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!");
310   return OMP_TGT_SUCCESS;
311 }
312 
313 EXTERN int __tgt_target_nowait_mapper(
314     ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
315     void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
316     map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
317     int32_t NoAliasDepNum, void *NoAliasDepList) {
318   TIMESCOPE_WITH_IDENT(Loc);
319 
320   return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
321                              ArgSizes, ArgTypes, ArgNames, ArgMappers);
322 }
323 
324 EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
325                               void **ArgsBase, void **Args, int64_t *ArgSizes,
326                               int64_t *ArgTypes, int32_t TeamNum,
327                               int32_t ThreadLimit) {
328   TIMESCOPE();
329   return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
330                                    Args, ArgSizes, ArgTypes, nullptr, nullptr,
331                                    TeamNum, ThreadLimit);
332 }
333 
334 EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
335                                      int32_t ArgNum, void **ArgsBase,
336                                      void **Args, int64_t *ArgSizes,
337                                      int64_t *ArgTypes, int32_t TeamNum,
338                                      int32_t ThreadLimit, int32_t DepNum,
339                                      void *DepList, int32_t NoAliasDepNum,
340                                      void *NoAliasDepList) {
341   TIMESCOPE();
342 
343   return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
344                                    Args, ArgSizes, ArgTypes, nullptr, nullptr,
345                                    TeamNum, ThreadLimit);
346 }
347 
348 EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
349                                      void *HostPtr, int32_t ArgNum,
350                                      void **ArgsBase, void **Args,
351                                      int64_t *ArgSizes, int64_t *ArgTypes,
352                                      map_var_info_t *ArgNames,
353                                      void **ArgMappers, int32_t TeamNum,
354                                      int32_t ThreadLimit) {
355   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
356      "\n",
357      DPxPTR(HostPtr), DeviceId);
358   if (checkDeviceAndCtors(DeviceId, Loc)) {
359     DP("Not offloading to device %" PRId64 "\n", DeviceId);
360     return OMP_TGT_FAIL;
361   }
362 
363   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
364     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
365                          "Entering OpenMP kernel");
366 #ifdef OMPTARGET_DEBUG
367   for (int I = 0; I < ArgNum; ++I) {
368     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
369        ", Type=0x%" PRIx64 ", Name=%s\n",
370        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
371        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
372   }
373 #endif
374 
375   DeviceTy &Device = *PM->Devices[DeviceId];
376   AsyncInfoTy AsyncInfo(Device);
377   int Rc = target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes,
378                   ArgTypes, ArgNames, ArgMappers, TeamNum, ThreadLimit,
379                   true /*team*/, AsyncInfo);
380   if (Rc == OFFLOAD_SUCCESS)
381     Rc = AsyncInfo.synchronize();
382   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
383   assert(Rc == OFFLOAD_SUCCESS &&
384          "__tgt_target_teams_mapper unexpected failure!");
385   return OMP_TGT_SUCCESS;
386 }
387 
388 EXTERN int __tgt_target_teams_nowait_mapper(
389     ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
390     void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
391     map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
392     int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
393     void *NoAliasDepList) {
394   TIMESCOPE_WITH_IDENT(Loc);
395 
396   return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
397                                    Args, ArgSizes, ArgTypes, ArgNames,
398                                    ArgMappers, TeamNum, ThreadLimit);
399 }
400 
401 // Get the current number of components for a user-defined mapper.
402 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
403   TIMESCOPE();
404   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
405   int64_t Size = MapperComponentsPtr->Components.size();
406   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
407      DPxPTR(RtMapperHandle), Size);
408   return Size;
409 }
410 
411 // Push back one component for a user-defined mapper.
412 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
413                                         void *Begin, int64_t Size, int64_t Type,
414                                         void *Name) {
415   TIMESCOPE();
416   DP("__tgt_push_mapper_component(Handle=" DPxMOD
417      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
418      ", Type=0x%" PRIx64 ", Name=%s).\n",
419      DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
420      (Name) ? getNameFromMapping(Name).c_str() : "unknown");
421   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
422   MapperComponentsPtr->Components.push_back(
423       MapComponentInfoTy(Base, Begin, Size, Type, Name));
424 }
425 
426 EXTERN void __kmpc_push_target_tripcount(int64_t DeviceId,
427                                          uint64_t LoopTripcount) {
428   __kmpc_push_target_tripcount_mapper(nullptr, DeviceId, LoopTripcount);
429 }
430 
431 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId,
432                                                 uint64_t LoopTripcount) {
433   TIMESCOPE_WITH_IDENT(Loc);
434   if (checkDeviceAndCtors(DeviceId, Loc)) {
435     DP("Not offloading to device %" PRId64 "\n", DeviceId);
436     return;
437   }
438 
439   DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId,
440      LoopTripcount);
441   PM->TblMapMtx.lock();
442   PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
443                                              LoopTripcount);
444   PM->TblMapMtx.unlock();
445 }
446 
447 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
448   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
449   InfoLevel.store(NewInfoLevel);
450   for (auto &R : PM->RTLs.AllRTLs) {
451     if (R.set_info_flag)
452       R.set_info_flag(NewInfoLevel);
453   }
454 }
455 
456 EXTERN int __tgt_print_device_info(int64_t DeviceId) {
457   return PM->Devices[DeviceId]->printDeviceInfo(
458       PM->Devices[DeviceId]->RTLDeviceID);
459 }
460