1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "device.h"
15 #include "omptarget.h"
16 #include "private.h"
17 #include "rtl.h"
18 
19 #include <cassert>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <mutex>
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds requires flags
26 EXTERN void __tgt_register_requires(int64_t Flags) {
27   TIMESCOPE();
28   PM->RTLs.registerRequires(Flags);
29 }
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 /// adds a target shared library to the target execution image
33 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
34   TIMESCOPE();
35   std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs);
36   for (auto &RTL : PM->RTLs.AllRTLs) {
37     if (RTL.register_lib) {
38       if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) {
39         DP("Could not register library with %s", RTL.RTLName.c_str());
40       }
41     }
42   }
43   PM->RTLs.registerLib(Desc);
44 }
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 /// Initialize all available devices without registering any image
48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
49 
50 ////////////////////////////////////////////////////////////////////////////////
51 /// unloads a target shared library
52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
53   TIMESCOPE();
54   PM->RTLs.unregisterLib(Desc);
55   for (auto &RTL : PM->RTLs.UsedRTLs) {
56     if (RTL->unregister_lib) {
57       if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) {
58         DP("Could not register library with %s", RTL->RTLName.c_str());
59       }
60     }
61   }
62 }
63 
64 /// creates host-to-target data mapping, stores it in the
65 /// libomptarget.so internal structure (an entry in a stack of data maps)
66 /// and passes the data to the device.
67 EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
68                                     void **ArgsBase, void **Args,
69                                     int64_t *ArgSizes, int64_t *ArgTypes) {
70   TIMESCOPE();
71   __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
72                                  ArgSizes, ArgTypes, nullptr, nullptr);
73 }
74 
75 EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
76                                            void **ArgsBase, void **Args,
77                                            int64_t *ArgSizes, int64_t *ArgTypes,
78                                            int32_t DepNum, void *DepList,
79                                            int32_t NoAliasDepNum,
80                                            void *NoAliasDepList) {
81   TIMESCOPE();
82 
83   __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
84                                  ArgSizes, ArgTypes, nullptr, nullptr);
85 }
86 
87 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
88                                            int32_t ArgNum, void **ArgsBase,
89                                            void **Args, int64_t *ArgSizes,
90                                            int64_t *ArgTypes,
91                                            map_var_info_t *ArgNames,
92                                            void **ArgMappers) {
93   TIMESCOPE_WITH_IDENT(Loc);
94   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
95      DeviceId, ArgNum);
96   if (checkDeviceAndCtors(DeviceId, Loc)) {
97     DP("Not offloading to device %" PRId64 "\n", DeviceId);
98     return;
99   }
100 
101   DeviceTy &Device = *PM->Devices[DeviceId];
102 
103   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
104     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
105                          "Entering OpenMP data region");
106 #ifdef OMPTARGET_DEBUG
107   for (int I = 0; I < ArgNum; ++I) {
108     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
109        ", Type=0x%" PRIx64 ", Name=%s\n",
110        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
111        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
112   }
113 #endif
114 
115   AsyncInfoTy AsyncInfo(Device);
116   int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
117                            ArgTypes, ArgNames, ArgMappers, AsyncInfo);
118   if (Rc == OFFLOAD_SUCCESS)
119     Rc = AsyncInfo.synchronize();
120   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
121 }
122 
123 EXTERN void __tgt_target_data_begin_nowait_mapper(
124     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
125     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
126     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
127     void *NoAliasDepList) {
128   TIMESCOPE_WITH_IDENT(Loc);
129 
130   __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
131                                  ArgSizes, ArgTypes, ArgNames, ArgMappers);
132 }
133 
134 /// passes data from the target, releases target memory and destroys
135 /// the host-target mapping (top entry from the stack of data maps)
136 /// created by the last __tgt_target_data_begin.
137 EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
138                                   void **ArgsBase, void **Args,
139                                   int64_t *ArgSizes, int64_t *ArgTypes) {
140   TIMESCOPE();
141   __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
142                                ArgSizes, ArgTypes, nullptr, nullptr);
143 }
144 
145 EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
146                                          void **ArgsBase, void **Args,
147                                          int64_t *ArgSizes, int64_t *ArgTypes,
148                                          int32_t DepNum, void *DepList,
149                                          int32_t NoAliasDepNum,
150                                          void *NoAliasDepList) {
151   TIMESCOPE();
152 
153   __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
154                                ArgSizes, ArgTypes, nullptr, nullptr);
155 }
156 
157 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
158                                          int32_t ArgNum, void **ArgsBase,
159                                          void **Args, int64_t *ArgSizes,
160                                          int64_t *ArgTypes,
161                                          map_var_info_t *ArgNames,
162                                          void **ArgMappers) {
163   TIMESCOPE_WITH_IDENT(Loc);
164   DP("Entering data end region with %d mappings\n", ArgNum);
165   if (checkDeviceAndCtors(DeviceId, Loc)) {
166     DP("Not offloading to device %" PRId64 "\n", DeviceId);
167     return;
168   }
169 
170   DeviceTy &Device = *PM->Devices[DeviceId];
171 
172   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
173     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
174                          "Exiting OpenMP data region");
175 #ifdef OMPTARGET_DEBUG
176   for (int I = 0; I < ArgNum; ++I) {
177     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
178        ", Type=0x%" PRIx64 ", Name=%s\n",
179        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
180        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
181   }
182 #endif
183 
184   AsyncInfoTy AsyncInfo(Device);
185   int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
186                          ArgTypes, ArgNames, ArgMappers, AsyncInfo);
187   if (Rc == OFFLOAD_SUCCESS)
188     Rc = AsyncInfo.synchronize();
189   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
190 }
191 
192 EXTERN void __tgt_target_data_end_nowait_mapper(
193     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
194     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
195     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
196     void *NoAliasDepList) {
197   TIMESCOPE_WITH_IDENT(Loc);
198 
199   __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
200                                ArgTypes, ArgNames, ArgMappers);
201 }
202 
203 EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
204                                      void **ArgsBase, void **Args,
205                                      int64_t *ArgSizes, int64_t *ArgTypes) {
206   TIMESCOPE();
207   __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
208                                   ArgSizes, ArgTypes, nullptr, nullptr);
209 }
210 
211 EXTERN void __tgt_target_data_update_nowait(
212     int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args,
213     int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
214     int32_t NoAliasDepNum, void *NoAliasDepList) {
215   TIMESCOPE();
216 
217   __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
218                                   ArgSizes, ArgTypes, nullptr, nullptr);
219 }
220 
221 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
222                                             int32_t ArgNum, void **ArgsBase,
223                                             void **Args, int64_t *ArgSizes,
224                                             int64_t *ArgTypes,
225                                             map_var_info_t *ArgNames,
226                                             void **ArgMappers) {
227   TIMESCOPE_WITH_IDENT(Loc);
228   DP("Entering data update with %d mappings\n", ArgNum);
229   if (checkDeviceAndCtors(DeviceId, Loc)) {
230     DP("Not offloading to device %" PRId64 "\n", DeviceId);
231     return;
232   }
233 
234   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
235     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
236                          "Updating OpenMP data");
237 
238   DeviceTy &Device = *PM->Devices[DeviceId];
239   AsyncInfoTy AsyncInfo(Device);
240   int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
241                             ArgTypes, ArgNames, ArgMappers, AsyncInfo);
242   if (Rc == OFFLOAD_SUCCESS)
243     Rc = AsyncInfo.synchronize();
244   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
245 }
246 
247 EXTERN void __tgt_target_data_update_nowait_mapper(
248     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
249     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
250     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
251     void *NoAliasDepList) {
252   TIMESCOPE_WITH_IDENT(Loc);
253 
254   __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
255                                   ArgSizes, ArgTypes, ArgNames, ArgMappers);
256 }
257 
258 EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
259                         void **ArgsBase, void **Args, int64_t *ArgSizes,
260                         int64_t *ArgTypes) {
261   TIMESCOPE();
262   return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
263                              ArgSizes, ArgTypes, nullptr, nullptr);
264 }
265 
266 EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
267                                void **ArgsBase, void **Args, int64_t *ArgSizes,
268                                int64_t *ArgTypes, int32_t DepNum, void *DepList,
269                                int32_t NoAliasDepNum, void *NoAliasDepList) {
270   TIMESCOPE();
271 
272   return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
273                              ArgSizes, ArgTypes, nullptr, nullptr);
274 }
275 
276 EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
277                                int32_t ArgNum, void **ArgsBase, void **Args,
278                                int64_t *ArgSizes, int64_t *ArgTypes,
279                                map_var_info_t *ArgNames, void **ArgMappers) {
280   TIMESCOPE_WITH_IDENT(Loc);
281   __tgt_kernel_arguments KernelArgs{
282       1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, -1};
283   return __tgt_target_kernel(Loc, DeviceId, -1, 0, HostPtr, &KernelArgs);
284 }
285 
286 EXTERN int __tgt_target_nowait_mapper(
287     ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
288     void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
289     map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
290     int32_t NoAliasDepNum, void *NoAliasDepList) {
291   TIMESCOPE_WITH_IDENT(Loc);
292 
293   return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
294                              ArgSizes, ArgTypes, ArgNames, ArgMappers);
295 }
296 
297 EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
298                               void **ArgsBase, void **Args, int64_t *ArgSizes,
299                               int64_t *ArgTypes, int32_t TeamNum,
300                               int32_t ThreadLimit) {
301   TIMESCOPE();
302   return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
303                                    Args, ArgSizes, ArgTypes, nullptr, nullptr,
304                                    TeamNum, ThreadLimit);
305 }
306 
307 EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
308                                      int32_t ArgNum, void **ArgsBase,
309                                      void **Args, int64_t *ArgSizes,
310                                      int64_t *ArgTypes, int32_t TeamNum,
311                                      int32_t ThreadLimit, int32_t DepNum,
312                                      void *DepList, int32_t NoAliasDepNum,
313                                      void *NoAliasDepList) {
314   TIMESCOPE();
315 
316   return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
317                                    Args, ArgSizes, ArgTypes, nullptr, nullptr,
318                                    TeamNum, ThreadLimit);
319 }
320 
321 EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
322                                      void *HostPtr, int32_t ArgNum,
323                                      void **ArgsBase, void **Args,
324                                      int64_t *ArgSizes, int64_t *ArgTypes,
325                                      map_var_info_t *ArgNames,
326                                      void **ArgMappers, int32_t TeamNum,
327                                      int32_t ThreadLimit) {
328   TIMESCOPE_WITH_IDENT(Loc);
329   __tgt_kernel_arguments KernelArgs{
330       1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, -1};
331   return __tgt_target_kernel(Loc, DeviceId, TeamNum, ThreadLimit, HostPtr,
332                              &KernelArgs);
333 }
334 
335 /// Implements a kernel entry that executes the target region on the specified
336 /// device.
337 ///
338 /// \param Loc Source location associated with this target region.
339 /// \param DeviceId The device to execute this region, -1 indicated the default.
340 /// \param NumTeams Number of teams to launch the region with, -1 indicates a
341 ///                 non-teams region and 0 indicates it was unspecified.
342 /// \param ThreadLimit Limit to the number of threads to use in the kernel
343 ///                    launch, 0 indicates it was unspecified.
344 /// \param HostPtr  The pointer to the host function registered with the kernel.
345 /// \param Args     All arguments to this kernel launch (see struct definition).
346 EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
347                                int32_t ThreadLimit, void *HostPtr,
348                                __tgt_kernel_arguments *Args) {
349   TIMESCOPE_WITH_IDENT(Loc);
350   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
351      "\n",
352      DPxPTR(HostPtr), DeviceId);
353   if (Args->Version != 1) {
354     DP("Unexpected ABI version: %d\n", Args->Version);
355   }
356   if (checkDeviceAndCtors(DeviceId, Loc)) {
357     DP("Not offloading to device %" PRId64 "\n", DeviceId);
358     return OMP_TGT_FAIL;
359   }
360 
361   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
362     printKernelArguments(Loc, DeviceId, Args->NumArgs, Args->ArgSizes,
363                          Args->ArgTypes, Args->ArgNames,
364                          "Entering OpenMP kernel");
365 #ifdef OMPTARGET_DEBUG
366   for (int I = 0; I < Args->NumArgs; ++I) {
367     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
368        ", Type=0x%" PRIx64 ", Name=%s\n",
369        I, DPxPTR(Args->ArgBasePtrs[I]), DPxPTR(Args->ArgPtrs[I]),
370        Args->ArgSizes[I], Args->ArgTypes[I],
371        (Args->ArgNames) ? getNameFromMapping(Args->ArgNames[I]).c_str()
372                         : "unknown");
373   }
374 #endif
375 
376   bool IsTeams = NumTeams != -1;
377   if (!IsTeams)
378     NumTeams = 0;
379 
380   DeviceTy &Device = *PM->Devices[DeviceId];
381   AsyncInfoTy AsyncInfo(Device);
382   int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs,
383                   Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames,
384                   Args->ArgMappers, NumTeams, ThreadLimit, Args->Tripcount,
385                   IsTeams, AsyncInfo);
386   if (Rc == OFFLOAD_SUCCESS)
387     Rc = AsyncInfo.synchronize();
388   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
389   assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
390   return OMP_TGT_SUCCESS;
391 }
392 
393 EXTERN int __tgt_target_kernel_nowait(
394     ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit,
395     void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList,
396     int32_t NoAliasDepNum, void *NoAliasDepList) {
397   TIMESCOPE_WITH_IDENT(Loc);
398 
399   return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
400                              Args);
401 }
402 
403 // Get the current number of components for a user-defined mapper.
404 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
405   TIMESCOPE();
406   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
407   int64_t Size = MapperComponentsPtr->Components.size();
408   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
409      DPxPTR(RtMapperHandle), Size);
410   return Size;
411 }
412 
413 // Push back one component for a user-defined mapper.
414 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
415                                         void *Begin, int64_t Size, int64_t Type,
416                                         void *Name) {
417   TIMESCOPE();
418   DP("__tgt_push_mapper_component(Handle=" DPxMOD
419      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
420      ", Type=0x%" PRIx64 ", Name=%s).\n",
421      DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
422      (Name) ? getNameFromMapping(Name).c_str() : "unknown");
423   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
424   MapperComponentsPtr->Components.push_back(
425       MapComponentInfoTy(Base, Begin, Size, Type, Name));
426 }
427 
428 EXTERN void __kmpc_push_target_tripcount(int64_t DeviceId,
429                                          uint64_t LoopTripcount) {
430   __kmpc_push_target_tripcount_mapper(nullptr, DeviceId, LoopTripcount);
431 }
432 
433 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId,
434                                                 uint64_t LoopTripcount) {
435   TIMESCOPE_WITH_IDENT(Loc);
436   if (checkDeviceAndCtors(DeviceId, Loc)) {
437     DP("Not offloading to device %" PRId64 "\n", DeviceId);
438     return;
439   }
440 
441   DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId,
442      LoopTripcount);
443   PM->TblMapMtx.lock();
444   PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
445                                              LoopTripcount);
446   PM->TblMapMtx.unlock();
447 }
448 
449 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
450   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
451   InfoLevel.store(NewInfoLevel);
452   for (auto &R : PM->RTLs.AllRTLs) {
453     if (R.set_info_flag)
454       R.set_info_flag(NewInfoLevel);
455   }
456 }
457 
458 EXTERN int __tgt_print_device_info(int64_t DeviceId) {
459   return PM->Devices[DeviceId]->printDeviceInfo(
460       PM->Devices[DeviceId]->RTLDeviceID);
461 }
462