1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "device.h"
15 #include "omptarget.h"
16 #include "private.h"
17 #include "rtl.h"
18 
19 #include <cassert>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <mutex>
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds requires flags
__tgt_register_requires(int64_t Flags)26 EXTERN void __tgt_register_requires(int64_t Flags) {
27   TIMESCOPE();
28   PM->RTLs.registerRequires(Flags);
29 }
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 /// adds a target shared library to the target execution image
__tgt_register_lib(__tgt_bin_desc * Desc)33 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
34   TIMESCOPE();
35   std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs);
36   for (auto &RTL : PM->RTLs.AllRTLs) {
37     if (RTL.register_lib) {
38       if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) {
39         DP("Could not register library with %s", RTL.RTLName.c_str());
40       }
41     }
42   }
43   PM->RTLs.registerLib(Desc);
44 }
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 /// Initialize all available devices without registering any image
__tgt_init_all_rtls()48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
49 
50 ////////////////////////////////////////////////////////////////////////////////
51 /// unloads a target shared library
__tgt_unregister_lib(__tgt_bin_desc * Desc)52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
53   TIMESCOPE();
54   PM->RTLs.unregisterLib(Desc);
55   for (auto &RTL : PM->RTLs.UsedRTLs) {
56     if (RTL->unregister_lib) {
57       if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) {
58         DP("Could not register library with %s", RTL->RTLName.c_str());
59       }
60     }
61   }
62 }
63 
64 /// creates host-to-target data mapping, stores it in the
65 /// libomptarget.so internal structure (an entry in a stack of data maps)
66 /// and passes the data to the device.
__tgt_target_data_begin_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers)67 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
68                                            int32_t ArgNum, void **ArgsBase,
69                                            void **Args, int64_t *ArgSizes,
70                                            int64_t *ArgTypes,
71                                            map_var_info_t *ArgNames,
72                                            void **ArgMappers) {
73   TIMESCOPE_WITH_IDENT(Loc);
74   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
75      DeviceId, ArgNum);
76   if (checkDeviceAndCtors(DeviceId, Loc)) {
77     DP("Not offloading to device %" PRId64 "\n", DeviceId);
78     return;
79   }
80 
81   DeviceTy &Device = *PM->Devices[DeviceId];
82 
83   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
84     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
85                          "Entering OpenMP data region");
86 #ifdef OMPTARGET_DEBUG
87   for (int I = 0; I < ArgNum; ++I) {
88     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
89        ", Type=0x%" PRIx64 ", Name=%s\n",
90        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
91        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
92   }
93 #endif
94 
95   AsyncInfoTy AsyncInfo(Device);
96   int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
97                            ArgTypes, ArgNames, ArgMappers, AsyncInfo);
98   if (Rc == OFFLOAD_SUCCESS)
99     Rc = AsyncInfo.synchronize();
100   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
101 }
102 
__tgt_target_data_begin_nowait_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)103 EXTERN void __tgt_target_data_begin_nowait_mapper(
104     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
105     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
106     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
107     void *NoAliasDepList) {
108   TIMESCOPE_WITH_IDENT(Loc);
109 
110   __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
111                                  ArgSizes, ArgTypes, ArgNames, ArgMappers);
112 }
113 
114 /// passes data from the target, releases target memory and destroys
115 /// the host-target mapping (top entry from the stack of data maps)
116 /// created by the last __tgt_target_data_begin.
__tgt_target_data_end_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers)117 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
118                                          int32_t ArgNum, void **ArgsBase,
119                                          void **Args, int64_t *ArgSizes,
120                                          int64_t *ArgTypes,
121                                          map_var_info_t *ArgNames,
122                                          void **ArgMappers) {
123   TIMESCOPE_WITH_IDENT(Loc);
124   DP("Entering data end region with %d mappings\n", ArgNum);
125   if (checkDeviceAndCtors(DeviceId, Loc)) {
126     DP("Not offloading to device %" PRId64 "\n", DeviceId);
127     return;
128   }
129 
130   DeviceTy &Device = *PM->Devices[DeviceId];
131 
132   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
133     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
134                          "Exiting OpenMP data region");
135 #ifdef OMPTARGET_DEBUG
136   for (int I = 0; I < ArgNum; ++I) {
137     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
138        ", Type=0x%" PRIx64 ", Name=%s\n",
139        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
140        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
141   }
142 #endif
143 
144   AsyncInfoTy AsyncInfo(Device);
145   int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
146                          ArgTypes, ArgNames, ArgMappers, AsyncInfo);
147   if (Rc == OFFLOAD_SUCCESS)
148     Rc = AsyncInfo.synchronize();
149   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
150 }
151 
__tgt_target_data_end_nowait_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)152 EXTERN void __tgt_target_data_end_nowait_mapper(
153     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
154     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
155     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
156     void *NoAliasDepList) {
157   TIMESCOPE_WITH_IDENT(Loc);
158 
159   __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
160                                ArgTypes, ArgNames, ArgMappers);
161 }
162 
__tgt_target_data_update_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers)163 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
164                                             int32_t ArgNum, void **ArgsBase,
165                                             void **Args, int64_t *ArgSizes,
166                                             int64_t *ArgTypes,
167                                             map_var_info_t *ArgNames,
168                                             void **ArgMappers) {
169   TIMESCOPE_WITH_IDENT(Loc);
170   DP("Entering data update with %d mappings\n", ArgNum);
171   if (checkDeviceAndCtors(DeviceId, Loc)) {
172     DP("Not offloading to device %" PRId64 "\n", DeviceId);
173     return;
174   }
175 
176   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
177     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
178                          "Updating OpenMP data");
179 
180   DeviceTy &Device = *PM->Devices[DeviceId];
181   AsyncInfoTy AsyncInfo(Device);
182   int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
183                             ArgTypes, ArgNames, ArgMappers, AsyncInfo);
184   if (Rc == OFFLOAD_SUCCESS)
185     Rc = AsyncInfo.synchronize();
186   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
187 }
188 
__tgt_target_data_update_nowait_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)189 EXTERN void __tgt_target_data_update_nowait_mapper(
190     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
191     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
192     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
193     void *NoAliasDepList) {
194   TIMESCOPE_WITH_IDENT(Loc);
195 
196   __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
197                                   ArgSizes, ArgTypes, ArgNames, ArgMappers);
198 }
199 
200 /// Implements a kernel entry that executes the target region on the specified
201 /// device.
202 ///
203 /// \param Loc Source location associated with this target region.
204 /// \param DeviceId The device to execute this region, -1 indicated the default.
205 /// \param NumTeams Number of teams to launch the region with, -1 indicates a
206 ///                 non-teams region and 0 indicates it was unspecified.
207 /// \param ThreadLimit Limit to the number of threads to use in the kernel
208 ///                    launch, 0 indicates it was unspecified.
209 /// \param HostPtr  The pointer to the host function registered with the kernel.
210 /// \param Args     All arguments to this kernel launch (see struct definition).
__tgt_target_kernel(ident_t * Loc,int64_t DeviceId,int32_t NumTeams,int32_t ThreadLimit,void * HostPtr,__tgt_kernel_arguments * Args)211 EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
212                                int32_t ThreadLimit, void *HostPtr,
213                                __tgt_kernel_arguments *Args) {
214   TIMESCOPE_WITH_IDENT(Loc);
215   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
216      "\n",
217      DPxPTR(HostPtr), DeviceId);
218   if (Args->Version != 1) {
219     DP("Unexpected ABI version: %d\n", Args->Version);
220   }
221   if (checkDeviceAndCtors(DeviceId, Loc)) {
222     DP("Not offloading to device %" PRId64 "\n", DeviceId);
223     return OMP_TGT_FAIL;
224   }
225 
226   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
227     printKernelArguments(Loc, DeviceId, Args->NumArgs, Args->ArgSizes,
228                          Args->ArgTypes, Args->ArgNames,
229                          "Entering OpenMP kernel");
230 #ifdef OMPTARGET_DEBUG
231   for (int I = 0; I < Args->NumArgs; ++I) {
232     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
233        ", Type=0x%" PRIx64 ", Name=%s\n",
234        I, DPxPTR(Args->ArgBasePtrs[I]), DPxPTR(Args->ArgPtrs[I]),
235        Args->ArgSizes[I], Args->ArgTypes[I],
236        (Args->ArgNames) ? getNameFromMapping(Args->ArgNames[I]).c_str()
237                         : "unknown");
238   }
239 #endif
240 
241   bool IsTeams = NumTeams != -1;
242   if (!IsTeams)
243     NumTeams = 0;
244 
245   DeviceTy &Device = *PM->Devices[DeviceId];
246   AsyncInfoTy AsyncInfo(Device);
247   int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs,
248                   Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames,
249                   Args->ArgMappers, NumTeams, ThreadLimit, Args->Tripcount,
250                   IsTeams, AsyncInfo);
251   if (Rc == OFFLOAD_SUCCESS)
252     Rc = AsyncInfo.synchronize();
253   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
254   assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
255   return OMP_TGT_SUCCESS;
256 }
257 
__tgt_target_kernel_nowait(ident_t * Loc,int64_t DeviceId,int32_t NumTeams,int32_t ThreadLimit,void * HostPtr,__tgt_kernel_arguments * Args,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)258 EXTERN int __tgt_target_kernel_nowait(
259     ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit,
260     void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList,
261     int32_t NoAliasDepNum, void *NoAliasDepList) {
262   TIMESCOPE_WITH_IDENT(Loc);
263 
264   return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
265                              Args);
266 }
267 
268 // Get the current number of components for a user-defined mapper.
__tgt_mapper_num_components(void * RtMapperHandle)269 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
270   TIMESCOPE();
271   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
272   int64_t Size = MapperComponentsPtr->Components.size();
273   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
274      DPxPTR(RtMapperHandle), Size);
275   return Size;
276 }
277 
278 // Push back one component for a user-defined mapper.
__tgt_push_mapper_component(void * RtMapperHandle,void * Base,void * Begin,int64_t Size,int64_t Type,void * Name)279 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
280                                         void *Begin, int64_t Size, int64_t Type,
281                                         void *Name) {
282   TIMESCOPE();
283   DP("__tgt_push_mapper_component(Handle=" DPxMOD
284      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
285      ", Type=0x%" PRIx64 ", Name=%s).\n",
286      DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
287      (Name) ? getNameFromMapping(Name).c_str() : "unknown");
288   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
289   MapperComponentsPtr->Components.push_back(
290       MapComponentInfoTy(Base, Begin, Size, Type, Name));
291 }
292 
__tgt_set_info_flag(uint32_t NewInfoLevel)293 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
294   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
295   InfoLevel.store(NewInfoLevel);
296   for (auto &R : PM->RTLs.AllRTLs) {
297     if (R.set_info_flag)
298       R.set_info_flag(NewInfoLevel);
299   }
300 }
301 
__tgt_print_device_info(int64_t DeviceId)302 EXTERN int __tgt_print_device_info(int64_t DeviceId) {
303   return PM->Devices[DeviceId]->printDeviceInfo(
304       PM->Devices[DeviceId]->RTLDeviceID);
305 }
306