1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "device.h"
15 #include "omptarget.h"
16 #include "private.h"
17 #include "rtl.h"
18
19 #include <cassert>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <mutex>
23
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds requires flags
__tgt_register_requires(int64_t Flags)26 EXTERN void __tgt_register_requires(int64_t Flags) {
27 TIMESCOPE();
28 PM->RTLs.registerRequires(Flags);
29 }
30
31 ////////////////////////////////////////////////////////////////////////////////
32 /// adds a target shared library to the target execution image
__tgt_register_lib(__tgt_bin_desc * Desc)33 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
34 TIMESCOPE();
35 std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs);
36 for (auto &RTL : PM->RTLs.AllRTLs) {
37 if (RTL.register_lib) {
38 if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) {
39 DP("Could not register library with %s", RTL.RTLName.c_str());
40 }
41 }
42 }
43 PM->RTLs.registerLib(Desc);
44 }
45
46 ////////////////////////////////////////////////////////////////////////////////
47 /// Initialize all available devices without registering any image
__tgt_init_all_rtls()48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
49
50 ////////////////////////////////////////////////////////////////////////////////
51 /// unloads a target shared library
__tgt_unregister_lib(__tgt_bin_desc * Desc)52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
53 TIMESCOPE();
54 PM->RTLs.unregisterLib(Desc);
55 for (auto &RTL : PM->RTLs.UsedRTLs) {
56 if (RTL->unregister_lib) {
57 if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) {
58 DP("Could not register library with %s", RTL->RTLName.c_str());
59 }
60 }
61 }
62 }
63
64 /// creates host-to-target data mapping, stores it in the
65 /// libomptarget.so internal structure (an entry in a stack of data maps)
66 /// and passes the data to the device.
__tgt_target_data_begin_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers)67 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
68 int32_t ArgNum, void **ArgsBase,
69 void **Args, int64_t *ArgSizes,
70 int64_t *ArgTypes,
71 map_var_info_t *ArgNames,
72 void **ArgMappers) {
73 TIMESCOPE_WITH_IDENT(Loc);
74 DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
75 DeviceId, ArgNum);
76 if (checkDeviceAndCtors(DeviceId, Loc)) {
77 DP("Not offloading to device %" PRId64 "\n", DeviceId);
78 return;
79 }
80
81 DeviceTy &Device = *PM->Devices[DeviceId];
82
83 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
84 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
85 "Entering OpenMP data region");
86 #ifdef OMPTARGET_DEBUG
87 for (int I = 0; I < ArgNum; ++I) {
88 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
89 ", Type=0x%" PRIx64 ", Name=%s\n",
90 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
91 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
92 }
93 #endif
94
95 AsyncInfoTy AsyncInfo(Device);
96 int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
97 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
98 if (Rc == OFFLOAD_SUCCESS)
99 Rc = AsyncInfo.synchronize();
100 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
101 }
102
__tgt_target_data_begin_nowait_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)103 EXTERN void __tgt_target_data_begin_nowait_mapper(
104 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
105 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
106 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
107 void *NoAliasDepList) {
108 TIMESCOPE_WITH_IDENT(Loc);
109
110 __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
111 ArgSizes, ArgTypes, ArgNames, ArgMappers);
112 }
113
114 /// passes data from the target, releases target memory and destroys
115 /// the host-target mapping (top entry from the stack of data maps)
116 /// created by the last __tgt_target_data_begin.
__tgt_target_data_end_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers)117 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
118 int32_t ArgNum, void **ArgsBase,
119 void **Args, int64_t *ArgSizes,
120 int64_t *ArgTypes,
121 map_var_info_t *ArgNames,
122 void **ArgMappers) {
123 TIMESCOPE_WITH_IDENT(Loc);
124 DP("Entering data end region with %d mappings\n", ArgNum);
125 if (checkDeviceAndCtors(DeviceId, Loc)) {
126 DP("Not offloading to device %" PRId64 "\n", DeviceId);
127 return;
128 }
129
130 DeviceTy &Device = *PM->Devices[DeviceId];
131
132 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
133 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
134 "Exiting OpenMP data region");
135 #ifdef OMPTARGET_DEBUG
136 for (int I = 0; I < ArgNum; ++I) {
137 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
138 ", Type=0x%" PRIx64 ", Name=%s\n",
139 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
140 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
141 }
142 #endif
143
144 AsyncInfoTy AsyncInfo(Device);
145 int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
146 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
147 if (Rc == OFFLOAD_SUCCESS)
148 Rc = AsyncInfo.synchronize();
149 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
150 }
151
__tgt_target_data_end_nowait_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)152 EXTERN void __tgt_target_data_end_nowait_mapper(
153 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
154 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
155 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
156 void *NoAliasDepList) {
157 TIMESCOPE_WITH_IDENT(Loc);
158
159 __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
160 ArgTypes, ArgNames, ArgMappers);
161 }
162
__tgt_target_data_update_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers)163 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
164 int32_t ArgNum, void **ArgsBase,
165 void **Args, int64_t *ArgSizes,
166 int64_t *ArgTypes,
167 map_var_info_t *ArgNames,
168 void **ArgMappers) {
169 TIMESCOPE_WITH_IDENT(Loc);
170 DP("Entering data update with %d mappings\n", ArgNum);
171 if (checkDeviceAndCtors(DeviceId, Loc)) {
172 DP("Not offloading to device %" PRId64 "\n", DeviceId);
173 return;
174 }
175
176 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
177 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
178 "Updating OpenMP data");
179
180 DeviceTy &Device = *PM->Devices[DeviceId];
181 AsyncInfoTy AsyncInfo(Device);
182 int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
183 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
184 if (Rc == OFFLOAD_SUCCESS)
185 Rc = AsyncInfo.synchronize();
186 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
187 }
188
__tgt_target_data_update_nowait_mapper(ident_t * Loc,int64_t DeviceId,int32_t ArgNum,void ** ArgsBase,void ** Args,int64_t * ArgSizes,int64_t * ArgTypes,map_var_info_t * ArgNames,void ** ArgMappers,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)189 EXTERN void __tgt_target_data_update_nowait_mapper(
190 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
191 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
192 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
193 void *NoAliasDepList) {
194 TIMESCOPE_WITH_IDENT(Loc);
195
196 __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
197 ArgSizes, ArgTypes, ArgNames, ArgMappers);
198 }
199
200 /// Implements a kernel entry that executes the target region on the specified
201 /// device.
202 ///
203 /// \param Loc Source location associated with this target region.
204 /// \param DeviceId The device to execute this region, -1 indicated the default.
205 /// \param NumTeams Number of teams to launch the region with, -1 indicates a
206 /// non-teams region and 0 indicates it was unspecified.
207 /// \param ThreadLimit Limit to the number of threads to use in the kernel
208 /// launch, 0 indicates it was unspecified.
209 /// \param HostPtr The pointer to the host function registered with the kernel.
210 /// \param Args All arguments to this kernel launch (see struct definition).
__tgt_target_kernel(ident_t * Loc,int64_t DeviceId,int32_t NumTeams,int32_t ThreadLimit,void * HostPtr,__tgt_kernel_arguments * Args)211 EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
212 int32_t ThreadLimit, void *HostPtr,
213 __tgt_kernel_arguments *Args) {
214 TIMESCOPE_WITH_IDENT(Loc);
215 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
216 "\n",
217 DPxPTR(HostPtr), DeviceId);
218 if (Args->Version != 1) {
219 DP("Unexpected ABI version: %d\n", Args->Version);
220 }
221 if (checkDeviceAndCtors(DeviceId, Loc)) {
222 DP("Not offloading to device %" PRId64 "\n", DeviceId);
223 return OMP_TGT_FAIL;
224 }
225
226 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
227 printKernelArguments(Loc, DeviceId, Args->NumArgs, Args->ArgSizes,
228 Args->ArgTypes, Args->ArgNames,
229 "Entering OpenMP kernel");
230 #ifdef OMPTARGET_DEBUG
231 for (int I = 0; I < Args->NumArgs; ++I) {
232 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
233 ", Type=0x%" PRIx64 ", Name=%s\n",
234 I, DPxPTR(Args->ArgBasePtrs[I]), DPxPTR(Args->ArgPtrs[I]),
235 Args->ArgSizes[I], Args->ArgTypes[I],
236 (Args->ArgNames) ? getNameFromMapping(Args->ArgNames[I]).c_str()
237 : "unknown");
238 }
239 #endif
240
241 bool IsTeams = NumTeams != -1;
242 if (!IsTeams)
243 NumTeams = 0;
244
245 DeviceTy &Device = *PM->Devices[DeviceId];
246 AsyncInfoTy AsyncInfo(Device);
247 int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs,
248 Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames,
249 Args->ArgMappers, NumTeams, ThreadLimit, Args->Tripcount,
250 IsTeams, AsyncInfo);
251 if (Rc == OFFLOAD_SUCCESS)
252 Rc = AsyncInfo.synchronize();
253 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
254 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
255 return OMP_TGT_SUCCESS;
256 }
257
__tgt_target_kernel_nowait(ident_t * Loc,int64_t DeviceId,int32_t NumTeams,int32_t ThreadLimit,void * HostPtr,__tgt_kernel_arguments * Args,int32_t DepNum,void * DepList,int32_t NoAliasDepNum,void * NoAliasDepList)258 EXTERN int __tgt_target_kernel_nowait(
259 ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit,
260 void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList,
261 int32_t NoAliasDepNum, void *NoAliasDepList) {
262 TIMESCOPE_WITH_IDENT(Loc);
263
264 return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
265 Args);
266 }
267
268 // Get the current number of components for a user-defined mapper.
__tgt_mapper_num_components(void * RtMapperHandle)269 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
270 TIMESCOPE();
271 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
272 int64_t Size = MapperComponentsPtr->Components.size();
273 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
274 DPxPTR(RtMapperHandle), Size);
275 return Size;
276 }
277
278 // Push back one component for a user-defined mapper.
__tgt_push_mapper_component(void * RtMapperHandle,void * Base,void * Begin,int64_t Size,int64_t Type,void * Name)279 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
280 void *Begin, int64_t Size, int64_t Type,
281 void *Name) {
282 TIMESCOPE();
283 DP("__tgt_push_mapper_component(Handle=" DPxMOD
284 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
285 ", Type=0x%" PRIx64 ", Name=%s).\n",
286 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
287 (Name) ? getNameFromMapping(Name).c_str() : "unknown");
288 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
289 MapperComponentsPtr->Components.push_back(
290 MapComponentInfoTy(Base, Begin, Size, Type, Name));
291 }
292
__tgt_set_info_flag(uint32_t NewInfoLevel)293 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
294 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
295 InfoLevel.store(NewInfoLevel);
296 for (auto &R : PM->RTLs.AllRTLs) {
297 if (R.set_info_flag)
298 R.set_info_flag(NewInfoLevel);
299 }
300 }
301
__tgt_print_device_info(int64_t DeviceId)302 EXTERN int __tgt_print_device_info(int64_t DeviceId) {
303 return PM->Devices[DeviceId]->printDeviceInfo(
304 PM->Devices[DeviceId]->RTLDeviceID);
305 }
306