1 //===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef _OMPTARGET_H_
15 #define _OMPTARGET_H_
16 
17 #include <deque>
18 #include <stddef.h>
19 #include <stdint.h>
20 
21 #include <SourceInfo.h>
22 
23 #define OFFLOAD_SUCCESS (0)
24 #define OFFLOAD_FAIL (~0)
25 
26 #define OFFLOAD_DEVICE_DEFAULT -1
27 
28 // Don't format out enums and structs.
29 // clang-format off
30 
31 /// return flags of __tgt_target_XXX public APIs
32 enum __tgt_target_return_t : int {
33   /// successful offload executed on a target device
34   OMP_TGT_SUCCESS = 0,
35   /// offload may not execute on the requested target device
36   /// this scenario can be caused by the device not available or unsupported
37   /// as described in the Execution Model in the specifcation
38   /// this status may not be used for target device execution failure
39   /// which should be handled internally in libomptarget
40   OMP_TGT_FAIL = ~0
41 };
42 
43 /// Data attributes for each data reference used in an OpenMP target region.
44 enum tgt_map_type {
45   // No flags
46   OMP_TGT_MAPTYPE_NONE            = 0x000,
47   // copy data from host to device
48   OMP_TGT_MAPTYPE_TO              = 0x001,
49   // copy data from device to host
50   OMP_TGT_MAPTYPE_FROM            = 0x002,
51   // copy regardless of the reference count
52   OMP_TGT_MAPTYPE_ALWAYS          = 0x004,
53   // force unmapping of data
54   OMP_TGT_MAPTYPE_DELETE          = 0x008,
55   // map the pointer as well as the pointee
56   OMP_TGT_MAPTYPE_PTR_AND_OBJ     = 0x010,
57   // pass device base address to kernel
58   OMP_TGT_MAPTYPE_TARGET_PARAM    = 0x020,
59   // return base device address of mapped data
60   OMP_TGT_MAPTYPE_RETURN_PARAM    = 0x040,
61   // private variable - not mapped
62   OMP_TGT_MAPTYPE_PRIVATE         = 0x080,
63   // copy by value - not mapped
64   OMP_TGT_MAPTYPE_LITERAL         = 0x100,
65   // mapping is implicit
66   OMP_TGT_MAPTYPE_IMPLICIT        = 0x200,
67   // copy data to device
68   OMP_TGT_MAPTYPE_CLOSE           = 0x400,
69   // runtime error if not already allocated
70   OMP_TGT_MAPTYPE_PRESENT         = 0x1000,
71   // use a separate reference counter so that the data cannot be unmapped within
72   // the structured region
73   // This is an OpenMP extension for the sake of OpenACC support.
74   OMP_TGT_MAPTYPE_OMPX_HOLD       = 0x2000,
75   // descriptor for non-contiguous target-update
76   OMP_TGT_MAPTYPE_NON_CONTIG      = 0x100000000000,
77   // member of struct, member given by [16 MSBs] - 1
78   OMP_TGT_MAPTYPE_MEMBER_OF       = 0xffff000000000000
79 };
80 
81 enum OpenMPOffloadingDeclareTargetFlags {
82   /// Mark the entry as having a 'link' attribute.
83   OMP_DECLARE_TARGET_LINK = 0x01,
84   /// Mark the entry as being a global constructor.
85   OMP_DECLARE_TARGET_CTOR = 0x02,
86   /// Mark the entry as being a global destructor.
87   OMP_DECLARE_TARGET_DTOR = 0x04
88 };
89 
90 enum OpenMPOffloadingRequiresDirFlags {
91   /// flag undefined.
92   OMP_REQ_UNDEFINED               = 0x000,
93   /// no requires directive present.
94   OMP_REQ_NONE                    = 0x001,
95   /// reverse_offload clause.
96   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
97   /// unified_address clause.
98   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
99   /// unified_shared_memory clause.
100   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
101   /// dynamic_allocators clause.
102   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010
103 };
104 
105 enum TargetAllocTy : int32_t {
106   TARGET_ALLOC_DEVICE = 0,
107   TARGET_ALLOC_HOST,
108   TARGET_ALLOC_SHARED,
109   TARGET_ALLOC_DEFAULT
110 };
111 
112 /// This struct contains all of the arguments to a target kernel region launch.
113 struct __tgt_kernel_arguments {
114   int32_t Version;    // Version of this struct for ABI compatibility.
115   int32_t NumArgs;    // Number of arguments in each input pointer.
116   void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
117   void **ArgPtrs;     // Pointer to the argument data.
118   int64_t *ArgSizes;  // Size of the argument data in bytes.
119   int64_t *ArgTypes;  // Type of the data (e.g. to / from).
120   void **ArgNames;    // Name of the data for debugging, possibly null.
121   void **ArgMappers;  // User-defined mappers, possibly null.
122   int64_t Tripcount;  // Tripcount for the teams / distribute loop, 0 otherwise.
123 };
124 static_assert(sizeof(__tgt_kernel_arguments) == 64 ||
125                   sizeof(__tgt_kernel_arguments) == 40,
126               "Invalid struct size");
127 
128 /// This struct is a record of an entry point or global. For a function
129 /// entry point the size is expected to be zero
130 struct __tgt_offload_entry {
131   void *addr;   // Pointer to the offload entry info (function or global)
132   char *name;   // Name of the function or global
133   size_t size;  // Size of the entry info (0 if it is a function)
134   int32_t flags; // Flags associated with the entry, e.g. 'link'.
135   int32_t reserved; // Reserved, to be used by the runtime library.
136 };
137 
138 /// This struct is a record of the device image information
139 struct __tgt_device_image {
140   void *ImageStart;                  // Pointer to the target code start
141   void *ImageEnd;                    // Pointer to the target code end
142   __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries
143   __tgt_offload_entry *EntriesEnd;   // End of table (non inclusive)
144 };
145 
146 /// This struct contains information about a given image.
147 struct __tgt_image_info {
148   const char *Arch;
149 };
150 
151 /// This struct is a record of all the host code that may be offloaded to a
152 /// target.
153 struct __tgt_bin_desc {
154   int32_t NumDeviceImages;           // Number of device types supported
155   __tgt_device_image *DeviceImages;  // Array of device images (1 per dev. type)
156   __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries
157   __tgt_offload_entry *HostEntriesEnd;   // End of table (non inclusive)
158 };
159 
160 /// This struct contains the offload entries identified by the target runtime
161 struct __tgt_target_table {
162   __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries
163   __tgt_offload_entry
164       *EntriesEnd; // End of the table with all the entries (non inclusive)
165 };
166 
167 // clang-format on
168 
169 /// This struct contains information exchanged between different asynchronous
170 /// operations for device-dependent optimization and potential synchronization
171 struct __tgt_async_info {
172   // A pointer to a queue-like structure where offloading operations are issued.
173   // We assume to use this structure to do synchronization. In CUDA backend, it
174   // is CUstream.
175   void *Queue = nullptr;
176 };
177 
178 struct DeviceTy;
179 
180 /// The libomptarget wrapper around a __tgt_async_info object directly
181 /// associated with a libomptarget layer device. RAII semantics to avoid
182 /// mistakes.
183 class AsyncInfoTy {
184   /// Locations we used in (potentially) asynchronous calls which should live
185   /// as long as this AsyncInfoTy object.
186   std::deque<void *> BufferLocations;
187 
188   __tgt_async_info AsyncInfo;
189   DeviceTy &Device;
190 
191 public:
AsyncInfoTy(DeviceTy & Device)192   AsyncInfoTy(DeviceTy &Device) : Device(Device) {}
~AsyncInfoTy()193   ~AsyncInfoTy() { synchronize(); }
194 
195   /// Implicit conversion to the __tgt_async_info which is used in the
196   /// plugin interface.
197   operator __tgt_async_info *() { return &AsyncInfo; }
198 
199   /// Synchronize all pending actions.
200   ///
201   /// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately.
202   int synchronize();
203 
204   /// Return a void* reference with a lifetime that is at least as long as this
205   /// AsyncInfoTy object. The location can be used as intermediate buffer.
206   void *&getVoidPtrLocation();
207 };
208 
209 /// This struct is a record of non-contiguous information
210 struct __tgt_target_non_contig {
211   uint64_t Offset;
212   uint64_t Count;
213   uint64_t Stride;
214 };
215 
216 struct __tgt_device_info {
217   void *Context = nullptr;
218   void *Device = nullptr;
219 };
220 
221 #ifdef __cplusplus
222 extern "C" {
223 #endif
224 
225 int omp_get_num_devices(void);
226 int omp_get_device_num(void);
227 int omp_get_initial_device(void);
228 void *omp_target_alloc(size_t Size, int DeviceNum);
229 void omp_target_free(void *DevicePtr, int DeviceNum);
230 int omp_target_is_present(const void *Ptr, int DeviceNum);
231 int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
232                       size_t DstOffset, size_t SrcOffset, int DstDevice,
233                       int SrcDevice);
234 int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
235                            int NumDims, const size_t *Volume,
236                            const size_t *DstOffsets, const size_t *SrcOffsets,
237                            const size_t *DstDimensions,
238                            const size_t *SrcDimensions, int DstDevice,
239                            int SrcDevice);
240 int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
241                              size_t Size, size_t DeviceOffset, int DeviceNum);
242 int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum);
243 
244 /// Explicit target memory allocators
245 /// Using the llvm_ prefix until they become part of the OpenMP standard.
246 void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
247 void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum);
248 void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
249 
250 /// Dummy target so we have a symbol for generating host fallback.
251 void *llvm_omp_target_dynamic_shared_alloc();
252 
253 /// add the clauses of the requires directives in a given file
254 void __tgt_register_requires(int64_t Flags);
255 
256 /// adds a target shared library to the target execution image
257 void __tgt_register_lib(__tgt_bin_desc *Desc);
258 
259 /// Initialize all RTLs at once
260 void __tgt_init_all_rtls();
261 
262 /// removes a target shared library from the target execution image
263 void __tgt_unregister_lib(__tgt_bin_desc *Desc);
264 
265 // creates the host to target data mapping, stores it in the
266 // libomptarget.so internal structure (an entry in a stack of data maps) and
267 // passes the data to the device;
268 void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
269                              void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
270 void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
271                                     void **ArgsBase, void **Args,
272                                     int64_t *ArgSizes, int64_t *ArgTypes,
273                                     int32_t DepNum, void *DepList,
274                                     int32_t NoAliasDepNum,
275                                     void *NoAliasDepList);
276 void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
277                                     int32_t ArgNum, void **ArgsBase,
278                                     void **Args, int64_t *ArgSizes,
279                                     int64_t *ArgTypes, map_var_info_t *ArgNames,
280                                     void **ArgMappers);
281 void __tgt_target_data_begin_nowait_mapper(
282     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
283     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
284     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
285     void *NoAliasDepList);
286 
287 // passes data from the target, release target memory and destroys the
288 // host-target mapping (top entry from the stack of data maps) created by
289 // the last __tgt_target_data_begin
290 void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
291                            void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
292 void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
293                                   void **ArgsBase, void **Args,
294                                   int64_t *ArgSizes, int64_t *ArgTypes,
295                                   int32_t DepNum, void *DepList,
296                                   int32_t NoAliasDepNum, void *NoAliasDepList);
297 void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
298                                   int32_t ArgNum, void **ArgsBase, void **Args,
299                                   int64_t *ArgSizes, int64_t *ArgTypes,
300                                   map_var_info_t *ArgNames, void **ArgMappers);
301 void __tgt_target_data_end_nowait_mapper(
302     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
303     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
304     void **ArgMappers, int32_t depNum, void *depList, int32_t NoAliasDepNum,
305     void *NoAliasDepList);
306 
307 /// passes data to/from the target
308 void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
309                               void **Args, int64_t *ArgSizes,
310                               int64_t *ArgTypes);
311 void __tgt_target_data_update_nowait(int64_t DeviceId, int32_t ArgNum,
312                                      void **ArgsBase, void **Args,
313                                      int64_t *ArgSizes, int64_t *ArgTypes,
314                                      int32_t DepNum, void *DepList,
315                                      int32_t NoAliasDepNum,
316                                      void *NoAliasDepList);
317 void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
318                                      int32_t ArgNum, void **ArgsBase,
319                                      void **Args, int64_t *ArgSizes,
320                                      int64_t *ArgTypes,
321                                      map_var_info_t *ArgNames,
322                                      void **ArgMappers);
323 void __tgt_target_data_update_nowait_mapper(
324     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
325     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
326     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
327     void *NoAliasDepList);
328 
329 // Performs the same actions as data_begin in case ArgNum is non-zero
330 // and initiates run of offloaded region on target platform; if ArgNum
331 // is non-zero after the region execution is done it also performs the
332 // same action as data_end above. The following types are used; this
333 // function returns 0 if it was able to transfer the execution to a
334 // target and an int different from zero otherwise.
335 int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
336                         int32_t ThreadLimit, void *HostPtr,
337                         __tgt_kernel_arguments *Args);
338 int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
339                                int32_t ThreadLimit, void *HostPtr,
340                                __tgt_kernel_arguments *Args, int32_t DepNum,
341                                void *DepList, int32_t NoAliasDepNum,
342                                void *NoAliasDepList);
343 
344 void __tgt_set_info_flag(uint32_t);
345 
346 int __tgt_print_device_info(int64_t DeviceId);
347 #ifdef __cplusplus
348 }
349 #endif
350 
351 #ifdef __cplusplus
352 #define EXTERN extern "C"
353 #else
354 #define EXTERN extern
355 #endif
356 
357 #endif // _OMPTARGET_H_
358