1 //===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef _OMPTARGET_H_ 15 #define _OMPTARGET_H_ 16 17 #include <deque> 18 #include <stddef.h> 19 #include <stdint.h> 20 21 #include <SourceInfo.h> 22 23 #define OFFLOAD_SUCCESS (0) 24 #define OFFLOAD_FAIL (~0) 25 26 #define OFFLOAD_DEVICE_DEFAULT -1 27 28 // Don't format out enums and structs. 29 // clang-format off 30 31 /// return flags of __tgt_target_XXX public APIs 32 enum __tgt_target_return_t : int { 33 /// successful offload executed on a target device 34 OMP_TGT_SUCCESS = 0, 35 /// offload may not execute on the requested target device 36 /// this scenario can be caused by the device not available or unsupported 37 /// as described in the Execution Model in the specifcation 38 /// this status may not be used for target device execution failure 39 /// which should be handled internally in libomptarget 40 OMP_TGT_FAIL = ~0 41 }; 42 43 /// Data attributes for each data reference used in an OpenMP target region. 44 enum tgt_map_type { 45 // No flags 46 OMP_TGT_MAPTYPE_NONE = 0x000, 47 // copy data from host to device 48 OMP_TGT_MAPTYPE_TO = 0x001, 49 // copy data from device to host 50 OMP_TGT_MAPTYPE_FROM = 0x002, 51 // copy regardless of the reference count 52 OMP_TGT_MAPTYPE_ALWAYS = 0x004, 53 // force unmapping of data 54 OMP_TGT_MAPTYPE_DELETE = 0x008, 55 // map the pointer as well as the pointee 56 OMP_TGT_MAPTYPE_PTR_AND_OBJ = 0x010, 57 // pass device base address to kernel 58 OMP_TGT_MAPTYPE_TARGET_PARAM = 0x020, 59 // return base device address of mapped data 60 OMP_TGT_MAPTYPE_RETURN_PARAM = 0x040, 61 // private variable - not mapped 62 OMP_TGT_MAPTYPE_PRIVATE = 0x080, 63 // copy by value - not mapped 64 OMP_TGT_MAPTYPE_LITERAL = 0x100, 65 // mapping is implicit 66 OMP_TGT_MAPTYPE_IMPLICIT = 0x200, 67 // copy data to device 68 OMP_TGT_MAPTYPE_CLOSE = 0x400, 69 // runtime error if not already allocated 70 OMP_TGT_MAPTYPE_PRESENT = 0x1000, 71 // use a separate reference counter so that the data cannot be unmapped within 72 // the structured region 73 // This is an OpenMP extension for the sake of OpenACC support. 74 OMP_TGT_MAPTYPE_OMPX_HOLD = 0x2000, 75 // descriptor for non-contiguous target-update 76 OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000, 77 // member of struct, member given by [16 MSBs] - 1 78 OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000 79 }; 80 81 enum OpenMPOffloadingDeclareTargetFlags { 82 /// Mark the entry as having a 'link' attribute. 83 OMP_DECLARE_TARGET_LINK = 0x01, 84 /// Mark the entry as being a global constructor. 85 OMP_DECLARE_TARGET_CTOR = 0x02, 86 /// Mark the entry as being a global destructor. 87 OMP_DECLARE_TARGET_DTOR = 0x04 88 }; 89 90 enum OpenMPOffloadingRequiresDirFlags { 91 /// flag undefined. 92 OMP_REQ_UNDEFINED = 0x000, 93 /// no requires directive present. 94 OMP_REQ_NONE = 0x001, 95 /// reverse_offload clause. 96 OMP_REQ_REVERSE_OFFLOAD = 0x002, 97 /// unified_address clause. 98 OMP_REQ_UNIFIED_ADDRESS = 0x004, 99 /// unified_shared_memory clause. 100 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 101 /// dynamic_allocators clause. 102 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010 103 }; 104 105 enum TargetAllocTy : int32_t { 106 TARGET_ALLOC_DEVICE = 0, 107 TARGET_ALLOC_HOST, 108 TARGET_ALLOC_SHARED, 109 TARGET_ALLOC_DEFAULT 110 }; 111 112 /// This struct contains all of the arguments to a target kernel region launch. 113 struct __tgt_kernel_arguments { 114 int32_t Version; // Version of this struct for ABI compatibility. 115 int32_t NumArgs; // Number of arguments in each input pointer. 116 void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct). 117 void **ArgPtrs; // Pointer to the argument data. 118 int64_t *ArgSizes; // Size of the argument data in bytes. 119 int64_t *ArgTypes; // Type of the data (e.g. to / from). 120 void **ArgNames; // Name of the data for debugging, possibly null. 121 void **ArgMappers; // User-defined mappers, possibly null. 122 int64_t Tripcount; // Tripcount for the teams / distribute loop, 0 otherwise. 123 }; 124 static_assert(sizeof(__tgt_kernel_arguments) == 64 || 125 sizeof(__tgt_kernel_arguments) == 40, 126 "Invalid struct size"); 127 128 /// This struct is a record of an entry point or global. For a function 129 /// entry point the size is expected to be zero 130 struct __tgt_offload_entry { 131 void *addr; // Pointer to the offload entry info (function or global) 132 char *name; // Name of the function or global 133 size_t size; // Size of the entry info (0 if it is a function) 134 int32_t flags; // Flags associated with the entry, e.g. 'link'. 135 int32_t reserved; // Reserved, to be used by the runtime library. 136 }; 137 138 /// This struct is a record of the device image information 139 struct __tgt_device_image { 140 void *ImageStart; // Pointer to the target code start 141 void *ImageEnd; // Pointer to the target code end 142 __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries 143 __tgt_offload_entry *EntriesEnd; // End of table (non inclusive) 144 }; 145 146 /// This struct contains information about a given image. 147 struct __tgt_image_info { 148 const char *Arch; 149 }; 150 151 /// This struct is a record of all the host code that may be offloaded to a 152 /// target. 153 struct __tgt_bin_desc { 154 int32_t NumDeviceImages; // Number of device types supported 155 __tgt_device_image *DeviceImages; // Array of device images (1 per dev. type) 156 __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries 157 __tgt_offload_entry *HostEntriesEnd; // End of table (non inclusive) 158 }; 159 160 /// This struct contains the offload entries identified by the target runtime 161 struct __tgt_target_table { 162 __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries 163 __tgt_offload_entry 164 *EntriesEnd; // End of the table with all the entries (non inclusive) 165 }; 166 167 // clang-format on 168 169 /// This struct contains information exchanged between different asynchronous 170 /// operations for device-dependent optimization and potential synchronization 171 struct __tgt_async_info { 172 // A pointer to a queue-like structure where offloading operations are issued. 173 // We assume to use this structure to do synchronization. In CUDA backend, it 174 // is CUstream. 175 void *Queue = nullptr; 176 }; 177 178 struct DeviceTy; 179 180 /// The libomptarget wrapper around a __tgt_async_info object directly 181 /// associated with a libomptarget layer device. RAII semantics to avoid 182 /// mistakes. 183 class AsyncInfoTy { 184 /// Locations we used in (potentially) asynchronous calls which should live 185 /// as long as this AsyncInfoTy object. 186 std::deque<void *> BufferLocations; 187 188 __tgt_async_info AsyncInfo; 189 DeviceTy &Device; 190 191 public: AsyncInfoTy(DeviceTy & Device)192 AsyncInfoTy(DeviceTy &Device) : Device(Device) {} ~AsyncInfoTy()193 ~AsyncInfoTy() { synchronize(); } 194 195 /// Implicit conversion to the __tgt_async_info which is used in the 196 /// plugin interface. 197 operator __tgt_async_info *() { return &AsyncInfo; } 198 199 /// Synchronize all pending actions. 200 /// 201 /// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately. 202 int synchronize(); 203 204 /// Return a void* reference with a lifetime that is at least as long as this 205 /// AsyncInfoTy object. The location can be used as intermediate buffer. 206 void *&getVoidPtrLocation(); 207 }; 208 209 /// This struct is a record of non-contiguous information 210 struct __tgt_target_non_contig { 211 uint64_t Offset; 212 uint64_t Count; 213 uint64_t Stride; 214 }; 215 216 struct __tgt_device_info { 217 void *Context = nullptr; 218 void *Device = nullptr; 219 }; 220 221 #ifdef __cplusplus 222 extern "C" { 223 #endif 224 225 int omp_get_num_devices(void); 226 int omp_get_device_num(void); 227 int omp_get_initial_device(void); 228 void *omp_target_alloc(size_t Size, int DeviceNum); 229 void omp_target_free(void *DevicePtr, int DeviceNum); 230 int omp_target_is_present(const void *Ptr, int DeviceNum); 231 int omp_target_memcpy(void *Dst, const void *Src, size_t Length, 232 size_t DstOffset, size_t SrcOffset, int DstDevice, 233 int SrcDevice); 234 int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, 235 int NumDims, const size_t *Volume, 236 const size_t *DstOffsets, const size_t *SrcOffsets, 237 const size_t *DstDimensions, 238 const size_t *SrcDimensions, int DstDevice, 239 int SrcDevice); 240 int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, 241 size_t Size, size_t DeviceOffset, int DeviceNum); 242 int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum); 243 244 /// Explicit target memory allocators 245 /// Using the llvm_ prefix until they become part of the OpenMP standard. 246 void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum); 247 void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum); 248 void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum); 249 250 /// Dummy target so we have a symbol for generating host fallback. 251 void *llvm_omp_target_dynamic_shared_alloc(); 252 253 /// add the clauses of the requires directives in a given file 254 void __tgt_register_requires(int64_t Flags); 255 256 /// adds a target shared library to the target execution image 257 void __tgt_register_lib(__tgt_bin_desc *Desc); 258 259 /// Initialize all RTLs at once 260 void __tgt_init_all_rtls(); 261 262 /// removes a target shared library from the target execution image 263 void __tgt_unregister_lib(__tgt_bin_desc *Desc); 264 265 // creates the host to target data mapping, stores it in the 266 // libomptarget.so internal structure (an entry in a stack of data maps) and 267 // passes the data to the device; 268 void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 269 void **Args, int64_t *ArgSizes, int64_t *ArgTypes); 270 void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum, 271 void **ArgsBase, void **Args, 272 int64_t *ArgSizes, int64_t *ArgTypes, 273 int32_t DepNum, void *DepList, 274 int32_t NoAliasDepNum, 275 void *NoAliasDepList); 276 void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, 277 int32_t ArgNum, void **ArgsBase, 278 void **Args, int64_t *ArgSizes, 279 int64_t *ArgTypes, map_var_info_t *ArgNames, 280 void **ArgMappers); 281 void __tgt_target_data_begin_nowait_mapper( 282 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 283 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 284 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 285 void *NoAliasDepList); 286 287 // passes data from the target, release target memory and destroys the 288 // host-target mapping (top entry from the stack of data maps) created by 289 // the last __tgt_target_data_begin 290 void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 291 void **Args, int64_t *ArgSizes, int64_t *ArgTypes); 292 void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum, 293 void **ArgsBase, void **Args, 294 int64_t *ArgSizes, int64_t *ArgTypes, 295 int32_t DepNum, void *DepList, 296 int32_t NoAliasDepNum, void *NoAliasDepList); 297 void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, 298 int32_t ArgNum, void **ArgsBase, void **Args, 299 int64_t *ArgSizes, int64_t *ArgTypes, 300 map_var_info_t *ArgNames, void **ArgMappers); 301 void __tgt_target_data_end_nowait_mapper( 302 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 303 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 304 void **ArgMappers, int32_t depNum, void *depList, int32_t NoAliasDepNum, 305 void *NoAliasDepList); 306 307 /// passes data to/from the target 308 void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 309 void **Args, int64_t *ArgSizes, 310 int64_t *ArgTypes); 311 void __tgt_target_data_update_nowait(int64_t DeviceId, int32_t ArgNum, 312 void **ArgsBase, void **Args, 313 int64_t *ArgSizes, int64_t *ArgTypes, 314 int32_t DepNum, void *DepList, 315 int32_t NoAliasDepNum, 316 void *NoAliasDepList); 317 void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, 318 int32_t ArgNum, void **ArgsBase, 319 void **Args, int64_t *ArgSizes, 320 int64_t *ArgTypes, 321 map_var_info_t *ArgNames, 322 void **ArgMappers); 323 void __tgt_target_data_update_nowait_mapper( 324 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 325 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 326 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 327 void *NoAliasDepList); 328 329 // Performs the same actions as data_begin in case ArgNum is non-zero 330 // and initiates run of offloaded region on target platform; if ArgNum 331 // is non-zero after the region execution is done it also performs the 332 // same action as data_end above. The following types are used; this 333 // function returns 0 if it was able to transfer the execution to a 334 // target and an int different from zero otherwise. 335 int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, 336 int32_t ThreadLimit, void *HostPtr, 337 __tgt_kernel_arguments *Args); 338 int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, 339 int32_t ThreadLimit, void *HostPtr, 340 __tgt_kernel_arguments *Args, int32_t DepNum, 341 void *DepList, int32_t NoAliasDepNum, 342 void *NoAliasDepList); 343 344 void __tgt_set_info_flag(uint32_t); 345 346 int __tgt_print_device_info(int64_t DeviceId); 347 #ifdef __cplusplus 348 } 349 #endif 350 351 #ifdef __cplusplus 352 #define EXTERN extern "C" 353 #else 354 #define EXTERN extern 355 #endif 356 357 #endif // _OMPTARGET_H_ 358