1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "device.h" 15 #include "omptarget.h" 16 #include "private.h" 17 #include "rtl.h" 18 19 #include <cassert> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <mutex> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds requires flags 26 EXTERN void __tgt_register_requires(int64_t Flags) { 27 TIMESCOPE(); 28 PM->RTLs.registerRequires(Flags); 29 } 30 31 //////////////////////////////////////////////////////////////////////////////// 32 /// adds a target shared library to the target execution image 33 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { 34 TIMESCOPE(); 35 std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); 36 for (auto &RTL : PM->RTLs.AllRTLs) { 37 if (RTL.register_lib) { 38 if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { 39 DP("Could not register library with %s", RTL.RTLName.c_str()); 40 } 41 } 42 } 43 PM->RTLs.registerLib(Desc); 44 } 45 46 //////////////////////////////////////////////////////////////////////////////// 47 /// Initialize all available devices without registering any image 48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } 49 50 //////////////////////////////////////////////////////////////////////////////// 51 /// unloads a target shared library 52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { 53 TIMESCOPE(); 54 PM->RTLs.unregisterLib(Desc); 55 for (auto &RTL : PM->RTLs.UsedRTLs) { 56 if (RTL->unregister_lib) { 57 if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) { 58 DP("Could not register library with %s", RTL->RTLName.c_str()); 59 } 60 } 61 } 62 } 63 64 /// creates host-to-target data mapping, stores it in the 65 /// libomptarget.so internal structure (an entry in a stack of data maps) 66 /// and passes the data to the device. 67 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, 68 int32_t ArgNum, void **ArgsBase, 69 void **Args, int64_t *ArgSizes, 70 int64_t *ArgTypes, 71 map_var_info_t *ArgNames, 72 void **ArgMappers) { 73 TIMESCOPE_WITH_IDENT(Loc); 74 DP("Entering data begin region for device %" PRId64 " with %d mappings\n", 75 DeviceId, ArgNum); 76 if (checkDeviceAndCtors(DeviceId, Loc)) { 77 DP("Not offloading to device %" PRId64 "\n", DeviceId); 78 return; 79 } 80 81 DeviceTy &Device = *PM->Devices[DeviceId]; 82 83 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 84 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 85 "Entering OpenMP data region"); 86 #ifdef OMPTARGET_DEBUG 87 for (int I = 0; I < ArgNum; ++I) { 88 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 89 ", Type=0x%" PRIx64 ", Name=%s\n", 90 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 91 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 92 } 93 #endif 94 95 AsyncInfoTy AsyncInfo(Device); 96 int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 97 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 98 if (Rc == OFFLOAD_SUCCESS) 99 Rc = AsyncInfo.synchronize(); 100 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 101 } 102 103 EXTERN void __tgt_target_data_begin_nowait_mapper( 104 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 105 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 106 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 107 void *NoAliasDepList) { 108 TIMESCOPE_WITH_IDENT(Loc); 109 110 __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, 111 ArgSizes, ArgTypes, ArgNames, ArgMappers); 112 } 113 114 /// passes data from the target, releases target memory and destroys 115 /// the host-target mapping (top entry from the stack of data maps) 116 /// created by the last __tgt_target_data_begin. 117 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, 118 int32_t ArgNum, void **ArgsBase, 119 void **Args, int64_t *ArgSizes, 120 int64_t *ArgTypes, 121 map_var_info_t *ArgNames, 122 void **ArgMappers) { 123 TIMESCOPE_WITH_IDENT(Loc); 124 DP("Entering data end region with %d mappings\n", ArgNum); 125 if (checkDeviceAndCtors(DeviceId, Loc)) { 126 DP("Not offloading to device %" PRId64 "\n", DeviceId); 127 return; 128 } 129 130 DeviceTy &Device = *PM->Devices[DeviceId]; 131 132 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 133 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 134 "Exiting OpenMP data region"); 135 #ifdef OMPTARGET_DEBUG 136 for (int I = 0; I < ArgNum; ++I) { 137 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 138 ", Type=0x%" PRIx64 ", Name=%s\n", 139 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 140 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 141 } 142 #endif 143 144 AsyncInfoTy AsyncInfo(Device); 145 int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 146 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 147 if (Rc == OFFLOAD_SUCCESS) 148 Rc = AsyncInfo.synchronize(); 149 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 150 } 151 152 EXTERN void __tgt_target_data_end_nowait_mapper( 153 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 154 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 155 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 156 void *NoAliasDepList) { 157 TIMESCOPE_WITH_IDENT(Loc); 158 159 __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, 160 ArgTypes, ArgNames, ArgMappers); 161 } 162 163 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, 164 int32_t ArgNum, void **ArgsBase, 165 void **Args, int64_t *ArgSizes, 166 int64_t *ArgTypes, 167 map_var_info_t *ArgNames, 168 void **ArgMappers) { 169 TIMESCOPE_WITH_IDENT(Loc); 170 DP("Entering data update with %d mappings\n", ArgNum); 171 if (checkDeviceAndCtors(DeviceId, Loc)) { 172 DP("Not offloading to device %" PRId64 "\n", DeviceId); 173 return; 174 } 175 176 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 177 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 178 "Updating OpenMP data"); 179 180 DeviceTy &Device = *PM->Devices[DeviceId]; 181 AsyncInfoTy AsyncInfo(Device); 182 int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 183 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 184 if (Rc == OFFLOAD_SUCCESS) 185 Rc = AsyncInfo.synchronize(); 186 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 187 } 188 189 EXTERN void __tgt_target_data_update_nowait_mapper( 190 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 191 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 192 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 193 void *NoAliasDepList) { 194 TIMESCOPE_WITH_IDENT(Loc); 195 196 __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, 197 ArgSizes, ArgTypes, ArgNames, ArgMappers); 198 } 199 200 /// Implements a kernel entry that executes the target region on the specified 201 /// device. 202 /// 203 /// \param Loc Source location associated with this target region. 204 /// \param DeviceId The device to execute this region, -1 indicated the default. 205 /// \param NumTeams Number of teams to launch the region with, -1 indicates a 206 /// non-teams region and 0 indicates it was unspecified. 207 /// \param ThreadLimit Limit to the number of threads to use in the kernel 208 /// launch, 0 indicates it was unspecified. 209 /// \param HostPtr The pointer to the host function registered with the kernel. 210 /// \param Args All arguments to this kernel launch (see struct definition). 211 EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, 212 int32_t ThreadLimit, void *HostPtr, 213 __tgt_kernel_arguments *Args) { 214 TIMESCOPE_WITH_IDENT(Loc); 215 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 216 "\n", 217 DPxPTR(HostPtr), DeviceId); 218 if (Args->Version != 1) { 219 DP("Unexpected ABI version: %d\n", Args->Version); 220 } 221 if (checkDeviceAndCtors(DeviceId, Loc)) { 222 DP("Not offloading to device %" PRId64 "\n", DeviceId); 223 return OMP_TGT_FAIL; 224 } 225 226 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 227 printKernelArguments(Loc, DeviceId, Args->NumArgs, Args->ArgSizes, 228 Args->ArgTypes, Args->ArgNames, 229 "Entering OpenMP kernel"); 230 #ifdef OMPTARGET_DEBUG 231 for (int I = 0; I < Args->NumArgs; ++I) { 232 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 233 ", Type=0x%" PRIx64 ", Name=%s\n", 234 I, DPxPTR(Args->ArgBasePtrs[I]), DPxPTR(Args->ArgPtrs[I]), 235 Args->ArgSizes[I], Args->ArgTypes[I], 236 (Args->ArgNames) ? getNameFromMapping(Args->ArgNames[I]).c_str() 237 : "unknown"); 238 } 239 #endif 240 241 bool IsTeams = NumTeams != -1; 242 if (!IsTeams) 243 NumTeams = 0; 244 245 DeviceTy &Device = *PM->Devices[DeviceId]; 246 AsyncInfoTy AsyncInfo(Device); 247 int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs, 248 Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames, 249 Args->ArgMappers, NumTeams, ThreadLimit, Args->Tripcount, 250 IsTeams, AsyncInfo); 251 if (Rc == OFFLOAD_SUCCESS) 252 Rc = AsyncInfo.synchronize(); 253 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 254 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); 255 return OMP_TGT_SUCCESS; 256 } 257 258 EXTERN int __tgt_target_kernel_nowait( 259 ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit, 260 void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList, 261 int32_t NoAliasDepNum, void *NoAliasDepList) { 262 TIMESCOPE_WITH_IDENT(Loc); 263 264 return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, 265 Args); 266 } 267 268 // Get the current number of components for a user-defined mapper. 269 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { 270 TIMESCOPE(); 271 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 272 int64_t Size = MapperComponentsPtr->Components.size(); 273 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 274 DPxPTR(RtMapperHandle), Size); 275 return Size; 276 } 277 278 // Push back one component for a user-defined mapper. 279 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, 280 void *Begin, int64_t Size, int64_t Type, 281 void *Name) { 282 TIMESCOPE(); 283 DP("__tgt_push_mapper_component(Handle=" DPxMOD 284 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 285 ", Type=0x%" PRIx64 ", Name=%s).\n", 286 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, 287 (Name) ? getNameFromMapping(Name).c_str() : "unknown"); 288 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 289 MapperComponentsPtr->Components.push_back( 290 MapComponentInfoTy(Base, Begin, Size, Type, Name)); 291 } 292 293 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { 294 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 295 InfoLevel.store(NewInfoLevel); 296 for (auto &R : PM->RTLs.AllRTLs) { 297 if (R.set_info_flag) 298 R.set_info_flag(NewInfoLevel); 299 } 300 } 301 302 EXTERN int __tgt_print_device_info(int64_t DeviceId) { 303 return PM->Devices[DeviceId]->printDeviceInfo( 304 PM->Devices[DeviceId]->RTLDeviceID); 305 } 306