1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "device.h" 15 #include "omptarget.h" 16 #include "private.h" 17 #include "rtl.h" 18 19 #include <cassert> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <mutex> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds requires flags 26 EXTERN void __tgt_register_requires(int64_t Flags) { 27 TIMESCOPE(); 28 PM->RTLs.registerRequires(Flags); 29 } 30 31 //////////////////////////////////////////////////////////////////////////////// 32 /// adds a target shared library to the target execution image 33 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { 34 TIMESCOPE(); 35 std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); 36 for (auto &RTL : PM->RTLs.AllRTLs) { 37 if (RTL.register_lib) { 38 if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { 39 DP("Could not register library with %s", RTL.RTLName.c_str()); 40 } 41 } 42 } 43 PM->RTLs.registerLib(Desc); 44 } 45 46 //////////////////////////////////////////////////////////////////////////////// 47 /// Initialize all available devices without registering any image 48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } 49 50 //////////////////////////////////////////////////////////////////////////////// 51 /// unloads a target shared library 52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { 53 TIMESCOPE(); 54 PM->RTLs.unregisterLib(Desc); 55 for (auto &RTL : PM->RTLs.UsedRTLs) { 56 if (RTL->unregister_lib) { 57 if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) { 58 DP("Could not register library with %s", RTL->RTLName.c_str()); 59 } 60 } 61 } 62 } 63 64 /// creates host-to-target data mapping, stores it in the 65 /// libomptarget.so internal structure (an entry in a stack of data maps) 66 /// and passes the data to the device. 67 EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, 68 void **ArgsBase, void **Args, 69 int64_t *ArgSizes, int64_t *ArgTypes) { 70 TIMESCOPE(); 71 __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 72 ArgSizes, ArgTypes, nullptr, nullptr); 73 } 74 75 EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum, 76 void **ArgsBase, void **Args, 77 int64_t *ArgSizes, int64_t *ArgTypes, 78 int32_t DepNum, void *DepList, 79 int32_t NoAliasDepNum, 80 void *NoAliasDepList) { 81 TIMESCOPE(); 82 83 __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 84 ArgSizes, ArgTypes, nullptr, nullptr); 85 } 86 87 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, 88 int32_t ArgNum, void **ArgsBase, 89 void **Args, int64_t *ArgSizes, 90 int64_t *ArgTypes, 91 map_var_info_t *ArgNames, 92 void **ArgMappers) { 93 TIMESCOPE_WITH_IDENT(Loc); 94 DP("Entering data begin region for device %" PRId64 " with %d mappings\n", 95 DeviceId, ArgNum); 96 if (checkDeviceAndCtors(DeviceId, Loc)) { 97 DP("Not offloading to device %" PRId64 "\n", DeviceId); 98 return; 99 } 100 101 DeviceTy &Device = *PM->Devices[DeviceId]; 102 103 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 104 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 105 "Entering OpenMP data region"); 106 #ifdef OMPTARGET_DEBUG 107 for (int I = 0; I < ArgNum; ++I) { 108 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 109 ", Type=0x%" PRIx64 ", Name=%s\n", 110 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 111 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 112 } 113 #endif 114 115 AsyncInfoTy AsyncInfo(Device); 116 int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 117 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 118 if (Rc == OFFLOAD_SUCCESS) 119 Rc = AsyncInfo.synchronize(); 120 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 121 } 122 123 EXTERN void __tgt_target_data_begin_nowait_mapper( 124 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 125 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 126 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 127 void *NoAliasDepList) { 128 TIMESCOPE_WITH_IDENT(Loc); 129 130 __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, 131 ArgSizes, ArgTypes, ArgNames, ArgMappers); 132 } 133 134 /// passes data from the target, releases target memory and destroys 135 /// the host-target mapping (top entry from the stack of data maps) 136 /// created by the last __tgt_target_data_begin. 137 EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, 138 void **ArgsBase, void **Args, 139 int64_t *ArgSizes, int64_t *ArgTypes) { 140 TIMESCOPE(); 141 __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 142 ArgSizes, ArgTypes, nullptr, nullptr); 143 } 144 145 EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum, 146 void **ArgsBase, void **Args, 147 int64_t *ArgSizes, int64_t *ArgTypes, 148 int32_t DepNum, void *DepList, 149 int32_t NoAliasDepNum, 150 void *NoAliasDepList) { 151 TIMESCOPE(); 152 153 __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 154 ArgSizes, ArgTypes, nullptr, nullptr); 155 } 156 157 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, 158 int32_t ArgNum, void **ArgsBase, 159 void **Args, int64_t *ArgSizes, 160 int64_t *ArgTypes, 161 map_var_info_t *ArgNames, 162 void **ArgMappers) { 163 TIMESCOPE_WITH_IDENT(Loc); 164 DP("Entering data end region with %d mappings\n", ArgNum); 165 if (checkDeviceAndCtors(DeviceId, Loc)) { 166 DP("Not offloading to device %" PRId64 "\n", DeviceId); 167 return; 168 } 169 170 DeviceTy &Device = *PM->Devices[DeviceId]; 171 172 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 173 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 174 "Exiting OpenMP data region"); 175 #ifdef OMPTARGET_DEBUG 176 for (int I = 0; I < ArgNum; ++I) { 177 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 178 ", Type=0x%" PRIx64 ", Name=%s\n", 179 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 180 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 181 } 182 #endif 183 184 AsyncInfoTy AsyncInfo(Device); 185 int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 186 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 187 if (Rc == OFFLOAD_SUCCESS) 188 Rc = AsyncInfo.synchronize(); 189 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 190 } 191 192 EXTERN void __tgt_target_data_end_nowait_mapper( 193 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 194 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 195 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 196 void *NoAliasDepList) { 197 TIMESCOPE_WITH_IDENT(Loc); 198 199 __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, 200 ArgTypes, ArgNames, ArgMappers); 201 } 202 203 EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, 204 void **ArgsBase, void **Args, 205 int64_t *ArgSizes, int64_t *ArgTypes) { 206 TIMESCOPE(); 207 __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 208 ArgSizes, ArgTypes, nullptr, nullptr); 209 } 210 211 EXTERN void __tgt_target_data_update_nowait( 212 int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, 213 int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList, 214 int32_t NoAliasDepNum, void *NoAliasDepList) { 215 TIMESCOPE(); 216 217 __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 218 ArgSizes, ArgTypes, nullptr, nullptr); 219 } 220 221 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, 222 int32_t ArgNum, void **ArgsBase, 223 void **Args, int64_t *ArgSizes, 224 int64_t *ArgTypes, 225 map_var_info_t *ArgNames, 226 void **ArgMappers) { 227 TIMESCOPE_WITH_IDENT(Loc); 228 DP("Entering data update with %d mappings\n", ArgNum); 229 if (checkDeviceAndCtors(DeviceId, Loc)) { 230 DP("Not offloading to device %" PRId64 "\n", DeviceId); 231 return; 232 } 233 234 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 235 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 236 "Updating OpenMP data"); 237 238 DeviceTy &Device = *PM->Devices[DeviceId]; 239 AsyncInfoTy AsyncInfo(Device); 240 int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 241 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 242 if (Rc == OFFLOAD_SUCCESS) 243 Rc = AsyncInfo.synchronize(); 244 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 245 } 246 247 EXTERN void __tgt_target_data_update_nowait_mapper( 248 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 249 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 250 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 251 void *NoAliasDepList) { 252 TIMESCOPE_WITH_IDENT(Loc); 253 254 __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, 255 ArgSizes, ArgTypes, ArgNames, ArgMappers); 256 } 257 258 EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, 259 void **ArgsBase, void **Args, int64_t *ArgSizes, 260 int64_t *ArgTypes) { 261 TIMESCOPE(); 262 return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, 263 ArgSizes, ArgTypes, nullptr, nullptr); 264 } 265 266 EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum, 267 void **ArgsBase, void **Args, int64_t *ArgSizes, 268 int64_t *ArgTypes, int32_t DepNum, void *DepList, 269 int32_t NoAliasDepNum, void *NoAliasDepList) { 270 TIMESCOPE(); 271 272 return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, 273 ArgSizes, ArgTypes, nullptr, nullptr); 274 } 275 276 EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr, 277 int32_t ArgNum, void **ArgsBase, void **Args, 278 int64_t *ArgSizes, int64_t *ArgTypes, 279 map_var_info_t *ArgNames, void **ArgMappers) { 280 TIMESCOPE_WITH_IDENT(Loc); 281 __tgt_kernel_arguments KernelArgs{ 282 1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, -1}; 283 return __tgt_target_kernel(Loc, DeviceId, -1, 0, HostPtr, &KernelArgs); 284 } 285 286 EXTERN int __tgt_target_nowait_mapper( 287 ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, 288 void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, 289 map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList, 290 int32_t NoAliasDepNum, void *NoAliasDepList) { 291 TIMESCOPE_WITH_IDENT(Loc); 292 293 return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args, 294 ArgSizes, ArgTypes, ArgNames, ArgMappers); 295 } 296 297 EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum, 298 void **ArgsBase, void **Args, int64_t *ArgSizes, 299 int64_t *ArgTypes, int32_t TeamNum, 300 int32_t ThreadLimit) { 301 TIMESCOPE(); 302 return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, 303 Args, ArgSizes, ArgTypes, nullptr, nullptr, 304 TeamNum, ThreadLimit); 305 } 306 307 EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr, 308 int32_t ArgNum, void **ArgsBase, 309 void **Args, int64_t *ArgSizes, 310 int64_t *ArgTypes, int32_t TeamNum, 311 int32_t ThreadLimit, int32_t DepNum, 312 void *DepList, int32_t NoAliasDepNum, 313 void *NoAliasDepList) { 314 TIMESCOPE(); 315 316 return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, 317 Args, ArgSizes, ArgTypes, nullptr, nullptr, 318 TeamNum, ThreadLimit); 319 } 320 321 EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, 322 void *HostPtr, int32_t ArgNum, 323 void **ArgsBase, void **Args, 324 int64_t *ArgSizes, int64_t *ArgTypes, 325 map_var_info_t *ArgNames, 326 void **ArgMappers, int32_t TeamNum, 327 int32_t ThreadLimit) { 328 TIMESCOPE_WITH_IDENT(Loc); 329 __tgt_kernel_arguments KernelArgs{ 330 1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, -1}; 331 return __tgt_target_kernel(Loc, DeviceId, TeamNum, ThreadLimit, HostPtr, 332 &KernelArgs); 333 } 334 335 /// Implements a kernel entry that executes the target region on the specified 336 /// device. 337 /// 338 /// \param Loc Source location associated with this target region. 339 /// \param DeviceId The device to execute this region, -1 indicated the default. 340 /// \param NumTeams Number of teams to launch the region with, -1 indicates a 341 /// non-teams region and 0 indicates it was unspecified. 342 /// \param ThreadLimit Limit to the number of threads to use in the kernel 343 /// launch, 0 indicates it was unspecified. 344 /// \param HostPtr The pointer to the host function registered with the kernel. 345 /// \param Args All arguments to this kernel launch (see struct definition). 346 EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, 347 int32_t ThreadLimit, void *HostPtr, 348 __tgt_kernel_arguments *Args) { 349 TIMESCOPE_WITH_IDENT(Loc); 350 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 351 "\n", 352 DPxPTR(HostPtr), DeviceId); 353 if (Args->Version != 1) { 354 DP("Unexpected ABI version: %d\n", Args->Version); 355 } 356 if (checkDeviceAndCtors(DeviceId, Loc)) { 357 DP("Not offloading to device %" PRId64 "\n", DeviceId); 358 return OMP_TGT_FAIL; 359 } 360 361 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 362 printKernelArguments(Loc, DeviceId, Args->NumArgs, Args->ArgSizes, 363 Args->ArgTypes, Args->ArgNames, 364 "Entering OpenMP kernel"); 365 #ifdef OMPTARGET_DEBUG 366 for (int I = 0; I < Args->NumArgs; ++I) { 367 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 368 ", Type=0x%" PRIx64 ", Name=%s\n", 369 I, DPxPTR(Args->ArgBasePtrs[I]), DPxPTR(Args->ArgPtrs[I]), 370 Args->ArgSizes[I], Args->ArgTypes[I], 371 (Args->ArgNames) ? getNameFromMapping(Args->ArgNames[I]).c_str() 372 : "unknown"); 373 } 374 #endif 375 376 bool IsTeams = NumTeams != -1; 377 if (!IsTeams) 378 NumTeams = 0; 379 380 DeviceTy &Device = *PM->Devices[DeviceId]; 381 AsyncInfoTy AsyncInfo(Device); 382 int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs, 383 Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames, 384 Args->ArgMappers, NumTeams, ThreadLimit, Args->Tripcount, 385 IsTeams, AsyncInfo); 386 if (Rc == OFFLOAD_SUCCESS) 387 Rc = AsyncInfo.synchronize(); 388 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 389 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); 390 return OMP_TGT_SUCCESS; 391 } 392 393 EXTERN int __tgt_target_kernel_nowait( 394 ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit, 395 void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList, 396 int32_t NoAliasDepNum, void *NoAliasDepList) { 397 TIMESCOPE_WITH_IDENT(Loc); 398 399 return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, 400 Args); 401 } 402 403 // Get the current number of components for a user-defined mapper. 404 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { 405 TIMESCOPE(); 406 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 407 int64_t Size = MapperComponentsPtr->Components.size(); 408 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 409 DPxPTR(RtMapperHandle), Size); 410 return Size; 411 } 412 413 // Push back one component for a user-defined mapper. 414 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, 415 void *Begin, int64_t Size, int64_t Type, 416 void *Name) { 417 TIMESCOPE(); 418 DP("__tgt_push_mapper_component(Handle=" DPxMOD 419 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 420 ", Type=0x%" PRIx64 ", Name=%s).\n", 421 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, 422 (Name) ? getNameFromMapping(Name).c_str() : "unknown"); 423 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 424 MapperComponentsPtr->Components.push_back( 425 MapComponentInfoTy(Base, Begin, Size, Type, Name)); 426 } 427 428 EXTERN void __kmpc_push_target_tripcount(int64_t DeviceId, 429 uint64_t LoopTripcount) { 430 __kmpc_push_target_tripcount_mapper(nullptr, DeviceId, LoopTripcount); 431 } 432 433 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId, 434 uint64_t LoopTripcount) { 435 TIMESCOPE_WITH_IDENT(Loc); 436 if (checkDeviceAndCtors(DeviceId, Loc)) { 437 DP("Not offloading to device %" PRId64 "\n", DeviceId); 438 return; 439 } 440 441 DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId, 442 LoopTripcount); 443 PM->TblMapMtx.lock(); 444 PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), 445 LoopTripcount); 446 PM->TblMapMtx.unlock(); 447 } 448 449 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { 450 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 451 InfoLevel.store(NewInfoLevel); 452 for (auto &R : PM->RTLs.AllRTLs) { 453 if (R.set_info_flag) 454 R.set_info_flag(NewInfoLevel); 455 } 456 } 457 458 EXTERN int __tgt_print_device_info(int64_t DeviceId) { 459 return PM->Devices[DeviceId]->printDeviceInfo( 460 PM->Devices[DeviceId]->RTLDeviceID); 461 } 462