1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "device.h" 15 #include "omptarget.h" 16 #include "private.h" 17 #include "rtl.h" 18 19 #include <cassert> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <mutex> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds requires flags 26 EXTERN void __tgt_register_requires(int64_t Flags) { 27 TIMESCOPE(); 28 PM->RTLs.registerRequires(Flags); 29 } 30 31 //////////////////////////////////////////////////////////////////////////////// 32 /// adds a target shared library to the target execution image 33 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { 34 TIMESCOPE(); 35 std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); 36 for (auto &RTL : PM->RTLs.AllRTLs) { 37 if (RTL.register_lib) { 38 if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { 39 DP("Could not register library with %s", RTL.RTLName.c_str()); 40 } 41 } 42 } 43 PM->RTLs.registerLib(Desc); 44 } 45 46 //////////////////////////////////////////////////////////////////////////////// 47 /// Initialize all available devices without registering any image 48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } 49 50 //////////////////////////////////////////////////////////////////////////////// 51 /// unloads a target shared library 52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { 53 TIMESCOPE(); 54 PM->RTLs.unregisterLib(Desc); 55 for (auto &RTL : PM->RTLs.UsedRTLs) { 56 if (RTL->unregister_lib) { 57 if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) { 58 DP("Could not register library with %s", RTL->RTLName.c_str()); 59 } 60 } 61 } 62 } 63 64 /// creates host-to-target data mapping, stores it in the 65 /// libomptarget.so internal structure (an entry in a stack of data maps) 66 /// and passes the data to the device. 67 EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, 68 void **ArgsBase, void **Args, 69 int64_t *ArgSizes, int64_t *ArgTypes) { 70 TIMESCOPE(); 71 __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 72 ArgSizes, ArgTypes, nullptr, nullptr); 73 } 74 75 EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum, 76 void **ArgsBase, void **Args, 77 int64_t *ArgSizes, int64_t *ArgTypes, 78 int32_t DepNum, void *DepList, 79 int32_t NoAliasDepNum, 80 void *NoAliasDepList) { 81 TIMESCOPE(); 82 83 __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 84 ArgSizes, ArgTypes, nullptr, nullptr); 85 } 86 87 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, 88 int32_t ArgNum, void **ArgsBase, 89 void **Args, int64_t *ArgSizes, 90 int64_t *ArgTypes, 91 map_var_info_t *ArgNames, 92 void **ArgMappers) { 93 TIMESCOPE_WITH_IDENT(Loc); 94 DP("Entering data begin region for device %" PRId64 " with %d mappings\n", 95 DeviceId, ArgNum); 96 if (checkDeviceAndCtors(DeviceId, Loc)) { 97 DP("Not offloading to device %" PRId64 "\n", DeviceId); 98 return; 99 } 100 101 DeviceTy &Device = *PM->Devices[DeviceId]; 102 103 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 104 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 105 "Entering OpenMP data region"); 106 #ifdef OMPTARGET_DEBUG 107 for (int I = 0; I < ArgNum; ++I) { 108 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 109 ", Type=0x%" PRIx64 ", Name=%s\n", 110 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 111 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 112 } 113 #endif 114 115 AsyncInfoTy AsyncInfo(Device); 116 int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 117 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 118 if (Rc == OFFLOAD_SUCCESS) 119 Rc = AsyncInfo.synchronize(); 120 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 121 } 122 123 EXTERN void __tgt_target_data_begin_nowait_mapper( 124 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 125 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 126 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 127 void *NoAliasDepList) { 128 TIMESCOPE_WITH_IDENT(Loc); 129 130 __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, 131 ArgSizes, ArgTypes, ArgNames, ArgMappers); 132 } 133 134 /// passes data from the target, releases target memory and destroys 135 /// the host-target mapping (top entry from the stack of data maps) 136 /// created by the last __tgt_target_data_begin. 137 EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, 138 void **ArgsBase, void **Args, 139 int64_t *ArgSizes, int64_t *ArgTypes) { 140 TIMESCOPE(); 141 __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 142 ArgSizes, ArgTypes, nullptr, nullptr); 143 } 144 145 EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum, 146 void **ArgsBase, void **Args, 147 int64_t *ArgSizes, int64_t *ArgTypes, 148 int32_t DepNum, void *DepList, 149 int32_t NoAliasDepNum, 150 void *NoAliasDepList) { 151 TIMESCOPE(); 152 153 __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 154 ArgSizes, ArgTypes, nullptr, nullptr); 155 } 156 157 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, 158 int32_t ArgNum, void **ArgsBase, 159 void **Args, int64_t *ArgSizes, 160 int64_t *ArgTypes, 161 map_var_info_t *ArgNames, 162 void **ArgMappers) { 163 TIMESCOPE_WITH_IDENT(Loc); 164 DP("Entering data end region with %d mappings\n", ArgNum); 165 if (checkDeviceAndCtors(DeviceId, Loc)) { 166 DP("Not offloading to device %" PRId64 "\n", DeviceId); 167 return; 168 } 169 170 DeviceTy &Device = *PM->Devices[DeviceId]; 171 172 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 173 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 174 "Exiting OpenMP data region"); 175 #ifdef OMPTARGET_DEBUG 176 for (int I = 0; I < ArgNum; ++I) { 177 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 178 ", Type=0x%" PRIx64 ", Name=%s\n", 179 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 180 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 181 } 182 #endif 183 184 AsyncInfoTy AsyncInfo(Device); 185 int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 186 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 187 if (Rc == OFFLOAD_SUCCESS) 188 Rc = AsyncInfo.synchronize(); 189 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 190 } 191 192 EXTERN void __tgt_target_data_end_nowait_mapper( 193 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 194 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 195 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 196 void *NoAliasDepList) { 197 TIMESCOPE_WITH_IDENT(Loc); 198 199 __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, 200 ArgTypes, ArgNames, ArgMappers); 201 } 202 203 EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, 204 void **ArgsBase, void **Args, 205 int64_t *ArgSizes, int64_t *ArgTypes) { 206 TIMESCOPE(); 207 __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 208 ArgSizes, ArgTypes, nullptr, nullptr); 209 } 210 211 EXTERN void __tgt_target_data_update_nowait( 212 int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, 213 int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList, 214 int32_t NoAliasDepNum, void *NoAliasDepList) { 215 TIMESCOPE(); 216 217 __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, 218 ArgSizes, ArgTypes, nullptr, nullptr); 219 } 220 221 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, 222 int32_t ArgNum, void **ArgsBase, 223 void **Args, int64_t *ArgSizes, 224 int64_t *ArgTypes, 225 map_var_info_t *ArgNames, 226 void **ArgMappers) { 227 TIMESCOPE_WITH_IDENT(Loc); 228 DP("Entering data update with %d mappings\n", ArgNum); 229 if (checkDeviceAndCtors(DeviceId, Loc)) { 230 DP("Not offloading to device %" PRId64 "\n", DeviceId); 231 return; 232 } 233 234 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 235 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 236 "Updating OpenMP data"); 237 238 DeviceTy &Device = *PM->Devices[DeviceId]; 239 AsyncInfoTy AsyncInfo(Device); 240 int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, 241 ArgTypes, ArgNames, ArgMappers, AsyncInfo); 242 if (Rc == OFFLOAD_SUCCESS) 243 Rc = AsyncInfo.synchronize(); 244 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 245 } 246 247 EXTERN void __tgt_target_data_update_nowait_mapper( 248 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 249 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 250 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 251 void *NoAliasDepList) { 252 TIMESCOPE_WITH_IDENT(Loc); 253 254 __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, 255 ArgSizes, ArgTypes, ArgNames, ArgMappers); 256 } 257 258 EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, 259 void **ArgsBase, void **Args, int64_t *ArgSizes, 260 int64_t *ArgTypes) { 261 TIMESCOPE(); 262 return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, 263 ArgSizes, ArgTypes, nullptr, nullptr); 264 } 265 266 EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum, 267 void **ArgsBase, void **Args, int64_t *ArgSizes, 268 int64_t *ArgTypes, int32_t DepNum, void *DepList, 269 int32_t NoAliasDepNum, void *NoAliasDepList) { 270 TIMESCOPE(); 271 272 return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, 273 ArgSizes, ArgTypes, nullptr, nullptr); 274 } 275 276 EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr, 277 int32_t ArgNum, void **ArgsBase, void **Args, 278 int64_t *ArgSizes, int64_t *ArgTypes, 279 map_var_info_t *ArgNames, void **ArgMappers) { 280 TIMESCOPE_WITH_IDENT(Loc); 281 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 282 "\n", 283 DPxPTR(HostPtr), DeviceId); 284 if (checkDeviceAndCtors(DeviceId, Loc)) { 285 DP("Not offloading to device %" PRId64 "\n", DeviceId); 286 return OMP_TGT_FAIL; 287 } 288 289 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 290 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 291 "Entering OpenMP kernel"); 292 #ifdef OMPTARGET_DEBUG 293 for (int I = 0; I < ArgNum; ++I) { 294 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 295 ", Type=0x%" PRIx64 ", Name=%s\n", 296 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 297 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 298 } 299 #endif 300 301 DeviceTy &Device = *PM->Devices[DeviceId]; 302 AsyncInfoTy AsyncInfo(Device); 303 int Rc = 304 target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, 305 ArgNames, ArgMappers, 0, 0, false /*team*/, AsyncInfo); 306 if (Rc == OFFLOAD_SUCCESS) 307 Rc = AsyncInfo.synchronize(); 308 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 309 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); 310 return OMP_TGT_SUCCESS; 311 } 312 313 EXTERN int __tgt_target_nowait_mapper( 314 ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, 315 void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, 316 map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList, 317 int32_t NoAliasDepNum, void *NoAliasDepList) { 318 TIMESCOPE_WITH_IDENT(Loc); 319 320 return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args, 321 ArgSizes, ArgTypes, ArgNames, ArgMappers); 322 } 323 324 EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum, 325 void **ArgsBase, void **Args, int64_t *ArgSizes, 326 int64_t *ArgTypes, int32_t TeamNum, 327 int32_t ThreadLimit) { 328 TIMESCOPE(); 329 return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, 330 Args, ArgSizes, ArgTypes, nullptr, nullptr, 331 TeamNum, ThreadLimit); 332 } 333 334 EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr, 335 int32_t ArgNum, void **ArgsBase, 336 void **Args, int64_t *ArgSizes, 337 int64_t *ArgTypes, int32_t TeamNum, 338 int32_t ThreadLimit, int32_t DepNum, 339 void *DepList, int32_t NoAliasDepNum, 340 void *NoAliasDepList) { 341 TIMESCOPE(); 342 343 return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, 344 Args, ArgSizes, ArgTypes, nullptr, nullptr, 345 TeamNum, ThreadLimit); 346 } 347 348 EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, 349 void *HostPtr, int32_t ArgNum, 350 void **ArgsBase, void **Args, 351 int64_t *ArgSizes, int64_t *ArgTypes, 352 map_var_info_t *ArgNames, 353 void **ArgMappers, int32_t TeamNum, 354 int32_t ThreadLimit) { 355 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 356 "\n", 357 DPxPTR(HostPtr), DeviceId); 358 if (checkDeviceAndCtors(DeviceId, Loc)) { 359 DP("Not offloading to device %" PRId64 "\n", DeviceId); 360 return OMP_TGT_FAIL; 361 } 362 363 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 364 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 365 "Entering OpenMP kernel"); 366 #ifdef OMPTARGET_DEBUG 367 for (int I = 0; I < ArgNum; ++I) { 368 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 369 ", Type=0x%" PRIx64 ", Name=%s\n", 370 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 371 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 372 } 373 #endif 374 375 DeviceTy &Device = *PM->Devices[DeviceId]; 376 AsyncInfoTy AsyncInfo(Device); 377 int Rc = target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, 378 ArgTypes, ArgNames, ArgMappers, TeamNum, ThreadLimit, 379 true /*team*/, AsyncInfo); 380 if (Rc == OFFLOAD_SUCCESS) 381 Rc = AsyncInfo.synchronize(); 382 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 383 assert(Rc == OFFLOAD_SUCCESS && 384 "__tgt_target_teams_mapper unexpected failure!"); 385 return OMP_TGT_SUCCESS; 386 } 387 388 EXTERN int __tgt_target_teams_nowait_mapper( 389 ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, 390 void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, 391 map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum, 392 int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 393 void *NoAliasDepList) { 394 TIMESCOPE_WITH_IDENT(Loc); 395 396 return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, 397 Args, ArgSizes, ArgTypes, ArgNames, 398 ArgMappers, TeamNum, ThreadLimit); 399 } 400 401 // Get the current number of components for a user-defined mapper. 402 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { 403 TIMESCOPE(); 404 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 405 int64_t Size = MapperComponentsPtr->Components.size(); 406 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 407 DPxPTR(RtMapperHandle), Size); 408 return Size; 409 } 410 411 // Push back one component for a user-defined mapper. 412 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, 413 void *Begin, int64_t Size, int64_t Type, 414 void *Name) { 415 TIMESCOPE(); 416 DP("__tgt_push_mapper_component(Handle=" DPxMOD 417 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 418 ", Type=0x%" PRIx64 ", Name=%s).\n", 419 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, 420 (Name) ? getNameFromMapping(Name).c_str() : "unknown"); 421 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 422 MapperComponentsPtr->Components.push_back( 423 MapComponentInfoTy(Base, Begin, Size, Type, Name)); 424 } 425 426 EXTERN void __kmpc_push_target_tripcount(int64_t DeviceId, 427 uint64_t LoopTripcount) { 428 __kmpc_push_target_tripcount_mapper(nullptr, DeviceId, LoopTripcount); 429 } 430 431 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId, 432 uint64_t LoopTripcount) { 433 TIMESCOPE_WITH_IDENT(Loc); 434 if (checkDeviceAndCtors(DeviceId, Loc)) { 435 DP("Not offloading to device %" PRId64 "\n", DeviceId); 436 return; 437 } 438 439 DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId, 440 LoopTripcount); 441 PM->TblMapMtx.lock(); 442 PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), 443 LoopTripcount); 444 PM->TblMapMtx.unlock(); 445 } 446 447 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { 448 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 449 InfoLevel.store(NewInfoLevel); 450 for (auto &R : PM->RTLs.AllRTLs) { 451 if (R.set_info_flag) 452 R.set_info_flag(NewInfoLevel); 453 } 454 } 455 456 EXTERN int __tgt_print_device_info(int64_t DeviceId) { 457 return PM->Devices[DeviceId]->printDeviceInfo( 458 PM->Devices[DeviceId]->RTLDeviceID); 459 } 460