1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "device.h" 15 #include "omptarget.h" 16 #include "private.h" 17 #include "rtl.h" 18 19 #include <cassert> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <mutex> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds requires flags 26 EXTERN void __tgt_register_requires(int64_t flags) { 27 TIMESCOPE(); 28 PM->RTLs.RegisterRequires(flags); 29 } 30 31 //////////////////////////////////////////////////////////////////////////////// 32 /// adds a target shared library to the target execution image 33 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { 34 TIMESCOPE(); 35 std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); 36 for (auto &RTL : PM->RTLs.AllRTLs) { 37 if (RTL.register_lib) { 38 if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) { 39 DP("Could not register library with %s", RTL.RTLName.c_str()); 40 } 41 } 42 } 43 PM->RTLs.RegisterLib(desc); 44 } 45 46 //////////////////////////////////////////////////////////////////////////////// 47 /// Initialize all available devices without registering any image 48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } 49 50 //////////////////////////////////////////////////////////////////////////////// 51 /// unloads a target shared library 52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { 53 TIMESCOPE(); 54 PM->RTLs.UnregisterLib(desc); 55 for (auto &RTL : PM->RTLs.UsedRTLs) { 56 if (RTL->unregister_lib) { 57 if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) { 58 DP("Could not register library with %s", RTL->RTLName.c_str()); 59 } 60 } 61 } 62 } 63 64 /// creates host-to-target data mapping, stores it in the 65 /// libomptarget.so internal structure (an entry in a stack of data maps) 66 /// and passes the data to the device. 67 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 68 void **args_base, void **args, 69 int64_t *arg_sizes, int64_t *arg_types) { 70 TIMESCOPE(); 71 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 72 arg_sizes, arg_types, nullptr, nullptr); 73 } 74 75 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, 76 void **args_base, void **args, 77 int64_t *arg_sizes, 78 int64_t *arg_types, int32_t depNum, 79 void *depList, int32_t noAliasDepNum, 80 void *noAliasDepList) { 81 TIMESCOPE(); 82 if (depNum + noAliasDepNum > 0) 83 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 84 85 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 86 arg_sizes, arg_types, nullptr, nullptr); 87 } 88 89 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id, 90 int32_t arg_num, void **args_base, 91 void **args, int64_t *arg_sizes, 92 int64_t *arg_types, 93 map_var_info_t *arg_names, 94 void **arg_mappers) { 95 TIMESCOPE_WITH_IDENT(loc); 96 DP("Entering data begin region for device %" PRId64 " with %d mappings\n", 97 device_id, arg_num); 98 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 99 DP("Not offloading to device %" PRId64 "\n", device_id); 100 return; 101 } 102 103 DeviceTy &Device = PM->Devices[device_id]; 104 105 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 106 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 107 arg_names, "Entering OpenMP data region"); 108 #ifdef OMPTARGET_DEBUG 109 for (int i = 0; i < arg_num; ++i) { 110 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 111 ", Type=0x%" PRIx64 ", Name=%s\n", 112 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 113 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 114 } 115 #endif 116 117 AsyncInfoTy AsyncInfo(Device); 118 int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, 119 arg_types, arg_names, arg_mappers, AsyncInfo); 120 if (rc == OFFLOAD_SUCCESS) 121 rc = AsyncInfo.synchronize(); 122 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 123 } 124 125 EXTERN void __tgt_target_data_begin_nowait_mapper( 126 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 127 void **args, int64_t *arg_sizes, int64_t *arg_types, 128 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 129 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 130 TIMESCOPE_WITH_IDENT(loc); 131 if (depNum + noAliasDepNum > 0) 132 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 133 134 __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args, 135 arg_sizes, arg_types, arg_names, arg_mappers); 136 } 137 138 /// passes data from the target, releases target memory and destroys 139 /// the host-target mapping (top entry from the stack of data maps) 140 /// created by the last __tgt_target_data_begin. 141 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 142 void **args_base, void **args, 143 int64_t *arg_sizes, int64_t *arg_types) { 144 TIMESCOPE(); 145 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 146 arg_sizes, arg_types, nullptr, nullptr); 147 } 148 149 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, 150 void **args_base, void **args, 151 int64_t *arg_sizes, int64_t *arg_types, 152 int32_t depNum, void *depList, 153 int32_t noAliasDepNum, 154 void *noAliasDepList) { 155 TIMESCOPE(); 156 if (depNum + noAliasDepNum > 0) 157 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 158 159 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 160 arg_sizes, arg_types, nullptr, nullptr); 161 } 162 163 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id, 164 int32_t arg_num, void **args_base, 165 void **args, int64_t *arg_sizes, 166 int64_t *arg_types, 167 map_var_info_t *arg_names, 168 void **arg_mappers) { 169 TIMESCOPE_WITH_IDENT(loc); 170 DP("Entering data end region with %d mappings\n", arg_num); 171 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 172 DP("Not offloading to device %" PRId64 "\n", device_id); 173 return; 174 } 175 176 DeviceTy &Device = PM->Devices[device_id]; 177 178 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 179 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 180 arg_names, "Exiting OpenMP data region"); 181 #ifdef OMPTARGET_DEBUG 182 for (int i = 0; i < arg_num; ++i) { 183 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 184 ", Type=0x%" PRIx64 ", Name=%s\n", 185 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 186 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 187 } 188 #endif 189 190 AsyncInfoTy AsyncInfo(Device); 191 int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, 192 arg_types, arg_names, arg_mappers, AsyncInfo); 193 if (rc == OFFLOAD_SUCCESS) 194 rc = AsyncInfo.synchronize(); 195 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 196 } 197 198 EXTERN void __tgt_target_data_end_nowait_mapper( 199 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 200 void **args, int64_t *arg_sizes, int64_t *arg_types, 201 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 202 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 203 TIMESCOPE_WITH_IDENT(loc); 204 if (depNum + noAliasDepNum > 0) 205 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 206 207 __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args, 208 arg_sizes, arg_types, arg_names, arg_mappers); 209 } 210 211 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 212 void **args_base, void **args, 213 int64_t *arg_sizes, int64_t *arg_types) { 214 TIMESCOPE(); 215 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 216 arg_sizes, arg_types, nullptr, nullptr); 217 } 218 219 EXTERN void __tgt_target_data_update_nowait( 220 int64_t device_id, int32_t arg_num, void **args_base, void **args, 221 int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, 222 int32_t noAliasDepNum, void *noAliasDepList) { 223 TIMESCOPE(); 224 if (depNum + noAliasDepNum > 0) 225 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 226 227 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 228 arg_sizes, arg_types, nullptr, nullptr); 229 } 230 231 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id, 232 int32_t arg_num, void **args_base, 233 void **args, int64_t *arg_sizes, 234 int64_t *arg_types, 235 map_var_info_t *arg_names, 236 void **arg_mappers) { 237 TIMESCOPE_WITH_IDENT(loc); 238 DP("Entering data update with %d mappings\n", arg_num); 239 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 240 DP("Not offloading to device %" PRId64 "\n", device_id); 241 return; 242 } 243 244 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 245 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 246 arg_names, "Updating OpenMP data"); 247 248 DeviceTy &Device = PM->Devices[device_id]; 249 AsyncInfoTy AsyncInfo(Device); 250 int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, 251 arg_types, arg_names, arg_mappers, AsyncInfo); 252 if (rc == OFFLOAD_SUCCESS) 253 rc = AsyncInfo.synchronize(); 254 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 255 } 256 257 EXTERN void __tgt_target_data_update_nowait_mapper( 258 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 259 void **args, int64_t *arg_sizes, int64_t *arg_types, 260 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 261 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 262 TIMESCOPE_WITH_IDENT(loc); 263 if (depNum + noAliasDepNum > 0) 264 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 265 266 __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args, 267 arg_sizes, arg_types, arg_names, arg_mappers); 268 } 269 270 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, 271 void **args_base, void **args, int64_t *arg_sizes, 272 int64_t *arg_types) { 273 TIMESCOPE(); 274 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 275 args, arg_sizes, arg_types, nullptr, nullptr); 276 } 277 278 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, 279 int32_t arg_num, void **args_base, void **args, 280 int64_t *arg_sizes, int64_t *arg_types, 281 int32_t depNum, void *depList, 282 int32_t noAliasDepNum, void *noAliasDepList) { 283 TIMESCOPE(); 284 if (depNum + noAliasDepNum > 0) 285 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 286 287 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 288 args, arg_sizes, arg_types, nullptr, nullptr); 289 } 290 291 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr, 292 int32_t arg_num, void **args_base, void **args, 293 int64_t *arg_sizes, int64_t *arg_types, 294 map_var_info_t *arg_names, void **arg_mappers) { 295 TIMESCOPE_WITH_IDENT(loc); 296 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 297 "\n", 298 DPxPTR(host_ptr), device_id); 299 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 300 DP("Not offloading to device %" PRId64 "\n", device_id); 301 return OFFLOAD_FAIL; 302 } 303 304 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 305 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 306 arg_names, "Entering OpenMP kernel"); 307 #ifdef OMPTARGET_DEBUG 308 for (int i = 0; i < arg_num; ++i) { 309 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 310 ", Type=0x%" PRIx64 ", Name=%s\n", 311 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 312 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 313 } 314 #endif 315 316 DeviceTy &Device = PM->Devices[device_id]; 317 AsyncInfoTy AsyncInfo(Device); 318 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 319 arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, 320 AsyncInfo); 321 if (rc == OFFLOAD_SUCCESS) 322 rc = AsyncInfo.synchronize(); 323 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 324 return rc; 325 } 326 327 EXTERN int __tgt_target_nowait_mapper( 328 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 329 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 330 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 331 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 332 TIMESCOPE_WITH_IDENT(loc); 333 if (depNum + noAliasDepNum > 0) 334 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 335 336 return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args, 337 arg_sizes, arg_types, arg_names, arg_mappers); 338 } 339 340 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, 341 int32_t arg_num, void **args_base, void **args, 342 int64_t *arg_sizes, int64_t *arg_types, 343 int32_t team_num, int32_t thread_limit) { 344 TIMESCOPE(); 345 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 346 args_base, args, arg_sizes, arg_types, 347 nullptr, nullptr, team_num, thread_limit); 348 } 349 350 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, 351 int32_t arg_num, void **args_base, 352 void **args, int64_t *arg_sizes, 353 int64_t *arg_types, int32_t team_num, 354 int32_t thread_limit, int32_t depNum, 355 void *depList, int32_t noAliasDepNum, 356 void *noAliasDepList) { 357 TIMESCOPE(); 358 if (depNum + noAliasDepNum > 0) 359 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 360 361 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 362 args_base, args, arg_sizes, arg_types, 363 nullptr, nullptr, team_num, thread_limit); 364 } 365 366 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, 367 void *host_ptr, int32_t arg_num, 368 void **args_base, void **args, 369 int64_t *arg_sizes, int64_t *arg_types, 370 map_var_info_t *arg_names, 371 void **arg_mappers, int32_t team_num, 372 int32_t thread_limit) { 373 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 374 "\n", 375 DPxPTR(host_ptr), device_id); 376 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 377 DP("Not offloading to device %" PRId64 "\n", device_id); 378 return OFFLOAD_FAIL; 379 } 380 381 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 382 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 383 arg_names, "Entering OpenMP kernel"); 384 #ifdef OMPTARGET_DEBUG 385 for (int i = 0; i < arg_num; ++i) { 386 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 387 ", Type=0x%" PRIx64 ", Name=%s\n", 388 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 389 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 390 } 391 #endif 392 393 DeviceTy &Device = PM->Devices[device_id]; 394 AsyncInfoTy AsyncInfo(Device); 395 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 396 arg_types, arg_names, arg_mappers, team_num, thread_limit, 397 true /*team*/, AsyncInfo); 398 if (rc == OFFLOAD_SUCCESS) 399 rc = AsyncInfo.synchronize(); 400 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 401 return rc; 402 } 403 404 EXTERN int __tgt_target_teams_nowait_mapper( 405 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 406 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 407 map_var_info_t *arg_names, void **arg_mappers, int32_t team_num, 408 int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, 409 void *noAliasDepList) { 410 TIMESCOPE_WITH_IDENT(loc); 411 if (depNum + noAliasDepNum > 0) 412 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 413 414 return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base, 415 args, arg_sizes, arg_types, arg_names, 416 arg_mappers, team_num, thread_limit); 417 } 418 419 // Get the current number of components for a user-defined mapper. 420 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) { 421 TIMESCOPE(); 422 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 423 int64_t size = MapperComponentsPtr->Components.size(); 424 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 425 DPxPTR(rt_mapper_handle), size); 426 return size; 427 } 428 429 // Push back one component for a user-defined mapper. 430 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base, 431 void *begin, int64_t size, int64_t type, 432 void *name) { 433 TIMESCOPE(); 434 DP("__tgt_push_mapper_component(Handle=" DPxMOD 435 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 436 ", Type=0x%" PRIx64 ", Name=%s).\n", 437 DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type, 438 (name) ? getNameFromMapping(name).c_str() : "unknown"); 439 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 440 MapperComponentsPtr->Components.push_back( 441 MapComponentInfoTy(base, begin, size, type, name)); 442 } 443 444 EXTERN void __kmpc_push_target_tripcount(int64_t device_id, 445 uint64_t loop_tripcount) { 446 __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); 447 } 448 449 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, 450 uint64_t loop_tripcount) { 451 TIMESCOPE_WITH_IDENT(loc); 452 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 453 DP("Not offloading to device %" PRId64 "\n", device_id); 454 return; 455 } 456 457 DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, 458 loop_tripcount); 459 PM->TblMapMtx.lock(); 460 PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), 461 loop_tripcount); 462 PM->TblMapMtx.unlock(); 463 } 464 465 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { 466 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 467 InfoLevel.store(NewInfoLevel); 468 for (auto &R : PM->RTLs.AllRTLs) { 469 if (R.set_info_flag) 470 R.set_info_flag(NewInfoLevel); 471 } 472 } 473 474 EXTERN int __tgt_print_device_info(int64_t device_id) { 475 return PM->Devices[device_id].printDeviceInfo( 476 PM->Devices[device_id].RTLDeviceID); 477 } 478