1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "device.h" 15 #include "omptarget.h" 16 #include "private.h" 17 #include "rtl.h" 18 19 #include <cassert> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <mutex> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds requires flags 26 EXTERN void __tgt_register_requires(int64_t flags) { 27 TIMESCOPE(); 28 PM->RTLs.RegisterRequires(flags); 29 } 30 31 //////////////////////////////////////////////////////////////////////////////// 32 /// adds a target shared library to the target execution image 33 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { 34 TIMESCOPE(); 35 std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); 36 for (auto &RTL : PM->RTLs.AllRTLs) { 37 if (RTL.register_lib) { 38 if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) { 39 DP("Could not register library with %s", RTL.RTLName.c_str()); 40 } 41 } 42 } 43 PM->RTLs.RegisterLib(desc); 44 } 45 46 //////////////////////////////////////////////////////////////////////////////// 47 /// Initialize all available devices without registering any image 48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } 49 50 //////////////////////////////////////////////////////////////////////////////// 51 /// unloads a target shared library 52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { 53 TIMESCOPE(); 54 PM->RTLs.UnregisterLib(desc); 55 for (auto &RTL : PM->RTLs.UsedRTLs) { 56 if (RTL->unregister_lib) { 57 if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) { 58 DP("Could not register library with %s", RTL->RTLName.c_str()); 59 } 60 } 61 } 62 } 63 64 /// creates host-to-target data mapping, stores it in the 65 /// libomptarget.so internal structure (an entry in a stack of data maps) 66 /// and passes the data to the device. 67 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 68 void **args_base, void **args, 69 int64_t *arg_sizes, int64_t *arg_types) { 70 TIMESCOPE(); 71 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 72 arg_sizes, arg_types, nullptr, nullptr); 73 } 74 75 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, 76 void **args_base, void **args, 77 int64_t *arg_sizes, 78 int64_t *arg_types, int32_t depNum, 79 void *depList, int32_t noAliasDepNum, 80 void *noAliasDepList) { 81 TIMESCOPE(); 82 83 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 84 arg_sizes, arg_types, nullptr, nullptr); 85 } 86 87 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id, 88 int32_t arg_num, void **args_base, 89 void **args, int64_t *arg_sizes, 90 int64_t *arg_types, 91 map_var_info_t *arg_names, 92 void **arg_mappers) { 93 TIMESCOPE_WITH_IDENT(loc); 94 DP("Entering data begin region for device %" PRId64 " with %d mappings\n", 95 device_id, arg_num); 96 if (checkDeviceAndCtors(device_id, loc)) { 97 DP("Not offloading to device %" PRId64 "\n", device_id); 98 return; 99 } 100 101 DeviceTy &Device = *PM->Devices[device_id]; 102 103 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 104 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 105 arg_names, "Entering OpenMP data region"); 106 #ifdef OMPTARGET_DEBUG 107 for (int i = 0; i < arg_num; ++i) { 108 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 109 ", Type=0x%" PRIx64 ", Name=%s\n", 110 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 111 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 112 } 113 #endif 114 115 AsyncInfoTy AsyncInfo(Device); 116 int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, 117 arg_types, arg_names, arg_mappers, AsyncInfo); 118 if (rc == OFFLOAD_SUCCESS) 119 rc = AsyncInfo.synchronize(); 120 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 121 } 122 123 EXTERN void __tgt_target_data_begin_nowait_mapper( 124 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 125 void **args, int64_t *arg_sizes, int64_t *arg_types, 126 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 127 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 128 TIMESCOPE_WITH_IDENT(loc); 129 130 __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args, 131 arg_sizes, arg_types, arg_names, arg_mappers); 132 } 133 134 /// passes data from the target, releases target memory and destroys 135 /// the host-target mapping (top entry from the stack of data maps) 136 /// created by the last __tgt_target_data_begin. 137 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 138 void **args_base, void **args, 139 int64_t *arg_sizes, int64_t *arg_types) { 140 TIMESCOPE(); 141 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 142 arg_sizes, arg_types, nullptr, nullptr); 143 } 144 145 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, 146 void **args_base, void **args, 147 int64_t *arg_sizes, int64_t *arg_types, 148 int32_t depNum, void *depList, 149 int32_t noAliasDepNum, 150 void *noAliasDepList) { 151 TIMESCOPE(); 152 153 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 154 arg_sizes, arg_types, nullptr, nullptr); 155 } 156 157 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id, 158 int32_t arg_num, void **args_base, 159 void **args, int64_t *arg_sizes, 160 int64_t *arg_types, 161 map_var_info_t *arg_names, 162 void **arg_mappers) { 163 TIMESCOPE_WITH_IDENT(loc); 164 DP("Entering data end region with %d mappings\n", arg_num); 165 if (checkDeviceAndCtors(device_id, loc)) { 166 DP("Not offloading to device %" PRId64 "\n", device_id); 167 return; 168 } 169 170 DeviceTy &Device = *PM->Devices[device_id]; 171 172 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 173 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 174 arg_names, "Exiting OpenMP data region"); 175 #ifdef OMPTARGET_DEBUG 176 for (int i = 0; i < arg_num; ++i) { 177 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 178 ", Type=0x%" PRIx64 ", Name=%s\n", 179 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 180 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 181 } 182 #endif 183 184 AsyncInfoTy AsyncInfo(Device); 185 int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, 186 arg_types, arg_names, arg_mappers, AsyncInfo); 187 if (rc == OFFLOAD_SUCCESS) 188 rc = AsyncInfo.synchronize(); 189 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 190 } 191 192 EXTERN void __tgt_target_data_end_nowait_mapper( 193 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 194 void **args, int64_t *arg_sizes, int64_t *arg_types, 195 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 196 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 197 TIMESCOPE_WITH_IDENT(loc); 198 199 __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args, 200 arg_sizes, arg_types, arg_names, arg_mappers); 201 } 202 203 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 204 void **args_base, void **args, 205 int64_t *arg_sizes, int64_t *arg_types) { 206 TIMESCOPE(); 207 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 208 arg_sizes, arg_types, nullptr, nullptr); 209 } 210 211 EXTERN void __tgt_target_data_update_nowait( 212 int64_t device_id, int32_t arg_num, void **args_base, void **args, 213 int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, 214 int32_t noAliasDepNum, void *noAliasDepList) { 215 TIMESCOPE(); 216 217 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 218 arg_sizes, arg_types, nullptr, nullptr); 219 } 220 221 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id, 222 int32_t arg_num, void **args_base, 223 void **args, int64_t *arg_sizes, 224 int64_t *arg_types, 225 map_var_info_t *arg_names, 226 void **arg_mappers) { 227 TIMESCOPE_WITH_IDENT(loc); 228 DP("Entering data update with %d mappings\n", arg_num); 229 if (checkDeviceAndCtors(device_id, loc)) { 230 DP("Not offloading to device %" PRId64 "\n", device_id); 231 return; 232 } 233 234 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 235 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 236 arg_names, "Updating OpenMP data"); 237 238 DeviceTy &Device = *PM->Devices[device_id]; 239 AsyncInfoTy AsyncInfo(Device); 240 int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, 241 arg_types, arg_names, arg_mappers, AsyncInfo); 242 if (rc == OFFLOAD_SUCCESS) 243 rc = AsyncInfo.synchronize(); 244 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 245 } 246 247 EXTERN void __tgt_target_data_update_nowait_mapper( 248 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 249 void **args, int64_t *arg_sizes, int64_t *arg_types, 250 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 251 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 252 TIMESCOPE_WITH_IDENT(loc); 253 254 __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args, 255 arg_sizes, arg_types, arg_names, arg_mappers); 256 } 257 258 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, 259 void **args_base, void **args, int64_t *arg_sizes, 260 int64_t *arg_types) { 261 TIMESCOPE(); 262 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 263 args, arg_sizes, arg_types, nullptr, nullptr); 264 } 265 266 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, 267 int32_t arg_num, void **args_base, void **args, 268 int64_t *arg_sizes, int64_t *arg_types, 269 int32_t depNum, void *depList, 270 int32_t noAliasDepNum, void *noAliasDepList) { 271 TIMESCOPE(); 272 273 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 274 args, arg_sizes, arg_types, nullptr, nullptr); 275 } 276 277 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr, 278 int32_t arg_num, void **args_base, void **args, 279 int64_t *arg_sizes, int64_t *arg_types, 280 map_var_info_t *arg_names, void **arg_mappers) { 281 TIMESCOPE_WITH_IDENT(loc); 282 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 283 "\n", 284 DPxPTR(host_ptr), device_id); 285 if (checkDeviceAndCtors(device_id, loc)) { 286 DP("Not offloading to device %" PRId64 "\n", device_id); 287 return OMP_TGT_FAIL; 288 } 289 290 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 291 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 292 arg_names, "Entering OpenMP kernel"); 293 #ifdef OMPTARGET_DEBUG 294 for (int i = 0; i < arg_num; ++i) { 295 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 296 ", Type=0x%" PRIx64 ", Name=%s\n", 297 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 298 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 299 } 300 #endif 301 302 DeviceTy &Device = *PM->Devices[device_id]; 303 AsyncInfoTy AsyncInfo(Device); 304 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 305 arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, 306 AsyncInfo); 307 if (rc == OFFLOAD_SUCCESS) 308 rc = AsyncInfo.synchronize(); 309 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 310 assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); 311 return OMP_TGT_SUCCESS; 312 } 313 314 EXTERN int __tgt_target_nowait_mapper( 315 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 316 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 317 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 318 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 319 TIMESCOPE_WITH_IDENT(loc); 320 321 return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args, 322 arg_sizes, arg_types, arg_names, arg_mappers); 323 } 324 325 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, 326 int32_t arg_num, void **args_base, void **args, 327 int64_t *arg_sizes, int64_t *arg_types, 328 int32_t team_num, int32_t thread_limit) { 329 TIMESCOPE(); 330 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 331 args_base, args, arg_sizes, arg_types, 332 nullptr, nullptr, team_num, thread_limit); 333 } 334 335 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, 336 int32_t arg_num, void **args_base, 337 void **args, int64_t *arg_sizes, 338 int64_t *arg_types, int32_t team_num, 339 int32_t thread_limit, int32_t depNum, 340 void *depList, int32_t noAliasDepNum, 341 void *noAliasDepList) { 342 TIMESCOPE(); 343 344 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 345 args_base, args, arg_sizes, arg_types, 346 nullptr, nullptr, team_num, thread_limit); 347 } 348 349 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, 350 void *host_ptr, int32_t arg_num, 351 void **args_base, void **args, 352 int64_t *arg_sizes, int64_t *arg_types, 353 map_var_info_t *arg_names, 354 void **arg_mappers, int32_t team_num, 355 int32_t thread_limit) { 356 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 357 "\n", 358 DPxPTR(host_ptr), device_id); 359 if (checkDeviceAndCtors(device_id, loc)) { 360 DP("Not offloading to device %" PRId64 "\n", device_id); 361 return OMP_TGT_FAIL; 362 } 363 364 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 365 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 366 arg_names, "Entering OpenMP kernel"); 367 #ifdef OMPTARGET_DEBUG 368 for (int i = 0; i < arg_num; ++i) { 369 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 370 ", Type=0x%" PRIx64 ", Name=%s\n", 371 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 372 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 373 } 374 #endif 375 376 DeviceTy &Device = *PM->Devices[device_id]; 377 AsyncInfoTy AsyncInfo(Device); 378 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 379 arg_types, arg_names, arg_mappers, team_num, thread_limit, 380 true /*team*/, AsyncInfo); 381 if (rc == OFFLOAD_SUCCESS) 382 rc = AsyncInfo.synchronize(); 383 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 384 assert(rc == OFFLOAD_SUCCESS && 385 "__tgt_target_teams_mapper unexpected failure!"); 386 return OMP_TGT_SUCCESS; 387 } 388 389 EXTERN int __tgt_target_teams_nowait_mapper( 390 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 391 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 392 map_var_info_t *arg_names, void **arg_mappers, int32_t team_num, 393 int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, 394 void *noAliasDepList) { 395 TIMESCOPE_WITH_IDENT(loc); 396 397 return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base, 398 args, arg_sizes, arg_types, arg_names, 399 arg_mappers, team_num, thread_limit); 400 } 401 402 // Get the current number of components for a user-defined mapper. 403 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) { 404 TIMESCOPE(); 405 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 406 int64_t size = MapperComponentsPtr->Components.size(); 407 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 408 DPxPTR(rt_mapper_handle), size); 409 return size; 410 } 411 412 // Push back one component for a user-defined mapper. 413 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base, 414 void *begin, int64_t size, int64_t type, 415 void *name) { 416 TIMESCOPE(); 417 DP("__tgt_push_mapper_component(Handle=" DPxMOD 418 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 419 ", Type=0x%" PRIx64 ", Name=%s).\n", 420 DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type, 421 (name) ? getNameFromMapping(name).c_str() : "unknown"); 422 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 423 MapperComponentsPtr->Components.push_back( 424 MapComponentInfoTy(base, begin, size, type, name)); 425 } 426 427 EXTERN void __kmpc_push_target_tripcount(int64_t device_id, 428 uint64_t loop_tripcount) { 429 __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); 430 } 431 432 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, 433 uint64_t loop_tripcount) { 434 TIMESCOPE_WITH_IDENT(loc); 435 if (checkDeviceAndCtors(device_id, loc)) { 436 DP("Not offloading to device %" PRId64 "\n", device_id); 437 return; 438 } 439 440 DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, 441 loop_tripcount); 442 PM->TblMapMtx.lock(); 443 PM->Devices[device_id]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), 444 loop_tripcount); 445 PM->TblMapMtx.unlock(); 446 } 447 448 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { 449 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 450 InfoLevel.store(NewInfoLevel); 451 for (auto &R : PM->RTLs.AllRTLs) { 452 if (R.set_info_flag) 453 R.set_info_flag(NewInfoLevel); 454 } 455 } 456 457 EXTERN int __tgt_print_device_info(int64_t device_id) { 458 return PM->Devices[device_id]->printDeviceInfo( 459 PM->Devices[device_id]->RTLDeviceID); 460 } 461