1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "device.h" 15 #include "omptarget.h" 16 #include "private.h" 17 #include "rtl.h" 18 19 #include <cassert> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <mutex> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds requires flags 26 EXTERN void __tgt_register_requires(int64_t flags) { 27 TIMESCOPE(); 28 PM->RTLs.RegisterRequires(flags); 29 } 30 31 //////////////////////////////////////////////////////////////////////////////// 32 /// adds a target shared library to the target execution image 33 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { 34 TIMESCOPE(); 35 std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); 36 for (auto &RTL : PM->RTLs.AllRTLs) { 37 if (RTL.register_lib) { 38 if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) { 39 DP("Could not register library with %s", RTL.RTLName.c_str()); 40 } 41 } 42 } 43 PM->RTLs.RegisterLib(desc); 44 } 45 46 //////////////////////////////////////////////////////////////////////////////// 47 /// Initialize all available devices without registering any image 48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } 49 50 //////////////////////////////////////////////////////////////////////////////// 51 /// unloads a target shared library 52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { 53 TIMESCOPE(); 54 PM->RTLs.UnregisterLib(desc); 55 for (auto &RTL : PM->RTLs.UsedRTLs) { 56 if (RTL->unregister_lib) { 57 if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) { 58 DP("Could not register library with %s", RTL->RTLName.c_str()); 59 } 60 } 61 } 62 } 63 64 /// creates host-to-target data mapping, stores it in the 65 /// libomptarget.so internal structure (an entry in a stack of data maps) 66 /// and passes the data to the device. 67 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 68 void **args_base, void **args, 69 int64_t *arg_sizes, int64_t *arg_types) { 70 TIMESCOPE(); 71 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 72 arg_sizes, arg_types, nullptr, nullptr); 73 } 74 75 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, 76 void **args_base, void **args, 77 int64_t *arg_sizes, 78 int64_t *arg_types, int32_t depNum, 79 void *depList, int32_t noAliasDepNum, 80 void *noAliasDepList) { 81 TIMESCOPE(); 82 83 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 84 arg_sizes, arg_types, nullptr, nullptr); 85 } 86 87 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id, 88 int32_t arg_num, void **args_base, 89 void **args, int64_t *arg_sizes, 90 int64_t *arg_types, 91 map_var_info_t *arg_names, 92 void **arg_mappers) { 93 TIMESCOPE_WITH_IDENT(loc); 94 DP("Entering data begin region for device %" PRId64 " with %d mappings\n", 95 device_id, arg_num); 96 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 97 DP("Not offloading to device %" PRId64 "\n", device_id); 98 return; 99 } 100 101 DeviceTy &Device = PM->Devices[device_id]; 102 103 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 104 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 105 arg_names, "Entering OpenMP data region"); 106 #ifdef OMPTARGET_DEBUG 107 for (int i = 0; i < arg_num; ++i) { 108 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 109 ", Type=0x%" PRIx64 ", Name=%s\n", 110 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 111 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 112 } 113 #endif 114 115 AsyncInfoTy AsyncInfo(Device); 116 int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, 117 arg_types, arg_names, arg_mappers, AsyncInfo); 118 if (rc == OFFLOAD_SUCCESS) 119 rc = AsyncInfo.synchronize(); 120 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 121 } 122 123 EXTERN void __tgt_target_data_begin_nowait_mapper( 124 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 125 void **args, int64_t *arg_sizes, int64_t *arg_types, 126 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 127 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 128 TIMESCOPE_WITH_IDENT(loc); 129 130 __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args, 131 arg_sizes, arg_types, arg_names, arg_mappers); 132 } 133 134 /// passes data from the target, releases target memory and destroys 135 /// the host-target mapping (top entry from the stack of data maps) 136 /// created by the last __tgt_target_data_begin. 137 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 138 void **args_base, void **args, 139 int64_t *arg_sizes, int64_t *arg_types) { 140 TIMESCOPE(); 141 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 142 arg_sizes, arg_types, nullptr, nullptr); 143 } 144 145 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, 146 void **args_base, void **args, 147 int64_t *arg_sizes, int64_t *arg_types, 148 int32_t depNum, void *depList, 149 int32_t noAliasDepNum, 150 void *noAliasDepList) { 151 TIMESCOPE(); 152 153 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 154 arg_sizes, arg_types, nullptr, nullptr); 155 } 156 157 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id, 158 int32_t arg_num, void **args_base, 159 void **args, int64_t *arg_sizes, 160 int64_t *arg_types, 161 map_var_info_t *arg_names, 162 void **arg_mappers) { 163 TIMESCOPE_WITH_IDENT(loc); 164 DP("Entering data end region with %d mappings\n", arg_num); 165 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 166 DP("Not offloading to device %" PRId64 "\n", device_id); 167 return; 168 } 169 170 DeviceTy &Device = PM->Devices[device_id]; 171 172 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 173 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 174 arg_names, "Exiting OpenMP data region"); 175 #ifdef OMPTARGET_DEBUG 176 for (int i = 0; i < arg_num; ++i) { 177 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 178 ", Type=0x%" PRIx64 ", Name=%s\n", 179 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 180 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 181 } 182 #endif 183 184 AsyncInfoTy AsyncInfo(Device); 185 int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, 186 arg_types, arg_names, arg_mappers, AsyncInfo); 187 if (rc == OFFLOAD_SUCCESS) 188 rc = AsyncInfo.synchronize(); 189 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 190 } 191 192 EXTERN void __tgt_target_data_end_nowait_mapper( 193 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 194 void **args, int64_t *arg_sizes, int64_t *arg_types, 195 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 196 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 197 TIMESCOPE_WITH_IDENT(loc); 198 199 __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args, 200 arg_sizes, arg_types, arg_names, arg_mappers); 201 } 202 203 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 204 void **args_base, void **args, 205 int64_t *arg_sizes, int64_t *arg_types) { 206 TIMESCOPE(); 207 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 208 arg_sizes, arg_types, nullptr, nullptr); 209 } 210 211 EXTERN void __tgt_target_data_update_nowait( 212 int64_t device_id, int32_t arg_num, void **args_base, void **args, 213 int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, 214 int32_t noAliasDepNum, void *noAliasDepList) { 215 TIMESCOPE(); 216 217 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 218 arg_sizes, arg_types, nullptr, nullptr); 219 } 220 221 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id, 222 int32_t arg_num, void **args_base, 223 void **args, int64_t *arg_sizes, 224 int64_t *arg_types, 225 map_var_info_t *arg_names, 226 void **arg_mappers) { 227 TIMESCOPE_WITH_IDENT(loc); 228 DP("Entering data update with %d mappings\n", arg_num); 229 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 230 DP("Not offloading to device %" PRId64 "\n", device_id); 231 return; 232 } 233 234 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 235 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 236 arg_names, "Updating OpenMP data"); 237 238 DeviceTy &Device = PM->Devices[device_id]; 239 AsyncInfoTy AsyncInfo(Device); 240 int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, 241 arg_types, arg_names, arg_mappers, AsyncInfo); 242 if (rc == OFFLOAD_SUCCESS) 243 rc = AsyncInfo.synchronize(); 244 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 245 } 246 247 EXTERN void __tgt_target_data_update_nowait_mapper( 248 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 249 void **args, int64_t *arg_sizes, int64_t *arg_types, 250 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 251 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 252 TIMESCOPE_WITH_IDENT(loc); 253 254 __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args, 255 arg_sizes, arg_types, arg_names, arg_mappers); 256 } 257 258 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, 259 void **args_base, void **args, int64_t *arg_sizes, 260 int64_t *arg_types) { 261 TIMESCOPE(); 262 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 263 args, arg_sizes, arg_types, nullptr, nullptr); 264 } 265 266 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, 267 int32_t arg_num, void **args_base, void **args, 268 int64_t *arg_sizes, int64_t *arg_types, 269 int32_t depNum, void *depList, 270 int32_t noAliasDepNum, void *noAliasDepList) { 271 TIMESCOPE(); 272 273 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 274 args, arg_sizes, arg_types, nullptr, nullptr); 275 } 276 277 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr, 278 int32_t arg_num, void **args_base, void **args, 279 int64_t *arg_sizes, int64_t *arg_types, 280 map_var_info_t *arg_names, void **arg_mappers) { 281 TIMESCOPE_WITH_IDENT(loc); 282 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 283 "\n", 284 DPxPTR(host_ptr), device_id); 285 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 286 DP("Not offloading to device %" PRId64 "\n", device_id); 287 return OFFLOAD_FAIL; 288 } 289 290 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 291 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 292 arg_names, "Entering OpenMP kernel"); 293 #ifdef OMPTARGET_DEBUG 294 for (int i = 0; i < arg_num; ++i) { 295 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 296 ", Type=0x%" PRIx64 ", Name=%s\n", 297 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 298 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 299 } 300 #endif 301 302 DeviceTy &Device = PM->Devices[device_id]; 303 AsyncInfoTy AsyncInfo(Device); 304 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 305 arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, 306 AsyncInfo); 307 if (rc == OFFLOAD_SUCCESS) 308 rc = AsyncInfo.synchronize(); 309 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 310 return rc; 311 } 312 313 EXTERN int __tgt_target_nowait_mapper( 314 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 315 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 316 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 317 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 318 TIMESCOPE_WITH_IDENT(loc); 319 320 return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args, 321 arg_sizes, arg_types, arg_names, arg_mappers); 322 } 323 324 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, 325 int32_t arg_num, void **args_base, void **args, 326 int64_t *arg_sizes, int64_t *arg_types, 327 int32_t team_num, int32_t thread_limit) { 328 TIMESCOPE(); 329 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 330 args_base, args, arg_sizes, arg_types, 331 nullptr, nullptr, team_num, thread_limit); 332 } 333 334 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, 335 int32_t arg_num, void **args_base, 336 void **args, int64_t *arg_sizes, 337 int64_t *arg_types, int32_t team_num, 338 int32_t thread_limit, int32_t depNum, 339 void *depList, int32_t noAliasDepNum, 340 void *noAliasDepList) { 341 TIMESCOPE(); 342 343 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 344 args_base, args, arg_sizes, arg_types, 345 nullptr, nullptr, team_num, thread_limit); 346 } 347 348 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, 349 void *host_ptr, int32_t arg_num, 350 void **args_base, void **args, 351 int64_t *arg_sizes, int64_t *arg_types, 352 map_var_info_t *arg_names, 353 void **arg_mappers, int32_t team_num, 354 int32_t thread_limit) { 355 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 356 "\n", 357 DPxPTR(host_ptr), device_id); 358 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 359 DP("Not offloading to device %" PRId64 "\n", device_id); 360 return OFFLOAD_FAIL; 361 } 362 363 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 364 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 365 arg_names, "Entering OpenMP kernel"); 366 #ifdef OMPTARGET_DEBUG 367 for (int i = 0; i < arg_num; ++i) { 368 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 369 ", Type=0x%" PRIx64 ", Name=%s\n", 370 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 371 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 372 } 373 #endif 374 375 DeviceTy &Device = PM->Devices[device_id]; 376 AsyncInfoTy AsyncInfo(Device); 377 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 378 arg_types, arg_names, arg_mappers, team_num, thread_limit, 379 true /*team*/, AsyncInfo); 380 if (rc == OFFLOAD_SUCCESS) 381 rc = AsyncInfo.synchronize(); 382 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 383 return rc; 384 } 385 386 EXTERN int __tgt_target_teams_nowait_mapper( 387 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 388 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 389 map_var_info_t *arg_names, void **arg_mappers, int32_t team_num, 390 int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, 391 void *noAliasDepList) { 392 TIMESCOPE_WITH_IDENT(loc); 393 394 return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base, 395 args, arg_sizes, arg_types, arg_names, 396 arg_mappers, team_num, thread_limit); 397 } 398 399 // Get the current number of components for a user-defined mapper. 400 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) { 401 TIMESCOPE(); 402 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 403 int64_t size = MapperComponentsPtr->Components.size(); 404 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 405 DPxPTR(rt_mapper_handle), size); 406 return size; 407 } 408 409 // Push back one component for a user-defined mapper. 410 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base, 411 void *begin, int64_t size, int64_t type, 412 void *name) { 413 TIMESCOPE(); 414 DP("__tgt_push_mapper_component(Handle=" DPxMOD 415 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 416 ", Type=0x%" PRIx64 ", Name=%s).\n", 417 DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type, 418 (name) ? getNameFromMapping(name).c_str() : "unknown"); 419 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 420 MapperComponentsPtr->Components.push_back( 421 MapComponentInfoTy(base, begin, size, type, name)); 422 } 423 424 EXTERN void __kmpc_push_target_tripcount(int64_t device_id, 425 uint64_t loop_tripcount) { 426 __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); 427 } 428 429 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, 430 uint64_t loop_tripcount) { 431 TIMESCOPE_WITH_IDENT(loc); 432 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 433 DP("Not offloading to device %" PRId64 "\n", device_id); 434 return; 435 } 436 437 DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, 438 loop_tripcount); 439 PM->TblMapMtx.lock(); 440 PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), 441 loop_tripcount); 442 PM->TblMapMtx.unlock(); 443 } 444 445 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { 446 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 447 InfoLevel.store(NewInfoLevel); 448 for (auto &R : PM->RTLs.AllRTLs) { 449 if (R.set_info_flag) 450 R.set_info_flag(NewInfoLevel); 451 } 452 } 453 454 EXTERN int __tgt_print_device_info(int64_t device_id) { 455 return PM->Devices[device_id].printDeviceInfo( 456 PM->Devices[device_id].RTLDeviceID); 457 } 458