1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "device.h" 15 #include "omptarget.h" 16 #include "private.h" 17 #include "rtl.h" 18 19 #include <cassert> 20 #include <cstdio> 21 #include <cstdlib> 22 #include <mutex> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds requires flags 26 EXTERN void __tgt_register_requires(int64_t flags) { 27 TIMESCOPE(); 28 PM->RTLs.RegisterRequires(flags); 29 } 30 31 //////////////////////////////////////////////////////////////////////////////// 32 /// adds a target shared library to the target execution image 33 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { 34 TIMESCOPE(); 35 std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); 36 for (auto &RTL : PM->RTLs.AllRTLs) { 37 if (RTL.register_lib) { 38 if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) { 39 DP("Could not register library with %s", RTL.RTLName.c_str()); 40 } 41 } 42 } 43 PM->RTLs.RegisterLib(desc); 44 } 45 46 //////////////////////////////////////////////////////////////////////////////// 47 /// unloads a target shared library 48 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { 49 TIMESCOPE(); 50 PM->RTLs.UnregisterLib(desc); 51 for (auto &RTL : PM->RTLs.UsedRTLs) { 52 if (RTL->unregister_lib) { 53 if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) { 54 DP("Could not register library with %s", RTL->RTLName.c_str()); 55 } 56 } 57 } 58 } 59 60 /// creates host-to-target data mapping, stores it in the 61 /// libomptarget.so internal structure (an entry in a stack of data maps) 62 /// and passes the data to the device. 63 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 64 void **args_base, void **args, 65 int64_t *arg_sizes, int64_t *arg_types) { 66 TIMESCOPE(); 67 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 68 arg_sizes, arg_types, nullptr, nullptr); 69 } 70 71 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, 72 void **args_base, void **args, 73 int64_t *arg_sizes, 74 int64_t *arg_types, int32_t depNum, 75 void *depList, int32_t noAliasDepNum, 76 void *noAliasDepList) { 77 TIMESCOPE(); 78 if (depNum + noAliasDepNum > 0) 79 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 80 81 __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, 82 arg_sizes, arg_types, nullptr, nullptr); 83 } 84 85 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id, 86 int32_t arg_num, void **args_base, 87 void **args, int64_t *arg_sizes, 88 int64_t *arg_types, 89 map_var_info_t *arg_names, 90 void **arg_mappers) { 91 TIMESCOPE_WITH_IDENT(loc); 92 DP("Entering data begin region for device %" PRId64 " with %d mappings\n", 93 device_id, arg_num); 94 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 95 DP("Not offloading to device %" PRId64 "\n", device_id); 96 return; 97 } 98 99 DeviceTy &Device = PM->Devices[device_id]; 100 101 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 102 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 103 arg_names, "Entering OpenMP data region"); 104 #ifdef OMPTARGET_DEBUG 105 for (int i = 0; i < arg_num; ++i) { 106 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 107 ", Type=0x%" PRIx64 ", Name=%s\n", 108 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 109 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 110 } 111 #endif 112 113 AsyncInfoTy AsyncInfo(Device); 114 int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, 115 arg_types, arg_names, arg_mappers, AsyncInfo); 116 if (rc == OFFLOAD_SUCCESS) 117 rc = AsyncInfo.synchronize(); 118 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 119 } 120 121 EXTERN void __tgt_target_data_begin_nowait_mapper( 122 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 123 void **args, int64_t *arg_sizes, int64_t *arg_types, 124 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 125 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 126 TIMESCOPE_WITH_IDENT(loc); 127 if (depNum + noAliasDepNum > 0) 128 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 129 130 __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args, 131 arg_sizes, arg_types, arg_names, arg_mappers); 132 } 133 134 /// passes data from the target, releases target memory and destroys 135 /// the host-target mapping (top entry from the stack of data maps) 136 /// created by the last __tgt_target_data_begin. 137 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 138 void **args_base, void **args, 139 int64_t *arg_sizes, int64_t *arg_types) { 140 TIMESCOPE(); 141 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 142 arg_sizes, arg_types, nullptr, nullptr); 143 } 144 145 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, 146 void **args_base, void **args, 147 int64_t *arg_sizes, int64_t *arg_types, 148 int32_t depNum, void *depList, 149 int32_t noAliasDepNum, 150 void *noAliasDepList) { 151 TIMESCOPE(); 152 if (depNum + noAliasDepNum > 0) 153 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 154 155 __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, 156 arg_sizes, arg_types, nullptr, nullptr); 157 } 158 159 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id, 160 int32_t arg_num, void **args_base, 161 void **args, int64_t *arg_sizes, 162 int64_t *arg_types, 163 map_var_info_t *arg_names, 164 void **arg_mappers) { 165 TIMESCOPE_WITH_IDENT(loc); 166 DP("Entering data end region with %d mappings\n", arg_num); 167 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 168 DP("Not offloading to device %" PRId64 "\n", device_id); 169 return; 170 } 171 172 DeviceTy &Device = PM->Devices[device_id]; 173 174 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 175 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 176 arg_names, "Exiting OpenMP data region"); 177 #ifdef OMPTARGET_DEBUG 178 for (int i = 0; i < arg_num; ++i) { 179 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 180 ", Type=0x%" PRIx64 ", Name=%s\n", 181 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 182 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 183 } 184 #endif 185 186 AsyncInfoTy AsyncInfo(Device); 187 int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, 188 arg_types, arg_names, arg_mappers, AsyncInfo); 189 if (rc == OFFLOAD_SUCCESS) 190 rc = AsyncInfo.synchronize(); 191 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 192 } 193 194 EXTERN void __tgt_target_data_end_nowait_mapper( 195 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 196 void **args, int64_t *arg_sizes, int64_t *arg_types, 197 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 198 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 199 TIMESCOPE_WITH_IDENT(loc); 200 if (depNum + noAliasDepNum > 0) 201 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 202 203 __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args, 204 arg_sizes, arg_types, arg_names, arg_mappers); 205 } 206 207 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 208 void **args_base, void **args, 209 int64_t *arg_sizes, int64_t *arg_types) { 210 TIMESCOPE(); 211 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 212 arg_sizes, arg_types, nullptr, nullptr); 213 } 214 215 EXTERN void __tgt_target_data_update_nowait( 216 int64_t device_id, int32_t arg_num, void **args_base, void **args, 217 int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, 218 int32_t noAliasDepNum, void *noAliasDepList) { 219 TIMESCOPE(); 220 if (depNum + noAliasDepNum > 0) 221 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 222 223 __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, 224 arg_sizes, arg_types, nullptr, nullptr); 225 } 226 227 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id, 228 int32_t arg_num, void **args_base, 229 void **args, int64_t *arg_sizes, 230 int64_t *arg_types, 231 map_var_info_t *arg_names, 232 void **arg_mappers) { 233 TIMESCOPE_WITH_IDENT(loc); 234 DP("Entering data update with %d mappings\n", arg_num); 235 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 236 DP("Not offloading to device %" PRId64 "\n", device_id); 237 return; 238 } 239 240 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 241 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 242 arg_names, "Updating OpenMP data"); 243 244 DeviceTy &Device = PM->Devices[device_id]; 245 AsyncInfoTy AsyncInfo(Device); 246 int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, 247 arg_types, arg_names, arg_mappers, AsyncInfo); 248 if (rc == OFFLOAD_SUCCESS) 249 rc = AsyncInfo.synchronize(); 250 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 251 } 252 253 EXTERN void __tgt_target_data_update_nowait_mapper( 254 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 255 void **args, int64_t *arg_sizes, int64_t *arg_types, 256 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 257 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 258 TIMESCOPE_WITH_IDENT(loc); 259 if (depNum + noAliasDepNum > 0) 260 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 261 262 __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args, 263 arg_sizes, arg_types, arg_names, arg_mappers); 264 } 265 266 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, 267 void **args_base, void **args, int64_t *arg_sizes, 268 int64_t *arg_types) { 269 TIMESCOPE(); 270 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 271 args, arg_sizes, arg_types, nullptr, nullptr); 272 } 273 274 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, 275 int32_t arg_num, void **args_base, void **args, 276 int64_t *arg_sizes, int64_t *arg_types, 277 int32_t depNum, void *depList, 278 int32_t noAliasDepNum, void *noAliasDepList) { 279 TIMESCOPE(); 280 if (depNum + noAliasDepNum > 0) 281 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 282 283 return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, 284 args, arg_sizes, arg_types, nullptr, nullptr); 285 } 286 287 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr, 288 int32_t arg_num, void **args_base, void **args, 289 int64_t *arg_sizes, int64_t *arg_types, 290 map_var_info_t *arg_names, void **arg_mappers) { 291 TIMESCOPE_WITH_IDENT(loc); 292 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 293 "\n", 294 DPxPTR(host_ptr), device_id); 295 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 296 DP("Not offloading to device %" PRId64 "\n", device_id); 297 return OFFLOAD_FAIL; 298 } 299 300 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 301 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 302 arg_names, "Entering OpenMP kernel"); 303 #ifdef OMPTARGET_DEBUG 304 for (int i = 0; i < arg_num; ++i) { 305 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 306 ", Type=0x%" PRIx64 ", Name=%s\n", 307 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 308 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 309 } 310 #endif 311 312 DeviceTy &Device = PM->Devices[device_id]; 313 AsyncInfoTy AsyncInfo(Device); 314 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 315 arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, 316 AsyncInfo); 317 if (rc == OFFLOAD_SUCCESS) 318 rc = AsyncInfo.synchronize(); 319 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 320 return rc; 321 } 322 323 EXTERN int __tgt_target_nowait_mapper( 324 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 325 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 326 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 327 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 328 TIMESCOPE_WITH_IDENT(loc); 329 if (depNum + noAliasDepNum > 0) 330 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 331 332 return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args, 333 arg_sizes, arg_types, arg_names, arg_mappers); 334 } 335 336 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, 337 int32_t arg_num, void **args_base, void **args, 338 int64_t *arg_sizes, int64_t *arg_types, 339 int32_t team_num, int32_t thread_limit) { 340 TIMESCOPE(); 341 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 342 args_base, args, arg_sizes, arg_types, 343 nullptr, nullptr, team_num, thread_limit); 344 } 345 346 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, 347 int32_t arg_num, void **args_base, 348 void **args, int64_t *arg_sizes, 349 int64_t *arg_types, int32_t team_num, 350 int32_t thread_limit, int32_t depNum, 351 void *depList, int32_t noAliasDepNum, 352 void *noAliasDepList) { 353 TIMESCOPE(); 354 if (depNum + noAliasDepNum > 0) 355 __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); 356 357 return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, 358 args_base, args, arg_sizes, arg_types, 359 nullptr, nullptr, team_num, thread_limit); 360 } 361 362 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, 363 void *host_ptr, int32_t arg_num, 364 void **args_base, void **args, 365 int64_t *arg_sizes, int64_t *arg_types, 366 map_var_info_t *arg_names, 367 void **arg_mappers, int32_t team_num, 368 int32_t thread_limit) { 369 DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 370 "\n", 371 DPxPTR(host_ptr), device_id); 372 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 373 DP("Not offloading to device %" PRId64 "\n", device_id); 374 return OFFLOAD_FAIL; 375 } 376 377 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 378 printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, 379 arg_names, "Entering OpenMP kernel"); 380 #ifdef OMPTARGET_DEBUG 381 for (int i = 0; i < arg_num; ++i) { 382 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 383 ", Type=0x%" PRIx64 ", Name=%s\n", 384 i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], 385 (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); 386 } 387 #endif 388 389 DeviceTy &Device = PM->Devices[device_id]; 390 AsyncInfoTy AsyncInfo(Device); 391 int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, 392 arg_types, arg_names, arg_mappers, team_num, thread_limit, 393 true /*team*/, AsyncInfo); 394 if (rc == OFFLOAD_SUCCESS) 395 rc = AsyncInfo.synchronize(); 396 handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); 397 return rc; 398 } 399 400 EXTERN int __tgt_target_teams_nowait_mapper( 401 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 402 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 403 map_var_info_t *arg_names, void **arg_mappers, int32_t team_num, 404 int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, 405 void *noAliasDepList) { 406 TIMESCOPE_WITH_IDENT(loc); 407 if (depNum + noAliasDepNum > 0) 408 __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc)); 409 410 return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base, 411 args, arg_sizes, arg_types, arg_names, 412 arg_mappers, team_num, thread_limit); 413 } 414 415 // Get the current number of components for a user-defined mapper. 416 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) { 417 TIMESCOPE(); 418 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 419 int64_t size = MapperComponentsPtr->Components.size(); 420 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 421 DPxPTR(rt_mapper_handle), size); 422 return size; 423 } 424 425 // Push back one component for a user-defined mapper. 426 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base, 427 void *begin, int64_t size, int64_t type, 428 void *name) { 429 TIMESCOPE(); 430 DP("__tgt_push_mapper_component(Handle=" DPxMOD 431 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 432 ", Type=0x%" PRIx64 ", Name=%s).\n", 433 DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type, 434 (name) ? getNameFromMapping(name).c_str() : "unknown"); 435 auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; 436 MapperComponentsPtr->Components.push_back( 437 MapComponentInfoTy(base, begin, size, type, name)); 438 } 439 440 EXTERN void __kmpc_push_target_tripcount(int64_t device_id, 441 uint64_t loop_tripcount) { 442 __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); 443 } 444 445 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, 446 uint64_t loop_tripcount) { 447 TIMESCOPE_WITH_IDENT(loc); 448 if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) { 449 DP("Not offloading to device %" PRId64 "\n", device_id); 450 return; 451 } 452 453 DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, 454 loop_tripcount); 455 PM->TblMapMtx.lock(); 456 PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), 457 loop_tripcount); 458 PM->TblMapMtx.unlock(); 459 } 460