1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is dual licensed under the MIT and the University of Illinois Open 6 // Source Licenses. See LICENSE.txt for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Implementation of the interface to be used by Clang during the codegen of a 11 // target region. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include <omptarget.h> 16 17 #include "device.h" 18 #include "private.h" 19 #include "rtl.h" 20 21 #include <cassert> 22 23 //////////////////////////////////////////////////////////////////////////////// 24 /// adds a target shared library to the target execution image 25 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { 26 RTLs.RegisterLib(desc); 27 } 28 29 //////////////////////////////////////////////////////////////////////////////// 30 /// unloads a target shared library 31 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { 32 RTLs.UnregisterLib(desc); 33 } 34 35 // Following datatypes and functions (tgt_oldmap_type, combined_entry_t, 36 // translate_map, cleanup_map) will be removed once the compiler starts using 37 // the new map types. 38 39 // Old map types 40 enum tgt_oldmap_type { 41 OMP_TGT_OLDMAPTYPE_TO = 0x001, // copy data from host to device 42 OMP_TGT_OLDMAPTYPE_FROM = 0x002, // copy data from device to host 43 OMP_TGT_OLDMAPTYPE_ALWAYS = 0x004, // copy regardless of the ref. count 44 OMP_TGT_OLDMAPTYPE_DELETE = 0x008, // force unmapping of data 45 OMP_TGT_OLDMAPTYPE_MAP_PTR = 0x010, // map pointer as well as pointee 46 OMP_TGT_OLDMAPTYPE_FIRST_MAP = 0x020, // first occurrence of mapped variable 47 OMP_TGT_OLDMAPTYPE_RETURN_PTR = 0x040, // return TgtBase addr of mapped data 48 OMP_TGT_OLDMAPTYPE_PRIVATE_PTR = 0x080, // private variable - not mapped 49 OMP_TGT_OLDMAPTYPE_PRIVATE_VAL = 0x100 // copy by value - not mapped 50 }; 51 52 // Temporary functions for map translation and cleanup 53 struct combined_entry_t { 54 int num_members; // number of members in combined entry 55 void *base_addr; // base address of combined entry 56 void *begin_addr; // begin address of combined entry 57 void *end_addr; // size of combined entry 58 }; 59 60 static void translate_map(int32_t arg_num, void **args_base, void **args, 61 int64_t *arg_sizes, int64_t *arg_types, int32_t &new_arg_num, 62 void **&new_args_base, void **&new_args, int64_t *&new_arg_sizes, 63 int64_t *&new_arg_types, bool is_target_construct) { 64 if (arg_num <= 0) { 65 DP("Nothing to translate\n"); 66 new_arg_num = 0; 67 return; 68 } 69 70 // array of combined entries 71 combined_entry_t *cmb_entries = 72 (combined_entry_t *) alloca(arg_num * sizeof(combined_entry_t)); 73 // number of combined entries 74 long num_combined = 0; 75 // old entry is MAP_PTR? 76 bool *is_ptr_old = (bool *) alloca(arg_num * sizeof(bool)); 77 // old entry is member of member_of[old] cmb_entry 78 int *member_of = (int *) alloca(arg_num * sizeof(int)); 79 // temporary storage for modifications of the original arg_types 80 int64_t *mod_arg_types = (int64_t *) alloca(arg_num *sizeof(int64_t)); 81 82 DP("Translating %d map entries\n", arg_num); 83 for (int i = 0; i < arg_num; ++i) { 84 member_of[i] = -1; 85 is_ptr_old[i] = false; 86 mod_arg_types[i] = arg_types[i]; 87 // Scan previous entries to see whether this entry shares the same base 88 for (int j = 0; j < i; ++j) { 89 void *new_begin_addr = NULL; 90 void *new_end_addr = NULL; 91 92 if (mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_MAP_PTR) { 93 if (args_base[i] == args[j]) { 94 if (!(mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR)) { 95 DP("Entry %d has the same base as entry %d's begin address\n", i, 96 j); 97 new_begin_addr = args_base[i]; 98 new_end_addr = (char *)args_base[i] + sizeof(void *); 99 assert(arg_sizes[j] == sizeof(void *)); 100 is_ptr_old[j] = true; 101 } else { 102 DP("Entry %d has the same base as entry %d's begin address, but " 103 "%d's base was a MAP_PTR too\n", i, j, j); 104 int32_t to_from_always_delete = 105 OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM | 106 OMP_TGT_OLDMAPTYPE_ALWAYS | OMP_TGT_OLDMAPTYPE_DELETE; 107 if (mod_arg_types[j] & to_from_always_delete) { 108 DP("Resetting to/from/always/delete flags for entry %d because " 109 "it is only a pointer to pointer\n", j); 110 mod_arg_types[j] &= ~to_from_always_delete; 111 } 112 } 113 } 114 } else { 115 if (!(mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_FIRST_MAP) && 116 args_base[i] == args_base[j]) { 117 DP("Entry %d has the same base address as entry %d\n", i, j); 118 new_begin_addr = args[i]; 119 new_end_addr = (char *)args[i] + arg_sizes[i]; 120 } 121 } 122 123 // If we have combined the entry with a previous one 124 if (new_begin_addr) { 125 int id; 126 if(member_of[j] == -1) { 127 // We have a new entry 128 id = num_combined++; 129 DP("Creating new combined entry %d for old entry %d\n", id, j); 130 // Initialize new entry 131 cmb_entries[id].num_members = 1; 132 cmb_entries[id].base_addr = args_base[j]; 133 if (mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR) { 134 cmb_entries[id].begin_addr = args_base[j]; 135 cmb_entries[id].end_addr = (char *)args_base[j] + arg_sizes[j]; 136 } else { 137 cmb_entries[id].begin_addr = args[j]; 138 cmb_entries[id].end_addr = (char *)args[j] + arg_sizes[j]; 139 } 140 member_of[j] = id; 141 } else { 142 // Reuse existing combined entry 143 DP("Reusing existing combined entry %d\n", member_of[j]); 144 id = member_of[j]; 145 } 146 147 // Update combined entry 148 DP("Adding entry %d to combined entry %d\n", i, id); 149 cmb_entries[id].num_members++; 150 // base_addr stays the same 151 cmb_entries[id].begin_addr = 152 std::min(cmb_entries[id].begin_addr, new_begin_addr); 153 cmb_entries[id].end_addr = 154 std::max(cmb_entries[id].end_addr, new_end_addr); 155 member_of[i] = id; 156 break; 157 } 158 } 159 } 160 161 DP("New entries: %ld combined + %d original\n", num_combined, arg_num); 162 new_arg_num = arg_num + num_combined; 163 new_args_base = (void **) malloc(new_arg_num * sizeof(void *)); 164 new_args = (void **) malloc(new_arg_num * sizeof(void *)); 165 new_arg_sizes = (int64_t *) malloc(new_arg_num * sizeof(int64_t)); 166 new_arg_types = (int64_t *) malloc(new_arg_num * sizeof(int64_t)); 167 168 const int64_t alignment = 8; 169 170 int next_id = 0; // next ID 171 int next_cid = 0; // next combined ID 172 int *combined_to_new_id = (int *) alloca(num_combined * sizeof(int)); 173 for (int i = 0; i < arg_num; ++i) { 174 // It is member_of 175 if (member_of[i] == next_cid) { 176 int cid = next_cid++; // ID of this combined entry 177 int nid = next_id++; // ID of the new (global) entry 178 combined_to_new_id[cid] = nid; 179 DP("Combined entry %3d will become new entry %3d\n", cid, nid); 180 181 int64_t padding = (int64_t)cmb_entries[cid].begin_addr % alignment; 182 if (padding) { 183 DP("Using a padding of %" PRId64 " for begin address " DPxMOD "\n", 184 padding, DPxPTR(cmb_entries[cid].begin_addr)); 185 cmb_entries[cid].begin_addr = 186 (char *)cmb_entries[cid].begin_addr - padding; 187 } 188 189 new_args_base[nid] = cmb_entries[cid].base_addr; 190 new_args[nid] = cmb_entries[cid].begin_addr; 191 new_arg_sizes[nid] = (int64_t) ((char *)cmb_entries[cid].end_addr - 192 (char *)cmb_entries[cid].begin_addr); 193 new_arg_types[nid] = OMP_TGT_MAPTYPE_TARGET_PARAM; 194 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", " 195 "size %" PRId64 ", type 0x%" PRIx64 "\n", nid, 196 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], 197 new_arg_types[nid]); 198 } else if (member_of[i] != -1) { 199 DP("Combined entry %3d has been encountered before, do nothing\n", 200 member_of[i]); 201 } 202 203 // Now that the combined entry (the one the old entry was a member of) has 204 // been inserted into the new arguments list, proceed with the old entry. 205 int nid = next_id++; 206 DP("Old entry %3d will become new entry %3d\n", i, nid); 207 208 new_args_base[nid] = args_base[i]; 209 new_args[nid] = args[i]; 210 new_arg_sizes[nid] = arg_sizes[i]; 211 int64_t old_type = mod_arg_types[i]; 212 213 if (is_ptr_old[i]) { 214 // Reset TO and FROM flags 215 old_type &= ~(OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM); 216 } 217 218 if (member_of[i] == -1) { 219 if (!is_target_construct) 220 old_type &= ~OMP_TGT_MAPTYPE_TARGET_PARAM; 221 new_arg_types[nid] = old_type; 222 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64 223 ", type 0x%" PRIx64 " (old entry %d not MEMBER_OF)\n", nid, 224 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], 225 new_arg_types[nid], i); 226 } else { 227 // Old entry is not FIRST_MAP 228 old_type &= ~OMP_TGT_OLDMAPTYPE_FIRST_MAP; 229 // Add MEMBER_OF 230 int new_member_of = combined_to_new_id[member_of[i]]; 231 old_type |= ((int64_t)new_member_of + 1) << 48; 232 new_arg_types[nid] = old_type; 233 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64 234 ", type 0x%" PRIx64 " (old entry %d MEMBER_OF %d)\n", nid, 235 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], 236 new_arg_types[nid], i, new_member_of); 237 } 238 } 239 } 240 241 static void cleanup_map(int32_t new_arg_num, void **new_args_base, 242 void **new_args, int64_t *new_arg_sizes, int64_t *new_arg_types, 243 int32_t arg_num, void **args_base) { 244 if (new_arg_num > 0) { 245 int offset = new_arg_num - arg_num; 246 for (int32_t i = 0; i < arg_num; ++i) { 247 // Restore old base address 248 args_base[i] = new_args_base[i+offset]; 249 } 250 free(new_args_base); 251 free(new_args); 252 free(new_arg_sizes); 253 free(new_arg_types); 254 } 255 } 256 257 /// creates host-to-target data mapping, stores it in the 258 /// libomptarget.so internal structure (an entry in a stack of data maps) 259 /// and passes the data to the device. 260 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 261 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 262 DP("Entering data begin region for device %ld with %d mappings\n", device_id, 263 arg_num); 264 265 // No devices available? 266 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 267 device_id = omp_get_default_device(); 268 DP("Use default device id %ld\n", device_id); 269 } 270 271 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 272 DP("Failed to get device %ld ready\n", device_id); 273 return; 274 } 275 276 DeviceTy& Device = Devices[device_id]; 277 278 // Translate maps 279 int32_t new_arg_num; 280 void **new_args_base; 281 void **new_args; 282 int64_t *new_arg_sizes; 283 int64_t *new_arg_types; 284 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 285 new_args_base, new_args, new_arg_sizes, new_arg_types, false); 286 287 //target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); 288 target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, 289 new_arg_types); 290 291 // Cleanup translation memory 292 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 293 new_arg_types, arg_num, args_base); 294 } 295 296 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, 297 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 298 int32_t depNum, void *depList, int32_t noAliasDepNum, 299 void *noAliasDepList) { 300 if (depNum + noAliasDepNum > 0) 301 __kmpc_omp_taskwait(NULL, 0); 302 303 __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, 304 arg_types); 305 } 306 307 /// passes data from the target, releases target memory and destroys 308 /// the host-target mapping (top entry from the stack of data maps) 309 /// created by the last __tgt_target_data_begin. 310 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 311 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 312 DP("Entering data end region with %d mappings\n", arg_num); 313 314 // No devices available? 315 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 316 device_id = omp_get_default_device(); 317 } 318 319 RTLsMtx.lock(); 320 size_t Devices_size = Devices.size(); 321 RTLsMtx.unlock(); 322 if (Devices_size <= (size_t)device_id) { 323 DP("Device ID %ld does not have a matching RTL.\n", device_id); 324 return; 325 } 326 327 DeviceTy &Device = Devices[device_id]; 328 if (!Device.IsInit) { 329 DP("uninit device: ignore"); 330 return; 331 } 332 333 // Translate maps 334 int32_t new_arg_num; 335 void **new_args_base; 336 void **new_args; 337 int64_t *new_arg_sizes; 338 int64_t *new_arg_types; 339 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 340 new_args_base, new_args, new_arg_sizes, new_arg_types, false); 341 342 //target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); 343 target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, 344 new_arg_types); 345 346 // Cleanup translation memory 347 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 348 new_arg_types, arg_num, args_base); 349 } 350 351 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, 352 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 353 int32_t depNum, void *depList, int32_t noAliasDepNum, 354 void *noAliasDepList) { 355 if (depNum + noAliasDepNum > 0) 356 __kmpc_omp_taskwait(NULL, 0); 357 358 __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, 359 arg_types); 360 } 361 362 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 363 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 364 DP("Entering data update with %d mappings\n", arg_num); 365 366 // No devices available? 367 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 368 device_id = omp_get_default_device(); 369 } 370 371 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 372 DP("Failed to get device %ld ready\n", device_id); 373 return; 374 } 375 376 DeviceTy& Device = Devices[device_id]; 377 target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types); 378 } 379 380 EXTERN void __tgt_target_data_update_nowait( 381 int64_t device_id, int32_t arg_num, void **args_base, void **args, 382 int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, 383 int32_t noAliasDepNum, void *noAliasDepList) { 384 if (depNum + noAliasDepNum > 0) 385 __kmpc_omp_taskwait(NULL, 0); 386 387 __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, 388 arg_types); 389 } 390 391 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, 392 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 393 DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", 394 DPxPTR(host_ptr), device_id); 395 396 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 397 device_id = omp_get_default_device(); 398 } 399 400 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 401 DP("Failed to get device %ld ready\n", device_id); 402 return OFFLOAD_FAIL; 403 } 404 405 // Translate maps 406 int32_t new_arg_num; 407 void **new_args_base; 408 void **new_args; 409 int64_t *new_arg_sizes; 410 int64_t *new_arg_types; 411 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 412 new_args_base, new_args, new_arg_sizes, new_arg_types, true); 413 414 //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, 415 // arg_types, 0, 0, false /*team*/, false /*recursive*/); 416 int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args, 417 new_arg_sizes, new_arg_types, 0, 0, false /*team*/); 418 419 // Cleanup translation memory 420 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 421 new_arg_types, arg_num, args_base); 422 423 return rc; 424 } 425 426 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, 427 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, 428 int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, 429 void *noAliasDepList) { 430 if (depNum + noAliasDepNum > 0) 431 __kmpc_omp_taskwait(NULL, 0); 432 433 return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, 434 arg_types); 435 } 436 437 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, 438 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, 439 int64_t *arg_types, int32_t team_num, int32_t thread_limit) { 440 DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", 441 DPxPTR(host_ptr), device_id); 442 443 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 444 device_id = omp_get_default_device(); 445 } 446 447 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 448 DP("Failed to get device %ld ready\n", device_id); 449 return OFFLOAD_FAIL; 450 } 451 452 // Translate maps 453 int32_t new_arg_num; 454 void **new_args_base; 455 void **new_args; 456 int64_t *new_arg_sizes; 457 int64_t *new_arg_types; 458 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 459 new_args_base, new_args, new_arg_sizes, new_arg_types, true); 460 461 //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, 462 // arg_types, team_num, thread_limit, true /*team*/, 463 // false /*recursive*/); 464 int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args, 465 new_arg_sizes, new_arg_types, team_num, thread_limit, true /*team*/); 466 467 // Cleanup translation memory 468 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 469 new_arg_types, arg_num, args_base); 470 471 return rc; 472 } 473 474 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, 475 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, 476 int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, 477 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 478 if (depNum + noAliasDepNum > 0) 479 __kmpc_omp_taskwait(NULL, 0); 480 481 return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, 482 arg_sizes, arg_types, team_num, thread_limit); 483 } 484 485 486 // The trip count mechanism will be revised - this scheme is not thread-safe. 487 EXTERN void __kmpc_push_target_tripcount(int64_t device_id, 488 uint64_t loop_tripcount) { 489 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 490 device_id = omp_get_default_device(); 491 } 492 493 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 494 DP("Failed to get device %ld ready\n", device_id); 495 return; 496 } 497 498 DP("__kmpc_push_target_tripcount(%ld, %" PRIu64 ")\n", device_id, 499 loop_tripcount); 500 Devices[device_id].loopTripCnt = loop_tripcount; 501 } 502