1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is dual licensed under the MIT and the University of Illinois Open 6 // Source Licenses. See LICENSE.txt for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Implementation of the interface to be used by Clang during the codegen of a 11 // target region. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include <omptarget.h> 16 17 #include "device.h" 18 #include "private.h" 19 #include "rtl.h" 20 21 #include <cassert> 22 #include <cstdlib> 23 24 //////////////////////////////////////////////////////////////////////////////// 25 /// adds a target shared library to the target execution image 26 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { 27 RTLs.RegisterLib(desc); 28 } 29 30 //////////////////////////////////////////////////////////////////////////////// 31 /// unloads a target shared library 32 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { 33 RTLs.UnregisterLib(desc); 34 } 35 36 // Following datatypes and functions (tgt_oldmap_type, combined_entry_t, 37 // translate_map, cleanup_map) will be removed once the compiler starts using 38 // the new map types. 39 40 // Old map types 41 enum tgt_oldmap_type { 42 OMP_TGT_OLDMAPTYPE_TO = 0x001, // copy data from host to device 43 OMP_TGT_OLDMAPTYPE_FROM = 0x002, // copy data from device to host 44 OMP_TGT_OLDMAPTYPE_ALWAYS = 0x004, // copy regardless of the ref. count 45 OMP_TGT_OLDMAPTYPE_DELETE = 0x008, // force unmapping of data 46 OMP_TGT_OLDMAPTYPE_MAP_PTR = 0x010, // map pointer as well as pointee 47 OMP_TGT_OLDMAPTYPE_FIRST_MAP = 0x020, // first occurrence of mapped variable 48 OMP_TGT_OLDMAPTYPE_RETURN_PTR = 0x040, // return TgtBase addr of mapped data 49 OMP_TGT_OLDMAPTYPE_PRIVATE_PTR = 0x080, // private variable - not mapped 50 OMP_TGT_OLDMAPTYPE_PRIVATE_VAL = 0x100 // copy by value - not mapped 51 }; 52 53 // Temporary functions for map translation and cleanup 54 struct combined_entry_t { 55 int num_members; // number of members in combined entry 56 void *base_addr; // base address of combined entry 57 void *begin_addr; // begin address of combined entry 58 void *end_addr; // size of combined entry 59 }; 60 61 static void translate_map(int32_t arg_num, void **args_base, void **args, 62 int64_t *arg_sizes, int64_t *arg_types, int32_t &new_arg_num, 63 void **&new_args_base, void **&new_args, int64_t *&new_arg_sizes, 64 int64_t *&new_arg_types, bool is_target_construct) { 65 if (arg_num <= 0) { 66 DP("Nothing to translate\n"); 67 new_arg_num = 0; 68 return; 69 } 70 71 // array of combined entries 72 combined_entry_t *cmb_entries = 73 (combined_entry_t *) alloca(arg_num * sizeof(combined_entry_t)); 74 // number of combined entries 75 long num_combined = 0; 76 // old entry is MAP_PTR? 77 bool *is_ptr_old = (bool *) alloca(arg_num * sizeof(bool)); 78 // old entry is member of member_of[old] cmb_entry 79 int *member_of = (int *) alloca(arg_num * sizeof(int)); 80 // temporary storage for modifications of the original arg_types 81 int64_t *mod_arg_types = (int64_t *) alloca(arg_num *sizeof(int64_t)); 82 83 DP("Translating %d map entries\n", arg_num); 84 for (int i = 0; i < arg_num; ++i) { 85 member_of[i] = -1; 86 is_ptr_old[i] = false; 87 mod_arg_types[i] = arg_types[i]; 88 // Scan previous entries to see whether this entry shares the same base 89 for (int j = 0; j < i; ++j) { 90 void *new_begin_addr = NULL; 91 void *new_end_addr = NULL; 92 93 if (mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_MAP_PTR) { 94 if (args_base[i] == args[j]) { 95 if (!(mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR)) { 96 DP("Entry %d has the same base as entry %d's begin address\n", i, 97 j); 98 new_begin_addr = args_base[i]; 99 new_end_addr = (char *)args_base[i] + sizeof(void *); 100 assert(arg_sizes[j] == sizeof(void *)); 101 is_ptr_old[j] = true; 102 } else { 103 DP("Entry %d has the same base as entry %d's begin address, but " 104 "%d's base was a MAP_PTR too\n", i, j, j); 105 int32_t to_from_always_delete = 106 OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM | 107 OMP_TGT_OLDMAPTYPE_ALWAYS | OMP_TGT_OLDMAPTYPE_DELETE; 108 if (mod_arg_types[j] & to_from_always_delete) { 109 DP("Resetting to/from/always/delete flags for entry %d because " 110 "it is only a pointer to pointer\n", j); 111 mod_arg_types[j] &= ~to_from_always_delete; 112 } 113 } 114 } 115 } else { 116 if (!(mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_FIRST_MAP) && 117 args_base[i] == args_base[j]) { 118 DP("Entry %d has the same base address as entry %d\n", i, j); 119 new_begin_addr = args[i]; 120 new_end_addr = (char *)args[i] + arg_sizes[i]; 121 } 122 } 123 124 // If we have combined the entry with a previous one 125 if (new_begin_addr) { 126 int id; 127 if(member_of[j] == -1) { 128 // We have a new entry 129 id = num_combined++; 130 DP("Creating new combined entry %d for old entry %d\n", id, j); 131 // Initialize new entry 132 cmb_entries[id].num_members = 1; 133 cmb_entries[id].base_addr = args_base[j]; 134 if (mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR) { 135 cmb_entries[id].begin_addr = args_base[j]; 136 cmb_entries[id].end_addr = (char *)args_base[j] + arg_sizes[j]; 137 } else { 138 cmb_entries[id].begin_addr = args[j]; 139 cmb_entries[id].end_addr = (char *)args[j] + arg_sizes[j]; 140 } 141 member_of[j] = id; 142 } else { 143 // Reuse existing combined entry 144 DP("Reusing existing combined entry %d\n", member_of[j]); 145 id = member_of[j]; 146 } 147 148 // Update combined entry 149 DP("Adding entry %d to combined entry %d\n", i, id); 150 cmb_entries[id].num_members++; 151 // base_addr stays the same 152 cmb_entries[id].begin_addr = 153 std::min(cmb_entries[id].begin_addr, new_begin_addr); 154 cmb_entries[id].end_addr = 155 std::max(cmb_entries[id].end_addr, new_end_addr); 156 member_of[i] = id; 157 break; 158 } 159 } 160 } 161 162 DP("New entries: %ld combined + %d original\n", num_combined, arg_num); 163 new_arg_num = arg_num + num_combined; 164 new_args_base = (void **) malloc(new_arg_num * sizeof(void *)); 165 new_args = (void **) malloc(new_arg_num * sizeof(void *)); 166 new_arg_sizes = (int64_t *) malloc(new_arg_num * sizeof(int64_t)); 167 new_arg_types = (int64_t *) malloc(new_arg_num * sizeof(int64_t)); 168 169 const int64_t alignment = 8; 170 171 int next_id = 0; // next ID 172 int next_cid = 0; // next combined ID 173 int *combined_to_new_id = (int *) alloca(num_combined * sizeof(int)); 174 for (int i = 0; i < arg_num; ++i) { 175 // It is member_of 176 if (member_of[i] == next_cid) { 177 int cid = next_cid++; // ID of this combined entry 178 int nid = next_id++; // ID of the new (global) entry 179 combined_to_new_id[cid] = nid; 180 DP("Combined entry %3d will become new entry %3d\n", cid, nid); 181 182 int64_t padding = (int64_t)cmb_entries[cid].begin_addr % alignment; 183 if (padding) { 184 DP("Using a padding of %" PRId64 " for begin address " DPxMOD "\n", 185 padding, DPxPTR(cmb_entries[cid].begin_addr)); 186 cmb_entries[cid].begin_addr = 187 (char *)cmb_entries[cid].begin_addr - padding; 188 } 189 190 new_args_base[nid] = cmb_entries[cid].base_addr; 191 new_args[nid] = cmb_entries[cid].begin_addr; 192 new_arg_sizes[nid] = (int64_t) ((char *)cmb_entries[cid].end_addr - 193 (char *)cmb_entries[cid].begin_addr); 194 new_arg_types[nid] = OMP_TGT_MAPTYPE_TARGET_PARAM; 195 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", " 196 "size %" PRId64 ", type 0x%" PRIx64 "\n", nid, 197 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], 198 new_arg_types[nid]); 199 } else if (member_of[i] != -1) { 200 DP("Combined entry %3d has been encountered before, do nothing\n", 201 member_of[i]); 202 } 203 204 // Now that the combined entry (the one the old entry was a member of) has 205 // been inserted into the new arguments list, proceed with the old entry. 206 int nid = next_id++; 207 DP("Old entry %3d will become new entry %3d\n", i, nid); 208 209 new_args_base[nid] = args_base[i]; 210 new_args[nid] = args[i]; 211 new_arg_sizes[nid] = arg_sizes[i]; 212 int64_t old_type = mod_arg_types[i]; 213 214 if (is_ptr_old[i]) { 215 // Reset TO and FROM flags 216 old_type &= ~(OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM); 217 } 218 219 if (member_of[i] == -1) { 220 if (!is_target_construct) 221 old_type &= ~OMP_TGT_MAPTYPE_TARGET_PARAM; 222 new_arg_types[nid] = old_type; 223 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64 224 ", type 0x%" PRIx64 " (old entry %d not MEMBER_OF)\n", nid, 225 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], 226 new_arg_types[nid], i); 227 } else { 228 // Old entry is not FIRST_MAP 229 old_type &= ~OMP_TGT_OLDMAPTYPE_FIRST_MAP; 230 // Add MEMBER_OF 231 int new_member_of = combined_to_new_id[member_of[i]]; 232 old_type |= ((int64_t)new_member_of + 1) << 48; 233 new_arg_types[nid] = old_type; 234 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64 235 ", type 0x%" PRIx64 " (old entry %d MEMBER_OF %d)\n", nid, 236 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], 237 new_arg_types[nid], i, new_member_of); 238 } 239 } 240 } 241 242 static void cleanup_map(int32_t new_arg_num, void **new_args_base, 243 void **new_args, int64_t *new_arg_sizes, int64_t *new_arg_types, 244 int32_t arg_num, void **args_base) { 245 if (new_arg_num > 0) { 246 int offset = new_arg_num - arg_num; 247 for (int32_t i = 0; i < arg_num; ++i) { 248 // Restore old base address 249 args_base[i] = new_args_base[i+offset]; 250 } 251 free(new_args_base); 252 free(new_args); 253 free(new_arg_sizes); 254 free(new_arg_types); 255 } 256 } 257 258 /// creates host-to-target data mapping, stores it in the 259 /// libomptarget.so internal structure (an entry in a stack of data maps) 260 /// and passes the data to the device. 261 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 262 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 263 DP("Entering data begin region for device %ld with %d mappings\n", device_id, 264 arg_num); 265 266 // No devices available? 267 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 268 device_id = omp_get_default_device(); 269 DP("Use default device id %ld\n", device_id); 270 } 271 272 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 273 DP("Failed to get device %ld ready\n", device_id); 274 return; 275 } 276 277 DeviceTy& Device = Devices[device_id]; 278 279 // Translate maps 280 int32_t new_arg_num; 281 void **new_args_base; 282 void **new_args; 283 int64_t *new_arg_sizes; 284 int64_t *new_arg_types; 285 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 286 new_args_base, new_args, new_arg_sizes, new_arg_types, false); 287 288 //target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); 289 target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, 290 new_arg_types); 291 292 // Cleanup translation memory 293 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 294 new_arg_types, arg_num, args_base); 295 } 296 297 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, 298 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 299 int32_t depNum, void *depList, int32_t noAliasDepNum, 300 void *noAliasDepList) { 301 if (depNum + noAliasDepNum > 0) 302 __kmpc_omp_taskwait(NULL, 0); 303 304 __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, 305 arg_types); 306 } 307 308 /// passes data from the target, releases target memory and destroys 309 /// the host-target mapping (top entry from the stack of data maps) 310 /// created by the last __tgt_target_data_begin. 311 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 312 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 313 DP("Entering data end region with %d mappings\n", arg_num); 314 315 // No devices available? 316 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 317 device_id = omp_get_default_device(); 318 } 319 320 RTLsMtx.lock(); 321 size_t Devices_size = Devices.size(); 322 RTLsMtx.unlock(); 323 if (Devices_size <= (size_t)device_id) { 324 DP("Device ID %ld does not have a matching RTL.\n", device_id); 325 return; 326 } 327 328 DeviceTy &Device = Devices[device_id]; 329 if (!Device.IsInit) { 330 DP("uninit device: ignore"); 331 return; 332 } 333 334 // Translate maps 335 int32_t new_arg_num; 336 void **new_args_base; 337 void **new_args; 338 int64_t *new_arg_sizes; 339 int64_t *new_arg_types; 340 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 341 new_args_base, new_args, new_arg_sizes, new_arg_types, false); 342 343 //target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); 344 target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, 345 new_arg_types); 346 347 // Cleanup translation memory 348 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 349 new_arg_types, arg_num, args_base); 350 } 351 352 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, 353 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 354 int32_t depNum, void *depList, int32_t noAliasDepNum, 355 void *noAliasDepList) { 356 if (depNum + noAliasDepNum > 0) 357 __kmpc_omp_taskwait(NULL, 0); 358 359 __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, 360 arg_types); 361 } 362 363 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 364 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 365 DP("Entering data update with %d mappings\n", arg_num); 366 367 // No devices available? 368 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 369 device_id = omp_get_default_device(); 370 } 371 372 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 373 DP("Failed to get device %ld ready\n", device_id); 374 return; 375 } 376 377 DeviceTy& Device = Devices[device_id]; 378 target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types); 379 } 380 381 EXTERN void __tgt_target_data_update_nowait( 382 int64_t device_id, int32_t arg_num, void **args_base, void **args, 383 int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, 384 int32_t noAliasDepNum, void *noAliasDepList) { 385 if (depNum + noAliasDepNum > 0) 386 __kmpc_omp_taskwait(NULL, 0); 387 388 __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, 389 arg_types); 390 } 391 392 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, 393 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { 394 DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", 395 DPxPTR(host_ptr), device_id); 396 397 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 398 device_id = omp_get_default_device(); 399 } 400 401 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 402 DP("Failed to get device %ld ready\n", device_id); 403 return OFFLOAD_FAIL; 404 } 405 406 // Translate maps 407 int32_t new_arg_num; 408 void **new_args_base; 409 void **new_args; 410 int64_t *new_arg_sizes; 411 int64_t *new_arg_types; 412 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 413 new_args_base, new_args, new_arg_sizes, new_arg_types, true); 414 415 //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, 416 // arg_types, 0, 0, false /*team*/, false /*recursive*/); 417 int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args, 418 new_arg_sizes, new_arg_types, 0, 0, false /*team*/); 419 420 // Cleanup translation memory 421 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 422 new_arg_types, arg_num, args_base); 423 424 return rc; 425 } 426 427 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, 428 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, 429 int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, 430 void *noAliasDepList) { 431 if (depNum + noAliasDepNum > 0) 432 __kmpc_omp_taskwait(NULL, 0); 433 434 return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, 435 arg_types); 436 } 437 438 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, 439 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, 440 int64_t *arg_types, int32_t team_num, int32_t thread_limit) { 441 DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", 442 DPxPTR(host_ptr), device_id); 443 444 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 445 device_id = omp_get_default_device(); 446 } 447 448 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 449 DP("Failed to get device %ld ready\n", device_id); 450 return OFFLOAD_FAIL; 451 } 452 453 // Translate maps 454 int32_t new_arg_num; 455 void **new_args_base; 456 void **new_args; 457 int64_t *new_arg_sizes; 458 int64_t *new_arg_types; 459 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, 460 new_args_base, new_args, new_arg_sizes, new_arg_types, true); 461 462 //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, 463 // arg_types, team_num, thread_limit, true /*team*/, 464 // false /*recursive*/); 465 int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args, 466 new_arg_sizes, new_arg_types, team_num, thread_limit, true /*team*/); 467 468 // Cleanup translation memory 469 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, 470 new_arg_types, arg_num, args_base); 471 472 return rc; 473 } 474 475 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, 476 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, 477 int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, 478 void *depList, int32_t noAliasDepNum, void *noAliasDepList) { 479 if (depNum + noAliasDepNum > 0) 480 __kmpc_omp_taskwait(NULL, 0); 481 482 return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, 483 arg_sizes, arg_types, team_num, thread_limit); 484 } 485 486 487 // The trip count mechanism will be revised - this scheme is not thread-safe. 488 EXTERN void __kmpc_push_target_tripcount(int64_t device_id, 489 uint64_t loop_tripcount) { 490 if (device_id == OFFLOAD_DEVICE_DEFAULT) { 491 device_id = omp_get_default_device(); 492 } 493 494 if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { 495 DP("Failed to get device %ld ready\n", device_id); 496 return; 497 } 498 499 DP("__kmpc_push_target_tripcount(%ld, %" PRIu64 ")\n", device_id, 500 loop_tripcount); 501 Devices[device_id].loopTripCnt = loop_tripcount; 502 } 503