1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Functionality for managing devices that are handled by RTL plugins. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "device.h" 14 #include "omptarget.h" 15 #include "private.h" 16 #include "rtl.h" 17 18 #include <cassert> 19 #include <climits> 20 #include <cstdio> 21 #include <string> 22 23 int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, 24 AsyncInfoTy &AsyncInfo) const { 25 // First, check if the user disabled atomic map transfer/malloc/dealloc. 26 if (!PM->UseEventsForAtomicTransfers) 27 return OFFLOAD_SUCCESS; 28 29 void *Event = getEvent(); 30 bool NeedNewEvent = Event == nullptr; 31 if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { 32 REPORT("Failed to create event\n"); 33 return OFFLOAD_FAIL; 34 } 35 36 // We cannot assume the event should not be nullptr because we don't 37 // know if the target support event. But if a target doesn't, 38 // recordEvent should always return success. 39 if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { 40 REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event)); 41 return OFFLOAD_FAIL; 42 } 43 44 if (NeedNewEvent) 45 setEvent(Event); 46 47 return OFFLOAD_SUCCESS; 48 } 49 50 DeviceTy::DeviceTy(RTLInfoTy *RTL) 51 : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), 52 HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), 53 ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx() {} 54 55 DeviceTy::~DeviceTy() { 56 if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) 57 return; 58 59 ident_t loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"}; 60 dumpTargetPointerMappings(&loc, *this); 61 } 62 63 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { 64 std::lock_guard<decltype(DataMapMtx)> LG(DataMapMtx); 65 66 // Check if entry exists 67 auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin}); 68 if (search != HostDataToTargetMap.end()) { 69 // Mapping already exists 70 bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size && 71 search->TgtPtrBegin == (uintptr_t)TgtPtrBegin; 72 if (isValid) { 73 DP("Attempt to re-associate the same device ptr+offset with the same " 74 "host ptr, nothing to do\n"); 75 return OFFLOAD_SUCCESS; 76 } else { 77 REPORT("Not allowed to re-associate a different device ptr+offset with " 78 "the same host ptr\n"); 79 return OFFLOAD_FAIL; 80 } 81 } 82 83 // Mapping does not exist, allocate it with refCount=INF 84 const HostDataToTargetTy &newEntry = 85 *HostDataToTargetMap 86 .emplace( 87 /*HstPtrBase=*/(uintptr_t)HstPtrBegin, 88 /*HstPtrBegin=*/(uintptr_t)HstPtrBegin, 89 /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size, 90 /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin, 91 /*UseHoldRefCount=*/false, /*Name=*/nullptr, 92 /*IsRefCountINF=*/true) 93 .first; 94 DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD 95 ", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, " 96 "HoldRefCount=%s\n", 97 DPxPTR(newEntry.HstPtrBase), DPxPTR(newEntry.HstPtrBegin), 98 DPxPTR(newEntry.HstPtrEnd), DPxPTR(newEntry.TgtPtrBegin), 99 newEntry.dynRefCountToStr().c_str(), newEntry.holdRefCountToStr().c_str()); 100 (void)newEntry; 101 102 return OFFLOAD_SUCCESS; 103 } 104 105 int DeviceTy::disassociatePtr(void *HstPtrBegin) { 106 std::lock_guard<decltype(DataMapMtx)> LG(DataMapMtx); 107 108 auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin}); 109 if (search != HostDataToTargetMap.end()) { 110 // Mapping exists 111 if (search->getHoldRefCount()) { 112 // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: 113 // "It is an error to call acc_unmap_data if the structured reference 114 // count for the pointer is not zero." 115 REPORT("Trying to disassociate a pointer with a non-zero hold reference " 116 "count\n"); 117 } else if (search->isDynRefCountInf()) { 118 DP("Association found, removing it\n"); 119 void *Event = search->getEvent(); 120 if (Event) 121 destroyEvent(Event); 122 HostDataToTargetMap.erase(search); 123 return OFFLOAD_SUCCESS; 124 } else { 125 REPORT("Trying to disassociate a pointer which was not mapped via " 126 "omp_target_associate_ptr\n"); 127 } 128 } else { 129 REPORT("Association not found\n"); 130 } 131 132 // Mapping not found 133 return OFFLOAD_FAIL; 134 } 135 136 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) { 137 uintptr_t hp = (uintptr_t)HstPtrBegin; 138 LookupResult lr; 139 140 DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n", 141 DPxPTR(hp), Size); 142 143 if (HostDataToTargetMap.empty()) 144 return lr; 145 146 auto upper = HostDataToTargetMap.upper_bound(hp); 147 // check the left bin 148 if (upper != HostDataToTargetMap.begin()) { 149 lr.Entry = std::prev(upper); 150 auto &HT = *lr.Entry; 151 // Is it contained? 152 lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd && 153 (hp + Size) <= HT.HstPtrEnd; 154 // Does it extend beyond the mapped region? 155 lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; 156 } 157 158 // check the right bin 159 if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) && 160 upper != HostDataToTargetMap.end()) { 161 lr.Entry = upper; 162 auto &HT = *lr.Entry; 163 // Does it extend into an already mapped region? 164 lr.Flags.ExtendsBefore = 165 hp < HT.HstPtrBegin && (hp + Size) > HT.HstPtrBegin; 166 // Does it extend beyond the mapped region? 167 lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; 168 } 169 170 if (lr.Flags.ExtendsBefore) { 171 DP("WARNING: Pointer is not mapped but section extends into already " 172 "mapped data\n"); 173 } 174 if (lr.Flags.ExtendsAfter) { 175 DP("WARNING: Pointer is already mapped but section extends beyond mapped " 176 "region\n"); 177 } 178 179 return lr; 180 } 181 182 TargetPointerResultTy DeviceTy::getTargetPointer( 183 void *HstPtrBegin, void *HstPtrBase, int64_t Size, 184 map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, 185 bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, 186 bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) { 187 void *TargetPointer = nullptr; 188 bool IsHostPtr = false; 189 bool IsNew = false; 190 191 DataMapMtx.lock(); 192 193 LookupResult LR = lookupMapping(HstPtrBegin, Size); 194 auto Entry = LR.Entry; 195 196 // Check if the pointer is contained. 197 // If a variable is mapped to the device manually by the user - which would 198 // lead to the IsContained flag to be true - then we must ensure that the 199 // device address is returned even under unified memory conditions. 200 if (LR.Flags.IsContained || 201 ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) { 202 auto &HT = *LR.Entry; 203 const char *RefCountAction; 204 assert(HT.getTotalRefCount() > 0 && "expected existing RefCount > 0"); 205 if (UpdateRefCount) { 206 // After this, RefCount > 1. 207 HT.incRefCount(HasHoldModifier); 208 RefCountAction = " (incremented)"; 209 } else { 210 // It might have been allocated with the parent, but it's still new. 211 IsNew = HT.getTotalRefCount() == 1; 212 RefCountAction = " (update suppressed)"; 213 } 214 const char *DynRefCountAction = HasHoldModifier ? "" : RefCountAction; 215 const char *HoldRefCountAction = HasHoldModifier ? RefCountAction : ""; 216 uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); 217 INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, 218 "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD 219 ", Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n", 220 (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr), 221 Size, HT.dynRefCountToStr().c_str(), DynRefCountAction, 222 HT.holdRefCountToStr().c_str(), HoldRefCountAction, 223 (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); 224 TargetPointer = (void *)Ptr; 225 } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) { 226 // Explicit extension of mapped data - not allowed. 227 MESSAGE("explicit extension not allowed: host address specified is " DPxMOD 228 " (%" PRId64 229 " bytes), but device allocation maps to host at " DPxMOD 230 " (%" PRId64 " bytes)", 231 DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin), 232 Entry->HstPtrEnd - Entry->HstPtrBegin); 233 if (HasPresentModifier) 234 MESSAGE("device mapping required by 'present' map type modifier does not " 235 "exist for host address " DPxMOD " (%" PRId64 " bytes)", 236 DPxPTR(HstPtrBegin), Size); 237 } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && 238 !HasCloseModifier) { 239 // If unified shared memory is active, implicitly mapped variables that are 240 // not privatized use host address. Any explicitly mapped variables also use 241 // host address where correctness is not impeded. In all other cases maps 242 // are respected. 243 // In addition to the mapping rules above, the close map modifier forces the 244 // mapping of the variable to the device. 245 if (Size) { 246 DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " 247 "memory\n", 248 DPxPTR((uintptr_t)HstPtrBegin), Size); 249 IsHostPtr = true; 250 TargetPointer = HstPtrBegin; 251 } 252 } else if (HasPresentModifier) { 253 DP("Mapping required by 'present' map type modifier does not exist for " 254 "HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n", 255 DPxPTR(HstPtrBegin), Size); 256 MESSAGE("device mapping required by 'present' map type modifier does not " 257 "exist for host address " DPxMOD " (%" PRId64 " bytes)", 258 DPxPTR(HstPtrBegin), Size); 259 } else if (Size) { 260 // If it is not contained and Size > 0, we should create a new entry for it. 261 IsNew = true; 262 uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin); 263 Entry = HostDataToTargetMap 264 .emplace((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, 265 (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, 266 HstPtrName) 267 .first; 268 INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, 269 "Creating new map entry with " 270 "HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, " 271 "DynRefCount=%s, HoldRefCount=%s, Name=%s\n", 272 DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, 273 Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(), 274 (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); 275 TargetPointer = (void *)Ptr; 276 } 277 278 // If the target pointer is valid, and we need to transfer data, issue the 279 // data transfer. 280 if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways)) { 281 // Lock the entry before releasing the mapping table lock such that another 282 // thread that could issue data movement will get the right result. 283 std::lock_guard<decltype(*Entry)> LG(*Entry); 284 // Release the mapping table lock right after the entry is locked. 285 DataMapMtx.unlock(); 286 287 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size, 288 DPxPTR(HstPtrBegin), DPxPTR(TargetPointer)); 289 290 int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo); 291 if (Ret != OFFLOAD_SUCCESS) { 292 REPORT("Copying data to device failed.\n"); 293 // We will also return nullptr if the data movement fails because that 294 // pointer points to a corrupted memory region so it doesn't make any 295 // sense to continue to use it. 296 TargetPointer = nullptr; 297 } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != OFFLOAD_SUCCESS) 298 return {{false /* IsNewEntry */, false /* IsHostPointer */}, 299 {} /* MapTableEntry */, 300 nullptr /* TargetPointer */}; 301 } else { 302 // Release the mapping table lock directly. 303 DataMapMtx.unlock(); 304 // If not a host pointer and no present modifier, we need to wait for the 305 // event if it exists. 306 // Note: Entry might be nullptr because of zero length array section. 307 if (Entry != HostDataToTargetListTy::iterator() && !IsHostPtr && 308 !HasPresentModifier) { 309 std::lock_guard<decltype(*Entry)> LG(*Entry); 310 void *Event = Entry->getEvent(); 311 if (Event) { 312 int Ret = waitEvent(Event, AsyncInfo); 313 if (Ret != OFFLOAD_SUCCESS) { 314 // If it fails to wait for the event, we need to return nullptr in 315 // case of any data race. 316 REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event)); 317 return {{false /* IsNewEntry */, false /* IsHostPointer */}, 318 {} /* MapTableEntry */, 319 nullptr /* TargetPointer */}; 320 } 321 } 322 } 323 } 324 325 return {{IsNew, IsHostPtr}, Entry, TargetPointer}; 326 } 327 328 // Used by targetDataBegin, targetDataEnd, targetDataUpdate and target. 329 // Return the target pointer begin (where the data will be moved). 330 // Decrement the reference counter if called from targetDataEnd. 331 TargetPointerResultTy 332 DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, 333 bool UpdateRefCount, bool UseHoldRefCount, 334 bool &IsHostPtr, bool MustContain, bool ForceDelete) { 335 void *TargetPointer = NULL; 336 bool IsNew = false; 337 IsHostPtr = false; 338 IsLast = false; 339 std::lock_guard<decltype(DataMapMtx)> LG(DataMapMtx); 340 LookupResult lr = lookupMapping(HstPtrBegin, Size); 341 342 if (lr.Flags.IsContained || 343 (!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter))) { 344 auto &HT = *lr.Entry; 345 // We do not zero the total reference count here. deallocTgtPtr does that 346 // atomically with removing the mapping. Otherwise, before this thread 347 // removed the mapping in deallocTgtPtr, another thread could retrieve the 348 // mapping, increment and decrement back to zero, and then both threads 349 // would try to remove the mapping, resulting in a double free. 350 IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete); 351 const char *RefCountAction; 352 if (!UpdateRefCount) { 353 RefCountAction = " (update suppressed)"; 354 } else if (ForceDelete) { 355 HT.resetRefCount(UseHoldRefCount); 356 assert(IsLast == HT.decShouldRemove(UseHoldRefCount) && 357 "expected correct IsLast prediction for reset"); 358 if (IsLast) 359 RefCountAction = " (reset, deferred final decrement)"; 360 else { 361 HT.decRefCount(UseHoldRefCount); 362 RefCountAction = " (reset)"; 363 } 364 } else if (IsLast) { 365 RefCountAction = " (deferred final decrement)"; 366 } else { 367 HT.decRefCount(UseHoldRefCount); 368 RefCountAction = " (decremented)"; 369 } 370 const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction; 371 const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : ""; 372 uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); 373 INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, 374 "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " 375 "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n", 376 DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(), 377 DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction); 378 TargetPointer = (void *)tp; 379 } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { 380 // If the value isn't found in the mapping and unified shared memory 381 // is on then it means we have stumbled upon a value which we need to 382 // use directly from the host. 383 DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " 384 "memory\n", 385 DPxPTR((uintptr_t)HstPtrBegin), Size); 386 IsHostPtr = true; 387 TargetPointer = HstPtrBegin; 388 } 389 390 return {{IsNew, IsHostPtr}, lr.Entry, TargetPointer}; 391 } 392 393 // Return the target pointer begin (where the data will be moved). 394 // Lock-free version called when loading global symbols from the fat binary. 395 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) { 396 uintptr_t hp = (uintptr_t)HstPtrBegin; 397 LookupResult lr = lookupMapping(HstPtrBegin, Size); 398 if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { 399 auto &HT = *lr.Entry; 400 uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin); 401 return (void *)tp; 402 } 403 404 return NULL; 405 } 406 407 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, 408 bool HasHoldModifier) { 409 std::lock_guard<decltype(DataMapMtx)> LG(DataMapMtx); 410 411 // Check if the pointer is contained in any sub-nodes. 412 int Ret = OFFLOAD_SUCCESS; 413 LookupResult lr = lookupMapping(HstPtrBegin, Size); 414 if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { 415 auto &HT = *lr.Entry; 416 if (HT.decRefCount(HasHoldModifier) == 0) { 417 DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", 418 DPxPTR(HT.TgtPtrBegin), Size); 419 deleteData((void *)HT.TgtPtrBegin); 420 INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, 421 "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD 422 ", Size=%" PRId64 ", Name=%s\n", 423 DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size, 424 (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() 425 : "unknown"); 426 void *Event = lr.Entry->getEvent(); 427 HostDataToTargetMap.erase(lr.Entry); 428 if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { 429 REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event)); 430 Ret = OFFLOAD_FAIL; 431 } 432 } 433 } else { 434 REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the" 435 " allocated memory\n", 436 DPxPTR(HstPtrBegin)); 437 Ret = OFFLOAD_FAIL; 438 } 439 440 return Ret; 441 } 442 443 /// Init device, should not be called directly. 444 void DeviceTy::init() { 445 // Make call to init_requires if it exists for this plugin. 446 if (RTL->init_requires) 447 RTL->init_requires(PM->RTLs.RequiresFlags); 448 int32_t Ret = RTL->init_device(RTLDeviceID); 449 if (Ret != OFFLOAD_SUCCESS) 450 return; 451 452 IsInit = true; 453 } 454 455 /// Thread-safe method to initialize the device only once. 456 int32_t DeviceTy::initOnce() { 457 std::call_once(InitFlag, &DeviceTy::init, this); 458 459 // At this point, if IsInit is true, then either this thread or some other 460 // thread in the past successfully initialized the device, so we can return 461 // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it 462 // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means 463 // that some other thread already attempted to execute init() and if IsInit 464 // is still false, return OFFLOAD_FAIL. 465 if (IsInit) 466 return OFFLOAD_SUCCESS; 467 else 468 return OFFLOAD_FAIL; 469 } 470 471 void DeviceTy::deinit() { 472 if (RTL->deinit_device) 473 RTL->deinit_device(RTLDeviceID); 474 } 475 476 // Load binary to device. 477 __tgt_target_table *DeviceTy::load_binary(void *Img) { 478 std::lock_guard<decltype(RTL->Mtx)> LG(RTL->Mtx); 479 __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img); 480 return rc; 481 } 482 483 void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { 484 return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); 485 } 486 487 int32_t DeviceTy::deleteData(void *TgtPtrBegin) { 488 return RTL->data_delete(RTLDeviceID, TgtPtrBegin); 489 } 490 491 // Submit data to device 492 int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, 493 AsyncInfoTy &AsyncInfo) { 494 if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { 495 LookupResult LR = lookupMapping(HstPtrBegin, Size); 496 auto *HT = &*LR.Entry; 497 498 INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, 499 "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD 500 ", Size=%" PRId64 ", Name=%s\n", 501 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size, 502 (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() 503 : "unknown"); 504 } 505 506 if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) 507 return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); 508 else 509 return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, 510 AsyncInfo); 511 } 512 513 // Retrieve data from device 514 int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, 515 int64_t Size, AsyncInfoTy &AsyncInfo) { 516 if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { 517 LookupResult LR = lookupMapping(HstPtrBegin, Size); 518 auto *HT = &*LR.Entry; 519 INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, 520 "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD 521 ", Size=%" PRId64 ", Name=%s\n", 522 DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size, 523 (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() 524 : "unknown"); 525 } 526 527 if (!RTL->data_retrieve_async || !RTL->synchronize) 528 return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); 529 else 530 return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, 531 AsyncInfo); 532 } 533 534 // Copy data from current device to destination device directly 535 int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, 536 int64_t Size, AsyncInfoTy &AsyncInfo) { 537 if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) { 538 assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); 539 return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, 540 Size); 541 } else 542 return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, 543 DstPtr, Size, AsyncInfo); 544 } 545 546 // Run region on device 547 int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr, 548 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, 549 AsyncInfoTy &AsyncInfo) { 550 if (!RTL->run_region || !RTL->synchronize) 551 return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, 552 TgtVarsSize); 553 else 554 return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, 555 TgtOffsets, TgtVarsSize, AsyncInfo); 556 } 557 558 // Run region on device 559 bool DeviceTy::printDeviceInfo(int32_t RTLDevId) { 560 if (!RTL->print_device_info) 561 return false; 562 RTL->print_device_info(RTLDevId); 563 return true; 564 } 565 566 // Run team region on device. 567 int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr, 568 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, 569 int32_t NumTeams, int32_t ThreadLimit, 570 uint64_t LoopTripCount, 571 AsyncInfoTy &AsyncInfo) { 572 if (!RTL->run_team_region_async || !RTL->synchronize) 573 return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, 574 TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit, 575 LoopTripCount); 576 else 577 return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, 578 TgtOffsets, TgtVarsSize, NumTeams, 579 ThreadLimit, LoopTripCount, AsyncInfo); 580 } 581 582 // Whether data can be copied to DstDevice directly 583 bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) { 584 if (RTL != DstDevice.RTL || !RTL->is_data_exchangable) 585 return false; 586 587 if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID)) 588 return (RTL->data_exchange != nullptr) || 589 (RTL->data_exchange_async != nullptr); 590 591 return false; 592 } 593 594 int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { 595 if (RTL->synchronize) 596 return RTL->synchronize(RTLDeviceID, AsyncInfo); 597 return OFFLOAD_SUCCESS; 598 } 599 600 int32_t DeviceTy::createEvent(void **Event) { 601 if (RTL->create_event) 602 return RTL->create_event(RTLDeviceID, Event); 603 604 return OFFLOAD_SUCCESS; 605 } 606 607 int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { 608 if (RTL->record_event) 609 return RTL->record_event(RTLDeviceID, Event, AsyncInfo); 610 611 return OFFLOAD_SUCCESS; 612 } 613 614 int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { 615 if (RTL->wait_event) 616 return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); 617 618 return OFFLOAD_SUCCESS; 619 } 620 621 int32_t DeviceTy::syncEvent(void *Event) { 622 if (RTL->sync_event) 623 return RTL->sync_event(RTLDeviceID, Event); 624 625 return OFFLOAD_SUCCESS; 626 } 627 628 int32_t DeviceTy::destroyEvent(void *Event) { 629 if (RTL->create_event) 630 return RTL->destroy_event(RTLDeviceID, Event); 631 632 return OFFLOAD_SUCCESS; 633 } 634 635 /// Check whether a device has an associated RTL and initialize it if it's not 636 /// already initialized. 637 bool device_is_ready(int device_num) { 638 DP("Checking whether device %d is ready.\n", device_num); 639 // Devices.size() can only change while registering a new 640 // library, so try to acquire the lock of RTLs' mutex. 641 size_t DevicesSize; 642 { 643 std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx); 644 DevicesSize = PM->Devices.size(); 645 } 646 if (DevicesSize <= (size_t)device_num) { 647 DP("Device ID %d does not have a matching RTL\n", device_num); 648 return false; 649 } 650 651 // Get device info 652 DeviceTy &Device = *PM->Devices[device_num]; 653 654 DP("Is the device %d (local ID %d) initialized? %d\n", device_num, 655 Device.RTLDeviceID, Device.IsInit); 656 657 // Init the device if not done before 658 if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) { 659 DP("Failed to init device %d\n", device_num); 660 return false; 661 } 662 663 DP("Device %d is ready to use.\n", device_num); 664 665 return true; 666 } 667