1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Functionality for managing devices that are handled by RTL plugins. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "device.h" 14 #include "private.h" 15 #include "rtl.h" 16 17 #include <cassert> 18 #include <climits> 19 #include <cstdio> 20 #include <string> 21 22 int HostDataToTargetTy::addEventIfNecessary( 23 DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { 24 // First, check if the user disabled atomic map transfer/malloc/dealloc. 25 if (!PM->UseEventsForAtomicTransfers) 26 return OFFLOAD_SUCCESS; 27 28 void *Event = getEvent(); 29 bool NeedNewEvent = Event == nullptr; 30 if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { 31 REPORT("Failed to create event\n"); 32 return OFFLOAD_FAIL; 33 } 34 35 // We cannot assume the event should not be nullptr because we don't 36 // know if the target support event. But if a target doesn't, 37 // recordEvent should always return success. 38 if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { 39 REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event)); 40 return OFFLOAD_FAIL; 41 } 42 43 if (NeedNewEvent) 44 setEvent(Event); 45 46 return OFFLOAD_SUCCESS; 47 } 48 49 DeviceTy::DeviceTy(RTLInfoTy *RTL) 50 : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), 51 HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), 52 ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx() {} 53 54 DeviceTy::~DeviceTy() { 55 if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) 56 return; 57 58 ident_t loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"}; 59 dumpTargetPointerMappings(&loc, *this); 60 } 61 62 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { 63 DataMapMtx.lock(); 64 65 // Check if entry exists 66 auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin}); 67 if (search != HostDataToTargetMap.end()) { 68 // Mapping already exists 69 bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size && 70 search->TgtPtrBegin == (uintptr_t)TgtPtrBegin; 71 DataMapMtx.unlock(); 72 if (isValid) { 73 DP("Attempt to re-associate the same device ptr+offset with the same " 74 "host ptr, nothing to do\n"); 75 return OFFLOAD_SUCCESS; 76 } else { 77 REPORT("Not allowed to re-associate a different device ptr+offset with " 78 "the same host ptr\n"); 79 return OFFLOAD_FAIL; 80 } 81 } 82 83 // Mapping does not exist, allocate it with refCount=INF 84 const HostDataToTargetTy &newEntry = 85 *HostDataToTargetMap 86 .emplace( 87 /*HstPtrBase=*/(uintptr_t)HstPtrBegin, 88 /*HstPtrBegin=*/(uintptr_t)HstPtrBegin, 89 /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size, 90 /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin, 91 /*UseHoldRefCount=*/false, /*Name=*/nullptr, 92 /*IsRefCountINF=*/true) 93 .first; 94 DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD 95 ", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, " 96 "HoldRefCount=%s\n", 97 DPxPTR(newEntry.HstPtrBase), DPxPTR(newEntry.HstPtrBegin), 98 DPxPTR(newEntry.HstPtrEnd), DPxPTR(newEntry.TgtPtrBegin), 99 newEntry.dynRefCountToStr().c_str(), newEntry.holdRefCountToStr().c_str()); 100 (void)newEntry; 101 102 DataMapMtx.unlock(); 103 104 return OFFLOAD_SUCCESS; 105 } 106 107 int DeviceTy::disassociatePtr(void *HstPtrBegin) { 108 DataMapMtx.lock(); 109 110 auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin}); 111 if (search != HostDataToTargetMap.end()) { 112 // Mapping exists 113 if (search->getHoldRefCount()) { 114 // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: 115 // "It is an error to call acc_unmap_data if the structured reference 116 // count for the pointer is not zero." 117 REPORT("Trying to disassociate a pointer with a non-zero hold reference " 118 "count\n"); 119 } else if (search->isDynRefCountInf()) { 120 DP("Association found, removing it\n"); 121 void *Event = search->getEvent(); 122 if (Event) 123 destroyEvent(Event); 124 HostDataToTargetMap.erase(search); 125 DataMapMtx.unlock(); 126 return OFFLOAD_SUCCESS; 127 } else { 128 REPORT("Trying to disassociate a pointer which was not mapped via " 129 "omp_target_associate_ptr\n"); 130 } 131 } else { 132 REPORT("Association not found\n"); 133 } 134 135 // Mapping not found 136 DataMapMtx.unlock(); 137 return OFFLOAD_FAIL; 138 } 139 140 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) { 141 uintptr_t hp = (uintptr_t)HstPtrBegin; 142 LookupResult lr; 143 144 DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n", 145 DPxPTR(hp), Size); 146 147 if (HostDataToTargetMap.empty()) 148 return lr; 149 150 auto upper = HostDataToTargetMap.upper_bound(hp); 151 // check the left bin 152 if (upper != HostDataToTargetMap.begin()) { 153 lr.Entry = std::prev(upper); 154 auto &HT = *lr.Entry; 155 // Is it contained? 156 lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd && 157 (hp + Size) <= HT.HstPtrEnd; 158 // Does it extend beyond the mapped region? 159 lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; 160 } 161 162 // check the right bin 163 if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) && 164 upper != HostDataToTargetMap.end()) { 165 lr.Entry = upper; 166 auto &HT = *lr.Entry; 167 // Does it extend into an already mapped region? 168 lr.Flags.ExtendsBefore = 169 hp < HT.HstPtrBegin && (hp + Size) > HT.HstPtrBegin; 170 // Does it extend beyond the mapped region? 171 lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; 172 } 173 174 if (lr.Flags.ExtendsBefore) { 175 DP("WARNING: Pointer is not mapped but section extends into already " 176 "mapped data\n"); 177 } 178 if (lr.Flags.ExtendsAfter) { 179 DP("WARNING: Pointer is already mapped but section extends beyond mapped " 180 "region\n"); 181 } 182 183 return lr; 184 } 185 186 TargetPointerResultTy 187 DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, 188 map_var_info_t HstPtrName, bool HasFlagTo, 189 bool HasFlagAlways, bool IsImplicit, 190 bool UpdateRefCount, bool HasCloseModifier, 191 bool HasPresentModifier, bool HasHoldModifier, 192 AsyncInfoTy &AsyncInfo) { 193 void *TargetPointer = nullptr; 194 bool IsHostPtr = false; 195 bool IsNew = false; 196 197 DataMapMtx.lock(); 198 199 LookupResult LR = lookupMapping(HstPtrBegin, Size); 200 auto Entry = LR.Entry; 201 202 // Check if the pointer is contained. 203 // If a variable is mapped to the device manually by the user - which would 204 // lead to the IsContained flag to be true - then we must ensure that the 205 // device address is returned even under unified memory conditions. 206 if (LR.Flags.IsContained || 207 ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) { 208 auto &HT = *LR.Entry; 209 const char *RefCountAction; 210 assert(HT.getTotalRefCount() > 0 && "expected existing RefCount > 0"); 211 if (UpdateRefCount) { 212 // After this, RefCount > 1. 213 HT.incRefCount(HasHoldModifier); 214 RefCountAction = " (incremented)"; 215 } else { 216 // It might have been allocated with the parent, but it's still new. 217 IsNew = HT.getTotalRefCount() == 1; 218 RefCountAction = " (update suppressed)"; 219 } 220 const char *DynRefCountAction = HasHoldModifier ? "" : RefCountAction; 221 const char *HoldRefCountAction = HasHoldModifier ? RefCountAction : ""; 222 uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); 223 INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, 224 "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD 225 ", Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n", 226 (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr), 227 Size, HT.dynRefCountToStr().c_str(), DynRefCountAction, 228 HT.holdRefCountToStr().c_str(), HoldRefCountAction, 229 (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); 230 TargetPointer = (void *)Ptr; 231 } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) { 232 // Explicit extension of mapped data - not allowed. 233 MESSAGE("explicit extension not allowed: host address specified is " DPxMOD 234 " (%" PRId64 235 " bytes), but device allocation maps to host at " DPxMOD 236 " (%" PRId64 " bytes)", 237 DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin), 238 Entry->HstPtrEnd - Entry->HstPtrBegin); 239 if (HasPresentModifier) 240 MESSAGE("device mapping required by 'present' map type modifier does not " 241 "exist for host address " DPxMOD " (%" PRId64 " bytes)", 242 DPxPTR(HstPtrBegin), Size); 243 } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && 244 !HasCloseModifier) { 245 // If unified shared memory is active, implicitly mapped variables that are 246 // not privatized use host address. Any explicitly mapped variables also use 247 // host address where correctness is not impeded. In all other cases maps 248 // are respected. 249 // In addition to the mapping rules above, the close map modifier forces the 250 // mapping of the variable to the device. 251 if (Size) { 252 DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " 253 "memory\n", 254 DPxPTR((uintptr_t)HstPtrBegin), Size); 255 IsHostPtr = true; 256 TargetPointer = HstPtrBegin; 257 } 258 } else if (HasPresentModifier) { 259 DP("Mapping required by 'present' map type modifier does not exist for " 260 "HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n", 261 DPxPTR(HstPtrBegin), Size); 262 MESSAGE("device mapping required by 'present' map type modifier does not " 263 "exist for host address " DPxMOD " (%" PRId64 " bytes)", 264 DPxPTR(HstPtrBegin), Size); 265 } else if (Size) { 266 // If it is not contained and Size > 0, we should create a new entry for it. 267 IsNew = true; 268 uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin); 269 Entry = HostDataToTargetMap 270 .emplace((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, 271 (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, 272 HstPtrName) 273 .first; 274 INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, 275 "Creating new map entry with " 276 "HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, " 277 "DynRefCount=%s, HoldRefCount=%s, Name=%s\n", 278 DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, 279 Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(), 280 (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); 281 TargetPointer = (void *)Ptr; 282 } 283 284 // If the target pointer is valid, and we need to transfer data, issue the 285 // data transfer. 286 if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways)) { 287 // Lock the entry before releasing the mapping table lock such that another 288 // thread that could issue data movement will get the right result. 289 HostDataToTargetTy::LockGuard LG(*Entry); 290 // Release the mapping table lock right after the entry is locked. 291 DataMapMtx.unlock(); 292 293 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size, 294 DPxPTR(HstPtrBegin), DPxPTR(TargetPointer)); 295 296 int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo); 297 if (Ret != OFFLOAD_SUCCESS) { 298 REPORT("Copying data to device failed.\n"); 299 // We will also return nullptr if the data movement fails because that 300 // pointer points to a corrupted memory region so it doesn't make any 301 // sense to continue to use it. 302 TargetPointer = nullptr; 303 } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != 304 OFFLOAD_SUCCESS) 305 return {{false /* IsNewEntry */, false /* IsHostPointer */}, 306 {} /* MapTableEntry */, 307 nullptr /* TargetPointer */}; 308 } else { 309 // Release the mapping table lock directly. 310 DataMapMtx.unlock(); 311 // If not a host pointer and no present modifier, we need to wait for the 312 // event if it exists. 313 // Note: Entry might be nullptr because of zero length array section. 314 if (Entry != HostDataToTargetListTy::iterator() && !IsHostPtr && 315 !HasPresentModifier) { 316 HostDataToTargetTy::LockGuard LG(*Entry); 317 void *Event = Entry->getEvent(); 318 if (Event) { 319 int Ret = waitEvent(Event, AsyncInfo); 320 if (Ret != OFFLOAD_SUCCESS) { 321 // If it fails to wait for the event, we need to return nullptr in 322 // case of any data race. 323 REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event)); 324 return {{false /* IsNewEntry */, false /* IsHostPointer */}, 325 {} /* MapTableEntry */, 326 nullptr /* TargetPointer */}; 327 } 328 } 329 } 330 } 331 332 return {{IsNew, IsHostPtr}, Entry, TargetPointer}; 333 } 334 335 // Used by targetDataBegin, targetDataEnd, targetDataUpdate and target. 336 // Return the target pointer begin (where the data will be moved). 337 // Decrement the reference counter if called from targetDataEnd. 338 TargetPointerResultTy 339 DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, 340 bool UpdateRefCount, bool UseHoldRefCount, 341 bool &IsHostPtr, bool MustContain, bool ForceDelete) { 342 void *TargetPointer = NULL; 343 bool IsNew = false; 344 IsHostPtr = false; 345 IsLast = false; 346 DataMapMtx.lock(); 347 LookupResult lr = lookupMapping(HstPtrBegin, Size); 348 349 if (lr.Flags.IsContained || 350 (!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter))) { 351 auto &HT = *lr.Entry; 352 // We do not zero the total reference count here. deallocTgtPtr does that 353 // atomically with removing the mapping. Otherwise, before this thread 354 // removed the mapping in deallocTgtPtr, another thread could retrieve the 355 // mapping, increment and decrement back to zero, and then both threads 356 // would try to remove the mapping, resulting in a double free. 357 IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete); 358 const char *RefCountAction; 359 if (!UpdateRefCount) { 360 RefCountAction = " (update suppressed)"; 361 } else if (ForceDelete) { 362 HT.resetRefCount(UseHoldRefCount); 363 assert(IsLast == HT.decShouldRemove(UseHoldRefCount) && 364 "expected correct IsLast prediction for reset"); 365 if (IsLast) 366 RefCountAction = " (reset, deferred final decrement)"; 367 else { 368 HT.decRefCount(UseHoldRefCount); 369 RefCountAction = " (reset)"; 370 } 371 } else if (IsLast) { 372 RefCountAction = " (deferred final decrement)"; 373 } else { 374 HT.decRefCount(UseHoldRefCount); 375 RefCountAction = " (decremented)"; 376 } 377 const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction; 378 const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : ""; 379 uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); 380 INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, 381 "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " 382 "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n", 383 DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(), 384 DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction); 385 TargetPointer = (void *)tp; 386 } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { 387 // If the value isn't found in the mapping and unified shared memory 388 // is on then it means we have stumbled upon a value which we need to 389 // use directly from the host. 390 DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " 391 "memory\n", 392 DPxPTR((uintptr_t)HstPtrBegin), Size); 393 IsHostPtr = true; 394 TargetPointer = HstPtrBegin; 395 } 396 397 DataMapMtx.unlock(); 398 return {{IsNew, IsHostPtr}, lr.Entry, TargetPointer}; 399 } 400 401 // Return the target pointer begin (where the data will be moved). 402 // Lock-free version called when loading global symbols from the fat binary. 403 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) { 404 uintptr_t hp = (uintptr_t)HstPtrBegin; 405 LookupResult lr = lookupMapping(HstPtrBegin, Size); 406 if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { 407 auto &HT = *lr.Entry; 408 uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin); 409 return (void *)tp; 410 } 411 412 return NULL; 413 } 414 415 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, 416 bool HasHoldModifier) { 417 // Check if the pointer is contained in any sub-nodes. 418 int Ret = OFFLOAD_SUCCESS; 419 DataMapMtx.lock(); 420 LookupResult lr = lookupMapping(HstPtrBegin, Size); 421 if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { 422 auto &HT = *lr.Entry; 423 if (HT.decRefCount(HasHoldModifier) == 0) { 424 DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", 425 DPxPTR(HT.TgtPtrBegin), Size); 426 deleteData((void *)HT.TgtPtrBegin); 427 INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, 428 "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD 429 ", Size=%" PRId64 ", Name=%s\n", 430 DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size, 431 (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() 432 : "unknown"); 433 void *Event = lr.Entry->getEvent(); 434 HostDataToTargetMap.erase(lr.Entry); 435 if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { 436 REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event)); 437 Ret = OFFLOAD_FAIL; 438 } 439 } 440 } else { 441 REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the" 442 " allocated memory\n", 443 DPxPTR(HstPtrBegin)); 444 Ret = OFFLOAD_FAIL; 445 } 446 447 DataMapMtx.unlock(); 448 return Ret; 449 } 450 451 /// Init device, should not be called directly. 452 void DeviceTy::init() { 453 // Make call to init_requires if it exists for this plugin. 454 if (RTL->init_requires) 455 RTL->init_requires(PM->RTLs.RequiresFlags); 456 int32_t Ret = RTL->init_device(RTLDeviceID); 457 if (Ret != OFFLOAD_SUCCESS) 458 return; 459 460 IsInit = true; 461 } 462 463 /// Thread-safe method to initialize the device only once. 464 int32_t DeviceTy::initOnce() { 465 std::call_once(InitFlag, &DeviceTy::init, this); 466 467 // At this point, if IsInit is true, then either this thread or some other 468 // thread in the past successfully initialized the device, so we can return 469 // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it 470 // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means 471 // that some other thread already attempted to execute init() and if IsInit 472 // is still false, return OFFLOAD_FAIL. 473 if (IsInit) 474 return OFFLOAD_SUCCESS; 475 else 476 return OFFLOAD_FAIL; 477 } 478 479 // Load binary to device. 480 __tgt_target_table *DeviceTy::load_binary(void *Img) { 481 RTL->Mtx.lock(); 482 __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img); 483 RTL->Mtx.unlock(); 484 return rc; 485 } 486 487 void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { 488 return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); 489 } 490 491 int32_t DeviceTy::deleteData(void *TgtPtrBegin) { 492 return RTL->data_delete(RTLDeviceID, TgtPtrBegin); 493 } 494 495 // Submit data to device 496 int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, 497 AsyncInfoTy &AsyncInfo) { 498 if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { 499 LookupResult LR = lookupMapping(HstPtrBegin, Size); 500 auto *HT = &*LR.Entry; 501 502 INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, 503 "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD 504 ", Size=%" PRId64 ", Name=%s\n", 505 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size, 506 (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() 507 : "unknown"); 508 } 509 510 if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) 511 return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); 512 else 513 return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, 514 AsyncInfo); 515 } 516 517 // Retrieve data from device 518 int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, 519 int64_t Size, AsyncInfoTy &AsyncInfo) { 520 if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { 521 LookupResult LR = lookupMapping(HstPtrBegin, Size); 522 auto *HT = &*LR.Entry; 523 INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, 524 "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD 525 ", Size=%" PRId64 ", Name=%s\n", 526 DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size, 527 (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() 528 : "unknown"); 529 } 530 531 if (!RTL->data_retrieve_async || !RTL->synchronize) 532 return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); 533 else 534 return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, 535 AsyncInfo); 536 } 537 538 // Copy data from current device to destination device directly 539 int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, 540 int64_t Size, AsyncInfoTy &AsyncInfo) { 541 if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) { 542 assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); 543 return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, 544 Size); 545 } else 546 return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, 547 DstPtr, Size, AsyncInfo); 548 } 549 550 // Run region on device 551 int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr, 552 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, 553 AsyncInfoTy &AsyncInfo) { 554 if (!RTL->run_region || !RTL->synchronize) 555 return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, 556 TgtVarsSize); 557 else 558 return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, 559 TgtOffsets, TgtVarsSize, AsyncInfo); 560 } 561 562 // Run region on device 563 bool DeviceTy::printDeviceInfo(int32_t RTLDevId) { 564 if (!RTL->print_device_info) 565 return false; 566 RTL->print_device_info(RTLDevId); 567 return true; 568 } 569 570 // Run team region on device. 571 int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr, 572 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, 573 int32_t NumTeams, int32_t ThreadLimit, 574 uint64_t LoopTripCount, 575 AsyncInfoTy &AsyncInfo) { 576 if (!RTL->run_team_region_async || !RTL->synchronize) 577 return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, 578 TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit, 579 LoopTripCount); 580 else 581 return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, 582 TgtOffsets, TgtVarsSize, NumTeams, 583 ThreadLimit, LoopTripCount, AsyncInfo); 584 } 585 586 // Whether data can be copied to DstDevice directly 587 bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) { 588 if (RTL != DstDevice.RTL || !RTL->is_data_exchangable) 589 return false; 590 591 if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID)) 592 return (RTL->data_exchange != nullptr) || 593 (RTL->data_exchange_async != nullptr); 594 595 return false; 596 } 597 598 int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { 599 if (RTL->synchronize) 600 return RTL->synchronize(RTLDeviceID, AsyncInfo); 601 return OFFLOAD_SUCCESS; 602 } 603 604 int32_t DeviceTy::createEvent(void **Event) { 605 if (RTL->create_event) 606 return RTL->create_event(RTLDeviceID, Event); 607 608 return OFFLOAD_SUCCESS; 609 } 610 611 int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { 612 if (RTL->record_event) 613 return RTL->record_event(RTLDeviceID, Event, AsyncInfo); 614 615 return OFFLOAD_SUCCESS; 616 } 617 618 int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { 619 if (RTL->wait_event) 620 return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); 621 622 return OFFLOAD_SUCCESS; 623 } 624 625 int32_t DeviceTy::syncEvent(void *Event) { 626 if (RTL->sync_event) 627 return RTL->sync_event(RTLDeviceID, Event); 628 629 return OFFLOAD_SUCCESS; 630 } 631 632 int32_t DeviceTy::destroyEvent(void *Event) { 633 if (RTL->create_event) 634 return RTL->destroy_event(RTLDeviceID, Event); 635 636 return OFFLOAD_SUCCESS; 637 } 638 639 /// Check whether a device has an associated RTL and initialize it if it's not 640 /// already initialized. 641 bool device_is_ready(int device_num) { 642 DP("Checking whether device %d is ready.\n", device_num); 643 // Devices.size() can only change while registering a new 644 // library, so try to acquire the lock of RTLs' mutex. 645 PM->RTLsMtx.lock(); 646 size_t DevicesSize = PM->Devices.size(); 647 PM->RTLsMtx.unlock(); 648 if (DevicesSize <= (size_t)device_num) { 649 DP("Device ID %d does not have a matching RTL\n", device_num); 650 return false; 651 } 652 653 // Get device info 654 DeviceTy &Device = *PM->Devices[device_num]; 655 656 DP("Is the device %d (local ID %d) initialized? %d\n", device_num, 657 Device.RTLDeviceID, Device.IsInit); 658 659 // Init the device if not done before 660 if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) { 661 DP("Failed to init device %d\n", device_num); 662 return false; 663 } 664 665 DP("Device %d is ready to use.\n", device_num); 666 667 return true; 668 } 669