1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "omptarget.h"
15 #include "private.h"
16 #include "rtl.h"
17 
18 #include <cassert>
19 #include <climits>
20 #include <cstdint>
21 #include <cstdio>
22 #include <string>
23 #include <thread>
24 
addEventIfNecessary(DeviceTy & Device,AsyncInfoTy & AsyncInfo) const25 int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
26                                             AsyncInfoTy &AsyncInfo) const {
27   // First, check if the user disabled atomic map transfer/malloc/dealloc.
28   if (!PM->UseEventsForAtomicTransfers)
29     return OFFLOAD_SUCCESS;
30 
31   void *Event = getEvent();
32   bool NeedNewEvent = Event == nullptr;
33   if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
34     REPORT("Failed to create event\n");
35     return OFFLOAD_FAIL;
36   }
37 
38   // We cannot assume the event should not be nullptr because we don't
39   // know if the target support event. But if a target doesn't,
40   // recordEvent should always return success.
41   if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
42     REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event));
43     return OFFLOAD_FAIL;
44   }
45 
46   if (NeedNewEvent)
47     setEvent(Event);
48 
49   return OFFLOAD_SUCCESS;
50 }
51 
DeviceTy(RTLInfoTy * RTL)52 DeviceTy::DeviceTy(RTLInfoTy *RTL)
53     : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
54       HasPendingGlobals(false), PendingCtorsDtors(), ShadowPtrMap(),
55       PendingGlobalsMtx(), ShadowMtx() {}
56 
~DeviceTy()57 DeviceTy::~DeviceTy() {
58   if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
59     return;
60 
61   ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
62   dumpTargetPointerMappings(&Loc, *this);
63 }
64 
associatePtr(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size)65 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
66   HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
67 
68   // Check if entry exists
69   auto It = HDTTMap->find(HstPtrBegin);
70   if (It != HDTTMap->end()) {
71     HostDataToTargetTy &HDTT = *It->HDTT;
72     // Mapping already exists
73     bool IsValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
74                    HDTT.TgtPtrBegin == (uintptr_t)TgtPtrBegin;
75     if (IsValid) {
76       DP("Attempt to re-associate the same device ptr+offset with the same "
77          "host ptr, nothing to do\n");
78       return OFFLOAD_SUCCESS;
79     }
80     REPORT("Not allowed to re-associate a different device ptr+offset with "
81            "the same host ptr\n");
82     return OFFLOAD_FAIL;
83   }
84 
85   // Mapping does not exist, allocate it with refCount=INF
86   const HostDataToTargetTy &NewEntry =
87       *HDTTMap
88            ->emplace(new HostDataToTargetTy(
89                /*HstPtrBase=*/(uintptr_t)HstPtrBegin,
90                /*HstPtrBegin=*/(uintptr_t)HstPtrBegin,
91                /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size,
92                /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin,
93                /*UseHoldRefCount=*/false, /*Name=*/nullptr,
94                /*IsRefCountINF=*/true))
95            .first->HDTT;
96   DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD
97      ", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, "
98      "HoldRefCount=%s\n",
99      DPxPTR(NewEntry.HstPtrBase), DPxPTR(NewEntry.HstPtrBegin),
100      DPxPTR(NewEntry.HstPtrEnd), DPxPTR(NewEntry.TgtPtrBegin),
101      NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str());
102   (void)NewEntry;
103 
104   return OFFLOAD_SUCCESS;
105 }
106 
disassociatePtr(void * HstPtrBegin)107 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
108   HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
109 
110   auto It = HDTTMap->find(HstPtrBegin);
111   if (It != HDTTMap->end()) {
112     HostDataToTargetTy &HDTT = *It->HDTT;
113     // Mapping exists
114     if (HDTT.getHoldRefCount()) {
115       // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657:
116       // "It is an error to call acc_unmap_data if the structured reference
117       // count for the pointer is not zero."
118       REPORT("Trying to disassociate a pointer with a non-zero hold reference "
119              "count\n");
120     } else if (HDTT.isDynRefCountInf()) {
121       DP("Association found, removing it\n");
122       void *Event = HDTT.getEvent();
123       delete &HDTT;
124       if (Event)
125         destroyEvent(Event);
126       HDTTMap->erase(It);
127       return OFFLOAD_SUCCESS;
128     } else {
129       REPORT("Trying to disassociate a pointer which was not mapped via "
130              "omp_target_associate_ptr\n");
131     }
132   } else {
133     REPORT("Association not found\n");
134   }
135 
136   // Mapping not found
137   return OFFLOAD_FAIL;
138 }
139 
lookupMapping(HDTTMapAccessorTy & HDTTMap,void * HstPtrBegin,int64_t Size)140 LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
141                                      void *HstPtrBegin, int64_t Size) {
142 
143   uintptr_t HP = (uintptr_t)HstPtrBegin;
144   LookupResult LR;
145 
146   DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n",
147      DPxPTR(HP), Size);
148 
149   if (HDTTMap->empty())
150     return LR;
151 
152   auto Upper = HDTTMap->upper_bound(HP);
153 
154   if (Size == 0) {
155     // specification v5.1 Pointer Initialization for Device Data Environments
156     // upper_bound satisfies
157     //   std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin
158     if (Upper != HDTTMap->begin()) {
159       LR.Entry = std::prev(Upper)->HDTT;
160       auto &HT = *LR.Entry;
161       // the left side of extended address range is satisified.
162       // hp >= HT.HstPtrBegin || hp >= HT.HstPtrBase
163       LR.Flags.IsContained = HP < HT.HstPtrEnd || HP < HT.HstPtrBase;
164     }
165 
166     if (!LR.Flags.IsContained && Upper != HDTTMap->end()) {
167       LR.Entry = Upper->HDTT;
168       auto &HT = *LR.Entry;
169       // the right side of extended address range is satisified.
170       // hp < HT.HstPtrEnd || hp < HT.HstPtrBase
171       LR.Flags.IsContained = HP >= HT.HstPtrBase;
172     }
173   } else {
174     // check the left bin
175     if (Upper != HDTTMap->begin()) {
176       LR.Entry = std::prev(Upper)->HDTT;
177       auto &HT = *LR.Entry;
178       // Is it contained?
179       LR.Flags.IsContained = HP >= HT.HstPtrBegin && HP < HT.HstPtrEnd &&
180                              (HP + Size) <= HT.HstPtrEnd;
181       // Does it extend beyond the mapped region?
182       LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
183     }
184 
185     // check the right bin
186     if (!(LR.Flags.IsContained || LR.Flags.ExtendsAfter) &&
187         Upper != HDTTMap->end()) {
188       LR.Entry = Upper->HDTT;
189       auto &HT = *LR.Entry;
190       // Does it extend into an already mapped region?
191       LR.Flags.ExtendsBefore =
192           HP < HT.HstPtrBegin && (HP + Size) > HT.HstPtrBegin;
193       // Does it extend beyond the mapped region?
194       LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
195     }
196 
197     if (LR.Flags.ExtendsBefore) {
198       DP("WARNING: Pointer is not mapped but section extends into already "
199          "mapped data\n");
200     }
201     if (LR.Flags.ExtendsAfter) {
202       DP("WARNING: Pointer is already mapped but section extends beyond mapped "
203          "region\n");
204     }
205   }
206 
207   return LR;
208 }
209 
getTargetPointer(void * HstPtrBegin,void * HstPtrBase,int64_t Size,map_var_info_t HstPtrName,bool HasFlagTo,bool HasFlagAlways,bool IsImplicit,bool UpdateRefCount,bool HasCloseModifier,bool HasPresentModifier,bool HasHoldModifier,AsyncInfoTy & AsyncInfo)210 TargetPointerResultTy DeviceTy::getTargetPointer(
211     void *HstPtrBegin, void *HstPtrBase, int64_t Size,
212     map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways,
213     bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
214     bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) {
215   HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
216 
217   void *TargetPointer = nullptr;
218   bool IsHostPtr = false;
219   bool IsNew = false;
220 
221   LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
222   auto *Entry = LR.Entry;
223 
224   // Check if the pointer is contained.
225   // If a variable is mapped to the device manually by the user - which would
226   // lead to the IsContained flag to be true - then we must ensure that the
227   // device address is returned even under unified memory conditions.
228   if (LR.Flags.IsContained ||
229       ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) {
230     auto &HT = *LR.Entry;
231     const char *RefCountAction;
232     if (UpdateRefCount) {
233       // After this, reference count >= 1. If the reference count was 0 but the
234       // entry was still there we can reuse the data on the device and avoid a
235       // new submission.
236       HT.incRefCount(HasHoldModifier);
237       RefCountAction = " (incremented)";
238     } else {
239       // It might have been allocated with the parent, but it's still new.
240       IsNew = HT.getTotalRefCount() == 1;
241       RefCountAction = " (update suppressed)";
242     }
243     const char *DynRefCountAction = HasHoldModifier ? "" : RefCountAction;
244     const char *HoldRefCountAction = HasHoldModifier ? RefCountAction : "";
245     uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
246     INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
247          "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
248          ", Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n",
249          (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr),
250          Size, HT.dynRefCountToStr().c_str(), DynRefCountAction,
251          HT.holdRefCountToStr().c_str(), HoldRefCountAction,
252          (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
253     TargetPointer = (void *)Ptr;
254   } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) {
255     // Explicit extension of mapped data - not allowed.
256     MESSAGE("explicit extension not allowed: host address specified is " DPxMOD
257             " (%" PRId64
258             " bytes), but device allocation maps to host at " DPxMOD
259             " (%" PRId64 " bytes)",
260             DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin),
261             Entry->HstPtrEnd - Entry->HstPtrBegin);
262     if (HasPresentModifier)
263       MESSAGE("device mapping required by 'present' map type modifier does not "
264               "exist for host address " DPxMOD " (%" PRId64 " bytes)",
265               DPxPTR(HstPtrBegin), Size);
266   } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
267              !HasCloseModifier) {
268     // If unified shared memory is active, implicitly mapped variables that are
269     // not privatized use host address. Any explicitly mapped variables also use
270     // host address where correctness is not impeded. In all other cases maps
271     // are respected.
272     // In addition to the mapping rules above, the close map modifier forces the
273     // mapping of the variable to the device.
274     if (Size) {
275       DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
276          "memory\n",
277          DPxPTR((uintptr_t)HstPtrBegin), Size);
278       IsHostPtr = true;
279       TargetPointer = HstPtrBegin;
280     }
281   } else if (HasPresentModifier) {
282     DP("Mapping required by 'present' map type modifier does not exist for "
283        "HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n",
284        DPxPTR(HstPtrBegin), Size);
285     MESSAGE("device mapping required by 'present' map type modifier does not "
286             "exist for host address " DPxMOD " (%" PRId64 " bytes)",
287             DPxPTR(HstPtrBegin), Size);
288   } else if (Size) {
289     // If it is not contained and Size > 0, we should create a new entry for it.
290     IsNew = true;
291     uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin);
292     Entry = HDTTMap
293                 ->emplace(new HostDataToTargetTy(
294                     (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
295                     (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier,
296                     HstPtrName))
297                 .first->HDTT;
298     INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
299          "Creating new map entry with HstPtrBase=" DPxMOD
300          ", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, "
301          "DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
302          DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size,
303          Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(),
304          (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
305     TargetPointer = (void *)Ptr;
306   }
307 
308   // If the target pointer is valid, and we need to transfer data, issue the
309   // data transfer.
310   if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways)) {
311     // Lock the entry before releasing the mapping table lock such that another
312     // thread that could issue data movement will get the right result.
313     std::lock_guard<decltype(*Entry)> LG(*Entry);
314     // Release the mapping table lock right after the entry is locked.
315     HDTTMap.destroy();
316 
317     DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size,
318        DPxPTR(HstPtrBegin), DPxPTR(TargetPointer));
319 
320     int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo);
321     if (Ret != OFFLOAD_SUCCESS) {
322       REPORT("Copying data to device failed.\n");
323       // We will also return nullptr if the data movement fails because that
324       // pointer points to a corrupted memory region so it doesn't make any
325       // sense to continue to use it.
326       TargetPointer = nullptr;
327     } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != OFFLOAD_SUCCESS)
328       return {{false /* IsNewEntry */, false /* IsHostPointer */},
329               nullptr /* Entry */,
330               nullptr /* TargetPointer */};
331   } else {
332     // Release the mapping table lock directly.
333     HDTTMap.destroy();
334     // If not a host pointer and no present modifier, we need to wait for the
335     // event if it exists.
336     // Note: Entry might be nullptr because of zero length array section.
337     if (Entry && !IsHostPtr && !HasPresentModifier) {
338       std::lock_guard<decltype(*Entry)> LG(*Entry);
339       void *Event = Entry->getEvent();
340       if (Event) {
341         int Ret = waitEvent(Event, AsyncInfo);
342         if (Ret != OFFLOAD_SUCCESS) {
343           // If it fails to wait for the event, we need to return nullptr in
344           // case of any data race.
345           REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event));
346           return {{false /* IsNewEntry */, false /* IsHostPointer */},
347                   nullptr /* Entry */,
348                   nullptr /* TargetPointer */};
349         }
350       }
351     }
352   }
353 
354   return {{IsNew, IsHostPtr}, Entry, TargetPointer};
355 }
356 
357 // Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
358 // Return the target pointer begin (where the data will be moved).
359 // Decrement the reference counter if called from targetDataEnd.
360 TargetPointerResultTy
getTgtPtrBegin(void * HstPtrBegin,int64_t Size,bool & IsLast,bool UpdateRefCount,bool UseHoldRefCount,bool & IsHostPtr,bool MustContain,bool ForceDelete)361 DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
362                          bool UpdateRefCount, bool UseHoldRefCount,
363                          bool &IsHostPtr, bool MustContain, bool ForceDelete) {
364   HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
365 
366   void *TargetPointer = NULL;
367   bool IsNew = false;
368   IsHostPtr = false;
369   IsLast = false;
370   LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
371 
372   if (LR.Flags.IsContained ||
373       (!MustContain && (LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter))) {
374     auto &HT = *LR.Entry;
375     IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete);
376 
377     if (ForceDelete) {
378       HT.resetRefCount(UseHoldRefCount);
379       assert(IsLast == HT.decShouldRemove(UseHoldRefCount) &&
380              "expected correct IsLast prediction for reset");
381     }
382 
383     const char *RefCountAction;
384     if (!UpdateRefCount) {
385       RefCountAction = " (update suppressed)";
386     } else if (IsLast) {
387       // Mark the entry as to be deleted by this thread. Another thread might
388       // reuse the entry and take "ownership" for the deletion while this thread
389       // is waiting for data transfers. That is fine and the current thread will
390       // simply skip the deletion step then.
391       HT.setDeleteThreadId();
392       HT.decRefCount(UseHoldRefCount);
393       assert(HT.getTotalRefCount() == 0 &&
394              "Expected zero reference count when deletion is scheduled");
395       if (ForceDelete)
396         RefCountAction = " (reset, delayed deletion)";
397       else
398         RefCountAction = " (decremented, delayed deletion)";
399     } else {
400       HT.decRefCount(UseHoldRefCount);
401       RefCountAction = " (decremented)";
402     }
403     const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction;
404     const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : "";
405     uintptr_t TP = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
406     INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
407          "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
408          "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n",
409          DPxPTR(HstPtrBegin), DPxPTR(TP), Size, HT.dynRefCountToStr().c_str(),
410          DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction);
411     TargetPointer = (void *)TP;
412   } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
413     // If the value isn't found in the mapping and unified shared memory
414     // is on then it means we have stumbled upon a value which we need to
415     // use directly from the host.
416     DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
417        "memory\n",
418        DPxPTR((uintptr_t)HstPtrBegin), Size);
419     IsHostPtr = true;
420     TargetPointer = HstPtrBegin;
421   }
422 
423   return {{IsNew, IsHostPtr}, LR.Entry, TargetPointer};
424 }
425 
426 // Return the target pointer begin (where the data will be moved).
getTgtPtrBegin(HDTTMapAccessorTy & HDTTMap,void * HstPtrBegin,int64_t Size)427 void *DeviceTy::getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
428                                int64_t Size) {
429   uintptr_t HP = (uintptr_t)HstPtrBegin;
430   LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
431   if (LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) {
432     auto &HT = *LR.Entry;
433     uintptr_t TP = HT.TgtPtrBegin + (HP - HT.HstPtrBegin);
434     return (void *)TP;
435   }
436 
437   return NULL;
438 }
439 
deallocTgtPtr(HDTTMapAccessorTy & HDTTMap,LookupResult LR,int64_t Size)440 int DeviceTy::deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR,
441                             int64_t Size) {
442   // Check if the pointer is contained in any sub-nodes.
443   if (!(LR.Flags.IsContained || LR.Flags.ExtendsBefore ||
444         LR.Flags.ExtendsAfter)) {
445     REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the"
446            " allocated memory\n",
447            DPxPTR(LR.Entry->HstPtrBegin));
448     return OFFLOAD_FAIL;
449   }
450 
451   auto &HT = *LR.Entry;
452   // Verify this thread is still in charge of deleting the entry.
453   assert(HT.getTotalRefCount() == 0 &&
454          HT.getDeleteThreadId() == std::this_thread::get_id() &&
455          "Trying to delete entry that is in use or owned by another thread.");
456 
457   DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
458      DPxPTR(HT.TgtPtrBegin), Size);
459   deleteData((void *)HT.TgtPtrBegin);
460   INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
461        "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
462        ", Size=%" PRId64 ", Name=%s\n",
463        DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size,
464        (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown");
465   void *Event = LR.Entry->getEvent();
466   HDTTMap->erase(LR.Entry);
467   delete LR.Entry;
468 
469   int Ret = OFFLOAD_SUCCESS;
470   if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) {
471     REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event));
472     Ret = OFFLOAD_FAIL;
473   }
474 
475   return Ret;
476 }
477 
478 /// Init device, should not be called directly.
init()479 void DeviceTy::init() {
480   // Make call to init_requires if it exists for this plugin.
481   if (RTL->init_requires)
482     RTL->init_requires(PM->RTLs.RequiresFlags);
483   int32_t Ret = RTL->init_device(RTLDeviceID);
484   if (Ret != OFFLOAD_SUCCESS)
485     return;
486 
487   IsInit = true;
488 }
489 
490 /// Thread-safe method to initialize the device only once.
initOnce()491 int32_t DeviceTy::initOnce() {
492   std::call_once(InitFlag, &DeviceTy::init, this);
493 
494   // At this point, if IsInit is true, then either this thread or some other
495   // thread in the past successfully initialized the device, so we can return
496   // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
497   // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
498   // that some other thread already attempted to execute init() and if IsInit
499   // is still false, return OFFLOAD_FAIL.
500   if (IsInit)
501     return OFFLOAD_SUCCESS;
502   return OFFLOAD_FAIL;
503 }
504 
deinit()505 void DeviceTy::deinit() {
506   if (RTL->deinit_device)
507     RTL->deinit_device(RTLDeviceID);
508 }
509 
510 // Load binary to device.
loadBinary(void * Img)511 __tgt_target_table *DeviceTy::loadBinary(void *Img) {
512   std::lock_guard<decltype(RTL->Mtx)> LG(RTL->Mtx);
513   return RTL->load_binary(RTLDeviceID, Img);
514 }
515 
allocData(int64_t Size,void * HstPtr,int32_t Kind)516 void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
517   return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
518 }
519 
deleteData(void * TgtPtrBegin)520 int32_t DeviceTy::deleteData(void *TgtPtrBegin) {
521   return RTL->data_delete(RTLDeviceID, TgtPtrBegin);
522 }
523 
524 // Submit data to device
submitData(void * TgtPtrBegin,void * HstPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo)525 int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
526                              AsyncInfoTy &AsyncInfo) {
527   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
528     HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
529     LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
530     auto *HT = &*LR.Entry;
531 
532     INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
533          "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
534          ", Size=%" PRId64 ", Name=%s\n",
535          DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size,
536          (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
537                                 : "unknown");
538   }
539 
540   if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
541     return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
542   return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
543                                 AsyncInfo);
544 }
545 
546 // Retrieve data from device
retrieveData(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo)547 int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
548                                int64_t Size, AsyncInfoTy &AsyncInfo) {
549   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
550     HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
551     LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
552     auto *HT = &*LR.Entry;
553     INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
554          "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD
555          ", Size=%" PRId64 ", Name=%s\n",
556          DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size,
557          (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
558                                 : "unknown");
559   }
560 
561   if (!RTL->data_retrieve_async || !RTL->synchronize)
562     return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
563   return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
564                                   AsyncInfo);
565 }
566 
567 // Copy data from current device to destination device directly
dataExchange(void * SrcPtr,DeviceTy & DstDev,void * DstPtr,int64_t Size,AsyncInfoTy & AsyncInfo)568 int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
569                                int64_t Size, AsyncInfoTy &AsyncInfo) {
570   if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
571     assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
572     return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
573                               Size);
574   }
575   return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
576                                   DstPtr, Size, AsyncInfo);
577 }
578 
579 // Run region on device
runRegion(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,AsyncInfoTy & AsyncInfo)580 int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr,
581                             ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
582                             AsyncInfoTy &AsyncInfo) {
583   if (!RTL->run_region || !RTL->synchronize)
584     return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
585                            TgtVarsSize);
586   return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
587                                TgtVarsSize, AsyncInfo);
588 }
589 
590 // Run region on device
printDeviceInfo(int32_t RTLDevId)591 bool DeviceTy::printDeviceInfo(int32_t RTLDevId) {
592   if (!RTL->print_device_info)
593     return false;
594   RTL->print_device_info(RTLDevId);
595   return true;
596 }
597 
598 // Run team region on device.
runTeamRegion(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,int32_t NumTeams,int32_t ThreadLimit,uint64_t LoopTripCount,AsyncInfoTy & AsyncInfo)599 int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
600                                 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
601                                 int32_t NumTeams, int32_t ThreadLimit,
602                                 uint64_t LoopTripCount,
603                                 AsyncInfoTy &AsyncInfo) {
604   if (!RTL->run_team_region_async || !RTL->synchronize)
605     return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
606                                 TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
607                                 LoopTripCount);
608   return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
609                                     TgtOffsets, TgtVarsSize, NumTeams,
610                                     ThreadLimit, LoopTripCount, AsyncInfo);
611 }
612 
613 // Whether data can be copied to DstDevice directly
isDataExchangable(const DeviceTy & DstDevice)614 bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
615   if (RTL != DstDevice.RTL || !RTL->is_data_exchangable)
616     return false;
617 
618   if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
619     return (RTL->data_exchange != nullptr) ||
620            (RTL->data_exchange_async != nullptr);
621 
622   return false;
623 }
624 
synchronize(AsyncInfoTy & AsyncInfo)625 int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
626   if (RTL->synchronize)
627     return RTL->synchronize(RTLDeviceID, AsyncInfo);
628   return OFFLOAD_SUCCESS;
629 }
630 
createEvent(void ** Event)631 int32_t DeviceTy::createEvent(void **Event) {
632   if (RTL->create_event)
633     return RTL->create_event(RTLDeviceID, Event);
634 
635   return OFFLOAD_SUCCESS;
636 }
637 
recordEvent(void * Event,AsyncInfoTy & AsyncInfo)638 int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
639   if (RTL->record_event)
640     return RTL->record_event(RTLDeviceID, Event, AsyncInfo);
641 
642   return OFFLOAD_SUCCESS;
643 }
644 
waitEvent(void * Event,AsyncInfoTy & AsyncInfo)645 int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
646   if (RTL->wait_event)
647     return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);
648 
649   return OFFLOAD_SUCCESS;
650 }
651 
syncEvent(void * Event)652 int32_t DeviceTy::syncEvent(void *Event) {
653   if (RTL->sync_event)
654     return RTL->sync_event(RTLDeviceID, Event);
655 
656   return OFFLOAD_SUCCESS;
657 }
658 
destroyEvent(void * Event)659 int32_t DeviceTy::destroyEvent(void *Event) {
660   if (RTL->create_event)
661     return RTL->destroy_event(RTLDeviceID, Event);
662 
663   return OFFLOAD_SUCCESS;
664 }
665 
666 /// Check whether a device has an associated RTL and initialize it if it's not
667 /// already initialized.
deviceIsReady(int DeviceNum)668 bool deviceIsReady(int DeviceNum) {
669   DP("Checking whether device %d is ready.\n", DeviceNum);
670   // Devices.size() can only change while registering a new
671   // library, so try to acquire the lock of RTLs' mutex.
672   size_t DevicesSize;
673   {
674     std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx);
675     DevicesSize = PM->Devices.size();
676   }
677   if (DevicesSize <= (size_t)DeviceNum) {
678     DP("Device ID  %d does not have a matching RTL\n", DeviceNum);
679     return false;
680   }
681 
682   // Get device info
683   DeviceTy &Device = *PM->Devices[DeviceNum];
684 
685   DP("Is the device %d (local ID %d) initialized? %d\n", DeviceNum,
686      Device.RTLDeviceID, Device.IsInit);
687 
688   // Init the device if not done before
689   if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
690     DP("Failed to init device %d\n", DeviceNum);
691     return false;
692   }
693 
694   DP("Device %d is ready to use.\n", DeviceNum);
695 
696   return true;
697 }
698