1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "private.h"
15 #include "rtl.h"
16 
17 #include <cassert>
18 #include <climits>
19 #include <string>
20 
21 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
22 DevicesTy Devices;
23 
24 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
25   DataMapMtx.lock();
26 
27   // Check if entry exists
28   for (auto &HT : HostDataToTargetMap) {
29     if ((uintptr_t)HstPtrBegin == HT.HstPtrBegin) {
30       // Mapping already exists
31       bool isValid = HT.HstPtrBegin == (uintptr_t) HstPtrBegin &&
32                      HT.HstPtrEnd == (uintptr_t) HstPtrBegin + Size &&
33                      HT.TgtPtrBegin == (uintptr_t) TgtPtrBegin;
34       DataMapMtx.unlock();
35       if (isValid) {
36         DP("Attempt to re-associate the same device ptr+offset with the same "
37             "host ptr, nothing to do\n");
38         return OFFLOAD_SUCCESS;
39       } else {
40         DP("Not allowed to re-associate a different device ptr+offset with the "
41             "same host ptr\n");
42         return OFFLOAD_FAIL;
43       }
44     }
45   }
46 
47   // Mapping does not exist, allocate it with refCount=INF
48   HostDataToTargetTy newEntry((uintptr_t) HstPtrBegin /*HstPtrBase*/,
49                               (uintptr_t) HstPtrBegin /*HstPtrBegin*/,
50                               (uintptr_t) HstPtrBegin + Size /*HstPtrEnd*/,
51                               (uintptr_t) TgtPtrBegin /*TgtPtrBegin*/,
52                               true /*IsRefCountINF*/);
53 
54   DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", HstEnd="
55       DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
56       DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
57       DPxPTR(newEntry.TgtPtrBegin));
58   HostDataToTargetMap.push_front(newEntry);
59 
60   DataMapMtx.unlock();
61 
62   return OFFLOAD_SUCCESS;
63 }
64 
65 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
66   DataMapMtx.lock();
67 
68   // Check if entry exists
69   for (HostDataToTargetListTy::iterator ii = HostDataToTargetMap.begin();
70       ii != HostDataToTargetMap.end(); ++ii) {
71     if ((uintptr_t)HstPtrBegin == ii->HstPtrBegin) {
72       // Mapping exists
73       if (ii->isRefCountInf()) {
74         DP("Association found, removing it\n");
75         HostDataToTargetMap.erase(ii);
76         DataMapMtx.unlock();
77         return OFFLOAD_SUCCESS;
78       } else {
79         DP("Trying to disassociate a pointer which was not mapped via "
80             "omp_target_associate_ptr\n");
81         break;
82       }
83     }
84   }
85 
86   // Mapping not found
87   DataMapMtx.unlock();
88   DP("Association not found\n");
89   return OFFLOAD_FAIL;
90 }
91 
92 // Get ref count of map entry containing HstPtrBegin
93 uint64_t DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
94   uintptr_t hp = (uintptr_t)HstPtrBegin;
95   uint64_t RefCnt = 0;
96 
97   DataMapMtx.lock();
98   for (auto &HT : HostDataToTargetMap) {
99     if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) {
100       DP("DeviceTy::getMapEntry: requested entry found\n");
101       RefCnt = HT.getRefCount();
102       break;
103     }
104   }
105   DataMapMtx.unlock();
106 
107   if (RefCnt == 0) {
108     DP("DeviceTy::getMapEntry: requested entry not found\n");
109   }
110 
111   return RefCnt;
112 }
113 
114 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
115   uintptr_t hp = (uintptr_t)HstPtrBegin;
116   LookupResult lr;
117 
118   DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
119       Size);
120   for (lr.Entry = HostDataToTargetMap.begin();
121       lr.Entry != HostDataToTargetMap.end(); ++lr.Entry) {
122     auto &HT = *lr.Entry;
123     // Is it contained?
124     lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
125         (hp+Size) <= HT.HstPtrEnd;
126     // Does it extend into an already mapped region?
127     lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
128     // Does it extend beyond the mapped region?
129     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
130 
131     if (lr.Flags.IsContained || lr.Flags.ExtendsBefore ||
132         lr.Flags.ExtendsAfter) {
133       break;
134     }
135   }
136 
137   if (lr.Flags.ExtendsBefore) {
138     DP("WARNING: Pointer is not mapped but section extends into already "
139         "mapped data\n");
140   }
141   if (lr.Flags.ExtendsAfter) {
142     DP("WARNING: Pointer is already mapped but section extends beyond mapped "
143         "region\n");
144   }
145 
146   return lr;
147 }
148 
149 // Used by target_data_begin
150 // Return the target pointer begin (where the data will be moved).
151 // Allocate memory if this is the first occurrence of this mapping.
152 // Increment the reference counter.
153 // If NULL is returned, then either data allocation failed or the user tried
154 // to do an illegal mapping.
155 void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
156     int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
157     bool UpdateRefCount, bool HasCloseModifier) {
158   void *rc = NULL;
159   IsHostPtr = false;
160   IsNew = false;
161   DataMapMtx.lock();
162   LookupResult lr = lookupMapping(HstPtrBegin, Size);
163 
164   // Check if the pointer is contained.
165   // If a variable is mapped to the device manually by the user - which would
166   // lead to the IsContained flag to be true - then we must ensure that the
167   // device address is returned even under unified memory conditions.
168   if (lr.Flags.IsContained ||
169       ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
170     auto &HT = *lr.Entry;
171     IsNew = false;
172 
173     if (UpdateRefCount)
174       HT.incRefCount();
175 
176     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
177     DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
178         "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""),
179         DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
180         (UpdateRefCount ? " updated" : ""),
181         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
182     rc = (void *)tp;
183   } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
184     // Explicit extension of mapped data - not allowed.
185     DP("Explicit extension of mapping is not allowed.\n");
186   } else if (Size) {
187     // If unified shared memory is active, implicitly mapped variables that are not
188     // privatized use host address. Any explicitly mapped variables also use
189     // host address where correctness is not impeded. In all other cases
190     // maps are respected.
191     // In addition to the mapping rules above, the close map
192     // modifier forces the mapping of the variable to the device.
193     if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
194         !HasCloseModifier) {
195       DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
196          DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
197       IsHostPtr = true;
198       rc = HstPtrBegin;
199     } else {
200       // If it is not contained and Size > 0 we should create a new entry for it.
201       IsNew = true;
202       uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
203       DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
204          "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
205          DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
206       HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
207           (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
208       rc = (void *)tp;
209     }
210   }
211 
212   DataMapMtx.unlock();
213   return rc;
214 }
215 
216 // Used by target_data_begin, target_data_end, target_data_update and target.
217 // Return the target pointer begin (where the data will be moved).
218 // Decrement the reference counter if called from target_data_end.
219 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
220     bool UpdateRefCount, bool &IsHostPtr) {
221   void *rc = NULL;
222   IsHostPtr = false;
223   IsLast = false;
224   DataMapMtx.lock();
225   LookupResult lr = lookupMapping(HstPtrBegin, Size);
226 
227   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
228     auto &HT = *lr.Entry;
229     IsLast = HT.getRefCount() == 1;
230 
231     if (!IsLast && UpdateRefCount)
232       HT.decRefCount();
233 
234     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
235     DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
236         "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
237         (UpdateRefCount ? " updated" : ""),
238         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
239     rc = (void *)tp;
240   } else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
241     // If the value isn't found in the mapping and unified shared memory
242     // is on then it means we have stumbled upon a value which we need to
243     // use directly from the host.
244     DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
245        DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
246     IsHostPtr = true;
247     rc = HstPtrBegin;
248   }
249 
250   DataMapMtx.unlock();
251   return rc;
252 }
253 
254 // Return the target pointer begin (where the data will be moved).
255 // Lock-free version called when loading global symbols from the fat binary.
256 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
257   uintptr_t hp = (uintptr_t)HstPtrBegin;
258   LookupResult lr = lookupMapping(HstPtrBegin, Size);
259   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
260     auto &HT = *lr.Entry;
261     uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
262     return (void *)tp;
263   }
264 
265   return NULL;
266 }
267 
268 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
269                             bool HasCloseModifier) {
270   if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
271     return OFFLOAD_SUCCESS;
272   // Check if the pointer is contained in any sub-nodes.
273   int rc;
274   DataMapMtx.lock();
275   LookupResult lr = lookupMapping(HstPtrBegin, Size);
276   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
277     auto &HT = *lr.Entry;
278     if (ForceDelete)
279       HT.resetRefCount();
280     if (HT.decRefCount() == 0) {
281       DP("Deleting tgt data " DPxMOD " of size %ld\n",
282           DPxPTR(HT.TgtPtrBegin), Size);
283       RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin);
284       DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
285           ", Size=%ld\n", (ForceDelete ? " (forced)" : ""),
286           DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size);
287       HostDataToTargetMap.erase(lr.Entry);
288     }
289     rc = OFFLOAD_SUCCESS;
290   } else {
291     DP("Section to delete (hst addr " DPxMOD ") does not exist in the allocated"
292        " memory\n", DPxPTR(HstPtrBegin));
293     rc = OFFLOAD_FAIL;
294   }
295 
296   DataMapMtx.unlock();
297   return rc;
298 }
299 
300 /// Init device, should not be called directly.
301 void DeviceTy::init() {
302   // Make call to init_requires if it exists for this plugin.
303   if (RTL->init_requires)
304     RTL->init_requires(RTLs->RequiresFlags);
305   int32_t rc = RTL->init_device(RTLDeviceID);
306   if (rc == OFFLOAD_SUCCESS) {
307     IsInit = true;
308   }
309 }
310 
311 /// Thread-safe method to initialize the device only once.
312 int32_t DeviceTy::initOnce() {
313   std::call_once(InitFlag, &DeviceTy::init, this);
314 
315   // At this point, if IsInit is true, then either this thread or some other
316   // thread in the past successfully initialized the device, so we can return
317   // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
318   // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
319   // that some other thread already attempted to execute init() and if IsInit
320   // is still false, return OFFLOAD_FAIL.
321   if (IsInit)
322     return OFFLOAD_SUCCESS;
323   else
324     return OFFLOAD_FAIL;
325 }
326 
327 // Load binary to device.
328 __tgt_target_table *DeviceTy::load_binary(void *Img) {
329   RTL->Mtx.lock();
330   __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
331   RTL->Mtx.unlock();
332   return rc;
333 }
334 
335 // Submit data to device
336 int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
337                               int64_t Size, __tgt_async_info *AsyncInfoPtr) {
338   if (!AsyncInfoPtr || !RTL->data_submit_async || !RTL->synchronize)
339     return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
340   else
341     return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
342                                   AsyncInfoPtr);
343 }
344 
345 // Retrieve data from device
346 int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
347                                 int64_t Size, __tgt_async_info *AsyncInfoPtr) {
348   if (!AsyncInfoPtr || !RTL->data_retrieve_async || !RTL->synchronize)
349     return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
350   else
351     return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
352                                     AsyncInfoPtr);
353 }
354 
355 // Run region on device
356 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
357                              ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
358                              __tgt_async_info *AsyncInfoPtr) {
359   if (!AsyncInfoPtr || !RTL->run_region || !RTL->synchronize)
360     return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
361                            TgtVarsSize);
362   else
363     return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
364                                  TgtOffsets, TgtVarsSize, AsyncInfoPtr);
365 }
366 
367 // Run team region on device.
368 int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
369                                   ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
370                                   int32_t NumTeams, int32_t ThreadLimit,
371                                   uint64_t LoopTripCount,
372                                   __tgt_async_info *AsyncInfoPtr) {
373   if (!AsyncInfoPtr || !RTL->run_team_region_async || !RTL->synchronize)
374     return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
375                                 TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
376                                 LoopTripCount);
377   else
378     return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
379                                       TgtOffsets, TgtVarsSize, NumTeams,
380                                       ThreadLimit, LoopTripCount, AsyncInfoPtr);
381 }
382 
383 /// Check whether a device has an associated RTL and initialize it if it's not
384 /// already initialized.
385 bool device_is_ready(int device_num) {
386   DP("Checking whether device %d is ready.\n", device_num);
387   // Devices.size() can only change while registering a new
388   // library, so try to acquire the lock of RTLs' mutex.
389   RTLsMtx->lock();
390   size_t Devices_size = Devices.size();
391   RTLsMtx->unlock();
392   if (Devices_size <= (size_t)device_num) {
393     DP("Device ID  %d does not have a matching RTL\n", device_num);
394     return false;
395   }
396 
397   // Get device info
398   DeviceTy &Device = Devices[device_num];
399 
400   DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
401        Device.RTLDeviceID, Device.IsInit);
402 
403   // Init the device if not done before
404   if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
405     DP("Failed to init device %d\n", device_num);
406     return false;
407   }
408 
409   DP("Device %d is ready to use.\n", device_num);
410 
411   return true;
412 }
413