1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "private.h"
15 #include "rtl.h"
16 
17 #include <cassert>
18 #include <climits>
19 #include <string>
20 
21 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
22 DevicesTy Devices;
23 
24 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
25   DataMapMtx.lock();
26 
27   // Check if entry exists
28   for (auto &HT : HostDataToTargetMap) {
29     if ((uintptr_t)HstPtrBegin == HT.HstPtrBegin) {
30       // Mapping already exists
31       bool isValid = HT.HstPtrBegin == (uintptr_t) HstPtrBegin &&
32                      HT.HstPtrEnd == (uintptr_t) HstPtrBegin + Size &&
33                      HT.TgtPtrBegin == (uintptr_t) TgtPtrBegin;
34       DataMapMtx.unlock();
35       if (isValid) {
36         DP("Attempt to re-associate the same device ptr+offset with the same "
37             "host ptr, nothing to do\n");
38         return OFFLOAD_SUCCESS;
39       } else {
40         DP("Not allowed to re-associate a different device ptr+offset with the "
41             "same host ptr\n");
42         return OFFLOAD_FAIL;
43       }
44     }
45   }
46 
47   // Mapping does not exist, allocate it with refCount=INF
48   HostDataToTargetTy newEntry((uintptr_t) HstPtrBegin /*HstPtrBase*/,
49                               (uintptr_t) HstPtrBegin /*HstPtrBegin*/,
50                               (uintptr_t) HstPtrBegin + Size /*HstPtrEnd*/,
51                               (uintptr_t) TgtPtrBegin /*TgtPtrBegin*/,
52                               true /*IsRefCountINF*/);
53 
54   DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", HstEnd="
55       DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
56       DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
57       DPxPTR(newEntry.TgtPtrBegin));
58   HostDataToTargetMap.push_front(newEntry);
59 
60   DataMapMtx.unlock();
61 
62   return OFFLOAD_SUCCESS;
63 }
64 
65 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
66   DataMapMtx.lock();
67 
68   // Check if entry exists
69   for (HostDataToTargetListTy::iterator ii = HostDataToTargetMap.begin();
70       ii != HostDataToTargetMap.end(); ++ii) {
71     if ((uintptr_t)HstPtrBegin == ii->HstPtrBegin) {
72       // Mapping exists
73       if (ii->isRefCountInf()) {
74         DP("Association found, removing it\n");
75         HostDataToTargetMap.erase(ii);
76         DataMapMtx.unlock();
77         return OFFLOAD_SUCCESS;
78       } else {
79         DP("Trying to disassociate a pointer which was not mapped via "
80             "omp_target_associate_ptr\n");
81         break;
82       }
83     }
84   }
85 
86   // Mapping not found
87   DataMapMtx.unlock();
88   DP("Association not found\n");
89   return OFFLOAD_FAIL;
90 }
91 
92 // Get ref count of map entry containing HstPtrBegin
93 uint64_t DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
94   uintptr_t hp = (uintptr_t)HstPtrBegin;
95   uint64_t RefCnt = 0;
96 
97   DataMapMtx.lock();
98   for (auto &HT : HostDataToTargetMap) {
99     if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) {
100       DP("DeviceTy::getMapEntry: requested entry found\n");
101       RefCnt = HT.getRefCount();
102       break;
103     }
104   }
105   DataMapMtx.unlock();
106 
107   if (RefCnt == 0) {
108     DP("DeviceTy::getMapEntry: requested entry not found\n");
109   }
110 
111   return RefCnt;
112 }
113 
114 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
115   uintptr_t hp = (uintptr_t)HstPtrBegin;
116   LookupResult lr;
117 
118   DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
119       Size);
120   for (lr.Entry = HostDataToTargetMap.begin();
121       lr.Entry != HostDataToTargetMap.end(); ++lr.Entry) {
122     auto &HT = *lr.Entry;
123     // Is it contained?
124     lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
125         (hp+Size) <= HT.HstPtrEnd;
126     // Does it extend into an already mapped region?
127     lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
128     // Does it extend beyond the mapped region?
129     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
130 
131     if (lr.Flags.IsContained || lr.Flags.ExtendsBefore ||
132         lr.Flags.ExtendsAfter) {
133       break;
134     }
135   }
136 
137   if (lr.Flags.ExtendsBefore) {
138     DP("WARNING: Pointer is not mapped but section extends into already "
139         "mapped data\n");
140   }
141   if (lr.Flags.ExtendsAfter) {
142     DP("WARNING: Pointer is already mapped but section extends beyond mapped "
143         "region\n");
144   }
145 
146   return lr;
147 }
148 
149 // Used by target_data_begin
150 // Return the target pointer begin (where the data will be moved).
151 // Allocate memory if this is the first occurrence of this mapping.
152 // Increment the reference counter.
153 // If NULL is returned, then either data allocation failed or the user tried
154 // to do an illegal mapping.
155 void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
156     int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
157     bool UpdateRefCount, bool HasCloseModifier) {
158   void *rc = NULL;
159   IsHostPtr = false;
160   DataMapMtx.lock();
161   LookupResult lr = lookupMapping(HstPtrBegin, Size);
162 
163   // Check if the pointer is contained.
164   // If a variable is mapped to the device manually by the user - which would
165   // lead to the IsContained flag to be true - then we must ensure that the
166   // device address is returned even under unified memory conditions.
167   if (lr.Flags.IsContained ||
168       ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
169     auto &HT = *lr.Entry;
170     IsNew = false;
171 
172     if (UpdateRefCount)
173       HT.incRefCount();
174 
175     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
176     DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
177         "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""),
178         DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
179         (UpdateRefCount ? " updated" : ""),
180         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
181     rc = (void *)tp;
182   } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
183     // Explicit extension of mapped data - not allowed.
184     DP("Explicit extension of mapping is not allowed.\n");
185   } else if (Size) {
186     // If unified shared memory is active, implicitly mapped variables that are not
187     // privatized use host address. Any explicitly mapped variables also use
188     // host address where correctness is not impeded. In all other cases
189     // maps are respected.
190     // In addition to the mapping rules above, the close map
191     // modifier forces the mapping of the variable to the device.
192     if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
193         !HasCloseModifier) {
194       DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
195          DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
196       IsHostPtr = true;
197       rc = HstPtrBegin;
198     } else {
199       // If it is not contained and Size > 0 we should create a new entry for it.
200       IsNew = true;
201       uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
202       DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
203          "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
204          DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
205       HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
206           (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
207       rc = (void *)tp;
208     }
209   }
210 
211   DataMapMtx.unlock();
212   return rc;
213 }
214 
215 // Used by target_data_begin, target_data_end, target_data_update and target.
216 // Return the target pointer begin (where the data will be moved).
217 // Decrement the reference counter if called from target_data_end.
218 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
219     bool UpdateRefCount, bool &IsHostPtr) {
220   void *rc = NULL;
221   IsHostPtr = false;
222   IsLast = false;
223   DataMapMtx.lock();
224   LookupResult lr = lookupMapping(HstPtrBegin, Size);
225 
226   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
227     auto &HT = *lr.Entry;
228     IsLast = HT.getRefCount() == 1;
229 
230     if (!IsLast && UpdateRefCount)
231       HT.decRefCount();
232 
233     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
234     DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
235         "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
236         (UpdateRefCount ? " updated" : ""),
237         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
238     rc = (void *)tp;
239   } else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
240     // If the value isn't found in the mapping and unified shared memory
241     // is on then it means we have stumbled upon a value which we need to
242     // use directly from the host.
243     DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
244        DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
245     IsHostPtr = true;
246     rc = HstPtrBegin;
247   }
248 
249   DataMapMtx.unlock();
250   return rc;
251 }
252 
253 // Return the target pointer begin (where the data will be moved).
254 // Lock-free version called when loading global symbols from the fat binary.
255 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
256   uintptr_t hp = (uintptr_t)HstPtrBegin;
257   LookupResult lr = lookupMapping(HstPtrBegin, Size);
258   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
259     auto &HT = *lr.Entry;
260     uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
261     return (void *)tp;
262   }
263 
264   return NULL;
265 }
266 
267 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
268                             bool HasCloseModifier) {
269   if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
270     return OFFLOAD_SUCCESS;
271   // Check if the pointer is contained in any sub-nodes.
272   int rc;
273   DataMapMtx.lock();
274   LookupResult lr = lookupMapping(HstPtrBegin, Size);
275   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
276     auto &HT = *lr.Entry;
277     if (ForceDelete)
278       HT.resetRefCount();
279     if (HT.decRefCount() == 0) {
280       DP("Deleting tgt data " DPxMOD " of size %ld\n",
281           DPxPTR(HT.TgtPtrBegin), Size);
282       RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin);
283       DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
284           ", Size=%ld\n", (ForceDelete ? " (forced)" : ""),
285           DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size);
286       HostDataToTargetMap.erase(lr.Entry);
287     }
288     rc = OFFLOAD_SUCCESS;
289   } else {
290     DP("Section to delete (hst addr " DPxMOD ") does not exist in the allocated"
291        " memory\n", DPxPTR(HstPtrBegin));
292     rc = OFFLOAD_FAIL;
293   }
294 
295   DataMapMtx.unlock();
296   return rc;
297 }
298 
299 /// Init device, should not be called directly.
300 void DeviceTy::init() {
301   // Make call to init_requires if it exists for this plugin.
302   if (RTL->init_requires)
303     RTL->init_requires(RTLs->RequiresFlags);
304   int32_t rc = RTL->init_device(RTLDeviceID);
305   if (rc == OFFLOAD_SUCCESS) {
306     IsInit = true;
307   }
308 }
309 
310 /// Thread-safe method to initialize the device only once.
311 int32_t DeviceTy::initOnce() {
312   std::call_once(InitFlag, &DeviceTy::init, this);
313 
314   // At this point, if IsInit is true, then either this thread or some other
315   // thread in the past successfully initialized the device, so we can return
316   // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
317   // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
318   // that some other thread already attempted to execute init() and if IsInit
319   // is still false, return OFFLOAD_FAIL.
320   if (IsInit)
321     return OFFLOAD_SUCCESS;
322   else
323     return OFFLOAD_FAIL;
324 }
325 
326 // Load binary to device.
327 __tgt_target_table *DeviceTy::load_binary(void *Img) {
328   RTL->Mtx.lock();
329   __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
330   RTL->Mtx.unlock();
331   return rc;
332 }
333 
334 // Submit data to device.
335 int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
336     int64_t Size) {
337   return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
338 }
339 
340 // Retrieve data from device.
341 int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
342     int64_t Size) {
343   return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
344 }
345 
346 // Run region on device
347 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
348     ptrdiff_t *TgtOffsets, int32_t TgtVarsSize) {
349   return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
350       TgtVarsSize);
351 }
352 
353 // Run team region on device.
354 int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
355     ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams,
356     int32_t ThreadLimit, uint64_t LoopTripCount) {
357   return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
358       TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount);
359 }
360 
361 /// Check whether a device has an associated RTL and initialize it if it's not
362 /// already initialized.
363 bool device_is_ready(int device_num) {
364   DP("Checking whether device %d is ready.\n", device_num);
365   // Devices.size() can only change while registering a new
366   // library, so try to acquire the lock of RTLs' mutex.
367   RTLsMtx->lock();
368   size_t Devices_size = Devices.size();
369   RTLsMtx->unlock();
370   if (Devices_size <= (size_t)device_num) {
371     DP("Device ID  %d does not have a matching RTL\n", device_num);
372     return false;
373   }
374 
375   // Get device info
376   DeviceTy &Device = Devices[device_num];
377 
378   DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
379        Device.RTLDeviceID, Device.IsInit);
380 
381   // Init the device if not done before
382   if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
383     DP("Failed to init device %d\n", device_num);
384     return false;
385   }
386 
387   DP("Device %d is ready to use.\n", device_num);
388 
389   return true;
390 }
391