1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "private.h"
15 #include "rtl.h"
16 
17 #include <cassert>
18 #include <climits>
19 #include <string>
20 
21 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
22 DevicesTy Devices;
23 
24 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
25   DataMapMtx.lock();
26 
27   // Check if entry exists
28   for (auto &HT : HostDataToTargetMap) {
29     if ((uintptr_t)HstPtrBegin == HT.HstPtrBegin) {
30       // Mapping already exists
31       bool isValid = HT.HstPtrBegin == (uintptr_t) HstPtrBegin &&
32                      HT.HstPtrEnd == (uintptr_t) HstPtrBegin + Size &&
33                      HT.TgtPtrBegin == (uintptr_t) TgtPtrBegin;
34       DataMapMtx.unlock();
35       if (isValid) {
36         DP("Attempt to re-associate the same device ptr+offset with the same "
37             "host ptr, nothing to do\n");
38         return OFFLOAD_SUCCESS;
39       } else {
40         DP("Not allowed to re-associate a different device ptr+offset with the "
41             "same host ptr\n");
42         return OFFLOAD_FAIL;
43       }
44     }
45   }
46 
47   // Mapping does not exist, allocate it with refCount=INF
48   HostDataToTargetTy newEntry((uintptr_t) HstPtrBegin /*HstPtrBase*/,
49                               (uintptr_t) HstPtrBegin /*HstPtrBegin*/,
50                               (uintptr_t) HstPtrBegin + Size /*HstPtrEnd*/,
51                               (uintptr_t) TgtPtrBegin /*TgtPtrBegin*/,
52                               true /*IsRefCountINF*/);
53 
54   DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", HstEnd="
55       DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
56       DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
57       DPxPTR(newEntry.TgtPtrBegin));
58   HostDataToTargetMap.push_front(newEntry);
59 
60   DataMapMtx.unlock();
61 
62   return OFFLOAD_SUCCESS;
63 }
64 
65 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
66   DataMapMtx.lock();
67 
68   // Check if entry exists
69   for (HostDataToTargetListTy::iterator ii = HostDataToTargetMap.begin();
70       ii != HostDataToTargetMap.end(); ++ii) {
71     if ((uintptr_t)HstPtrBegin == ii->HstPtrBegin) {
72       // Mapping exists
73       if (ii->isRefCountInf()) {
74         DP("Association found, removing it\n");
75         HostDataToTargetMap.erase(ii);
76         DataMapMtx.unlock();
77         return OFFLOAD_SUCCESS;
78       } else {
79         DP("Trying to disassociate a pointer which was not mapped via "
80             "omp_target_associate_ptr\n");
81         break;
82       }
83     }
84   }
85 
86   // Mapping not found
87   DataMapMtx.unlock();
88   DP("Association not found\n");
89   return OFFLOAD_FAIL;
90 }
91 
92 // Get ref count of map entry containing HstPtrBegin
93 uint64_t DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
94   uintptr_t hp = (uintptr_t)HstPtrBegin;
95   uint64_t RefCnt = 0;
96 
97   DataMapMtx.lock();
98   for (auto &HT : HostDataToTargetMap) {
99     if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) {
100       DP("DeviceTy::getMapEntry: requested entry found\n");
101       RefCnt = HT.getRefCount();
102       break;
103     }
104   }
105   DataMapMtx.unlock();
106 
107   if (RefCnt == 0) {
108     DP("DeviceTy::getMapEntry: requested entry not found\n");
109   }
110 
111   return RefCnt;
112 }
113 
114 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
115   uintptr_t hp = (uintptr_t)HstPtrBegin;
116   LookupResult lr;
117 
118   DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
119       Size);
120   for (lr.Entry = HostDataToTargetMap.begin();
121       lr.Entry != HostDataToTargetMap.end(); ++lr.Entry) {
122     auto &HT = *lr.Entry;
123     // Is it contained?
124     lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
125         (hp+Size) <= HT.HstPtrEnd;
126     // Does it extend into an already mapped region?
127     lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
128     // Does it extend beyond the mapped region?
129     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
130 
131     if (lr.Flags.IsContained || lr.Flags.ExtendsBefore ||
132         lr.Flags.ExtendsAfter) {
133       break;
134     }
135   }
136 
137   if (lr.Flags.ExtendsBefore) {
138     DP("WARNING: Pointer is not mapped but section extends into already "
139         "mapped data\n");
140   }
141   if (lr.Flags.ExtendsAfter) {
142     DP("WARNING: Pointer is already mapped but section extends beyond mapped "
143         "region\n");
144   }
145 
146   return lr;
147 }
148 
149 // Used by target_data_begin
150 // Return the target pointer begin (where the data will be moved).
151 // Allocate memory if this is the first occurrence of this mapping.
152 // Increment the reference counter.
153 // If NULL is returned, then either data allocation failed or the user tried
154 // to do an illegal mapping.
155 void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
156     int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
157     bool UpdateRefCount, bool HasCloseModifier) {
158   void *rc = NULL;
159   IsHostPtr = false;
160   DataMapMtx.lock();
161   LookupResult lr = lookupMapping(HstPtrBegin, Size);
162 
163   // Check if the pointer is contained.
164   // If a variable is mapped to the device manually by the user - which would
165   // lead to the IsContained flag to be true - then we must ensure that the
166   // device address is returned even under unified memory conditions.
167   if (lr.Flags.IsContained ||
168       ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
169     auto &HT = *lr.Entry;
170     IsNew = false;
171 
172     if (UpdateRefCount)
173       HT.incRefCount();
174 
175     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
176     DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
177         "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""),
178         DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
179         (UpdateRefCount ? " updated" : ""),
180         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
181     rc = (void *)tp;
182   } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
183     // Explicit extension of mapped data - not allowed.
184     DP("Explicit extension of mapping is not allowed.\n");
185   } else if (Size) {
186     // If unified shared memory is active, implicitly mapped variables that are not
187     // privatized use host address. Any explicitly mapped variables also use
188     // host address where correctness is not impeded. In all other cases
189     // maps are respected.
190     // In addition to the mapping rules above, the close map
191     // modifier forces the mapping of the variable to the device.
192     if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) {
193       DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
194          DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
195       IsHostPtr = true;
196       rc = HstPtrBegin;
197     } else {
198       // If it is not contained and Size > 0 we should create a new entry for it.
199       IsNew = true;
200       uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
201       DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
202          "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
203          DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
204       HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
205           (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
206       rc = (void *)tp;
207     }
208   }
209 
210   DataMapMtx.unlock();
211   return rc;
212 }
213 
214 // Used by target_data_begin, target_data_end, target_data_update and target.
215 // Return the target pointer begin (where the data will be moved).
216 // Decrement the reference counter if called from target_data_end.
217 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
218     bool UpdateRefCount, bool &IsHostPtr) {
219   void *rc = NULL;
220   IsHostPtr = false;
221   IsLast = false;
222   DataMapMtx.lock();
223   LookupResult lr = lookupMapping(HstPtrBegin, Size);
224 
225   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
226     auto &HT = *lr.Entry;
227     IsLast = HT.getRefCount() == 1;
228 
229     if (!IsLast && UpdateRefCount)
230       HT.decRefCount();
231 
232     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
233     DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
234         "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
235         (UpdateRefCount ? " updated" : ""),
236         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
237     rc = (void *)tp;
238   } else if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
239     // If the value isn't found in the mapping and unified shared memory
240     // is on then it means we have stumbled upon a value which we need to
241     // use directly from the host.
242     DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
243        DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
244     IsHostPtr = true;
245     rc = HstPtrBegin;
246   }
247 
248   DataMapMtx.unlock();
249   return rc;
250 }
251 
252 // Return the target pointer begin (where the data will be moved).
253 // Lock-free version called when loading global symbols from the fat binary.
254 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
255   uintptr_t hp = (uintptr_t)HstPtrBegin;
256   LookupResult lr = lookupMapping(HstPtrBegin, Size);
257   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
258     auto &HT = *lr.Entry;
259     uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
260     return (void *)tp;
261   }
262 
263   return NULL;
264 }
265 
266 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
267                             bool HasCloseModifier) {
268   if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
269     return OFFLOAD_SUCCESS;
270   // Check if the pointer is contained in any sub-nodes.
271   int rc;
272   DataMapMtx.lock();
273   LookupResult lr = lookupMapping(HstPtrBegin, Size);
274   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
275     auto &HT = *lr.Entry;
276     if (ForceDelete)
277       HT.resetRefCount();
278     if (HT.decRefCount() == 0) {
279       DP("Deleting tgt data " DPxMOD " of size %ld\n",
280           DPxPTR(HT.TgtPtrBegin), Size);
281       RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin);
282       DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
283           ", Size=%ld\n", (ForceDelete ? " (forced)" : ""),
284           DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size);
285       HostDataToTargetMap.erase(lr.Entry);
286     }
287     rc = OFFLOAD_SUCCESS;
288   } else {
289     DP("Section to delete (hst addr " DPxMOD ") does not exist in the allocated"
290        " memory\n", DPxPTR(HstPtrBegin));
291     rc = OFFLOAD_FAIL;
292   }
293 
294   DataMapMtx.unlock();
295   return rc;
296 }
297 
298 /// Init device, should not be called directly.
299 void DeviceTy::init() {
300   // Make call to init_requires if it exists for this plugin.
301   if (RTL->init_requires)
302     RTL->init_requires(RTLs.RequiresFlags);
303   int32_t rc = RTL->init_device(RTLDeviceID);
304   if (rc == OFFLOAD_SUCCESS) {
305     IsInit = true;
306   }
307 }
308 
309 /// Thread-safe method to initialize the device only once.
310 int32_t DeviceTy::initOnce() {
311   std::call_once(InitFlag, &DeviceTy::init, this);
312 
313   // At this point, if IsInit is true, then either this thread or some other
314   // thread in the past successfully initialized the device, so we can return
315   // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
316   // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
317   // that some other thread already attempted to execute init() and if IsInit
318   // is still false, return OFFLOAD_FAIL.
319   if (IsInit)
320     return OFFLOAD_SUCCESS;
321   else
322     return OFFLOAD_FAIL;
323 }
324 
325 // Load binary to device.
326 __tgt_target_table *DeviceTy::load_binary(void *Img) {
327   RTL->Mtx.lock();
328   __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
329   RTL->Mtx.unlock();
330   return rc;
331 }
332 
333 // Submit data to device.
334 int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
335     int64_t Size) {
336   return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
337 }
338 
339 // Retrieve data from device.
340 int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
341     int64_t Size) {
342   return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
343 }
344 
345 // Run region on device
346 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
347     ptrdiff_t *TgtOffsets, int32_t TgtVarsSize) {
348   return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
349       TgtVarsSize);
350 }
351 
352 // Run team region on device.
353 int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
354     ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams,
355     int32_t ThreadLimit, uint64_t LoopTripCount) {
356   return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
357       TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount);
358 }
359 
360 /// Check whether a device has an associated RTL and initialize it if it's not
361 /// already initialized.
362 bool device_is_ready(int device_num) {
363   DP("Checking whether device %d is ready.\n", device_num);
364   // Devices.size() can only change while registering a new
365   // library, so try to acquire the lock of RTLs' mutex.
366   RTLsMtx.lock();
367   size_t Devices_size = Devices.size();
368   RTLsMtx.unlock();
369   if (Devices_size <= (size_t)device_num) {
370     DP("Device ID  %d does not have a matching RTL\n", device_num);
371     return false;
372   }
373 
374   // Get device info
375   DeviceTy &Device = Devices[device_num];
376 
377   DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
378        Device.RTLDeviceID, Device.IsInit);
379 
380   // Init the device if not done before
381   if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
382     DP("Failed to init device %d\n", device_num);
383     return false;
384   }
385 
386   DP("Device %d is ready to use.\n", device_num);
387 
388   return true;
389 }
390