1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "private.h"
15 #include "rtl.h"
16 
17 #include <cassert>
18 #include <climits>
19 #include <string>
20 
21 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
22 DevicesTy Devices;
23 
24 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
25   DataMapMtx.lock();
26 
27   // Check if entry exists
28   for (auto &HT : HostDataToTargetMap) {
29     if ((uintptr_t)HstPtrBegin == HT.HstPtrBegin) {
30       // Mapping already exists
31       bool isValid = HT.HstPtrBegin == (uintptr_t) HstPtrBegin &&
32                      HT.HstPtrEnd == (uintptr_t) HstPtrBegin + Size &&
33                      HT.TgtPtrBegin == (uintptr_t) TgtPtrBegin;
34       DataMapMtx.unlock();
35       if (isValid) {
36         DP("Attempt to re-associate the same device ptr+offset with the same "
37             "host ptr, nothing to do\n");
38         return OFFLOAD_SUCCESS;
39       } else {
40         DP("Not allowed to re-associate a different device ptr+offset with the "
41             "same host ptr\n");
42         return OFFLOAD_FAIL;
43       }
44     }
45   }
46 
47   // Mapping does not exist, allocate it
48   HostDataToTargetTy newEntry;
49 
50   // Set up missing fields
51   newEntry.HstPtrBase = (uintptr_t) HstPtrBegin;
52   newEntry.HstPtrBegin = (uintptr_t) HstPtrBegin;
53   newEntry.HstPtrEnd = (uintptr_t) HstPtrBegin + Size;
54   newEntry.TgtPtrBegin = (uintptr_t) TgtPtrBegin;
55   // refCount must be infinite
56   newEntry.RefCount = INF_REF_CNT;
57 
58   DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", HstEnd="
59       DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
60       DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
61       DPxPTR(newEntry.TgtPtrBegin));
62   HostDataToTargetMap.push_front(newEntry);
63 
64   DataMapMtx.unlock();
65 
66   return OFFLOAD_SUCCESS;
67 }
68 
69 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
70   DataMapMtx.lock();
71 
72   // Check if entry exists
73   for (HostDataToTargetListTy::iterator ii = HostDataToTargetMap.begin();
74       ii != HostDataToTargetMap.end(); ++ii) {
75     if ((uintptr_t)HstPtrBegin == ii->HstPtrBegin) {
76       // Mapping exists
77       if (CONSIDERED_INF(ii->RefCount)) {
78         DP("Association found, removing it\n");
79         HostDataToTargetMap.erase(ii);
80         DataMapMtx.unlock();
81         return OFFLOAD_SUCCESS;
82       } else {
83         DP("Trying to disassociate a pointer which was not mapped via "
84             "omp_target_associate_ptr\n");
85         break;
86       }
87     }
88   }
89 
90   // Mapping not found
91   DataMapMtx.unlock();
92   DP("Association not found\n");
93   return OFFLOAD_FAIL;
94 }
95 
96 // Get ref count of map entry containing HstPtrBegin
97 long DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
98   uintptr_t hp = (uintptr_t)HstPtrBegin;
99   long RefCnt = -1;
100 
101   DataMapMtx.lock();
102   for (auto &HT : HostDataToTargetMap) {
103     if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) {
104       DP("DeviceTy::getMapEntry: requested entry found\n");
105       RefCnt = HT.RefCount;
106       break;
107     }
108   }
109   DataMapMtx.unlock();
110 
111   if (RefCnt < 0) {
112     DP("DeviceTy::getMapEntry: requested entry not found\n");
113   }
114 
115   return RefCnt;
116 }
117 
118 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
119   uintptr_t hp = (uintptr_t)HstPtrBegin;
120   LookupResult lr;
121 
122   DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
123       Size);
124   for (lr.Entry = HostDataToTargetMap.begin();
125       lr.Entry != HostDataToTargetMap.end(); ++lr.Entry) {
126     auto &HT = *lr.Entry;
127     // Is it contained?
128     lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
129         (hp+Size) <= HT.HstPtrEnd;
130     // Does it extend into an already mapped region?
131     lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
132     // Does it extend beyond the mapped region?
133     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
134 
135     if (lr.Flags.IsContained || lr.Flags.ExtendsBefore ||
136         lr.Flags.ExtendsAfter) {
137       break;
138     }
139   }
140 
141   if (lr.Flags.ExtendsBefore) {
142     DP("WARNING: Pointer is not mapped but section extends into already "
143         "mapped data\n");
144   }
145   if (lr.Flags.ExtendsAfter) {
146     DP("WARNING: Pointer is already mapped but section extends beyond mapped "
147         "region\n");
148   }
149 
150   return lr;
151 }
152 
153 // Used by target_data_begin
154 // Return the target pointer begin (where the data will be moved).
155 // Allocate memory if this is the first occurrence of this mapping.
156 // Increment the reference counter.
157 // If NULL is returned, then either data allocation failed or the user tried
158 // to do an illegal mapping.
159 void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
160     int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
161     bool UpdateRefCount, bool HasCloseModifier) {
162   void *rc = NULL;
163   IsHostPtr = false;
164   DataMapMtx.lock();
165   LookupResult lr = lookupMapping(HstPtrBegin, Size);
166 
167   // Check if the pointer is contained.
168   // If a variable is mapped to the device manually by the user - which would
169   // lead to the IsContained flag to be true - then we must ensure that the
170   // device address is returned even under unified memory conditions.
171   if (lr.Flags.IsContained ||
172       ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
173     auto &HT = *lr.Entry;
174     IsNew = false;
175 
176     if (UpdateRefCount)
177       ++HT.RefCount;
178 
179     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
180     DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
181         "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""),
182         DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
183         (UpdateRefCount ? " updated" : ""),
184         (CONSIDERED_INF(HT.RefCount)) ? "INF" :
185             std::to_string(HT.RefCount).c_str());
186     rc = (void *)tp;
187   } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
188     // Explicit extension of mapped data - not allowed.
189     DP("Explicit extension of mapping is not allowed.\n");
190   } else if (Size) {
191     // If unified shared memory is active, implicitly mapped variables that are not
192     // privatized use host address. Any explicitly mapped variables also use
193     // host address where correctness is not impeded. In all other cases
194     // maps are respected.
195     // In addition to the mapping rules above, the close map
196     // modifier forces the mapping of the variable to the device.
197     if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) {
198       DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
199          DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
200       IsHostPtr = true;
201       rc = HstPtrBegin;
202     } else {
203       // If it is not contained and Size > 0 we should create a new entry for it.
204       IsNew = true;
205       uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
206       DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
207          "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
208          DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
209       HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
210           (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
211       rc = (void *)tp;
212     }
213   }
214 
215   DataMapMtx.unlock();
216   return rc;
217 }
218 
219 // Used by target_data_begin, target_data_end, target_data_update and target.
220 // Return the target pointer begin (where the data will be moved).
221 // Decrement the reference counter if called from target_data_end.
222 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
223     bool UpdateRefCount, bool &IsHostPtr) {
224   void *rc = NULL;
225   IsHostPtr = false;
226   IsLast = false;
227   DataMapMtx.lock();
228   LookupResult lr = lookupMapping(HstPtrBegin, Size);
229 
230   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
231     auto &HT = *lr.Entry;
232     IsLast = !(HT.RefCount > 1);
233 
234     if (HT.RefCount > 1 && UpdateRefCount)
235       --HT.RefCount;
236 
237     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
238     DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
239         "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
240         (UpdateRefCount ? " updated" : ""),
241         (CONSIDERED_INF(HT.RefCount)) ? "INF" :
242             std::to_string(HT.RefCount).c_str());
243     rc = (void *)tp;
244   } else if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
245     // If the value isn't found in the mapping and unified shared memory
246     // is on then it means we have stumbled upon a value which we need to
247     // use directly from the host.
248     DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
249        DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
250     IsHostPtr = true;
251     rc = HstPtrBegin;
252   }
253 
254   DataMapMtx.unlock();
255   return rc;
256 }
257 
258 // Return the target pointer begin (where the data will be moved).
259 // Lock-free version called when loading global symbols from the fat binary.
260 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
261   uintptr_t hp = (uintptr_t)HstPtrBegin;
262   LookupResult lr = lookupMapping(HstPtrBegin, Size);
263   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
264     auto &HT = *lr.Entry;
265     uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
266     return (void *)tp;
267   }
268 
269   return NULL;
270 }
271 
272 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
273                             bool HasCloseModifier) {
274   if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
275     return OFFLOAD_SUCCESS;
276   // Check if the pointer is contained in any sub-nodes.
277   int rc;
278   DataMapMtx.lock();
279   LookupResult lr = lookupMapping(HstPtrBegin, Size);
280   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
281     auto &HT = *lr.Entry;
282     if (ForceDelete)
283       HT.RefCount = 1;
284     if (--HT.RefCount <= 0) {
285       assert(HT.RefCount == 0 && "did not expect a negative ref count");
286       DP("Deleting tgt data " DPxMOD " of size %ld\n",
287           DPxPTR(HT.TgtPtrBegin), Size);
288       RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin);
289       DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
290           ", Size=%ld\n", (ForceDelete ? " (forced)" : ""),
291           DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size);
292       HostDataToTargetMap.erase(lr.Entry);
293     }
294     rc = OFFLOAD_SUCCESS;
295   } else {
296     DP("Section to delete (hst addr " DPxMOD ") does not exist in the allocated"
297        " memory\n", DPxPTR(HstPtrBegin));
298     rc = OFFLOAD_FAIL;
299   }
300 
301   DataMapMtx.unlock();
302   return rc;
303 }
304 
305 /// Init device, should not be called directly.
306 void DeviceTy::init() {
307   // Make call to init_requires if it exists for this plugin.
308   if (RTL->init_requires)
309     RTL->init_requires(RTLs.RequiresFlags);
310   int32_t rc = RTL->init_device(RTLDeviceID);
311   if (rc == OFFLOAD_SUCCESS) {
312     IsInit = true;
313   }
314 }
315 
316 /// Thread-safe method to initialize the device only once.
317 int32_t DeviceTy::initOnce() {
318   std::call_once(InitFlag, &DeviceTy::init, this);
319 
320   // At this point, if IsInit is true, then either this thread or some other
321   // thread in the past successfully initialized the device, so we can return
322   // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
323   // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
324   // that some other thread already attempted to execute init() and if IsInit
325   // is still false, return OFFLOAD_FAIL.
326   if (IsInit)
327     return OFFLOAD_SUCCESS;
328   else
329     return OFFLOAD_FAIL;
330 }
331 
332 // Load binary to device.
333 __tgt_target_table *DeviceTy::load_binary(void *Img) {
334   RTL->Mtx.lock();
335   __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
336   RTL->Mtx.unlock();
337   return rc;
338 }
339 
340 // Submit data to device.
341 int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
342     int64_t Size) {
343   return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
344 }
345 
346 // Retrieve data from device.
347 int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
348     int64_t Size) {
349   return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
350 }
351 
352 // Run region on device
353 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
354     ptrdiff_t *TgtOffsets, int32_t TgtVarsSize) {
355   return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
356       TgtVarsSize);
357 }
358 
359 // Run team region on device.
360 int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
361     ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams,
362     int32_t ThreadLimit, uint64_t LoopTripCount) {
363   return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
364       TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount);
365 }
366 
367 /// Check whether a device has an associated RTL and initialize it if it's not
368 /// already initialized.
369 bool device_is_ready(int device_num) {
370   DP("Checking whether device %d is ready.\n", device_num);
371   // Devices.size() can only change while registering a new
372   // library, so try to acquire the lock of RTLs' mutex.
373   RTLsMtx.lock();
374   size_t Devices_size = Devices.size();
375   RTLsMtx.unlock();
376   if (Devices_size <= (size_t)device_num) {
377     DP("Device ID  %d does not have a matching RTL\n", device_num);
378     return false;
379   }
380 
381   // Get device info
382   DeviceTy &Device = Devices[device_num];
383 
384   DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
385        Device.RTLDeviceID, Device.IsInit);
386 
387   // Init the device if not done before
388   if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
389     DP("Failed to init device %d\n", device_num);
390     return false;
391   }
392 
393   DP("Device %d is ready to use.\n", device_num);
394 
395   return true;
396 }
397