1 //===----------- api.cpp - Target independent OpenMP target RTL -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of OpenMP API interface functions.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "device.h"
14 #include "omptarget.h"
15 #include "private.h"
16 #include "rtl.h"
17
18 #include <climits>
19 #include <cstdlib>
20 #include <cstring>
21
omp_get_num_devices(void)22 EXTERN int omp_get_num_devices(void) {
23 TIMESCOPE();
24 PM->RTLsMtx.lock();
25 size_t DevicesSize = PM->Devices.size();
26 PM->RTLsMtx.unlock();
27
28 DP("Call to omp_get_num_devices returning %zd\n", DevicesSize);
29
30 return DevicesSize;
31 }
32
omp_get_device_num(void)33 EXTERN int omp_get_device_num(void) {
34 TIMESCOPE();
35 int HostDevice = omp_get_initial_device();
36
37 DP("Call to omp_get_device_num returning %d\n", HostDevice);
38
39 return HostDevice;
40 }
41
omp_get_initial_device(void)42 EXTERN int omp_get_initial_device(void) {
43 TIMESCOPE();
44 int HostDevice = omp_get_num_devices();
45 DP("Call to omp_get_initial_device returning %d\n", HostDevice);
46 return HostDevice;
47 }
48
omp_target_alloc(size_t Size,int DeviceNum)49 EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
50 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__);
51 }
52
llvm_omp_target_alloc_device(size_t Size,int DeviceNum)53 EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) {
54 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__);
55 }
56
llvm_omp_target_alloc_host(size_t Size,int DeviceNum)57 EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) {
58 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__);
59 }
60
llvm_omp_target_alloc_shared(size_t Size,int DeviceNum)61 EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) {
62 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__);
63 }
64
llvm_omp_target_dynamic_shared_alloc()65 EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; }
llvm_omp_get_dynamic_shared()66 EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
67
omp_target_free(void * DevicePtr,int DeviceNum)68 EXTERN void omp_target_free(void *DevicePtr, int DeviceNum) {
69 TIMESCOPE();
70 DP("Call to omp_target_free for device %d and address " DPxMOD "\n",
71 DeviceNum, DPxPTR(DevicePtr));
72
73 if (!DevicePtr) {
74 DP("Call to omp_target_free with NULL ptr\n");
75 return;
76 }
77
78 if (DeviceNum == omp_get_initial_device()) {
79 free(DevicePtr);
80 DP("omp_target_free deallocated host ptr\n");
81 return;
82 }
83
84 if (!deviceIsReady(DeviceNum)) {
85 DP("omp_target_free returns, nothing to do\n");
86 return;
87 }
88
89 PM->Devices[DeviceNum]->deleteData(DevicePtr);
90 DP("omp_target_free deallocated device ptr\n");
91 }
92
omp_target_is_present(const void * Ptr,int DeviceNum)93 EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
94 TIMESCOPE();
95 DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",
96 DeviceNum, DPxPTR(Ptr));
97
98 if (!Ptr) {
99 DP("Call to omp_target_is_present with NULL ptr, returning false\n");
100 return false;
101 }
102
103 if (DeviceNum == omp_get_initial_device()) {
104 DP("Call to omp_target_is_present on host, returning true\n");
105 return true;
106 }
107
108 PM->RTLsMtx.lock();
109 size_t DevicesSize = PM->Devices.size();
110 PM->RTLsMtx.unlock();
111 if (DevicesSize <= (size_t)DeviceNum) {
112 DP("Call to omp_target_is_present with invalid device ID, returning "
113 "false\n");
114 return false;
115 }
116
117 DeviceTy &Device = *PM->Devices[DeviceNum];
118 bool IsLast; // not used
119 bool IsHostPtr;
120 // omp_target_is_present tests whether a host pointer refers to storage that
121 // is mapped to a given device. However, due to the lack of the storage size,
122 // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
123 // lengh array) is mapped instead of the referred storage.
124 TargetPointerResultTy TPR =
125 Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
126 /*UpdateRefCount=*/false,
127 /*UseHoldRefCount=*/false, IsHostPtr);
128 int Rc = (TPR.TargetPointer != NULL);
129 // Under unified memory the host pointer can be returned by the
130 // getTgtPtrBegin() function which means that there is no device
131 // corresponding point for ptr. This function should return false
132 // in that situation.
133 if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
134 Rc = !IsHostPtr;
135 DP("Call to omp_target_is_present returns %d\n", Rc);
136 return Rc;
137 }
138
omp_target_memcpy(void * Dst,const void * Src,size_t Length,size_t DstOffset,size_t SrcOffset,int DstDevice,int SrcDevice)139 EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
140 size_t DstOffset, size_t SrcOffset, int DstDevice,
141 int SrcDevice) {
142 TIMESCOPE();
143 DP("Call to omp_target_memcpy, dst device %d, src device %d, "
144 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
145 "src offset %zu, length %zu\n",
146 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset,
147 Length);
148
149 if (!Dst || !Src || Length <= 0) {
150 if (Length == 0) {
151 DP("Call to omp_target_memcpy with zero length, nothing to do\n");
152 return OFFLOAD_SUCCESS;
153 }
154
155 REPORT("Call to omp_target_memcpy with invalid arguments\n");
156 return OFFLOAD_FAIL;
157 }
158
159 if (SrcDevice != omp_get_initial_device() && !deviceIsReady(SrcDevice)) {
160 REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n");
161 return OFFLOAD_FAIL;
162 }
163
164 if (DstDevice != omp_get_initial_device() && !deviceIsReady(DstDevice)) {
165 REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n");
166 return OFFLOAD_FAIL;
167 }
168
169 int Rc = OFFLOAD_SUCCESS;
170 void *SrcAddr = (char *)const_cast<void *>(Src) + SrcOffset;
171 void *DstAddr = (char *)Dst + DstOffset;
172
173 if (SrcDevice == omp_get_initial_device() &&
174 DstDevice == omp_get_initial_device()) {
175 DP("copy from host to host\n");
176 const void *P = memcpy(DstAddr, SrcAddr, Length);
177 if (P == NULL)
178 Rc = OFFLOAD_FAIL;
179 } else if (SrcDevice == omp_get_initial_device()) {
180 DP("copy from host to device\n");
181 DeviceTy &DstDev = *PM->Devices[DstDevice];
182 AsyncInfoTy AsyncInfo(DstDev);
183 Rc = DstDev.submitData(DstAddr, SrcAddr, Length, AsyncInfo);
184 } else if (DstDevice == omp_get_initial_device()) {
185 DP("copy from device to host\n");
186 DeviceTy &SrcDev = *PM->Devices[SrcDevice];
187 AsyncInfoTy AsyncInfo(SrcDev);
188 Rc = SrcDev.retrieveData(DstAddr, SrcAddr, Length, AsyncInfo);
189 } else {
190 DP("copy from device to device\n");
191 DeviceTy &SrcDev = *PM->Devices[SrcDevice];
192 DeviceTy &DstDev = *PM->Devices[DstDevice];
193 // First try to use D2D memcpy which is more efficient. If fails, fall back
194 // to unefficient way.
195 if (SrcDev.isDataExchangable(DstDev)) {
196 AsyncInfoTy AsyncInfo(SrcDev);
197 Rc = SrcDev.dataExchange(SrcAddr, DstDev, DstAddr, Length, AsyncInfo);
198 if (Rc == OFFLOAD_SUCCESS)
199 return OFFLOAD_SUCCESS;
200 }
201
202 void *Buffer = malloc(Length);
203 {
204 AsyncInfoTy AsyncInfo(SrcDev);
205 Rc = SrcDev.retrieveData(Buffer, SrcAddr, Length, AsyncInfo);
206 }
207 if (Rc == OFFLOAD_SUCCESS) {
208 AsyncInfoTy AsyncInfo(SrcDev);
209 Rc = DstDev.submitData(DstAddr, Buffer, Length, AsyncInfo);
210 }
211 free(Buffer);
212 }
213
214 DP("omp_target_memcpy returns %d\n", Rc);
215 return Rc;
216 }
217
218 EXTERN int
omp_target_memcpy_rect(void * Dst,const void * Src,size_t ElementSize,int NumDims,const size_t * Volume,const size_t * DstOffsets,const size_t * SrcOffsets,const size_t * DstDimensions,const size_t * SrcDimensions,int DstDevice,int SrcDevice)219 omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
220 int NumDims, const size_t *Volume,
221 const size_t *DstOffsets, const size_t *SrcOffsets,
222 const size_t *DstDimensions, const size_t *SrcDimensions,
223 int DstDevice, int SrcDevice) {
224 TIMESCOPE();
225 DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, "
226 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
227 "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
228 "volume " DPxMOD ", element size %zu, num_dims %d\n",
229 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets),
230 DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions),
231 DPxPTR(Volume), ElementSize, NumDims);
232
233 if (!(Dst || Src)) {
234 DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n",
235 INT_MAX);
236 return INT_MAX;
237 }
238
239 if (!Dst || !Src || ElementSize < 1 || NumDims < 1 || !Volume ||
240 !DstOffsets || !SrcOffsets || !DstDimensions || !SrcDimensions) {
241 REPORT("Call to omp_target_memcpy_rect with invalid arguments\n");
242 return OFFLOAD_FAIL;
243 }
244
245 int Rc;
246 if (NumDims == 1) {
247 Rc = omp_target_memcpy(Dst, Src, ElementSize * Volume[0],
248 ElementSize * DstOffsets[0],
249 ElementSize * SrcOffsets[0], DstDevice, SrcDevice);
250 } else {
251 size_t DstSliceSize = ElementSize;
252 size_t SrcSliceSize = ElementSize;
253 for (int I = 1; I < NumDims; ++I) {
254 DstSliceSize *= DstDimensions[I];
255 SrcSliceSize *= SrcDimensions[I];
256 }
257
258 size_t DstOff = DstOffsets[0] * DstSliceSize;
259 size_t SrcOff = SrcOffsets[0] * SrcSliceSize;
260 for (size_t I = 0; I < Volume[0]; ++I) {
261 Rc = omp_target_memcpy_rect(
262 (char *)Dst + DstOff + DstSliceSize * I,
263 (char *)const_cast<void *>(Src) + SrcOff + SrcSliceSize * I,
264 ElementSize, NumDims - 1, Volume + 1, DstOffsets + 1, SrcOffsets + 1,
265 DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice);
266
267 if (Rc) {
268 DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n");
269 return Rc;
270 }
271 }
272 }
273
274 DP("omp_target_memcpy_rect returns %d\n", Rc);
275 return Rc;
276 }
277
omp_target_associate_ptr(const void * HostPtr,const void * DevicePtr,size_t Size,size_t DeviceOffset,int DeviceNum)278 EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
279 size_t Size, size_t DeviceOffset,
280 int DeviceNum) {
281 TIMESCOPE();
282 DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", "
283 "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n",
284 DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum);
285
286 if (!HostPtr || !DevicePtr || Size <= 0) {
287 REPORT("Call to omp_target_associate_ptr with invalid arguments\n");
288 return OFFLOAD_FAIL;
289 }
290
291 if (DeviceNum == omp_get_initial_device()) {
292 REPORT("omp_target_associate_ptr: no association possible on the host\n");
293 return OFFLOAD_FAIL;
294 }
295
296 if (!deviceIsReady(DeviceNum)) {
297 REPORT("omp_target_associate_ptr returns OFFLOAD_FAIL\n");
298 return OFFLOAD_FAIL;
299 }
300
301 DeviceTy &Device = *PM->Devices[DeviceNum];
302 void *DeviceAddr = (void *)((uint64_t)DevicePtr + (uint64_t)DeviceOffset);
303 int Rc = Device.associatePtr(const_cast<void *>(HostPtr),
304 const_cast<void *>(DeviceAddr), Size);
305 DP("omp_target_associate_ptr returns %d\n", Rc);
306 return Rc;
307 }
308
omp_target_disassociate_ptr(const void * HostPtr,int DeviceNum)309 EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) {
310 TIMESCOPE();
311 DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", "
312 "device_num %d\n",
313 DPxPTR(HostPtr), DeviceNum);
314
315 if (!HostPtr) {
316 REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n");
317 return OFFLOAD_FAIL;
318 }
319
320 if (DeviceNum == omp_get_initial_device()) {
321 REPORT(
322 "omp_target_disassociate_ptr: no association possible on the host\n");
323 return OFFLOAD_FAIL;
324 }
325
326 if (!deviceIsReady(DeviceNum)) {
327 REPORT("omp_target_disassociate_ptr returns OFFLOAD_FAIL\n");
328 return OFFLOAD_FAIL;
329 }
330
331 DeviceTy &Device = *PM->Devices[DeviceNum];
332 int Rc = Device.disassociatePtr(const_cast<void *>(HostPtr));
333 DP("omp_target_disassociate_ptr returns %d\n", Rc);
334 return Rc;
335 }
336