1 //===--- amdgpu/impl/impl.cpp ------------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "hsa_api.h"
9 #include "impl_runtime.h"
10 #include "internal.h"
11 #include "rt.h"
12 #include <memory>
13 
14 /*
15  * Data
16  */
17 
18 static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
19                                     const void *src, size_t size,
20                                     hsa_agent_t agent) {
21   const hsa_signal_value_t init = 1;
22   const hsa_signal_value_t success = 0;
23   hsa_signal_store_screlease(sig, init);
24 
25   hsa_status_t err =
26       hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, sig);
27   if (err != HSA_STATUS_SUCCESS) {
28     return err;
29   }
30 
31   // async_copy reports success by decrementing and failure by setting to < 0
32   hsa_signal_value_t got = init;
33   while (got == init) {
34     got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init,
35                                     UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
36   }
37 
38   if (got != success) {
39     return HSA_STATUS_ERROR;
40   }
41 
42   return err;
43 }
44 
45 struct implFreePtrDeletor {
46   void operator()(void *p) {
47     core::Runtime::Memfree(p); // ignore failure to free
48   }
49 };
50 
51 hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
52                              const void *hostSrc, size_t size,
53                              hsa_agent_t agent,
54                              hsa_amd_memory_pool_t MemoryPool) {
55   hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size);
56 
57   // hsa_memory_copy sometimes fails in situations where
58   // allocate + copy succeeds. Looks like it might be related to
59   // locking part of a read only segment. Fall back for now.
60   if (rc == HSA_STATUS_SUCCESS) {
61     return HSA_STATUS_SUCCESS;
62   }
63 
64   void *tempHostPtr;
65   hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
66   if (ret != HSA_STATUS_SUCCESS) {
67     DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
68     return ret;
69   }
70   std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
71   memcpy(tempHostPtr, hostSrc, size);
72 
73   if (invoke_hsa_copy(signal, deviceDest, tempHostPtr, size, agent) !=
74       HSA_STATUS_SUCCESS) {
75     return HSA_STATUS_ERROR;
76   }
77   return HSA_STATUS_SUCCESS;
78 }
79 
80 hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *dest,
81                              const void *deviceSrc, size_t size,
82                              hsa_agent_t agent,
83                              hsa_amd_memory_pool_t MemoryPool) {
84   hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size);
85 
86   // hsa_memory_copy sometimes fails in situations where
87   // allocate + copy succeeds. Looks like it might be related to
88   // locking part of a read only segment. Fall back for now.
89   if (rc == HSA_STATUS_SUCCESS) {
90     return HSA_STATUS_SUCCESS;
91   }
92 
93   void *tempHostPtr;
94   hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
95   if (ret != HSA_STATUS_SUCCESS) {
96     DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
97     return ret;
98   }
99   std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
100 
101   if (invoke_hsa_copy(signal, tempHostPtr, deviceSrc, size, agent) !=
102       HSA_STATUS_SUCCESS) {
103     return HSA_STATUS_ERROR;
104   }
105 
106   memcpy(dest, tempHostPtr, size);
107   return HSA_STATUS_SUCCESS;
108 }
109