1 //===--- amdgpu/impl/impl.cpp ------------------------------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "hsa_api.h" 9 #include "impl_runtime.h" 10 #include "internal.h" 11 #include "rt.h" 12 #include <memory> 13 14 /* 15 * Data 16 */ 17 18 static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest, 19 const void *src, size_t size, 20 hsa_agent_t agent) { 21 const hsa_signal_value_t init = 1; 22 const hsa_signal_value_t success = 0; 23 hsa_signal_store_screlease(sig, init); 24 25 hsa_status_t err = 26 hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, sig); 27 if (err != HSA_STATUS_SUCCESS) { 28 return err; 29 } 30 31 // async_copy reports success by decrementing and failure by setting to < 0 32 hsa_signal_value_t got = init; 33 while (got == init) { 34 got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init, 35 UINT64_MAX, HSA_WAIT_STATE_BLOCKED); 36 } 37 38 if (got != success) { 39 return HSA_STATUS_ERROR; 40 } 41 42 return err; 43 } 44 45 struct implFreePtrDeletor { 46 void operator()(void *p) { 47 core::Runtime::Memfree(p); // ignore failure to free 48 } 49 }; 50 51 hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest, 52 const void *hostSrc, size_t size, 53 hsa_agent_t agent, 54 hsa_amd_memory_pool_t MemoryPool) { 55 hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size); 56 57 // hsa_memory_copy sometimes fails in situations where 58 // allocate + copy succeeds. Looks like it might be related to 59 // locking part of a read only segment. Fall back for now. 60 if (rc == HSA_STATUS_SUCCESS) { 61 return HSA_STATUS_SUCCESS; 62 } 63 64 void *tempHostPtr; 65 hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool); 66 if (ret != HSA_STATUS_SUCCESS) { 67 DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size); 68 return ret; 69 } 70 std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr); 71 memcpy(tempHostPtr, hostSrc, size); 72 73 if (invoke_hsa_copy(signal, deviceDest, tempHostPtr, size, agent) != 74 HSA_STATUS_SUCCESS) { 75 return HSA_STATUS_ERROR; 76 } 77 return HSA_STATUS_SUCCESS; 78 } 79 80 hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *dest, 81 const void *deviceSrc, size_t size, 82 hsa_agent_t agent, 83 hsa_amd_memory_pool_t MemoryPool) { 84 hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size); 85 86 // hsa_memory_copy sometimes fails in situations where 87 // allocate + copy succeeds. Looks like it might be related to 88 // locking part of a read only segment. Fall back for now. 89 if (rc == HSA_STATUS_SUCCESS) { 90 return HSA_STATUS_SUCCESS; 91 } 92 93 void *tempHostPtr; 94 hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool); 95 if (ret != HSA_STATUS_SUCCESS) { 96 DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size); 97 return ret; 98 } 99 std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr); 100 101 if (invoke_hsa_copy(signal, tempHostPtr, deviceSrc, size, agent) != 102 HSA_STATUS_SUCCESS) { 103 return HSA_STATUS_ERROR; 104 } 105 106 memcpy(dest, tempHostPtr, size); 107 return HSA_STATUS_SUCCESS; 108 } 109