1 //===--- amdgpu/impl/impl.cpp ------------------------------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "rt.h" 9 #include <memory> 10 11 /* 12 * Data 13 */ 14 15 // host pointer (either src or dest) must be locked via hsa_amd_memory_lock 16 static hsa_status_t invoke_hsa_copy(hsa_signal_t signal, void *dest, 17 hsa_agent_t agent, const void *src, 18 size_t size) { 19 const hsa_signal_value_t init = 1; 20 const hsa_signal_value_t success = 0; 21 hsa_signal_store_screlease(signal, init); 22 23 hsa_status_t err = hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, 24 nullptr, signal); 25 if (err != HSA_STATUS_SUCCESS) 26 return err; 27 28 // async_copy reports success by decrementing and failure by setting to < 0 29 hsa_signal_value_t got = init; 30 while (got == init) 31 got = hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_NE, init, 32 UINT64_MAX, HSA_WAIT_STATE_BLOCKED); 33 34 if (got != success) 35 return HSA_STATUS_ERROR; 36 37 return err; 38 } 39 40 struct implFreePtrDeletor { 41 void operator()(void *p) { 42 core::Runtime::Memfree(p); // ignore failure to free 43 } 44 }; 45 46 enum CopyDirection { H2D, D2H }; 47 48 static hsa_status_t locking_async_memcpy(enum CopyDirection direction, 49 hsa_signal_t signal, void *dest, 50 hsa_agent_t agent, void *src, 51 void *lockingPtr, size_t size) { 52 hsa_status_t err; 53 54 void *lockedPtr = nullptr; 55 err = hsa_amd_memory_lock(lockingPtr, size, nullptr, 0, (void **)&lockedPtr); 56 if (err != HSA_STATUS_SUCCESS) 57 return err; 58 59 switch (direction) { 60 case H2D: 61 err = invoke_hsa_copy(signal, dest, agent, lockedPtr, size); 62 break; 63 case D2H: 64 err = invoke_hsa_copy(signal, lockedPtr, agent, src, size); 65 break; 66 default: 67 err = HSA_STATUS_ERROR; // fall into unlock before returning 68 } 69 70 if (err != HSA_STATUS_SUCCESS) { 71 // do not leak locked host pointers, but discard potential error message 72 hsa_amd_memory_unlock(lockingPtr); 73 return err; 74 } 75 76 err = hsa_amd_memory_unlock(lockingPtr); 77 if (err != HSA_STATUS_SUCCESS) 78 return err; 79 80 return HSA_STATUS_SUCCESS; 81 } 82 83 hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest, 84 void *hostSrc, size_t size, 85 hsa_agent_t device_agent, 86 hsa_amd_memory_pool_t MemoryPool) { 87 hsa_status_t err; 88 89 err = locking_async_memcpy(CopyDirection::H2D, signal, deviceDest, 90 device_agent, hostSrc, hostSrc, size); 91 92 if (err == HSA_STATUS_SUCCESS) 93 return err; 94 95 // async memcpy sometimes fails in situations where 96 // allocate + copy succeeds. Looks like it might be related to 97 // locking part of a read only segment. Fall back for now. 98 void *tempHostPtr; 99 hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool); 100 if (ret != HSA_STATUS_SUCCESS) { 101 DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size); 102 return ret; 103 } 104 std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr); 105 memcpy(tempHostPtr, hostSrc, size); 106 107 return locking_async_memcpy(CopyDirection::H2D, signal, deviceDest, 108 device_agent, tempHostPtr, tempHostPtr, size); 109 } 110 111 hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *hostDest, 112 void *deviceSrc, size_t size, 113 hsa_agent_t deviceAgent, 114 hsa_amd_memory_pool_t MemoryPool) { 115 hsa_status_t err; 116 117 // device has always visibility over both pointers, so use that 118 err = locking_async_memcpy(CopyDirection::D2H, signal, hostDest, deviceAgent, 119 deviceSrc, hostDest, size); 120 121 if (err == HSA_STATUS_SUCCESS) 122 return err; 123 124 // hsa_memory_copy sometimes fails in situations where 125 // allocate + copy succeeds. Looks like it might be related to 126 // locking part of a read only segment. Fall back for now. 127 void *tempHostPtr; 128 hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool); 129 if (ret != HSA_STATUS_SUCCESS) { 130 DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size); 131 return ret; 132 } 133 std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr); 134 135 err = locking_async_memcpy(CopyDirection::D2H, signal, tempHostPtr, 136 deviceAgent, deviceSrc, tempHostPtr, size); 137 if (err != HSA_STATUS_SUCCESS) 138 return HSA_STATUS_ERROR; 139 140 memcpy(hostDest, tempHostPtr, size); 141 return HSA_STATUS_SUCCESS; 142 } 143