1 //===--- amdgpu/impl/internal.h ----------------------------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef SRC_RUNTIME_INCLUDE_INTERNAL_H_ 9 #define SRC_RUNTIME_INCLUDE_INTERNAL_H_ 10 #include <inttypes.h> 11 #include <pthread.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 17 #include <cstring> 18 #include <map> 19 #include <queue> 20 #include <string> 21 #include <utility> 22 #include <vector> 23 24 #include "hsa_api.h" 25 26 #include "impl_runtime.h" 27 28 #ifndef TARGET_NAME 29 #error "Missing TARGET_NAME macro" 30 #endif 31 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL" 32 #include "Debug.h" 33 34 #define MAX_NUM_KERNELS (1024 * 16) 35 36 typedef struct impl_implicit_args_s { 37 uint64_t offset_x; 38 uint64_t offset_y; 39 uint64_t offset_z; 40 uint64_t hostcall_ptr; 41 uint64_t unused0; 42 uint64_t unused1; 43 uint64_t unused2; 44 } impl_implicit_args_t; 45 static_assert(sizeof(impl_implicit_args_t) == 56, ""); 46 47 // ---------------------- Kernel Start ------------- 48 typedef struct atl_kernel_info_s { 49 uint64_t kernel_object; 50 uint32_t group_segment_size; 51 uint32_t private_segment_size; 52 uint32_t sgpr_count; 53 uint32_t vgpr_count; 54 uint32_t sgpr_spill_count; 55 uint32_t vgpr_spill_count; 56 uint32_t kernel_segment_size; 57 uint32_t explicit_argument_count; 58 uint32_t implicit_argument_count; 59 } atl_kernel_info_t; 60 61 typedef struct atl_symbol_info_s { 62 uint64_t addr; 63 uint32_t size; 64 } atl_symbol_info_t; 65 66 // ---------------------- Kernel End ------------- 67 68 namespace core { 69 class TaskgroupImpl; 70 class TaskImpl; 71 class Kernel; 72 class KernelImpl; 73 } // namespace core 74 75 struct SignalPoolT { 76 SignalPoolT() {} 77 SignalPoolT(const SignalPoolT &) = delete; 78 SignalPoolT(SignalPoolT &&) = delete; 79 ~SignalPoolT() { 80 size_t N = state.size(); 81 for (size_t i = 0; i < N; i++) { 82 hsa_signal_t signal = state.front(); 83 state.pop(); 84 hsa_status_t rc = hsa_signal_destroy(signal); 85 if (rc != HSA_STATUS_SUCCESS) { 86 DP("Signal pool destruction failed\n"); 87 } 88 } 89 } 90 size_t size() { 91 lock l(&mutex); 92 return state.size(); 93 } 94 void push(hsa_signal_t s) { 95 lock l(&mutex); 96 state.push(s); 97 } 98 hsa_signal_t pop(void) { 99 lock l(&mutex); 100 if (!state.empty()) { 101 hsa_signal_t res = state.front(); 102 state.pop(); 103 return res; 104 } 105 106 // Pool empty, attempt to create another signal 107 hsa_signal_t new_signal; 108 hsa_status_t err = hsa_signal_create(0, 0, NULL, &new_signal); 109 if (err == HSA_STATUS_SUCCESS) { 110 return new_signal; 111 } 112 113 // Fail 114 return {0}; 115 } 116 117 private: 118 static pthread_mutex_t mutex; 119 std::queue<hsa_signal_t> state; 120 struct lock { 121 lock(pthread_mutex_t *m) : m(m) { pthread_mutex_lock(m); } 122 ~lock() { pthread_mutex_unlock(m); } 123 pthread_mutex_t *m; 124 }; 125 }; 126 127 namespace core { 128 hsa_status_t atl_init_gpu_context(); 129 130 hsa_status_t init_hsa(); 131 hsa_status_t finalize_hsa(); 132 /* 133 * Generic utils 134 */ 135 template <typename T> inline T alignDown(T value, size_t alignment) { 136 return (T)(value & ~(alignment - 1)); 137 } 138 139 template <typename T> inline T *alignDown(T *value, size_t alignment) { 140 return reinterpret_cast<T *>(alignDown((intptr_t)value, alignment)); 141 } 142 143 template <typename T> inline T alignUp(T value, size_t alignment) { 144 return alignDown((T)(value + alignment - 1), alignment); 145 } 146 147 template <typename T> inline T *alignUp(T *value, size_t alignment) { 148 return reinterpret_cast<T *>( 149 alignDown((intptr_t)(value + alignment - 1), alignment)); 150 } 151 152 extern bool atl_is_impl_initialized(); 153 154 bool handle_group_signal(hsa_signal_value_t value, void *arg); 155 156 hsa_status_t allow_access_to_all_gpu_agents(void *ptr); 157 } // namespace core 158 159 inline const char *get_error_string(hsa_status_t err) { 160 const char *res; 161 hsa_status_t rc = hsa_status_string(err, &res); 162 return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN."; 163 } 164 165 #endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_ 166