1 //===--- amdgpu/impl/internal.h ----------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef SRC_RUNTIME_INCLUDE_INTERNAL_H_
9 #define SRC_RUNTIME_INCLUDE_INTERNAL_H_
10 #include <inttypes.h>
11 #include <pthread.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16
17 #include <cstring>
18 #include <map>
19 #include <queue>
20 #include <string>
21 #include <utility>
22 #include <vector>
23
24 #include "hsa_api.h"
25
26 #include "impl_runtime.h"
27
28 #ifndef TARGET_NAME
29 #error "Missing TARGET_NAME macro"
30 #endif
31 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
32 #include "Debug.h"
33
34 #define MAX_NUM_KERNELS (1024 * 16)
35
36 typedef struct impl_implicit_args_s {
37 uint64_t offset_x;
38 uint64_t offset_y;
39 uint64_t offset_z;
40 uint64_t hostcall_ptr;
41 uint64_t unused0;
42 uint64_t unused1;
43 uint64_t unused2;
44 } impl_implicit_args_t;
45 static_assert(sizeof(impl_implicit_args_t) == 56, "");
46
47 // ---------------------- Kernel Start -------------
48 typedef struct atl_kernel_info_s {
49 uint64_t kernel_object;
50 uint32_t group_segment_size;
51 uint32_t private_segment_size;
52 uint32_t sgpr_count;
53 uint32_t vgpr_count;
54 uint32_t sgpr_spill_count;
55 uint32_t vgpr_spill_count;
56 uint32_t kernel_segment_size;
57 uint32_t explicit_argument_count;
58 uint32_t implicit_argument_count;
59 } atl_kernel_info_t;
60
61 typedef struct atl_symbol_info_s {
62 uint64_t addr;
63 uint32_t size;
64 } atl_symbol_info_t;
65
66 // ---------------------- Kernel End -------------
67
68 namespace core {
69 class TaskgroupImpl;
70 class TaskImpl;
71 class Kernel;
72 class KernelImpl;
73 } // namespace core
74
75 struct SignalPoolT {
SignalPoolTSignalPoolT76 SignalPoolT() {}
77 SignalPoolT(const SignalPoolT &) = delete;
78 SignalPoolT(SignalPoolT &&) = delete;
~SignalPoolTSignalPoolT79 ~SignalPoolT() {
80 size_t N = state.size();
81 for (size_t i = 0; i < N; i++) {
82 hsa_signal_t signal = state.front();
83 state.pop();
84 hsa_status_t rc = hsa_signal_destroy(signal);
85 if (rc != HSA_STATUS_SUCCESS) {
86 DP("Signal pool destruction failed\n");
87 }
88 }
89 }
sizeSignalPoolT90 size_t size() {
91 lock l(&mutex);
92 return state.size();
93 }
pushSignalPoolT94 void push(hsa_signal_t s) {
95 lock l(&mutex);
96 state.push(s);
97 }
popSignalPoolT98 hsa_signal_t pop(void) {
99 lock l(&mutex);
100 if (!state.empty()) {
101 hsa_signal_t res = state.front();
102 state.pop();
103 return res;
104 }
105
106 // Pool empty, attempt to create another signal
107 hsa_signal_t new_signal;
108 hsa_status_t err = hsa_signal_create(0, 0, NULL, &new_signal);
109 if (err == HSA_STATUS_SUCCESS) {
110 return new_signal;
111 }
112
113 // Fail
114 return {0};
115 }
116
117 private:
118 static pthread_mutex_t mutex;
119 std::queue<hsa_signal_t> state;
120 struct lock {
lockSignalPoolT::lock121 lock(pthread_mutex_t *m) : m(m) { pthread_mutex_lock(m); }
~lockSignalPoolT::lock122 ~lock() { pthread_mutex_unlock(m); }
123 pthread_mutex_t *m;
124 };
125 };
126
127 namespace core {
128 hsa_status_t atl_init_gpu_context();
129
130 hsa_status_t init_hsa();
131 hsa_status_t finalize_hsa();
132 /*
133 * Generic utils
134 */
alignDown(T value,size_t alignment)135 template <typename T> inline T alignDown(T value, size_t alignment) {
136 return (T)(value & ~(alignment - 1));
137 }
138
alignDown(T * value,size_t alignment)139 template <typename T> inline T *alignDown(T *value, size_t alignment) {
140 return reinterpret_cast<T *>(alignDown((intptr_t)value, alignment));
141 }
142
alignUp(T value,size_t alignment)143 template <typename T> inline T alignUp(T value, size_t alignment) {
144 return alignDown((T)(value + alignment - 1), alignment);
145 }
146
alignUp(T * value,size_t alignment)147 template <typename T> inline T *alignUp(T *value, size_t alignment) {
148 return reinterpret_cast<T *>(
149 alignDown((intptr_t)(value + alignment - 1), alignment));
150 }
151
152 extern bool atl_is_impl_initialized();
153
154 bool handle_group_signal(hsa_signal_value_t value, void *arg);
155
156 hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
157 } // namespace core
158
get_error_string(hsa_status_t err)159 inline const char *get_error_string(hsa_status_t err) {
160 const char *res;
161 hsa_status_t rc = hsa_status_string(err, &res);
162 return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
163 }
164
165 #endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_
166