1 //===--- amdgpu/impl/internal.h ----------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef SRC_RUNTIME_INCLUDE_INTERNAL_H_
9 #define SRC_RUNTIME_INCLUDE_INTERNAL_H_
10 #include <inttypes.h>
11 #include <pthread.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 
17 #include <cstring>
18 #include <map>
19 #include <queue>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "hsa_api.h"
25 
26 #include "impl_runtime.h"
27 
28 #ifndef TARGET_NAME
29 #error "Missing TARGET_NAME macro"
30 #endif
31 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
32 #include "Debug.h"
33 
34 #define MAX_NUM_KERNELS (1024 * 16)
35 
36 typedef struct impl_implicit_args_s {
37   uint64_t offset_x;
38   uint64_t offset_y;
39   uint64_t offset_z;
40   uint64_t hostcall_ptr;
41   uint64_t unused0;
42   uint64_t unused1;
43   uint64_t unused2;
44 } impl_implicit_args_t;
45 static_assert(sizeof(impl_implicit_args_t) == 56, "");
46 
47 // ---------------------- Kernel Start -------------
48 typedef struct atl_kernel_info_s {
49   uint64_t kernel_object;
50   uint32_t group_segment_size;
51   uint32_t private_segment_size;
52   uint32_t sgpr_count;
53   uint32_t vgpr_count;
54   uint32_t sgpr_spill_count;
55   uint32_t vgpr_spill_count;
56   uint32_t kernel_segment_size;
57   uint32_t explicit_argument_count;
58   uint32_t implicit_argument_count;
59 } atl_kernel_info_t;
60 
61 typedef struct atl_symbol_info_s {
62   uint64_t addr;
63   uint32_t size;
64 } atl_symbol_info_t;
65 
66 // ---------------------- Kernel End -------------
67 
68 namespace core {
69 class TaskgroupImpl;
70 class TaskImpl;
71 class Kernel;
72 class KernelImpl;
73 } // namespace core
74 
75 struct SignalPoolT {
SignalPoolTSignalPoolT76   SignalPoolT() {}
77   SignalPoolT(const SignalPoolT &) = delete;
78   SignalPoolT(SignalPoolT &&) = delete;
~SignalPoolTSignalPoolT79   ~SignalPoolT() {
80     size_t N = state.size();
81     for (size_t i = 0; i < N; i++) {
82       hsa_signal_t signal = state.front();
83       state.pop();
84       hsa_status_t rc = hsa_signal_destroy(signal);
85       if (rc != HSA_STATUS_SUCCESS) {
86         DP("Signal pool destruction failed\n");
87       }
88     }
89   }
sizeSignalPoolT90   size_t size() {
91     lock l(&mutex);
92     return state.size();
93   }
pushSignalPoolT94   void push(hsa_signal_t s) {
95     lock l(&mutex);
96     state.push(s);
97   }
popSignalPoolT98   hsa_signal_t pop(void) {
99     lock l(&mutex);
100     if (!state.empty()) {
101       hsa_signal_t res = state.front();
102       state.pop();
103       return res;
104     }
105 
106     // Pool empty, attempt to create another signal
107     hsa_signal_t new_signal;
108     hsa_status_t err = hsa_signal_create(0, 0, NULL, &new_signal);
109     if (err == HSA_STATUS_SUCCESS) {
110       return new_signal;
111     }
112 
113     // Fail
114     return {0};
115   }
116 
117 private:
118   static pthread_mutex_t mutex;
119   std::queue<hsa_signal_t> state;
120   struct lock {
lockSignalPoolT::lock121     lock(pthread_mutex_t *m) : m(m) { pthread_mutex_lock(m); }
~lockSignalPoolT::lock122     ~lock() { pthread_mutex_unlock(m); }
123     pthread_mutex_t *m;
124   };
125 };
126 
127 namespace core {
128 hsa_status_t atl_init_gpu_context();
129 
130 hsa_status_t init_hsa();
131 hsa_status_t finalize_hsa();
132 /*
133  * Generic utils
134  */
alignDown(T value,size_t alignment)135 template <typename T> inline T alignDown(T value, size_t alignment) {
136   return (T)(value & ~(alignment - 1));
137 }
138 
alignDown(T * value,size_t alignment)139 template <typename T> inline T *alignDown(T *value, size_t alignment) {
140   return reinterpret_cast<T *>(alignDown((intptr_t)value, alignment));
141 }
142 
alignUp(T value,size_t alignment)143 template <typename T> inline T alignUp(T value, size_t alignment) {
144   return alignDown((T)(value + alignment - 1), alignment);
145 }
146 
alignUp(T * value,size_t alignment)147 template <typename T> inline T *alignUp(T *value, size_t alignment) {
148   return reinterpret_cast<T *>(
149       alignDown((intptr_t)(value + alignment - 1), alignment));
150 }
151 
152 extern bool atl_is_impl_initialized();
153 
154 bool handle_group_signal(hsa_signal_value_t value, void *arg);
155 
156 hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
157 } // namespace core
158 
get_error_string(hsa_status_t err)159 inline const char *get_error_string(hsa_status_t err) {
160   const char *res;
161   hsa_status_t rc = hsa_status_string(err, &res);
162   return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
163 }
164 
165 #endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_
166