1 //===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "Mapping.h"
13 #include "Interface.h"
14 #include "State.h"
15 #include "Types.h"
16 #include "Utils.h"
17 
18 #pragma omp declare target
19 
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 
22 using namespace _OMP;
23 
24 namespace _OMP {
25 namespace impl {
26 
27 /// AMDGCN Implementation
28 ///
29 ///{
30 #pragma omp begin declare variant match(device = {arch(amdgcn)})
31 
32 static const llvm::omp::GV &getGridValue() {
33   return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
34 }
35 
36 uint32_t getGridDim(uint32_t n, uint16_t d) {
37   uint32_t q = n / d;
38   return q + (n > q * d);
39 }
40 
41 uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
42                          uint16_t group_size) {
43   uint32_t r = grid_size - group_id * group_size;
44   return (r < group_size) ? r : group_size;
45 }
46 
47 uint32_t getNumHardwareThreadsInBlock() {
48   return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
49                          __builtin_amdgcn_grid_size_x(),
50                          __builtin_amdgcn_workgroup_size_x());
51 }
52 
53 LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
54 
55 LaneMaskTy lanemaskLT() {
56   uint32_t Lane = mapping::getThreadIdInWarp();
57   int64_t Ballot = mapping::activemask();
58   uint64_t Mask = ((uint64_t)1 << Lane) - (uint64_t)1;
59   return Mask & Ballot;
60 }
61 
62 LaneMaskTy lanemaskGT() {
63   uint32_t Lane = mapping::getThreadIdInWarp();
64   if (Lane == (mapping::getWarpSize() - 1))
65     return 0;
66   int64_t Ballot = mapping::activemask();
67   uint64_t Mask = (~((uint64_t)0)) << (Lane + 1);
68   return Mask & Ballot;
69 }
70 
71 uint32_t getThreadIdInWarp() {
72   return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
73 }
74 
75 uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
76 
77 uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
78 
79 uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
80 
81 uint32_t getNumberOfBlocks() {
82   return getGridDim(__builtin_amdgcn_grid_size_x(),
83                     __builtin_amdgcn_workgroup_size_x());
84 }
85 
86 uint32_t getWarpId() {
87   return impl::getThreadIdInBlock() / mapping::getWarpSize();
88 }
89 
90 uint32_t getNumberOfWarpsInBlock() {
91   return mapping::getBlockSize() / mapping::getWarpSize();
92 }
93 
94 #pragma omp end declare variant
95 ///}
96 
97 /// NVPTX Implementation
98 ///
99 ///{
100 #pragma omp begin declare variant match(                                       \
101     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
102 
103 uint32_t getNumHardwareThreadsInBlock() {
104   return __nvvm_read_ptx_sreg_ntid_x();
105 }
106 
107 static const llvm::omp::GV &getGridValue() {
108   return llvm::omp::NVPTXGridValues;
109 }
110 
111 LaneMaskTy activemask() {
112   unsigned int Mask;
113   asm("activemask.b32 %0;" : "=r"(Mask));
114   return Mask;
115 }
116 
117 LaneMaskTy lanemaskLT() {
118   __kmpc_impl_lanemask_t Res;
119   asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
120   return Res;
121 }
122 
123 LaneMaskTy lanemaskGT() {
124   __kmpc_impl_lanemask_t Res;
125   asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
126   return Res;
127 }
128 
129 uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
130 
131 uint32_t getThreadIdInWarp() {
132   return impl::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
133 }
134 
135 uint32_t getKernelSize() {
136   return __nvvm_read_ptx_sreg_nctaid_x() *
137          mapping::getNumberOfProcessorElements();
138 }
139 
140 uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
141 
142 uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
143 
144 uint32_t getWarpId() {
145   return impl::getThreadIdInBlock() / mapping::getWarpSize();
146 }
147 
148 uint32_t getNumberOfWarpsInBlock() {
149   return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
150          mapping::getWarpSize();
151 }
152 
153 #pragma omp end declare variant
154 ///}
155 
156 uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
157 
158 } // namespace impl
159 } // namespace _OMP
160 
161 /// We have to be deliberate about the distinction of `mapping::` and `impl::`
162 /// below to avoid repeating assumptions or including irrelevant ones.
163 ///{
164 
165 static bool isInLastWarp() {
166   uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
167                      ~(mapping::getWarpSize() - 1);
168   return mapping::getThreadIdInBlock() == MainTId;
169 }
170 
171 bool mapping::isMainThreadInGenericMode(bool IsSPMD) {
172   if (IsSPMD || icv::Level)
173     return false;
174 
175   // Check if this is the last warp in the block.
176   return isInLastWarp();
177 }
178 
179 bool mapping::isMainThreadInGenericMode() {
180   return mapping::isMainThreadInGenericMode(mapping::isSPMDMode());
181 }
182 
183 bool mapping::isInitialThreadInLevel0(bool IsSPMD) {
184   if (IsSPMD)
185     return mapping::getThreadIdInBlock() == 0;
186   return isInLastWarp();
187 }
188 
189 bool mapping::isLeaderInWarp() {
190   __kmpc_impl_lanemask_t Active = mapping::activemask();
191   __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT();
192   return utils::popc(Active & LaneMaskLT) == 0;
193 }
194 
195 LaneMaskTy mapping::activemask() { return impl::activemask(); }
196 
197 LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
198 
199 LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
200 
201 uint32_t mapping::getThreadIdInWarp() {
202   uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
203   ASSERT(ThreadIdInWarp < impl::getWarpSize());
204   return ThreadIdInWarp;
205 }
206 
207 uint32_t mapping::getThreadIdInBlock() {
208   uint32_t ThreadIdInBlock = impl::getThreadIdInBlock();
209   ASSERT(ThreadIdInBlock < impl::getNumHardwareThreadsInBlock());
210   return ThreadIdInBlock;
211 }
212 
213 uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
214 
215 uint32_t mapping::getBlockSize(bool IsSPMD) {
216   uint32_t BlockSize = mapping::getNumberOfProcessorElements() -
217                        (!IsSPMD * impl::getWarpSize());
218   return BlockSize;
219 }
220 uint32_t mapping::getBlockSize() {
221   return mapping::getBlockSize(mapping::isSPMDMode());
222 }
223 
224 uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
225 
226 uint32_t mapping::getWarpId() {
227   uint32_t WarpID = impl::getWarpId();
228   ASSERT(WarpID < impl::getNumberOfWarpsInBlock());
229   return WarpID;
230 }
231 
232 uint32_t mapping::getBlockId() {
233   uint32_t BlockId = impl::getBlockId();
234   ASSERT(BlockId < impl::getNumberOfBlocks());
235   return BlockId;
236 }
237 
238 uint32_t mapping::getNumberOfWarpsInBlock() {
239   uint32_t NumberOfWarpsInBlocks = impl::getNumberOfWarpsInBlock();
240   ASSERT(impl::getWarpId() < NumberOfWarpsInBlocks);
241   return NumberOfWarpsInBlocks;
242 }
243 
244 uint32_t mapping::getNumberOfBlocks() {
245   uint32_t NumberOfBlocks = impl::getNumberOfBlocks();
246   ASSERT(impl::getBlockId() < NumberOfBlocks);
247   return NumberOfBlocks;
248 }
249 
250 uint32_t mapping::getNumberOfProcessorElements() {
251   uint32_t NumberOfProcessorElements = impl::getNumHardwareThreadsInBlock();
252   ASSERT(impl::getThreadIdInBlock() < NumberOfProcessorElements);
253   return NumberOfProcessorElements;
254 }
255 
256 ///}
257 
258 /// Execution mode
259 ///
260 ///{
261 static int SHARED(IsSPMDMode);
262 
263 void mapping::init(bool IsSPMD) {
264   if (mapping::isInitialThreadInLevel0(IsSPMD))
265     IsSPMDMode = IsSPMD;
266 }
267 
268 bool mapping::isSPMDMode() { return IsSPMDMode; }
269 
270 bool mapping::isGenericMode() { return !isSPMDMode(); }
271 ///}
272 
273 extern "C" {
274 __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
275   FunctionTracingRAII();
276   return mapping::getThreadIdInBlock();
277 }
278 
279 __attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
280   FunctionTracingRAII();
281   return impl::getNumHardwareThreadsInBlock();
282 }
283 
284 __attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
285   FunctionTracingRAII();
286   return impl::getWarpSize();
287 }
288 }
289 #pragma omp end declare target
290