1 //===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "Mapping.h"
13 #include "State.h"
14 #include "Types.h"
15 #include "Utils.h"
16 
17 #pragma omp declare target
18 
19 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
20 
21 using namespace _OMP;
22 
23 namespace _OMP {
24 namespace impl {
25 
26 /// AMDGCN Implementation
27 ///
28 ///{
29 #pragma omp begin declare variant match(device = {arch(amdgcn)})
30 
31 constexpr const llvm::omp::GV &getGridValue() {
32   return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
33 }
34 
35 uint32_t getGridDim(uint32_t n, uint16_t d) {
36   uint32_t q = n / d;
37   return q + (n > q * d);
38 }
39 
40 uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
41                          uint16_t group_size) {
42   uint32_t r = grid_size - group_id * group_size;
43   return (r < group_size) ? r : group_size;
44 }
45 
46 LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
47 
48 LaneMaskTy lanemaskLT() {
49   uint32_t Lane = mapping::getThreadIdInWarp();
50   int64_t Ballot = mapping::activemask();
51   uint64_t Mask = ((uint64_t)1 << Lane) - (uint64_t)1;
52   return Mask & Ballot;
53 }
54 
55 LaneMaskTy lanemaskGT() {
56   uint32_t Lane = mapping::getThreadIdInWarp();
57   if (Lane == (mapping::getWarpSize() - 1))
58     return 0;
59   int64_t Ballot = mapping::activemask();
60   uint64_t Mask = (~((uint64_t)0)) << (Lane + 1);
61   return Mask & Ballot;
62 }
63 
64 uint32_t getThreadIdInWarp() {
65   return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
66 }
67 
68 uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
69 
70 uint32_t getBlockSize() {
71   // TODO: verify this logic for generic mode.
72   return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
73                          __builtin_amdgcn_grid_size_x(),
74                          __builtin_amdgcn_workgroup_size_x());
75 }
76 
77 uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
78 
79 uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
80 
81 uint32_t getNumberOfBlocks() {
82   return getGridDim(__builtin_amdgcn_grid_size_x(),
83                     __builtin_amdgcn_workgroup_size_x());
84 }
85 
86 uint32_t getNumberOfProcessorElements() {
87   return getBlockSize();
88 }
89 
90 uint32_t getWarpId() {
91   return mapping::getThreadIdInBlock() / mapping::getWarpSize();
92 }
93 
94 uint32_t getNumberOfWarpsInBlock() {
95   return mapping::getBlockSize() / mapping::getWarpSize();
96 }
97 
98 #pragma omp end declare variant
99 ///}
100 
101 /// NVPTX Implementation
102 ///
103 ///{
104 #pragma omp begin declare variant match(                                       \
105     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
106 
107 constexpr const llvm::omp::GV &getGridValue() {
108   return llvm::omp::NVPTXGridValues;
109 }
110 
111 LaneMaskTy activemask() {
112   unsigned int Mask;
113   asm("activemask.b32 %0;" : "=r"(Mask));
114   return Mask;
115 }
116 
117 LaneMaskTy lanemaskLT() {
118   __kmpc_impl_lanemask_t Res;
119   asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
120   return Res;
121 }
122 
123 LaneMaskTy lanemaskGT() {
124   __kmpc_impl_lanemask_t Res;
125   asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
126   return Res;
127 }
128 
129 uint32_t getThreadIdInWarp() {
130   return mapping::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
131 }
132 
133 uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
134 
135 uint32_t getBlockSize() {
136   return __nvvm_read_ptx_sreg_ntid_x() -
137          (!mapping::isSPMDMode() * mapping::getWarpSize());
138 }
139 
140 uint32_t getKernelSize() { return __nvvm_read_ptx_sreg_nctaid_x(); }
141 
142 uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
143 
144 uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
145 
146 uint32_t getNumberOfProcessorElements() {
147   return __nvvm_read_ptx_sreg_ntid_x();
148 }
149 
150 uint32_t getWarpId() {
151   return mapping::getThreadIdInBlock() / mapping::getWarpSize();
152 }
153 
154 uint32_t getNumberOfWarpsInBlock() {
155   return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
156          mapping::getWarpSize();
157 }
158 
159 #pragma omp end declare variant
160 ///}
161 
162 uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
163 
164 } // namespace impl
165 } // namespace _OMP
166 
167 static bool isInLastWarp() {
168   uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
169                      ~(mapping::getWarpSize() - 1);
170   return mapping::getThreadIdInBlock() == MainTId;
171 }
172 
173 bool mapping::isMainThreadInGenericMode(bool IsSPMD) {
174   if (IsSPMD || icv::Level)
175     return false;
176 
177   // Check if this is the last warp in the block.
178   return isInLastWarp();
179 }
180 
181 bool mapping::isMainThreadInGenericMode() {
182   return mapping::isMainThreadInGenericMode(mapping::isSPMDMode());
183 }
184 
185 bool mapping::isInitialThreadInLevel0(bool IsSPMD) {
186   if (IsSPMD)
187     return mapping::getThreadIdInBlock() == 0;
188   return isInLastWarp();
189 }
190 
191 bool mapping::isLeaderInWarp() {
192   __kmpc_impl_lanemask_t Active = mapping::activemask();
193   __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT();
194   return utils::popc(Active & LaneMaskLT) == 0;
195 }
196 
197 LaneMaskTy mapping::activemask() { return impl::activemask(); }
198 
199 LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
200 
201 LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
202 
203 uint32_t mapping::getThreadIdInWarp() { return impl::getThreadIdInWarp(); }
204 
205 uint32_t mapping::getThreadIdInBlock() { return impl::getThreadIdInBlock(); }
206 
207 uint32_t mapping::getBlockSize() { return impl::getBlockSize(); }
208 
209 uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
210 
211 uint32_t mapping::getBlockId() { return impl::getBlockId(); }
212 
213 uint32_t mapping::getNumberOfBlocks() { return impl::getNumberOfBlocks(); }
214 
215 uint32_t mapping::getNumberOfProcessorElements() {
216   return impl::getNumberOfProcessorElements();
217 }
218 
219 uint32_t mapping::getWarpId() { return impl::getWarpId(); }
220 
221 uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
222 
223 uint32_t mapping::getNumberOfWarpsInBlock() {
224   return impl::getNumberOfWarpsInBlock();
225 }
226 
227 /// Execution mode
228 ///
229 ///{
230 static int SHARED(IsSPMDMode);
231 
232 void mapping::init(bool IsSPMD) {
233   if (mapping::isInitialThreadInLevel0(IsSPMD))
234     IsSPMDMode = IsSPMD;
235 }
236 
237 bool mapping::isSPMDMode() { return IsSPMDMode; }
238 
239 bool mapping::isGenericMode() { return !isSPMDMode(); }
240 ///}
241 
242 extern "C" {
243 __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
244   FunctionTracingRAII();
245   return mapping::getThreadIdInBlock();
246 }
247 
248 __attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
249   FunctionTracingRAII();
250   return mapping::getNumberOfProcessorElements();
251 }
252 }
253 #pragma omp end declare target
254