1 //===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11
12 #ifndef OMPTARGET_STATE_H
13 #define OMPTARGET_STATE_H
14
15 #include "Debug.h"
16 #include "Mapping.h"
17 #include "Types.h"
18 #include "Utils.h"
19
20 #pragma omp begin declare target device_type(nohost)
21
22 namespace _OMP {
23
24 namespace memory {
25
26 /// Alloca \p Size bytes in shared memory, if possible, for \p Reason.
27 ///
28 /// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
29 void *allocShared(uint64_t Size, const char *Reason);
30
31 /// Free \p Ptr, alloated via allocShared, for \p Reason.
32 ///
33 /// Note: See the restrictions on __kmpc_free_shared for proper usage.
34 void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
35
36 /// Alloca \p Size bytes in global memory, if possible, for \p Reason.
37 void *allocGlobal(uint64_t Size, const char *Reason);
38
39 /// Return a pointer to the dynamic shared memory buffer.
40 void *getDynamicBuffer();
41
42 /// Free \p Ptr, alloated via allocGlobal, for \p Reason.
43 void freeGlobal(void *Ptr, const char *Reason);
44
45 } // namespace memory
46
47 namespace state {
48
49 inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE;
50
51 struct ICVStateTy {
52 uint32_t NThreadsVar;
53 uint32_t LevelVar;
54 uint32_t ActiveLevelVar;
55 uint32_t MaxActiveLevelsVar;
56 uint32_t RunSchedVar;
57 uint32_t RunSchedChunkVar;
58
59 bool operator==(const ICVStateTy &Other) const;
60
61 void assertEqual(const ICVStateTy &Other) const;
62 };
63
64 struct TeamStateTy {
65 void init(bool IsSPMD);
66
67 bool operator==(const TeamStateTy &) const;
68
69 void assertEqual(TeamStateTy &Other) const;
70
71 /// ICVs
72 ///
73 /// Preallocated storage for ICV values that are used if the threads have not
74 /// set a custom default. The latter is supported but unlikely and slow(er).
75 ///
76 ///{
77 ICVStateTy ICVState;
78 ///}
79
80 uint32_t ParallelTeamSize;
81 uint32_t HasThreadState;
82 ParallelRegionFnTy ParallelRegionFnVar;
83 };
84
85 extern TeamStateTy TeamState;
86 #pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc)
87
88 struct ThreadStateTy {
89
90 /// ICVs have preallocated storage in the TeamStateTy which is used if a
91 /// thread has not set a custom value. The latter is supported but unlikely.
92 /// When it happens we will allocate dynamic memory to hold the values of all
93 /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an
94 /// ICV struct to hold them all. This is slower than alternatives but allows
95 /// users to pay only for what they use.
96 ///
97 state::ICVStateTy ICVState;
98
99 ThreadStateTy *PreviousThreadState;
100
initThreadStateTy101 void init() {
102 ICVState = TeamState.ICVState;
103 PreviousThreadState = nullptr;
104 }
105
initThreadStateTy106 void init(ThreadStateTy *PreviousTS) {
107 ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState;
108 PreviousThreadState = PreviousTS;
109 }
110 };
111
112 extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
113 #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
114
115 /// Initialize the state machinery. Must be called by all threads.
116 void init(bool IsSPMD);
117
118 /// TODO
119 enum ValueKind {
120 VK_NThreads,
121 VK_Level,
122 VK_ActiveLevel,
123 VK_MaxActiveLevels,
124 VK_RunSched,
125 // ---
126 VK_RunSchedChunk,
127 VK_ParallelRegionFn,
128 VK_ParallelTeamSize,
129 VK_HasThreadState,
130 };
131
132 /// TODO
133 void enterDataEnvironment(IdentTy *Ident);
134
135 /// TODO
136 void exitDataEnvironment();
137
138 /// TODO
139 struct DateEnvironmentRAII {
DateEnvironmentRAIIDateEnvironmentRAII140 DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); }
~DateEnvironmentRAIIDateEnvironmentRAII141 ~DateEnvironmentRAII() { exitDataEnvironment(); }
142 };
143
144 /// TODO
145 void resetStateForThread(uint32_t TId);
146
lookupForModify32Impl(uint32_t state::ICVStateTy::* Var,IdentTy * Ident,bool ForceTeamState)147 inline uint32_t &lookupForModify32Impl(uint32_t state::ICVStateTy::*Var,
148 IdentTy *Ident, bool ForceTeamState) {
149 if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() ||
150 !TeamState.HasThreadState))
151 return TeamState.ICVState.*Var;
152 uint32_t TId = mapping::getThreadIdInBlock();
153 if (OMP_UNLIKELY(!ThreadStates[TId])) {
154 ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>(memory::allocGlobal(
155 sizeof(ThreadStateTy), "ICV modification outside data environment"));
156 ASSERT(ThreadStates[TId] != nullptr && "Nullptr returned by malloc!");
157 TeamState.HasThreadState = true;
158 ThreadStates[TId]->init();
159 }
160 return ThreadStates[TId]->ICVState.*Var;
161 }
162
lookupImpl(uint32_t state::ICVStateTy::* Var,bool ForceTeamState)163 inline uint32_t &lookupImpl(uint32_t state::ICVStateTy::*Var,
164 bool ForceTeamState) {
165 auto TId = mapping::getThreadIdInBlock();
166 if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() &&
167 TeamState.HasThreadState && ThreadStates[TId]))
168 return ThreadStates[TId]->ICVState.*Var;
169 return TeamState.ICVState.*Var;
170 }
171
172 __attribute__((always_inline, flatten)) inline uint32_t &
lookup32(ValueKind Kind,bool IsReadonly,IdentTy * Ident,bool ForceTeamState)173 lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
174 switch (Kind) {
175 case state::VK_NThreads:
176 if (IsReadonly)
177 return lookupImpl(&ICVStateTy::NThreadsVar, ForceTeamState);
178 return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident,
179 ForceTeamState);
180 case state::VK_Level:
181 if (IsReadonly)
182 return lookupImpl(&ICVStateTy::LevelVar, ForceTeamState);
183 return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident, ForceTeamState);
184 case state::VK_ActiveLevel:
185 if (IsReadonly)
186 return lookupImpl(&ICVStateTy::ActiveLevelVar, ForceTeamState);
187 return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident,
188 ForceTeamState);
189 case state::VK_MaxActiveLevels:
190 if (IsReadonly)
191 return lookupImpl(&ICVStateTy::MaxActiveLevelsVar, ForceTeamState);
192 return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident,
193 ForceTeamState);
194 case state::VK_RunSched:
195 if (IsReadonly)
196 return lookupImpl(&ICVStateTy::RunSchedVar, ForceTeamState);
197 return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident,
198 ForceTeamState);
199 case state::VK_RunSchedChunk:
200 if (IsReadonly)
201 return lookupImpl(&ICVStateTy::RunSchedChunkVar, ForceTeamState);
202 return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident,
203 ForceTeamState);
204 case state::VK_ParallelTeamSize:
205 return TeamState.ParallelTeamSize;
206 case state::VK_HasThreadState:
207 return TeamState.HasThreadState;
208 default:
209 break;
210 }
211 __builtin_unreachable();
212 }
213
214 __attribute__((always_inline, flatten)) inline void *&
lookupPtr(ValueKind Kind,bool IsReadonly,bool ForceTeamState)215 lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
216 switch (Kind) {
217 case state::VK_ParallelRegionFn:
218 return TeamState.ParallelRegionFnVar;
219 default:
220 break;
221 }
222 __builtin_unreachable();
223 }
224
225 /// A class without actual state used to provide a nice interface to lookup and
226 /// update ICV values we can declare in global scope.
227 template <typename Ty, ValueKind Kind> struct Value {
TyValue228 __attribute__((flatten, always_inline)) operator Ty() {
229 return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
230 /* ForceTeamState */ false);
231 }
232
233 __attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) {
234 set(Other, /* IdentTy */ nullptr);
235 return *this;
236 }
237
238 __attribute__((flatten, always_inline)) Value &operator++() {
239 inc(1, /* IdentTy */ nullptr);
240 return *this;
241 }
242
243 __attribute__((flatten, always_inline)) Value &operator--() {
244 inc(-1, /* IdentTy */ nullptr);
245 return *this;
246 }
247
248 __attribute__((flatten, always_inline)) void
249 assert_eq(const Ty &V, IdentTy *Ident = nullptr,
250 bool ForceTeamState = false) {
251 ASSERT(lookup(/* IsReadonly */ true, Ident, ForceTeamState) == V);
252 }
253
254 private:
255 __attribute__((flatten, always_inline)) Ty &
lookupValue256 lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
257 Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState);
258 return t;
259 }
260
incValue261 __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal,
262 IdentTy *Ident) {
263 return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) +=
264 UpdateVal);
265 }
266
setValue267 __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal,
268 IdentTy *Ident) {
269 return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) =
270 UpdateVal);
271 }
272
273 template <typename VTy, typename Ty2> friend struct ValueRAII;
274 };
275
276 /// A mookup class without actual state used to provide
277 /// a nice interface to lookup and update ICV values
278 /// we can declare in global scope.
279 template <typename Ty, ValueKind Kind> struct PtrValue {
TyPtrValue280 __attribute__((flatten, always_inline)) operator Ty() {
281 return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
282 /* ForceTeamState */ false);
283 }
284
285 __attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) {
286 set(Other);
287 return *this;
288 }
289
290 private:
lookupPtrValue291 Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) {
292 return lookupPtr(Kind, IsReadonly, ForceTeamState);
293 }
294
setPtrValue295 Ty &set(Ty UpdateVal) {
296 return (lookup(/* IsReadonly */ false, /* IdentTy */ nullptr,
297 /* ForceTeamState */ false) = UpdateVal);
298 }
299
300 template <typename VTy, typename Ty2> friend struct ValueRAII;
301 };
302
303 template <typename VTy, typename Ty> struct ValueRAII {
304 ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident,
305 bool ForceTeamState = false)
306 : Ptr(Active ? &V.lookup(/* IsReadonly */ false, Ident, ForceTeamState)
307 : (Ty *)utils::UndefPtr),
308 Val(OldValue), Active(Active) {
309 if (!Active)
310 return;
311 ASSERT(*Ptr == OldValue &&
312 "ValueRAII initialization with wrong old value!");
313 *Ptr = NewValue;
314 }
~ValueRAIIValueRAII315 ~ValueRAII() {
316 if (Active)
317 *Ptr = Val;
318 }
319
320 private:
321 Ty *Ptr;
322 Ty Val;
323 bool Active;
324 };
325
326 /// TODO
327 inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk;
328
329 /// TODO
330 inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize;
331
332 /// TODO
333 inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState;
334
335 /// TODO
336 inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn>
337 ParallelRegionFn;
338
339 void runAndCheckState(void(Func(void)));
340
341 void assumeInitialState(bool IsSPMD);
342
343 } // namespace state
344
345 namespace icv {
346
347 /// TODO
348 inline state::Value<uint32_t, state::VK_NThreads> NThreads;
349
350 /// TODO
351 inline state::Value<uint32_t, state::VK_Level> Level;
352
353 /// The `active-level` describes which of the parallel level counted with the
354 /// `level-var` is active. There can only be one.
355 ///
356 /// active-level-var is 1, if ActiveLevelVar is not 0, otherweise it is 0.
357 inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel;
358
359 /// TODO
360 inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels;
361
362 /// TODO
363 inline state::Value<uint32_t, state::VK_RunSched> RunSched;
364
365 } // namespace icv
366
367 } // namespace _OMP
368
369 #pragma omp end declare target
370
371 #endif
372