1 //===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #ifndef OMPTARGET_STATE_H
13 #define OMPTARGET_STATE_H
14 
15 #include "Debug.h"
16 #include "Mapping.h"
17 #include "Types.h"
18 #include "Utils.h"
19 
20 #pragma omp begin declare target device_type(nohost)
21 
22 namespace _OMP {
23 
24 namespace memory {
25 
26 /// Alloca \p Size bytes in shared memory, if possible, for \p Reason.
27 ///
28 /// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
29 void *allocShared(uint64_t Size, const char *Reason);
30 
31 /// Free \p Ptr, alloated via allocShared, for \p Reason.
32 ///
33 /// Note: See the restrictions on __kmpc_free_shared for proper usage.
34 void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
35 
36 /// Alloca \p Size bytes in global memory, if possible, for \p Reason.
37 void *allocGlobal(uint64_t Size, const char *Reason);
38 
39 /// Return a pointer to the dynamic shared memory buffer.
40 void *getDynamicBuffer();
41 
42 /// Free \p Ptr, alloated via allocGlobal, for \p Reason.
43 void freeGlobal(void *Ptr, const char *Reason);
44 
45 } // namespace memory
46 
47 namespace state {
48 
49 inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE;
50 
51 struct ICVStateTy {
52   uint32_t NThreadsVar;
53   uint32_t LevelVar;
54   uint32_t ActiveLevelVar;
55   uint32_t MaxActiveLevelsVar;
56   uint32_t RunSchedVar;
57   uint32_t RunSchedChunkVar;
58 
59   bool operator==(const ICVStateTy &Other) const;
60 
61   void assertEqual(const ICVStateTy &Other) const;
62 };
63 
64 struct TeamStateTy {
65   void init(bool IsSPMD);
66 
67   bool operator==(const TeamStateTy &) const;
68 
69   void assertEqual(TeamStateTy &Other) const;
70 
71   /// ICVs
72   ///
73   /// Preallocated storage for ICV values that are used if the threads have not
74   /// set a custom default. The latter is supported but unlikely and slow(er).
75   ///
76   ///{
77   ICVStateTy ICVState;
78   ///}
79 
80   uint32_t ParallelTeamSize;
81   uint32_t HasThreadState;
82   ParallelRegionFnTy ParallelRegionFnVar;
83 };
84 
85 extern TeamStateTy TeamState;
86 #pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc)
87 
88 struct ThreadStateTy {
89 
90   /// ICVs have preallocated storage in the TeamStateTy which is used if a
91   /// thread has not set a custom value. The latter is supported but unlikely.
92   /// When it happens we will allocate dynamic memory to hold the values of all
93   /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an
94   /// ICV struct to hold them all. This is slower than alternatives but allows
95   /// users to pay only for what they use.
96   ///
97   state::ICVStateTy ICVState;
98 
99   ThreadStateTy *PreviousThreadState;
100 
initThreadStateTy101   void init() {
102     ICVState = TeamState.ICVState;
103     PreviousThreadState = nullptr;
104   }
105 
initThreadStateTy106   void init(ThreadStateTy *PreviousTS) {
107     ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState;
108     PreviousThreadState = PreviousTS;
109   }
110 };
111 
112 extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
113 #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
114 
115 /// Initialize the state machinery. Must be called by all threads.
116 void init(bool IsSPMD);
117 
118 /// TODO
119 enum ValueKind {
120   VK_NThreads,
121   VK_Level,
122   VK_ActiveLevel,
123   VK_MaxActiveLevels,
124   VK_RunSched,
125   // ---
126   VK_RunSchedChunk,
127   VK_ParallelRegionFn,
128   VK_ParallelTeamSize,
129   VK_HasThreadState,
130 };
131 
132 /// TODO
133 void enterDataEnvironment(IdentTy *Ident);
134 
135 /// TODO
136 void exitDataEnvironment();
137 
138 /// TODO
139 struct DateEnvironmentRAII {
DateEnvironmentRAIIDateEnvironmentRAII140   DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); }
~DateEnvironmentRAIIDateEnvironmentRAII141   ~DateEnvironmentRAII() { exitDataEnvironment(); }
142 };
143 
144 /// TODO
145 void resetStateForThread(uint32_t TId);
146 
lookupForModify32Impl(uint32_t state::ICVStateTy::* Var,IdentTy * Ident,bool ForceTeamState)147 inline uint32_t &lookupForModify32Impl(uint32_t state::ICVStateTy::*Var,
148                                        IdentTy *Ident, bool ForceTeamState) {
149   if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() ||
150                  !TeamState.HasThreadState))
151     return TeamState.ICVState.*Var;
152   uint32_t TId = mapping::getThreadIdInBlock();
153   if (OMP_UNLIKELY(!ThreadStates[TId])) {
154     ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>(memory::allocGlobal(
155         sizeof(ThreadStateTy), "ICV modification outside data environment"));
156     ASSERT(ThreadStates[TId] != nullptr && "Nullptr returned by malloc!");
157     TeamState.HasThreadState = true;
158     ThreadStates[TId]->init();
159   }
160   return ThreadStates[TId]->ICVState.*Var;
161 }
162 
lookupImpl(uint32_t state::ICVStateTy::* Var,bool ForceTeamState)163 inline uint32_t &lookupImpl(uint32_t state::ICVStateTy::*Var,
164                             bool ForceTeamState) {
165   auto TId = mapping::getThreadIdInBlock();
166   if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() &&
167                    TeamState.HasThreadState && ThreadStates[TId]))
168     return ThreadStates[TId]->ICVState.*Var;
169   return TeamState.ICVState.*Var;
170 }
171 
172 __attribute__((always_inline, flatten)) inline uint32_t &
lookup32(ValueKind Kind,bool IsReadonly,IdentTy * Ident,bool ForceTeamState)173 lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
174   switch (Kind) {
175   case state::VK_NThreads:
176     if (IsReadonly)
177       return lookupImpl(&ICVStateTy::NThreadsVar, ForceTeamState);
178     return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident,
179                                  ForceTeamState);
180   case state::VK_Level:
181     if (IsReadonly)
182       return lookupImpl(&ICVStateTy::LevelVar, ForceTeamState);
183     return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident, ForceTeamState);
184   case state::VK_ActiveLevel:
185     if (IsReadonly)
186       return lookupImpl(&ICVStateTy::ActiveLevelVar, ForceTeamState);
187     return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident,
188                                  ForceTeamState);
189   case state::VK_MaxActiveLevels:
190     if (IsReadonly)
191       return lookupImpl(&ICVStateTy::MaxActiveLevelsVar, ForceTeamState);
192     return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident,
193                                  ForceTeamState);
194   case state::VK_RunSched:
195     if (IsReadonly)
196       return lookupImpl(&ICVStateTy::RunSchedVar, ForceTeamState);
197     return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident,
198                                  ForceTeamState);
199   case state::VK_RunSchedChunk:
200     if (IsReadonly)
201       return lookupImpl(&ICVStateTy::RunSchedChunkVar, ForceTeamState);
202     return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident,
203                                  ForceTeamState);
204   case state::VK_ParallelTeamSize:
205     return TeamState.ParallelTeamSize;
206   case state::VK_HasThreadState:
207     return TeamState.HasThreadState;
208   default:
209     break;
210   }
211   __builtin_unreachable();
212 }
213 
214 __attribute__((always_inline, flatten)) inline void *&
lookupPtr(ValueKind Kind,bool IsReadonly,bool ForceTeamState)215 lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
216   switch (Kind) {
217   case state::VK_ParallelRegionFn:
218     return TeamState.ParallelRegionFnVar;
219   default:
220     break;
221   }
222   __builtin_unreachable();
223 }
224 
225 /// A class without actual state used to provide a nice interface to lookup and
226 /// update ICV values we can declare in global scope.
227 template <typename Ty, ValueKind Kind> struct Value {
TyValue228   __attribute__((flatten, always_inline)) operator Ty() {
229     return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
230                   /* ForceTeamState */ false);
231   }
232 
233   __attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) {
234     set(Other, /* IdentTy */ nullptr);
235     return *this;
236   }
237 
238   __attribute__((flatten, always_inline)) Value &operator++() {
239     inc(1, /* IdentTy */ nullptr);
240     return *this;
241   }
242 
243   __attribute__((flatten, always_inline)) Value &operator--() {
244     inc(-1, /* IdentTy */ nullptr);
245     return *this;
246   }
247 
248   __attribute__((flatten, always_inline)) void
249   assert_eq(const Ty &V, IdentTy *Ident = nullptr,
250             bool ForceTeamState = false) {
251     ASSERT(lookup(/* IsReadonly */ true, Ident, ForceTeamState) == V);
252   }
253 
254 private:
255   __attribute__((flatten, always_inline)) Ty &
lookupValue256   lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
257     Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState);
258     return t;
259   }
260 
incValue261   __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal,
262                                                   IdentTy *Ident) {
263     return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) +=
264             UpdateVal);
265   }
266 
setValue267   __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal,
268                                                   IdentTy *Ident) {
269     return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) =
270                 UpdateVal);
271   }
272 
273   template <typename VTy, typename Ty2> friend struct ValueRAII;
274 };
275 
276 /// A mookup class without actual state used to provide
277 /// a nice interface to lookup and update ICV values
278 /// we can declare in global scope.
279 template <typename Ty, ValueKind Kind> struct PtrValue {
TyPtrValue280   __attribute__((flatten, always_inline)) operator Ty() {
281     return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
282                   /* ForceTeamState */ false);
283   }
284 
285   __attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) {
286     set(Other);
287     return *this;
288   }
289 
290 private:
lookupPtrValue291   Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) {
292     return lookupPtr(Kind, IsReadonly, ForceTeamState);
293   }
294 
setPtrValue295   Ty &set(Ty UpdateVal) {
296     return (lookup(/* IsReadonly */ false, /* IdentTy */ nullptr,
297                    /* ForceTeamState */ false) = UpdateVal);
298   }
299 
300   template <typename VTy, typename Ty2> friend struct ValueRAII;
301 };
302 
303 template <typename VTy, typename Ty> struct ValueRAII {
304   ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident,
305             bool ForceTeamState = false)
306       : Ptr(Active ? &V.lookup(/* IsReadonly */ false, Ident, ForceTeamState)
307                    : (Ty *)utils::UndefPtr),
308         Val(OldValue), Active(Active) {
309     if (!Active)
310       return;
311     ASSERT(*Ptr == OldValue &&
312            "ValueRAII initialization with wrong old value!");
313     *Ptr = NewValue;
314   }
~ValueRAIIValueRAII315   ~ValueRAII() {
316     if (Active)
317       *Ptr = Val;
318   }
319 
320 private:
321   Ty *Ptr;
322   Ty Val;
323   bool Active;
324 };
325 
326 /// TODO
327 inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk;
328 
329 /// TODO
330 inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize;
331 
332 /// TODO
333 inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState;
334 
335 /// TODO
336 inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn>
337     ParallelRegionFn;
338 
339 void runAndCheckState(void(Func(void)));
340 
341 void assumeInitialState(bool IsSPMD);
342 
343 } // namespace state
344 
345 namespace icv {
346 
347 /// TODO
348 inline state::Value<uint32_t, state::VK_NThreads> NThreads;
349 
350 /// TODO
351 inline state::Value<uint32_t, state::VK_Level> Level;
352 
353 /// The `active-level` describes which of the parallel level counted with the
354 /// `level-var` is active. There can only be one.
355 ///
356 /// active-level-var is 1, if ActiveLevelVar is not 0, otherweise it is 0.
357 inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel;
358 
359 /// TODO
360 inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels;
361 
362 /// TODO
363 inline state::Value<uint32_t, state::VK_RunSched> RunSched;
364 
365 } // namespace icv
366 
367 } // namespace _OMP
368 
369 #pragma omp end declare target
370 
371 #endif
372