167ab875fSJohannes Doerfert //===------ State.cpp - OpenMP State & ICV interface ------------- C++ -*-===//
267ab875fSJohannes Doerfert //
367ab875fSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
467ab875fSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
567ab875fSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
667ab875fSJohannes Doerfert //
767ab875fSJohannes Doerfert //===----------------------------------------------------------------------===//
867ab875fSJohannes Doerfert //
967ab875fSJohannes Doerfert //===----------------------------------------------------------------------===//
1067ab875fSJohannes Doerfert 
1167ab875fSJohannes Doerfert #include "State.h"
1267ab875fSJohannes Doerfert #include "Configuration.h"
1367ab875fSJohannes Doerfert #include "Debug.h"
1467ab875fSJohannes Doerfert #include "Interface.h"
1567ab875fSJohannes Doerfert #include "Synchronization.h"
1667ab875fSJohannes Doerfert #include "Types.h"
1767ab875fSJohannes Doerfert 
1867ab875fSJohannes Doerfert using namespace _OMP;
1967ab875fSJohannes Doerfert 
20b4f8443dSJoseph Huber #pragma omp begin declare target device_type(nohost)
2167ab875fSJohannes Doerfert 
2267ab875fSJohannes Doerfert /// Memory implementation
2367ab875fSJohannes Doerfert ///
2467ab875fSJohannes Doerfert ///{
2567ab875fSJohannes Doerfert 
26f1c821faSJoseph Huber /// Add worst-case padding so that future allocations are properly aligned.
277cdaa5a9SJoseph Huber /// FIXME: The stack shouldn't require worst-case padding. Alignment needs to be
287cdaa5a9SJoseph Huber /// passed in as an argument and the stack rewritten to support it.
297cdaa5a9SJoseph Huber constexpr const uint32_t Alignment = 16;
30f1c821faSJoseph Huber 
31f1c821faSJoseph Huber /// External symbol to access dynamic shared memory.
32f1c821faSJoseph Huber extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
33f1c821faSJoseph Huber #pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
34f1c821faSJoseph Huber 
3567ab875fSJohannes Doerfert namespace {
3667ab875fSJohannes Doerfert 
3767ab875fSJohannes Doerfert /// Fallback implementations are missing to trigger a link time error.
3867ab875fSJohannes Doerfert /// Implementations for new devices, including the host, should go into a
3967ab875fSJohannes Doerfert /// dedicated begin/end declare variant.
4067ab875fSJohannes Doerfert ///
4167ab875fSJohannes Doerfert ///{
4267ab875fSJohannes Doerfert 
4367ab875fSJohannes Doerfert extern "C" {
44b16aadf0SJohannes Doerfert __attribute__((leaf)) void *malloc(uint64_t Size);
45b16aadf0SJohannes Doerfert __attribute__((leaf)) void free(void *Ptr);
4667ab875fSJohannes Doerfert }
4767ab875fSJohannes Doerfert 
4867ab875fSJohannes Doerfert ///}
4967ab875fSJohannes Doerfert 
5067ab875fSJohannes Doerfert /// AMDGCN implementations of the shuffle sync idiom.
5167ab875fSJohannes Doerfert ///
5267ab875fSJohannes Doerfert ///{
5367ab875fSJohannes Doerfert #pragma omp begin declare variant match(device = {arch(amdgcn)})
5467ab875fSJohannes Doerfert 
5567ab875fSJohannes Doerfert extern "C" {
malloc(uint64_t Size)5667ab875fSJohannes Doerfert void *malloc(uint64_t Size) {
5767ab875fSJohannes Doerfert   // TODO: Use some preallocated space for dynamic malloc.
5867ab875fSJohannes Doerfert   return nullptr;
5967ab875fSJohannes Doerfert }
6067ab875fSJohannes Doerfert 
free(void * Ptr)6167ab875fSJohannes Doerfert void free(void *Ptr) {}
6267ab875fSJohannes Doerfert }
6367ab875fSJohannes Doerfert 
6467ab875fSJohannes Doerfert #pragma omp end declare variant
6567ab875fSJohannes Doerfert ///}
6667ab875fSJohannes Doerfert 
6767ab875fSJohannes Doerfert /// A "smart" stack in shared memory.
6867ab875fSJohannes Doerfert ///
6967ab875fSJohannes Doerfert /// The stack exposes a malloc/free interface but works like a stack internally.
7067ab875fSJohannes Doerfert /// In fact, it is a separate stack *per warp*. That means, each warp must push
7167ab875fSJohannes Doerfert /// and pop symmetrically or this breaks, badly. The implementation will (aim
7267ab875fSJohannes Doerfert /// to) detect non-lock-step warps and fallback to malloc/free. The same will
7367ab875fSJohannes Doerfert /// happen if a warp runs out of memory. The master warp in generic memory is
7467ab875fSJohannes Doerfert /// special and is given more memory than the rest.
7567ab875fSJohannes Doerfert ///
7667ab875fSJohannes Doerfert struct SharedMemorySmartStackTy {
7767ab875fSJohannes Doerfert   /// Initialize the stack. Must be called by all threads.
7867ab875fSJohannes Doerfert   void init(bool IsSPMD);
7967ab875fSJohannes Doerfert 
8067ab875fSJohannes Doerfert   /// Allocate \p Bytes on the stack for the encountering thread. Each thread
8167ab875fSJohannes Doerfert   /// can call this function.
8267ab875fSJohannes Doerfert   void *push(uint64_t Bytes);
8367ab875fSJohannes Doerfert 
8467ab875fSJohannes Doerfert   /// Deallocate the last allocation made by the encountering thread and pointed
8567ab875fSJohannes Doerfert   /// to by \p Ptr from the stack. Each thread can call this function.
8667ab875fSJohannes Doerfert   void pop(void *Ptr, uint32_t Bytes);
8767ab875fSJohannes Doerfert 
8867ab875fSJohannes Doerfert private:
8967ab875fSJohannes Doerfert   /// Compute the size of the storage space reserved for a thread.
computeThreadStorageTotal__anonb8aa7d100111::SharedMemorySmartStackTy9067ab875fSJohannes Doerfert   uint32_t computeThreadStorageTotal() {
9167ab875fSJohannes Doerfert     uint32_t NumLanesInBlock = mapping::getNumberOfProcessorElements();
92ed7ec860SJohannes Doerfert     return utils::align_down((state::SharedScratchpadSize / NumLanesInBlock),
93ed7ec860SJohannes Doerfert                              Alignment);
9467ab875fSJohannes Doerfert   }
9567ab875fSJohannes Doerfert 
9667ab875fSJohannes Doerfert   /// Return the top address of the warp data stack, that is the first address
9767ab875fSJohannes Doerfert   /// this warp will allocate memory at next.
getThreadDataTop__anonb8aa7d100111::SharedMemorySmartStackTy9867ab875fSJohannes Doerfert   void *getThreadDataTop(uint32_t TId) {
9967ab875fSJohannes Doerfert     return &Data[computeThreadStorageTotal() * TId + Usage[TId]];
10067ab875fSJohannes Doerfert   }
10167ab875fSJohannes Doerfert 
10267ab875fSJohannes Doerfert   /// The actual storage, shared among all warps.
10367ab875fSJohannes Doerfert   unsigned char Data[state::SharedScratchpadSize]
10467ab875fSJohannes Doerfert       __attribute__((aligned(Alignment)));
10567ab875fSJohannes Doerfert   unsigned char Usage[mapping::MaxThreadsPerTeam]
10667ab875fSJohannes Doerfert       __attribute__((aligned(Alignment)));
10767ab875fSJohannes Doerfert };
10867ab875fSJohannes Doerfert 
10967ab875fSJohannes Doerfert static_assert(state::SharedScratchpadSize / mapping::MaxThreadsPerTeam <= 256,
11067ab875fSJohannes Doerfert               "Shared scratchpad of this size not supported yet.");
11167ab875fSJohannes Doerfert 
11267ab875fSJohannes Doerfert /// The allocation of a single shared memory scratchpad.
11367ab875fSJohannes Doerfert static SharedMemorySmartStackTy SHARED(SharedMemorySmartStack);
11467ab875fSJohannes Doerfert 
init(bool IsSPMD)11567ab875fSJohannes Doerfert void SharedMemorySmartStackTy::init(bool IsSPMD) {
11667ab875fSJohannes Doerfert   Usage[mapping::getThreadIdInBlock()] = 0;
11767ab875fSJohannes Doerfert }
11867ab875fSJohannes Doerfert 
push(uint64_t Bytes)11967ab875fSJohannes Doerfert void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
12067ab875fSJohannes Doerfert   // First align the number of requested bytes.
121ed7ec860SJohannes Doerfert   uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
12267ab875fSJohannes Doerfert 
12367ab875fSJohannes Doerfert   uint32_t StorageTotal = computeThreadStorageTotal();
12467ab875fSJohannes Doerfert 
12567ab875fSJohannes Doerfert   // The main thread in generic mode gets the space of its entire warp as the
12667ab875fSJohannes Doerfert   // other threads do not participate in any computation at all.
12767ab875fSJohannes Doerfert   if (mapping::isMainThreadInGenericMode())
12867ab875fSJohannes Doerfert     StorageTotal *= mapping::getWarpSize();
12967ab875fSJohannes Doerfert 
13067ab875fSJohannes Doerfert   int TId = mapping::getThreadIdInBlock();
13167ab875fSJohannes Doerfert   if (Usage[TId] + AlignedBytes <= StorageTotal) {
13267ab875fSJohannes Doerfert     void *Ptr = getThreadDataTop(TId);
13367ab875fSJohannes Doerfert     Usage[TId] += AlignedBytes;
13467ab875fSJohannes Doerfert     return Ptr;
13567ab875fSJohannes Doerfert   }
13667ab875fSJohannes Doerfert 
137fd5853daSJoseph Huber   if (config::isDebugMode(config::DebugKind::CommonIssues))
138fd5853daSJoseph Huber     PRINT("Shared memory stack full, fallback to dynamic allocation of global "
1395dd0c396SJoseph Huber           "memory will negatively impact performance.\n");
1406dd791bcSJoseph Huber   void *GlobalMemory = memory::allocGlobal(
1416dd791bcSJoseph Huber       AlignedBytes, "Slow path shared memory allocation, insufficient "
14267ab875fSJohannes Doerfert                     "shared memory stack memory!");
1436dd791bcSJoseph Huber   ASSERT(GlobalMemory != nullptr && "nullptr returned by malloc!");
1446dd791bcSJoseph Huber 
1456dd791bcSJoseph Huber   return GlobalMemory;
14667ab875fSJohannes Doerfert }
14767ab875fSJohannes Doerfert 
pop(void * Ptr,uint32_t Bytes)14867ab875fSJohannes Doerfert void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) {
149ed7ec860SJohannes Doerfert   uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
15067ab875fSJohannes Doerfert   if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) {
15167ab875fSJohannes Doerfert     int TId = mapping::getThreadIdInBlock();
15267ab875fSJohannes Doerfert     Usage[TId] -= AlignedBytes;
15367ab875fSJohannes Doerfert     return;
15467ab875fSJohannes Doerfert   }
15567ab875fSJohannes Doerfert   memory::freeGlobal(Ptr, "Slow path shared memory deallocation");
15667ab875fSJohannes Doerfert }
15767ab875fSJohannes Doerfert 
15867ab875fSJohannes Doerfert } // namespace
15967ab875fSJohannes Doerfert 
getDynamicBuffer()160f1c821faSJoseph Huber void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }
161f1c821faSJoseph Huber 
allocShared(uint64_t Bytes,const char * Reason)16267ab875fSJohannes Doerfert void *memory::allocShared(uint64_t Bytes, const char *Reason) {
16367ab875fSJohannes Doerfert   return SharedMemorySmartStack.push(Bytes);
16467ab875fSJohannes Doerfert }
16567ab875fSJohannes Doerfert 
freeShared(void * Ptr,uint64_t Bytes,const char * Reason)16667ab875fSJohannes Doerfert void memory::freeShared(void *Ptr, uint64_t Bytes, const char *Reason) {
16767ab875fSJohannes Doerfert   SharedMemorySmartStack.pop(Ptr, Bytes);
16867ab875fSJohannes Doerfert }
16967ab875fSJohannes Doerfert 
allocGlobal(uint64_t Bytes,const char * Reason)17067ab875fSJohannes Doerfert void *memory::allocGlobal(uint64_t Bytes, const char *Reason) {
1716dd791bcSJoseph Huber   void *Ptr = malloc(Bytes);
1726dd791bcSJoseph Huber   if (config::isDebugMode(config::DebugKind::CommonIssues) && Ptr == nullptr)
1736dd791bcSJoseph Huber     PRINT("nullptr returned by malloc!\n");
1746dd791bcSJoseph Huber   return Ptr;
17567ab875fSJohannes Doerfert }
17667ab875fSJohannes Doerfert 
freeGlobal(void * Ptr,const char * Reason)17767ab875fSJohannes Doerfert void memory::freeGlobal(void *Ptr, const char *Reason) { free(Ptr); }
17867ab875fSJohannes Doerfert 
17967ab875fSJohannes Doerfert ///}
18067ab875fSJohannes Doerfert 
operator ==(const ICVStateTy & Other) const181a42361dcSJohannes Doerfert bool state::ICVStateTy::operator==(const ICVStateTy &Other) const {
18267ab875fSJohannes Doerfert   return (NThreadsVar == Other.NThreadsVar) & (LevelVar == Other.LevelVar) &
18367ab875fSJohannes Doerfert          (ActiveLevelVar == Other.ActiveLevelVar) &
18467ab875fSJohannes Doerfert          (MaxActiveLevelsVar == Other.MaxActiveLevelsVar) &
18567ab875fSJohannes Doerfert          (RunSchedVar == Other.RunSchedVar) &
18667ab875fSJohannes Doerfert          (RunSchedChunkVar == Other.RunSchedChunkVar);
18767ab875fSJohannes Doerfert }
18867ab875fSJohannes Doerfert 
assertEqual(const ICVStateTy & Other) const189a42361dcSJohannes Doerfert void state::ICVStateTy::assertEqual(const ICVStateTy &Other) const {
19067ab875fSJohannes Doerfert   ASSERT(NThreadsVar == Other.NThreadsVar);
19167ab875fSJohannes Doerfert   ASSERT(LevelVar == Other.LevelVar);
19267ab875fSJohannes Doerfert   ASSERT(ActiveLevelVar == Other.ActiveLevelVar);
19367ab875fSJohannes Doerfert   ASSERT(MaxActiveLevelsVar == Other.MaxActiveLevelsVar);
19467ab875fSJohannes Doerfert   ASSERT(RunSchedVar == Other.RunSchedVar);
19567ab875fSJohannes Doerfert   ASSERT(RunSchedChunkVar == Other.RunSchedChunkVar);
19667ab875fSJohannes Doerfert }
19767ab875fSJohannes Doerfert 
init(bool IsSPMD)198a42361dcSJohannes Doerfert void state::TeamStateTy::init(bool IsSPMD) {
19957b4c526SJohannes Doerfert   ICVState.NThreadsVar = mapping::getBlockSize(IsSPMD);
20067ab875fSJohannes Doerfert   ICVState.LevelVar = 0;
20167ab875fSJohannes Doerfert   ICVState.ActiveLevelVar = 0;
20267ab875fSJohannes Doerfert   ICVState.MaxActiveLevelsVar = 1;
20367ab875fSJohannes Doerfert   ICVState.RunSchedVar = omp_sched_static;
20467ab875fSJohannes Doerfert   ICVState.RunSchedChunkVar = 1;
20567ab875fSJohannes Doerfert   ParallelTeamSize = 1;
206d1501526SJohannes Doerfert   HasThreadState = false;
20767ab875fSJohannes Doerfert   ParallelRegionFnVar = nullptr;
20867ab875fSJohannes Doerfert }
20967ab875fSJohannes Doerfert 
operator ==(const TeamStateTy & Other) const210a42361dcSJohannes Doerfert bool state::TeamStateTy::operator==(const TeamStateTy &Other) const {
21167ab875fSJohannes Doerfert   return (ICVState == Other.ICVState) &
212d1501526SJohannes Doerfert          (HasThreadState == Other.HasThreadState) &
21367ab875fSJohannes Doerfert          (ParallelTeamSize == Other.ParallelTeamSize);
21467ab875fSJohannes Doerfert }
21567ab875fSJohannes Doerfert 
assertEqual(TeamStateTy & Other) const216a42361dcSJohannes Doerfert void state::TeamStateTy::assertEqual(TeamStateTy &Other) const {
21767ab875fSJohannes Doerfert   ICVState.assertEqual(Other.ICVState);
21867ab875fSJohannes Doerfert   ASSERT(ParallelTeamSize == Other.ParallelTeamSize);
219d1501526SJohannes Doerfert   ASSERT(HasThreadState == Other.HasThreadState);
22067ab875fSJohannes Doerfert }
22167ab875fSJohannes Doerfert 
2221da6ae4bSJohannes Doerfert state::TeamStateTy SHARED(_OMP::state::TeamState);
22367ab875fSJohannes Doerfert 
22467ab875fSJohannes Doerfert __attribute__((loader_uninitialized))
2251da6ae4bSJohannes Doerfert state::ThreadStateTy *_OMP::state::ThreadStates[mapping::MaxThreadsPerTeam];
2261da6ae4bSJohannes Doerfert #pragma omp allocate(_OMP::state::ThreadStates) allocator(omp_pteam_mem_alloc)
2271da6ae4bSJohannes Doerfert 
2281da6ae4bSJohannes Doerfert namespace {
22967ab875fSJohannes Doerfert 
returnValIfLevelIsActive(int Level,int Val,int DefaultVal,int OutOfBoundsVal=-1)23067ab875fSJohannes Doerfert int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
23167ab875fSJohannes Doerfert                              int OutOfBoundsVal = -1) {
23267ab875fSJohannes Doerfert   if (Level == 0)
23367ab875fSJohannes Doerfert     return DefaultVal;
23467ab875fSJohannes Doerfert   int LevelVar = omp_get_level();
23567ab875fSJohannes Doerfert   if (OMP_UNLIKELY(Level < 0 || Level > LevelVar))
23667ab875fSJohannes Doerfert     return OutOfBoundsVal;
23767ab875fSJohannes Doerfert   int ActiveLevel = icv::ActiveLevel;
23867ab875fSJohannes Doerfert   if (OMP_UNLIKELY(Level != ActiveLevel))
23967ab875fSJohannes Doerfert     return DefaultVal;
24067ab875fSJohannes Doerfert   return Val;
24167ab875fSJohannes Doerfert }
24267ab875fSJohannes Doerfert 
24367ab875fSJohannes Doerfert } // namespace
24467ab875fSJohannes Doerfert 
init(bool IsSPMD)24567ab875fSJohannes Doerfert void state::init(bool IsSPMD) {
24667ab875fSJohannes Doerfert   SharedMemorySmartStack.init(IsSPMD);
247374cd0fbSJoseph Huber   if (mapping::isInitialThreadInLevel0(IsSPMD)) {
24867ab875fSJohannes Doerfert     TeamState.init(IsSPMD);
249374cd0fbSJoseph Huber     DebugEntryRAII::init();
250374cd0fbSJoseph Huber   }
25167ab875fSJohannes Doerfert 
25267ab875fSJohannes Doerfert   ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
25367ab875fSJohannes Doerfert }
25467ab875fSJohannes Doerfert 
enterDataEnvironment(IdentTy * Ident)2551e121568SJohannes Doerfert void state::enterDataEnvironment(IdentTy *Ident) {
2560870a4f5SJoseph Huber   ASSERT(config::mayUseThreadStates() &&
2570870a4f5SJoseph Huber          "Thread state modified while explicitly disabled!");
2580870a4f5SJoseph Huber 
25967ab875fSJohannes Doerfert   unsigned TId = mapping::getThreadIdInBlock();
26067ab875fSJohannes Doerfert   ThreadStateTy *NewThreadState =
26167ab875fSJohannes Doerfert       static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
262dc729609SJohannes Doerfert   NewThreadState->init(ThreadStates[TId]);
263d1501526SJohannes Doerfert   TeamState.HasThreadState = true;
26467ab875fSJohannes Doerfert   ThreadStates[TId] = NewThreadState;
26567ab875fSJohannes Doerfert }
26667ab875fSJohannes Doerfert 
exitDataEnvironment()26767ab875fSJohannes Doerfert void state::exitDataEnvironment() {
2680870a4f5SJoseph Huber   ASSERT(config::mayUseThreadStates() &&
2690870a4f5SJoseph Huber          "Thread state modified while explicitly disabled!");
2700870a4f5SJoseph Huber 
27167ab875fSJohannes Doerfert   unsigned TId = mapping::getThreadIdInBlock();
27267ab875fSJohannes Doerfert   resetStateForThread(TId);
27367ab875fSJohannes Doerfert }
27467ab875fSJohannes Doerfert 
resetStateForThread(uint32_t TId)27567ab875fSJohannes Doerfert void state::resetStateForThread(uint32_t TId) {
276d1501526SJohannes Doerfert   if (OMP_LIKELY(!TeamState.HasThreadState || !ThreadStates[TId]))
27767ab875fSJohannes Doerfert     return;
27867ab875fSJohannes Doerfert 
27967ab875fSJohannes Doerfert   ThreadStateTy *PreviousThreadState = ThreadStates[TId]->PreviousThreadState;
28067ab875fSJohannes Doerfert   __kmpc_free_shared(ThreadStates[TId], sizeof(ThreadStateTy));
28167ab875fSJohannes Doerfert   ThreadStates[TId] = PreviousThreadState;
28267ab875fSJohannes Doerfert }
28367ab875fSJohannes Doerfert 
runAndCheckState(void (Func (void)))28467ab875fSJohannes Doerfert void state::runAndCheckState(void(Func(void))) {
28567ab875fSJohannes Doerfert   TeamStateTy OldTeamState = TeamState;
28667ab875fSJohannes Doerfert   OldTeamState.assertEqual(TeamState);
28767ab875fSJohannes Doerfert 
28867ab875fSJohannes Doerfert   Func();
28967ab875fSJohannes Doerfert 
29067ab875fSJohannes Doerfert   OldTeamState.assertEqual(TeamState);
29167ab875fSJohannes Doerfert }
29267ab875fSJohannes Doerfert 
assumeInitialState(bool IsSPMD)29367ab875fSJohannes Doerfert void state::assumeInitialState(bool IsSPMD) {
29467ab875fSJohannes Doerfert   TeamStateTy InitialTeamState;
29567ab875fSJohannes Doerfert   InitialTeamState.init(IsSPMD);
29667ab875fSJohannes Doerfert   InitialTeamState.assertEqual(TeamState);
29767ab875fSJohannes Doerfert   ASSERT(!ThreadStates[mapping::getThreadIdInBlock()]);
29867ab875fSJohannes Doerfert   ASSERT(mapping::isSPMDMode() == IsSPMD);
29967ab875fSJohannes Doerfert }
30067ab875fSJohannes Doerfert 
30167ab875fSJohannes Doerfert extern "C" {
omp_set_dynamic(int V)30267ab875fSJohannes Doerfert void omp_set_dynamic(int V) {}
30367ab875fSJohannes Doerfert 
omp_get_dynamic(void)30467ab875fSJohannes Doerfert int omp_get_dynamic(void) { return 0; }
30567ab875fSJohannes Doerfert 
omp_set_num_threads(int V)30667ab875fSJohannes Doerfert void omp_set_num_threads(int V) { icv::NThreads = V; }
30767ab875fSJohannes Doerfert 
omp_get_max_threads(void)30867ab875fSJohannes Doerfert int omp_get_max_threads(void) { return icv::NThreads; }
30967ab875fSJohannes Doerfert 
omp_get_level(void)31067ab875fSJohannes Doerfert int omp_get_level(void) {
31167ab875fSJohannes Doerfert   int LevelVar = icv::Level;
31267ab875fSJohannes Doerfert   ASSERT(LevelVar >= 0);
31367ab875fSJohannes Doerfert   return LevelVar;
31467ab875fSJohannes Doerfert }
31567ab875fSJohannes Doerfert 
omp_get_active_level(void)31667ab875fSJohannes Doerfert int omp_get_active_level(void) { return !!icv::ActiveLevel; }
31767ab875fSJohannes Doerfert 
omp_in_parallel(void)31867ab875fSJohannes Doerfert int omp_in_parallel(void) { return !!icv::ActiveLevel; }
31967ab875fSJohannes Doerfert 
omp_get_schedule(omp_sched_t * ScheduleKind,int * ChunkSize)32067ab875fSJohannes Doerfert void omp_get_schedule(omp_sched_t *ScheduleKind, int *ChunkSize) {
32167ab875fSJohannes Doerfert   *ScheduleKind = static_cast<omp_sched_t>((int)icv::RunSched);
32267ab875fSJohannes Doerfert   *ChunkSize = state::RunSchedChunk;
32367ab875fSJohannes Doerfert }
32467ab875fSJohannes Doerfert 
omp_set_schedule(omp_sched_t ScheduleKind,int ChunkSize)32567ab875fSJohannes Doerfert void omp_set_schedule(omp_sched_t ScheduleKind, int ChunkSize) {
32667ab875fSJohannes Doerfert   icv::RunSched = (int)ScheduleKind;
32767ab875fSJohannes Doerfert   state::RunSchedChunk = ChunkSize;
32867ab875fSJohannes Doerfert }
32967ab875fSJohannes Doerfert 
omp_get_ancestor_thread_num(int Level)33067ab875fSJohannes Doerfert int omp_get_ancestor_thread_num(int Level) {
33167ab875fSJohannes Doerfert   return returnValIfLevelIsActive(Level, mapping::getThreadIdInBlock(), 0);
33267ab875fSJohannes Doerfert }
33367ab875fSJohannes Doerfert 
omp_get_thread_num(void)33467ab875fSJohannes Doerfert int omp_get_thread_num(void) {
33567ab875fSJohannes Doerfert   return omp_get_ancestor_thread_num(omp_get_level());
33667ab875fSJohannes Doerfert }
33767ab875fSJohannes Doerfert 
omp_get_team_size(int Level)33867ab875fSJohannes Doerfert int omp_get_team_size(int Level) {
33967ab875fSJohannes Doerfert   return returnValIfLevelIsActive(Level, state::ParallelTeamSize, 1);
34067ab875fSJohannes Doerfert }
34167ab875fSJohannes Doerfert 
omp_get_num_threads(void)3424c88341dSJohannes Doerfert int omp_get_num_threads(void) {
3434c88341dSJohannes Doerfert   return omp_get_level() > 1 ? 1 : state::ParallelTeamSize;
3444c88341dSJohannes Doerfert }
34567ab875fSJohannes Doerfert 
omp_get_thread_limit(void)34667ab875fSJohannes Doerfert int omp_get_thread_limit(void) { return mapping::getKernelSize(); }
34767ab875fSJohannes Doerfert 
omp_get_num_procs(void)34867ab875fSJohannes Doerfert int omp_get_num_procs(void) { return mapping::getNumberOfProcessorElements(); }
34967ab875fSJohannes Doerfert 
omp_set_nested(int)35067ab875fSJohannes Doerfert void omp_set_nested(int) {}
35167ab875fSJohannes Doerfert 
omp_get_nested(void)35267ab875fSJohannes Doerfert int omp_get_nested(void) { return false; }
35367ab875fSJohannes Doerfert 
omp_set_max_active_levels(int Levels)35467ab875fSJohannes Doerfert void omp_set_max_active_levels(int Levels) {
35567ab875fSJohannes Doerfert   icv::MaxActiveLevels = Levels > 0 ? 1 : 0;
35667ab875fSJohannes Doerfert }
35767ab875fSJohannes Doerfert 
omp_get_max_active_levels(void)35867ab875fSJohannes Doerfert int omp_get_max_active_levels(void) { return icv::MaxActiveLevels; }
35967ab875fSJohannes Doerfert 
omp_get_proc_bind(void)36067ab875fSJohannes Doerfert omp_proc_bind_t omp_get_proc_bind(void) { return omp_proc_bind_false; }
36167ab875fSJohannes Doerfert 
omp_get_num_places(void)36267ab875fSJohannes Doerfert int omp_get_num_places(void) { return 0; }
36367ab875fSJohannes Doerfert 
omp_get_place_num_procs(int)36467ab875fSJohannes Doerfert int omp_get_place_num_procs(int) { return omp_get_num_procs(); }
36567ab875fSJohannes Doerfert 
omp_get_place_proc_ids(int,int *)36667ab875fSJohannes Doerfert void omp_get_place_proc_ids(int, int *) {
36767ab875fSJohannes Doerfert   // TODO
36867ab875fSJohannes Doerfert }
36967ab875fSJohannes Doerfert 
omp_get_place_num(void)37067ab875fSJohannes Doerfert int omp_get_place_num(void) { return 0; }
37167ab875fSJohannes Doerfert 
omp_get_partition_num_places(void)37267ab875fSJohannes Doerfert int omp_get_partition_num_places(void) { return 0; }
37367ab875fSJohannes Doerfert 
omp_get_partition_place_nums(int *)37467ab875fSJohannes Doerfert void omp_get_partition_place_nums(int *) {
37567ab875fSJohannes Doerfert   // TODO
37667ab875fSJohannes Doerfert }
37767ab875fSJohannes Doerfert 
omp_get_cancellation(void)37867ab875fSJohannes Doerfert int omp_get_cancellation(void) { return 0; }
37967ab875fSJohannes Doerfert 
omp_set_default_device(int)38067ab875fSJohannes Doerfert void omp_set_default_device(int) {}
38167ab875fSJohannes Doerfert 
omp_get_default_device(void)38267ab875fSJohannes Doerfert int omp_get_default_device(void) { return -1; }
38367ab875fSJohannes Doerfert 
omp_get_num_devices(void)38467ab875fSJohannes Doerfert int omp_get_num_devices(void) { return config::getNumDevices(); }
38567ab875fSJohannes Doerfert 
omp_get_device_num(void)386616dd9aeSJose M Monsalve Diaz int omp_get_device_num(void) { return config::getDeviceNum(); }
387616dd9aeSJose M Monsalve Diaz 
omp_get_num_teams(void)38867ab875fSJohannes Doerfert int omp_get_num_teams(void) { return mapping::getNumberOfBlocks(); }
38967ab875fSJohannes Doerfert 
omp_get_team_num()39067ab875fSJohannes Doerfert int omp_get_team_num() { return mapping::getBlockId(); }
39167ab875fSJohannes Doerfert 
omp_get_initial_device(void)39267ab875fSJohannes Doerfert int omp_get_initial_device(void) { return -1; }
39367ab875fSJohannes Doerfert }
39467ab875fSJohannes Doerfert 
39567ab875fSJohannes Doerfert extern "C" {
__kmpc_alloc_shared(uint64_t Bytes)396*fd8fd9e5SJoseph Huber __attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
39774f91741SJoseph Huber   FunctionTracingRAII();
39867ab875fSJohannes Doerfert   return memory::allocShared(Bytes, "Frontend alloc shared");
39967ab875fSJohannes Doerfert }
40067ab875fSJohannes Doerfert 
__kmpc_free_shared(void * Ptr,uint64_t Bytes)401*fd8fd9e5SJoseph Huber __attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
40274f91741SJoseph Huber   FunctionTracingRAII();
40367ab875fSJohannes Doerfert   memory::freeShared(Ptr, Bytes, "Frontend free shared");
40467ab875fSJohannes Doerfert }
40567ab875fSJohannes Doerfert 
__kmpc_get_dynamic_shared()406208f9005SJoseph Huber void *__kmpc_get_dynamic_shared() { return memory::getDynamicBuffer(); }
407208f9005SJoseph Huber 
llvm_omp_target_dynamic_shared_alloc()408a3f423cfSJoseph Huber void *llvm_omp_target_dynamic_shared_alloc() {
409a3f423cfSJoseph Huber   return __kmpc_get_dynamic_shared();
410a3f423cfSJoseph Huber }
411a3f423cfSJoseph Huber 
llvm_omp_get_dynamic_shared()412208f9005SJoseph Huber void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
413f1c821faSJoseph Huber 
414e3ee7624SJoseph Huber /// Allocate storage in shared memory to communicate arguments from the main
415e3ee7624SJoseph Huber /// thread to the workers in generic mode. If we exceed
416e3ee7624SJoseph Huber /// NUM_SHARED_VARIABLES_IN_SHARED_MEM we will malloc space for communication.
417e3ee7624SJoseph Huber constexpr uint64_t NUM_SHARED_VARIABLES_IN_SHARED_MEM = 64;
418e3ee7624SJoseph Huber 
419e3ee7624SJoseph Huber [[clang::loader_uninitialized]] static void
420e3ee7624SJoseph Huber     *SharedMemVariableSharingSpace[NUM_SHARED_VARIABLES_IN_SHARED_MEM];
421e3ee7624SJoseph Huber #pragma omp allocate(SharedMemVariableSharingSpace)                            \
422e3ee7624SJoseph Huber     allocator(omp_pteam_mem_alloc)
42367ab875fSJohannes Doerfert [[clang::loader_uninitialized]] static void **SharedMemVariableSharingSpacePtr;
42467ab875fSJohannes Doerfert #pragma omp allocate(SharedMemVariableSharingSpacePtr)                         \
42567ab875fSJohannes Doerfert     allocator(omp_pteam_mem_alloc)
42667ab875fSJohannes Doerfert 
__kmpc_begin_sharing_variables(void *** GlobalArgs,uint64_t nArgs)427e3ee7624SJoseph Huber void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) {
42874f91741SJoseph Huber   FunctionTracingRAII();
429e3ee7624SJoseph Huber   if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) {
430e3ee7624SJoseph Huber     SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0];
431e3ee7624SJoseph Huber   } else {
432e3ee7624SJoseph Huber     SharedMemVariableSharingSpacePtr = (void **)memory::allocGlobal(
433e3ee7624SJoseph Huber         nArgs * sizeof(void *), "new extended args");
4346dd791bcSJoseph Huber     ASSERT(SharedMemVariableSharingSpacePtr != nullptr &&
4356dd791bcSJoseph Huber            "Nullptr returned by malloc!");
436e3ee7624SJoseph Huber   }
43767ab875fSJohannes Doerfert   *GlobalArgs = SharedMemVariableSharingSpacePtr;
43867ab875fSJohannes Doerfert }
43967ab875fSJohannes Doerfert 
__kmpc_end_sharing_variables()440e3ee7624SJoseph Huber void __kmpc_end_sharing_variables() {
44174f91741SJoseph Huber   FunctionTracingRAII();
442e3ee7624SJoseph Huber   if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0])
443e3ee7624SJoseph Huber     memory::freeGlobal(SharedMemVariableSharingSpacePtr, "new extended args");
44467ab875fSJohannes Doerfert }
44567ab875fSJohannes Doerfert 
__kmpc_get_shared_variables(void *** GlobalArgs)44667ab875fSJohannes Doerfert void __kmpc_get_shared_variables(void ***GlobalArgs) {
44774f91741SJoseph Huber   FunctionTracingRAII();
44867ab875fSJohannes Doerfert   *GlobalArgs = SharedMemVariableSharingSpacePtr;
44967ab875fSJohannes Doerfert }
45067ab875fSJohannes Doerfert }
45167ab875fSJohannes Doerfert #pragma omp end declare target
452