167ab875fSJohannes Doerfert //===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===//
267ab875fSJohannes Doerfert //
367ab875fSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
467ab875fSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
567ab875fSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
667ab875fSJohannes Doerfert //
767ab875fSJohannes Doerfert //===----------------------------------------------------------------------===//
867ab875fSJohannes Doerfert //
967ab875fSJohannes Doerfert // This file contains the kernel entry points for the device.
1067ab875fSJohannes Doerfert //
1167ab875fSJohannes Doerfert //===----------------------------------------------------------------------===//
1267ab875fSJohannes Doerfert 
1367ab875fSJohannes Doerfert #include "Debug.h"
1467ab875fSJohannes Doerfert #include "Interface.h"
1567ab875fSJohannes Doerfert #include "Mapping.h"
1667ab875fSJohannes Doerfert #include "State.h"
1767ab875fSJohannes Doerfert #include "Synchronization.h"
1867ab875fSJohannes Doerfert #include "Types.h"
1967ab875fSJohannes Doerfert 
2067ab875fSJohannes Doerfert using namespace _OMP;
2167ab875fSJohannes Doerfert 
22b4f8443dSJoseph Huber #pragma omp begin declare target device_type(nohost)
2367ab875fSJohannes Doerfert 
inititializeRuntime(bool IsSPMD)2467ab875fSJohannes Doerfert static void inititializeRuntime(bool IsSPMD) {
2567ab875fSJohannes Doerfert   // Order is important here.
2667ab875fSJohannes Doerfert   synchronize::init(IsSPMD);
2767ab875fSJohannes Doerfert   mapping::init(IsSPMD);
2867ab875fSJohannes Doerfert   state::init(IsSPMD);
2967ab875fSJohannes Doerfert }
3067ab875fSJohannes Doerfert 
3167ab875fSJohannes Doerfert /// Simple generic state machine for worker threads.
genericStateMachine(IdentTy * Ident)3267ab875fSJohannes Doerfert static void genericStateMachine(IdentTy *Ident) {
3374f91741SJoseph Huber   FunctionTracingRAII();
3467ab875fSJohannes Doerfert 
3567ab875fSJohannes Doerfert   uint32_t TId = mapping::getThreadIdInBlock();
3667ab875fSJohannes Doerfert 
3767ab875fSJohannes Doerfert   do {
38*6ce43697SJohannes Doerfert     ParallelRegionFnTy WorkFn = nullptr;
3967ab875fSJohannes Doerfert 
4067ab875fSJohannes Doerfert     // Wait for the signal that we have a new work function.
4167ab875fSJohannes Doerfert     synchronize::threads();
4267ab875fSJohannes Doerfert 
4367ab875fSJohannes Doerfert     // Retrieve the work function from the runtime.
4467ab875fSJohannes Doerfert     bool IsActive = __kmpc_kernel_parallel(&WorkFn);
4567ab875fSJohannes Doerfert 
4667ab875fSJohannes Doerfert     // If there is nothing more to do, break out of the state machine by
4767ab875fSJohannes Doerfert     // returning to the caller.
4867ab875fSJohannes Doerfert     if (!WorkFn)
4967ab875fSJohannes Doerfert       return;
5067ab875fSJohannes Doerfert 
5167ab875fSJohannes Doerfert     if (IsActive) {
5267ab875fSJohannes Doerfert       ASSERT(!mapping::isSPMDMode());
5367ab875fSJohannes Doerfert       ((void (*)(uint32_t, uint32_t))WorkFn)(0, TId);
5467ab875fSJohannes Doerfert       __kmpc_kernel_end_parallel();
5567ab875fSJohannes Doerfert     }
5667ab875fSJohannes Doerfert 
5767ab875fSJohannes Doerfert     synchronize::threads();
5867ab875fSJohannes Doerfert 
5967ab875fSJohannes Doerfert   } while (true);
6067ab875fSJohannes Doerfert }
6167ab875fSJohannes Doerfert 
6267ab875fSJohannes Doerfert extern "C" {
6367ab875fSJohannes Doerfert 
6467ab875fSJohannes Doerfert /// Initialization
6567ab875fSJohannes Doerfert ///
6667ab875fSJohannes Doerfert /// \param Ident               Source location identification, can be NULL.
6767ab875fSJohannes Doerfert ///
__kmpc_target_init(IdentTy * Ident,int8_t Mode,bool UseGenericStateMachine,bool)68423d34f7SShilei Tian int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
6967ab875fSJohannes Doerfert                            bool UseGenericStateMachine, bool) {
7074f91741SJoseph Huber   FunctionTracingRAII();
71423d34f7SShilei Tian   const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
7267ab875fSJohannes Doerfert   if (IsSPMD) {
7367ab875fSJohannes Doerfert     inititializeRuntime(/* IsSPMD */ true);
74b16aadf0SJohannes Doerfert     synchronize::threadsAligned();
7567ab875fSJohannes Doerfert   } else {
7667ab875fSJohannes Doerfert     inititializeRuntime(/* IsSPMD */ false);
7767ab875fSJohannes Doerfert     // No need to wait since only the main threads will execute user
7867ab875fSJohannes Doerfert     // code and workers will run into a barrier right away.
7967ab875fSJohannes Doerfert   }
8067ab875fSJohannes Doerfert 
8167ab875fSJohannes Doerfert   if (IsSPMD) {
8267ab875fSJohannes Doerfert     state::assumeInitialState(IsSPMD);
8367ab875fSJohannes Doerfert     return -1;
8467ab875fSJohannes Doerfert   }
8567ab875fSJohannes Doerfert 
86ccb5d272SJohannes Doerfert   if (mapping::isInitialThreadInLevel0(IsSPMD))
8767ab875fSJohannes Doerfert     return -1;
8867ab875fSJohannes Doerfert 
89c9dfe322SJoel E. Denny   // Enter the generic state machine if enabled and if this thread can possibly
90c9dfe322SJoel E. Denny   // be an active worker thread.
91c9dfe322SJoel E. Denny   //
92c9dfe322SJoel E. Denny   // The latter check is important for NVIDIA Pascal (but not Volta) and AMD
93c9dfe322SJoel E. Denny   // GPU.  In those cases, a single thread can apparently satisfy a barrier on
94c9dfe322SJoel E. Denny   // behalf of all threads in the same warp.  Thus, it would not be safe for
95c9dfe322SJoel E. Denny   // other threads in the main thread's warp to reach the first
96c9dfe322SJoel E. Denny   // synchronize::threads call in genericStateMachine before the main thread
97c9dfe322SJoel E. Denny   // reaches its corresponding synchronize::threads call: that would permit all
98c9dfe322SJoel E. Denny   // active worker threads to proceed before the main thread has actually set
99c9dfe322SJoel E. Denny   // state::ParallelRegionFn, and then they would immediately quit without
100c9dfe322SJoel E. Denny   // doing any work.  mapping::getBlockSize() does not include any of the main
101c9dfe322SJoel E. Denny   // thread's warp, so none of its threads can ever be active worker threads.
102c9dfe322SJoel E. Denny   if (UseGenericStateMachine &&
103*6ce43697SJohannes Doerfert       mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) {
10467ab875fSJohannes Doerfert     genericStateMachine(Ident);
105*6ce43697SJohannes Doerfert   } else {
106*6ce43697SJohannes Doerfert     // Retrieve the work function just to ensure we always call
107*6ce43697SJohannes Doerfert     // __kmpc_kernel_parallel even if a custom state machine is used.
108*6ce43697SJohannes Doerfert     // TODO: this is not super pretty. The problem is we create the call to
109*6ce43697SJohannes Doerfert     // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it is
110*6ce43697SJohannes Doerfert     // not there yet. Thus, we assume we never reach it from
111*6ce43697SJohannes Doerfert     // __kmpc_target_deinit. That allows us to remove the store in there to
112*6ce43697SJohannes Doerfert     // ParallelRegionFn, which leads to bad results later on.
113*6ce43697SJohannes Doerfert     ParallelRegionFnTy WorkFn = nullptr;
114*6ce43697SJohannes Doerfert     __kmpc_kernel_parallel(&WorkFn);
115*6ce43697SJohannes Doerfert     ASSERT(WorkFn == nullptr);
116*6ce43697SJohannes Doerfert   }
11767ab875fSJohannes Doerfert 
11867ab875fSJohannes Doerfert   return mapping::getThreadIdInBlock();
11967ab875fSJohannes Doerfert }
12067ab875fSJohannes Doerfert 
12167ab875fSJohannes Doerfert /// De-Initialization
12267ab875fSJohannes Doerfert ///
12367ab875fSJohannes Doerfert /// In non-SPMD, this function releases the workers trapped in a state machine
12467ab875fSJohannes Doerfert /// and also any memory dynamically allocated by the runtime.
12567ab875fSJohannes Doerfert ///
12667ab875fSJohannes Doerfert /// \param Ident Source location identification, can be NULL.
12767ab875fSJohannes Doerfert ///
__kmpc_target_deinit(IdentTy * Ident,int8_t Mode,bool)128423d34f7SShilei Tian void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) {
12974f91741SJoseph Huber   FunctionTracingRAII();
130423d34f7SShilei Tian   const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
13167ab875fSJohannes Doerfert   state::assumeInitialState(IsSPMD);
13267ab875fSJohannes Doerfert   if (IsSPMD)
13367ab875fSJohannes Doerfert     return;
13467ab875fSJohannes Doerfert 
13567ab875fSJohannes Doerfert   // Signal the workers to exit the state machine and exit the kernel.
13667ab875fSJohannes Doerfert   state::ParallelRegionFn = nullptr;
13767ab875fSJohannes Doerfert }
13867ab875fSJohannes Doerfert 
__kmpc_is_spmd_exec_mode()13974f91741SJoseph Huber int8_t __kmpc_is_spmd_exec_mode() {
14074f91741SJoseph Huber   FunctionTracingRAII();
14174f91741SJoseph Huber   return mapping::isSPMDMode();
14274f91741SJoseph Huber }
14367ab875fSJohannes Doerfert }
14467ab875fSJohannes Doerfert 
14567ab875fSJohannes Doerfert #pragma omp end declare target
146