1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #ifndef KMP_WAIT_RELEASE_H
17 #define KMP_WAIT_RELEASE_H
18 
19 #include "kmp.h"
20 #include "kmp_itt.h"
21 #include "kmp_stats.h"
22 
23 /*!
24 @defgroup WAIT_RELEASE Wait/Release operations
25 
26 The definitions and functions here implement the lowest level thread
27 synchronizations of suspending a thread and awaking it. They are used
28 to build higher level operations such as barriers and fork/join.
29 */
30 
31 /*!
32 @ingroup WAIT_RELEASE
33 @{
34 */
35 
36 /*!
37  * The flag_type describes the storage used for the flag.
38  */
39 enum flag_type {
40     flag32,        /**< 32 bit flags */
41     flag64,        /**< 64 bit flags */
42     flag_oncore    /**< special 64-bit flag for on-core barrier (hierarchical) */
43 };
44 
45 /*!
46  * Base class for wait/release volatile flag
47  */
48 template <typename P>
49 class kmp_flag {
50     volatile P * loc;  /**< Pointer to the flag storage that is modified by another thread */
51     flag_type t;       /**< "Type" of the flag in loc */
52  public:
53     typedef P flag_t;
54     kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55     /*!
56      * @result the pointer to the actual flag
57      */
58     volatile P * get() { return loc; }
59     /*!
60      * @param new_loc in   set loc to point at new_loc
61      */
62     void set(volatile P *new_loc) { loc = new_loc; }
63     /*!
64      * @result the flag_type
65      */
66     flag_type get_type() { return t; }
67     // Derived classes must provide the following:
68     /*
69     kmp_info_t * get_waiter(kmp_uint32 i);
70     kmp_uint32 get_num_waiters();
71     bool done_check();
72     bool done_check_val(P old_loc);
73     bool notdone_check();
74     P internal_release();
75     void suspend(int th_gtid);
76     void resume(int th_gtid);
77     P set_sleeping();
78     P unset_sleeping();
79     bool is_sleeping();
80     bool is_any_sleeping();
81     bool is_sleeping_val(P old_loc);
82     int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
83                       USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained);
84     */
85 };
86 
87 #if ! KMP_USE_MONITOR
88 # if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
89    // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
90    extern double __kmp_ticks_per_nsec;
91 #  define KMP_NOW() __kmp_hardware_timestamp()
92 #  define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC * __kmp_ticks_per_nsec)
93 #  define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
94 # else
95    // System time is retrieved sporadically while blocking.
96    extern kmp_uint64 __kmp_now_nsec();
97 #  define KMP_NOW() __kmp_now_nsec()
98 #  define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC)
99 #  define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
100 # endif
101 #endif
102 
103 /* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
104    must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks!  */
105 template <class C>
106 static inline void
107 __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
108                     USE_ITT_BUILD_ARG(void * itt_sync_obj) )
109 {
110     // NOTE: We may not belong to a team at this point.
111     volatile typename C::flag_t *spin = flag->get();
112     kmp_uint32 spins;
113     kmp_uint32 hibernate;
114     int th_gtid;
115     int tasks_completed = FALSE;
116     int oversubscribed;
117 #if ! KMP_USE_MONITOR
118     kmp_uint64 poll_count;
119     kmp_uint64 hibernate_goal;
120 #endif
121 
122     KMP_FSYNC_SPIN_INIT(spin, NULL);
123     if (flag->done_check()) {
124         KMP_FSYNC_SPIN_ACQUIRED(spin);
125         return;
126     }
127     th_gtid = this_thr->th.th_info.ds.ds_gtid;
128     KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
129 #if KMP_STATS_ENABLED
130     stats_state_e thread_state = KMP_GET_THREAD_STATE();
131 #endif
132 
133 #if OMPT_SUPPORT && OMPT_BLAME
134     ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
135     if (ompt_enabled &&
136         ompt_state != ompt_state_undefined) {
137         if (ompt_state == ompt_state_idle) {
138             if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
139                 ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
140             }
141         } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
142             KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
143                              ompt_state == ompt_state_wait_barrier_implicit ||
144                              ompt_state == ompt_state_wait_barrier_explicit);
145 
146             ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
147             ompt_parallel_id_t pId;
148             ompt_task_id_t tId;
149             if (team){
150                 pId = team->ompt_team_info.parallel_id;
151                 tId = team->ompt_task_info.task_id;
152             } else {
153                 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
154                 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
155             }
156             ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
157         }
158     }
159 #endif
160 
161     // Setup for waiting
162     KMP_INIT_YIELD(spins);
163 
164     if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
165 #if KMP_USE_MONITOR
166         // The worker threads cannot rely on the team struct existing at this point.
167         // Use the bt values cached in the thread struct instead.
168 #ifdef KMP_ADJUST_BLOCKTIME
169         if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
170             // Force immediate suspend if not set by user and more threads than available procs
171             hibernate = 0;
172         else
173             hibernate = this_thr->th.th_team_bt_intervals;
174 #else
175         hibernate = this_thr->th.th_team_bt_intervals;
176 #endif /* KMP_ADJUST_BLOCKTIME */
177 
178         /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
179            of the specified #intervals, plus up to one interval more.  This increment make
180            certain that this thread doesn't go to sleep too soon.  */
181         if (hibernate != 0)
182             hibernate++;
183 
184         // Add in the current time value.
185         hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
186         KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
187                       th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
188                       hibernate - __kmp_global.g.g_time.dt.t_value));
189 #else
190         hibernate_goal = KMP_NOW() + KMP_BLOCKTIME_INTERVAL();
191         poll_count = 0;
192 #endif // KMP_USE_MONITOR
193     }
194 
195     oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
196     KMP_MB();
197 
198     // Main wait spin loop
199     while (flag->notdone_check()) {
200         int in_pool;
201         kmp_task_team_t * task_team = NULL;
202         if (__kmp_tasking_mode != tskm_immediate_exec) {
203             task_team = this_thr->th.th_task_team;
204             /* If the thread's task team pointer is NULL, it means one of 3 things:
205 	       1) A newly-created thread is first being released by __kmp_fork_barrier(), and
206 	          its task team has not been set up yet.
207 	       2) All tasks have been executed to completion.
208 	       3) Tasking is off for this region.  This could be because we are in a serialized region
209 	          (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0).  */
210             if (task_team != NULL) {
211                 if (TCR_SYNC_4(task_team->tt.tt_active)) {
212                     if (KMP_TASKING_ENABLED(task_team))
213                         flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
214                                             USE_ITT_BUILD_ARG(itt_sync_obj), 0);
215                 }
216                 else {
217                     KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
218                     this_thr->th.th_task_team = NULL;
219                 }
220             } // if
221         } // if
222 
223         KMP_FSYNC_SPIN_PREPARE(spin);
224         if (TCR_4(__kmp_global.g.g_done)) {
225             if (__kmp_global.g.g_abort)
226                 __kmp_abort_thread();
227             break;
228         }
229 
230         // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
231         KMP_YIELD(oversubscribed);
232         // TODO: Should it be number of cores instead of thread contexts? Like:
233         // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
234         // Need performance improvement data to make the change...
235         KMP_YIELD_SPIN(spins);
236 
237         // Check if this thread was transferred from a team
238         // to the thread pool (or vice-versa) while spinning.
239         in_pool = !!TCR_4(this_thr->th.th_in_pool);
240         if (in_pool != !!this_thr->th.th_active_in_pool) {
241             if (in_pool) { // Recently transferred from team to pool
242                 KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
243                 this_thr->th.th_active_in_pool = TRUE;
244                 /* Here, we cannot assert that:
245                    KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
246                    __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
247                    lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
248                    by the workers.  The two can get out of sync for brief periods of time.  */
249             }
250             else { // Recently transferred from pool to team
251                 KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
252                 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
253                 this_thr->th.th_active_in_pool = FALSE;
254             }
255         }
256 
257 #if KMP_STATS_ENABLED
258         // Check if thread has been signalled to idle state
259         // This indicates that the logical "join-barrier" has finished
260         if (this_thr->th.th_stats->isIdle() && KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
261             KMP_SET_THREAD_STATE(IDLE);
262             KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
263         }
264 #endif
265 
266         // Don't suspend if KMP_BLOCKTIME is set to "infinite"
267         if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
268             continue;
269 
270         // Don't suspend if there is a likelihood of new tasks being spawned.
271         if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
272             continue;
273 
274 #if KMP_USE_MONITOR
275         // If we have waited a bit more, fall asleep
276         if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
277             continue;
278 #else
279         if (KMP_BLOCKING(hibernate_goal, poll_count++))
280             continue;
281 #endif
282 
283         KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
284 
285         flag->suspend(th_gtid);
286 
287         if (TCR_4(__kmp_global.g.g_done)) {
288             if (__kmp_global.g.g_abort)
289                 __kmp_abort_thread();
290             break;
291         }
292         // TODO: If thread is done with work and times out, disband/free
293     }
294 
295 #if OMPT_SUPPORT && OMPT_BLAME
296     if (ompt_enabled &&
297         ompt_state != ompt_state_undefined) {
298         if (ompt_state == ompt_state_idle) {
299             if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
300                 ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
301             }
302         } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
303             KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
304                              ompt_state == ompt_state_wait_barrier_implicit ||
305                              ompt_state == ompt_state_wait_barrier_explicit);
306 
307             ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
308             ompt_parallel_id_t pId;
309             ompt_task_id_t tId;
310             if (team){
311                 pId = team->ompt_team_info.parallel_id;
312                 tId = team->ompt_task_info.task_id;
313             } else {
314                 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
315                 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
316             }
317             ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
318         }
319     }
320 #endif
321 #if KMP_STATS_ENABLED
322     // If we were put into idle state, pop that off the state stack
323     if (KMP_GET_THREAD_STATE() == IDLE) {
324         KMP_POP_PARTITIONED_TIMER();
325         KMP_SET_THREAD_STATE(thread_state);
326         this_thr->th.th_stats->resetIdleFlag();
327     }
328 #endif
329 
330     KMP_FSYNC_SPIN_ACQUIRED(spin);
331 }
332 
333 /* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
334    if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
335    up the potentially sleeping thread and prevent deadlocks!  */
336 template <class C>
337 static inline void
338 __kmp_release_template(C *flag)
339 {
340 #ifdef KMP_DEBUG
341     int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
342 #endif
343     KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
344     KMP_DEBUG_ASSERT(flag->get());
345     KMP_FSYNC_RELEASING(flag->get());
346 
347     flag->internal_release();
348 
349     KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get())));
350 
351     if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
352         // Only need to check sleep stuff if infinite block time not set
353         if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping?
354             for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
355                 kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag
356                 if (waiter) {
357                     int wait_gtid = waiter->th.th_info.ds.ds_gtid;
358                     // Wake up thread if needed
359                     KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n",
360                                   gtid, wait_gtid, flag->get()));
361                     flag->resume(wait_gtid); // unsets flag's current_waiter when done
362                 }
363             }
364         }
365     }
366 }
367 
368 template <typename FlagType>
369 struct flag_traits {};
370 
371 template <>
372 struct flag_traits<kmp_uint32> {
373     typedef kmp_uint32 flag_t;
374     static const flag_type t = flag32;
375     static inline flag_t tcr(flag_t f) { return TCR_4(f); }
376     static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
377     static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
378     static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
379 };
380 
381 template <>
382 struct flag_traits<kmp_uint64> {
383     typedef kmp_uint64 flag_t;
384     static const flag_type t = flag64;
385     static inline flag_t tcr(flag_t f) { return TCR_8(f); }
386     static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
387     static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
388     static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
389 };
390 
391 template <typename FlagType>
392 class kmp_basic_flag : public kmp_flag<FlagType> {
393     typedef flag_traits<FlagType> traits_type;
394     FlagType checker;  /**< Value to compare flag to to check if flag has been released. */
395     kmp_info_t * waiting_threads[1];  /**< Array of threads sleeping on this thread. */
396     kmp_uint32 num_waiting_threads;       /**< Number of threads sleeping on this thread. */
397  public:
398     kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
399     kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
400         waiting_threads[0] = thr;
401     }
402     kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
403     /*!
404      * param i in   index into waiting_threads
405      * @result the thread that is waiting at index i
406      */
407     kmp_info_t * get_waiter(kmp_uint32 i) {
408         KMP_DEBUG_ASSERT(i<num_waiting_threads);
409         return waiting_threads[i];
410     }
411     /*!
412      * @result num_waiting_threads
413      */
414     kmp_uint32 get_num_waiters() { return num_waiting_threads; }
415     /*!
416      * @param thr in   the thread which is now waiting
417      *
418      * Insert a waiting thread at index 0.
419      */
420     void set_waiter(kmp_info_t *thr) {
421         waiting_threads[0] = thr;
422         num_waiting_threads = 1;
423     }
424     /*!
425      * @result true if the flag object has been released.
426      */
427     bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
428     /*!
429      * @param old_loc in   old value of flag
430      * @result true if the flag's old value indicates it was released.
431      */
432     bool done_check_val(FlagType old_loc) { return old_loc == checker; }
433     /*!
434      * @result true if the flag object is not yet released.
435      * Used in __kmp_wait_template like:
436      * @code
437      * while (flag.notdone_check()) { pause(); }
438      * @endcode
439      */
440     bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
441     /*!
442      * @result Actual flag value before release was applied.
443      * Trigger all waiting threads to run by modifying flag to release state.
444      */
445     void internal_release() {
446         (void) traits_type::test_then_add4((volatile FlagType *)this->get());
447     }
448     /*!
449      * @result Actual flag value before sleep bit(s) set.
450      * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s).
451      */
452     FlagType set_sleeping() {
453         return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
454     }
455     /*!
456      * @result Actual flag value before sleep bit(s) cleared.
457      * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s).
458      */
459     FlagType unset_sleeping() {
460         return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
461     }
462     /*!
463      * @param old_loc in   old value of flag
464      * Test whether there are threads sleeping on the flag's old value in old_loc.
465      */
466     bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
467     /*!
468      * Test whether there are threads sleeping on the flag.
469      */
470     bool is_sleeping() { return is_sleeping_val(*(this->get())); }
471     bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
472     kmp_uint8 *get_stolen() { return NULL; }
473     enum barrier_type get_bt() { return bs_last_barrier; }
474 };
475 
476 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
477  public:
478     kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
479     kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
480     kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
481     void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
482     void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
483     int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
484                       USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
485         return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
486                                       USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
487     }
488     void wait(kmp_info_t *this_thr, int final_spin
489               USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
490         __kmp_wait_template(this_thr, this, final_spin
491                             USE_ITT_BUILD_ARG(itt_sync_obj));
492     }
493     void release() { __kmp_release_template(this); }
494     flag_type get_ptr_type() { return flag32; }
495 };
496 
497 class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
498  public:
499     kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
500     kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
501     kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
502     void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
503     void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
504     int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
505                       USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
506         return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
507                                       USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
508     }
509     void wait(kmp_info_t *this_thr, int final_spin
510               USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
511         __kmp_wait_template(this_thr, this, final_spin
512                             USE_ITT_BUILD_ARG(itt_sync_obj));
513     }
514     void release() { __kmp_release_template(this); }
515     flag_type get_ptr_type() { return flag64; }
516 };
517 
518 // Hierarchical 64-bit on-core barrier instantiation
519 class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
520     kmp_uint64 checker;
521     kmp_info_t * waiting_threads[1];
522     kmp_uint32 num_waiting_threads;
523     kmp_uint32 offset;      /**< Portion of flag that is of interest for an operation. */
524     bool flag_switch;       /**< Indicates a switch in flag location. */
525     enum barrier_type bt;   /**< Barrier type. */
526     kmp_info_t * this_thr;  /**< Thread that may be redirected to different flag location. */
527 #if USE_ITT_BUILD
528     void *itt_sync_obj;     /**< ITT object that must be passed to new flag location. */
529 #endif
530     unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; }
531 public:
532     kmp_flag_oncore(volatile kmp_uint64 *p)
533         : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
534     kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
535         : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {}
536     kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
537                     kmp_info_t * thr
538 #if USE_ITT_BUILD
539                     , void *itt
540 #endif
541                     )
542         : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx),
543           flag_switch(false), bt(bar_t), this_thr(thr)
544 #if USE_ITT_BUILD
545         , itt_sync_obj(itt)
546 #endif
547         {}
548     kmp_info_t * get_waiter(kmp_uint32 i) {
549         KMP_DEBUG_ASSERT(i<num_waiting_threads);
550         return waiting_threads[i];
551     }
552     kmp_uint32 get_num_waiters() { return num_waiting_threads; }
553     void set_waiter(kmp_info_t *thr) {
554         waiting_threads[0] = thr;
555         num_waiting_threads = 1;
556     }
557     bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
558     bool done_check() { return done_check_val(*get()); }
559     bool notdone_check() {
560         // Calculate flag_switch
561         if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
562             flag_switch = true;
563         if (byteref(get(),offset) != 1 && !flag_switch)
564             return true;
565         else if (flag_switch) {
566             this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
567             kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
568             __kmp_wait_64(this_thr, &flag, TRUE
569 #if USE_ITT_BUILD
570                           , itt_sync_obj
571 #endif
572                           );
573         }
574         return false;
575     }
576     void internal_release() {
577         if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
578             byteref(get(),offset) = 1;
579         }
580         else {
581             kmp_uint64 mask=0;
582             byteref(&mask,offset) = 1;
583             (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
584         }
585     }
586     kmp_uint64 set_sleeping() {
587         return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
588     }
589     kmp_uint64 unset_sleeping() {
590         return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
591     }
592     bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
593     bool is_sleeping() { return is_sleeping_val(*get()); }
594     bool is_any_sleeping() { return is_sleeping_val(*get()); }
595     void wait(kmp_info_t *this_thr, int final_spin) {
596         __kmp_wait_template<kmp_flag_oncore>(this_thr, this, final_spin
597                             USE_ITT_BUILD_ARG(itt_sync_obj));
598     }
599     void release() { __kmp_release_template(this); }
600     void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
601     void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
602     int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
603                       USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
604         return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
605                                           USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
606     }
607     kmp_uint8 *get_stolen() { return NULL; }
608     enum barrier_type get_bt() { return bt; }
609     flag_type get_ptr_type() { return flag_oncore; }
610 };
611 
612 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc associated
613 // with int gtid.
614 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
615     if (!flag) return;
616 
617     switch (((kmp_flag_64 *)flag)->get_type()) {
618     case flag32: __kmp_resume_32(gtid, NULL); break;
619     case flag64: __kmp_resume_64(gtid, NULL); break;
620     case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
621     }
622 }
623 
624 /*!
625 @}
626 */
627 
628 #endif // KMP_WAIT_RELEASE_H
629