15e8470afSJim Cownie /*
25e8470afSJim Cownie * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
35e8470afSJim Cownie */
45e8470afSJim Cownie
55e8470afSJim Cownie //===----------------------------------------------------------------------===//
65e8470afSJim Cownie //
757b08b09SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
857b08b09SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
957b08b09SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
105e8470afSJim Cownie //
115e8470afSJim Cownie //===----------------------------------------------------------------------===//
125e8470afSJim Cownie
133041982dSJonathan Peyton /* Dynamic scheduling initialization and dispatch.
145e8470afSJim Cownie *
155e8470afSJim Cownie * NOTE: __kmp_nth is a constant inside of any dispatch loop, however
165e8470afSJim Cownie * it may change values between parallel regions. __kmp_max_nth
175e8470afSJim Cownie * is the largest value __kmp_nth may take, 1 is the smallest.
185e8470afSJim Cownie */
195e8470afSJim Cownie
205e8470afSJim Cownie #include "kmp.h"
213041982dSJonathan Peyton #include "kmp_error.h"
225e8470afSJim Cownie #include "kmp_i18n.h"
235e8470afSJim Cownie #include "kmp_itt.h"
244cc4bb4cSJim Cownie #include "kmp_stats.h"
253041982dSJonathan Peyton #include "kmp_str.h"
26f700e9edSAndrey Churbanov #if KMP_USE_X87CONTROL
275e8470afSJim Cownie #include <float.h>
285e8470afSJim Cownie #endif
2939ada854SJonathan Peyton #include "kmp_lock.h"
3039ada854SJonathan Peyton #include "kmp_dispatch.h"
31f6399367SJonathan Peyton #if KMP_USE_HIER_SCHED
32f6399367SJonathan Peyton #include "kmp_dispatch_hier.h"
33f6399367SJonathan Peyton #endif
345e8470afSJim Cownie
35d7d088f8SAndrey Churbanov #if OMPT_SUPPORT
36d7d088f8SAndrey Churbanov #include "ompt-specific.h"
37d7d088f8SAndrey Churbanov #endif
38d7d088f8SAndrey Churbanov
395e8470afSJim Cownie /* ------------------------------------------------------------------------ */
405e8470afSJim Cownie /* ------------------------------------------------------------------------ */
415e8470afSJim Cownie
__kmp_dispatch_deo_error(int * gtid_ref,int * cid_ref,ident_t * loc_ref)4239ada854SJonathan Peyton void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
435e8470afSJim Cownie kmp_info_t *th;
445e8470afSJim Cownie
455e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid_ref);
465e8470afSJim Cownie
475e8470afSJim Cownie if (__kmp_env_consistency_check) {
485e8470afSJim Cownie th = __kmp_threads[*gtid_ref];
493041982dSJonathan Peyton if (th->th.th_root->r.r_active &&
503041982dSJonathan Peyton (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
515c56fb55SAndrey Churbanov #if KMP_USE_DYNAMIC_LOCK
525c56fb55SAndrey Churbanov __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
535c56fb55SAndrey Churbanov #else
545e8470afSJim Cownie __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
555c56fb55SAndrey Churbanov #endif
565e8470afSJim Cownie }
575e8470afSJim Cownie }
585e8470afSJim Cownie }
595e8470afSJim Cownie
__kmp_dispatch_dxo_error(int * gtid_ref,int * cid_ref,ident_t * loc_ref)6039ada854SJonathan Peyton void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
615e8470afSJim Cownie kmp_info_t *th;
625e8470afSJim Cownie
635e8470afSJim Cownie if (__kmp_env_consistency_check) {
645e8470afSJim Cownie th = __kmp_threads[*gtid_ref];
655e8470afSJim Cownie if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
665e8470afSJim Cownie __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
675e8470afSJim Cownie }
685e8470afSJim Cownie }
695e8470afSJim Cownie }
705e8470afSJim Cownie
7171abe28eSJonathan Peyton // Returns either SCHEDULE_MONOTONIC or SCHEDULE_NONMONOTONIC
__kmp_get_monotonicity(ident_t * loc,enum sched_type schedule,bool use_hier=false)725e348774SPeyton, Jonathan L static inline int __kmp_get_monotonicity(ident_t *loc, enum sched_type schedule,
7371abe28eSJonathan Peyton bool use_hier = false) {
7471abe28eSJonathan Peyton // Pick up the nonmonotonic/monotonic bits from the scheduling type
75c24da72fSNawrin Sultana // Nonmonotonic as default for dynamic schedule when no modifier is specified
76c24da72fSNawrin Sultana int monotonicity = SCHEDULE_NONMONOTONIC;
775e348774SPeyton, Jonathan L
785e348774SPeyton, Jonathan L // Let default be monotonic for executables
795e348774SPeyton, Jonathan L // compiled with OpenMP* 4.5 or less compilers
8054f059c9SBryan Chan if (loc != NULL && loc->get_openmp_version() < 50)
8171abe28eSJonathan Peyton monotonicity = SCHEDULE_MONOTONIC;
825e348774SPeyton, Jonathan L
8367773681SJonathan Peyton if (use_hier || __kmp_force_monotonic)
845e348774SPeyton, Jonathan L monotonicity = SCHEDULE_MONOTONIC;
855e348774SPeyton, Jonathan L else if (SCHEDULE_HAS_NONMONOTONIC(schedule))
8671abe28eSJonathan Peyton monotonicity = SCHEDULE_NONMONOTONIC;
8771abe28eSJonathan Peyton else if (SCHEDULE_HAS_MONOTONIC(schedule))
8871abe28eSJonathan Peyton monotonicity = SCHEDULE_MONOTONIC;
895e348774SPeyton, Jonathan L
9071abe28eSJonathan Peyton return monotonicity;
9171abe28eSJonathan Peyton }
9271abe28eSJonathan Peyton
935dd4d0d4SAndreyChurbanov #if KMP_STATIC_STEAL_ENABLED
945dd4d0d4SAndreyChurbanov enum { // values for steal_flag (possible states of private per-loop buffer)
955dd4d0d4SAndreyChurbanov UNUSED = 0,
965dd4d0d4SAndreyChurbanov CLAIMED = 1, // owner thread started initialization
975dd4d0d4SAndreyChurbanov READY = 2, // available for stealing
985dd4d0d4SAndreyChurbanov THIEF = 3 // finished by owner, or claimed by thief
995dd4d0d4SAndreyChurbanov // possible state changes:
1005dd4d0d4SAndreyChurbanov // 0 -> 1 owner only, sync
1015dd4d0d4SAndreyChurbanov // 0 -> 3 thief only, sync
1025dd4d0d4SAndreyChurbanov // 1 -> 2 owner only, async
1035dd4d0d4SAndreyChurbanov // 2 -> 3 owner only, async
1045dd4d0d4SAndreyChurbanov // 3 -> 2 owner only, async
1055dd4d0d4SAndreyChurbanov // 3 -> 0 last thread finishing the loop, async
1065dd4d0d4SAndreyChurbanov };
1075dd4d0d4SAndreyChurbanov #endif
1085dd4d0d4SAndreyChurbanov
10939ada854SJonathan Peyton // Initialize a dispatch_private_info_template<T> buffer for a particular
11039ada854SJonathan Peyton // type of schedule,chunk. The loop description is found in lb (lower bound),
11139ada854SJonathan Peyton // ub (upper bound), and st (stride). nproc is the number of threads relevant
11239ada854SJonathan Peyton // to the scheduling (often the number of threads in a team, but not always if
11339ada854SJonathan Peyton // hierarchical scheduling is used). tid is the id of the thread calling
11439ada854SJonathan Peyton // the function within the group of nproc threads. It will have a value
11539ada854SJonathan Peyton // between 0 and nproc - 1. This is often just the thread id within a team, but
11639ada854SJonathan Peyton // is not necessarily the case when using hierarchical scheduling.
11739ada854SJonathan Peyton // loc is the source file location of the corresponding loop
11839ada854SJonathan Peyton // gtid is the global thread id
1195e8470afSJim Cownie template <typename T>
__kmp_dispatch_init_algorithm(ident_t * loc,int gtid,dispatch_private_info_template<T> * pr,enum sched_type schedule,T lb,T ub,typename traits_t<T>::signed_t st,kmp_uint64 * cur_chunk,typename traits_t<T>::signed_t chunk,T nproc,T tid)12039ada854SJonathan Peyton void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
12139ada854SJonathan Peyton dispatch_private_info_template<T> *pr,
12239ada854SJonathan Peyton enum sched_type schedule, T lb, T ub,
12339ada854SJonathan Peyton typename traits_t<T>::signed_t st,
12439ada854SJonathan Peyton #if USE_ITT_BUILD
12539ada854SJonathan Peyton kmp_uint64 *cur_chunk,
12639ada854SJonathan Peyton #endif
12739ada854SJonathan Peyton typename traits_t<T>::signed_t chunk,
12839ada854SJonathan Peyton T nproc, T tid) {
1295e8470afSJim Cownie typedef typename traits_t<T>::unsigned_t UT;
1305e8470afSJim Cownie typedef typename traits_t<T>::floating_t DBL;
1315e8470afSJim Cownie
1325e8470afSJim Cownie int active;
1335e8470afSJim Cownie T tc;
1345e8470afSJim Cownie kmp_info_t *th;
1355e8470afSJim Cownie kmp_team_t *team;
13671abe28eSJonathan Peyton int monotonicity;
13771abe28eSJonathan Peyton bool use_hier;
1385e8470afSJim Cownie
1395e8470afSJim Cownie #ifdef KMP_DEBUG
140baad3f60SJonathan Peyton typedef typename traits_t<T>::signed_t ST;
1415e8470afSJim Cownie {
142aeb40adaSJonas Hahnfeld char *buff;
1435e8470afSJim Cownie // create format specifiers before the debug output
14439ada854SJonathan Peyton buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called "
14539ada854SJonathan Peyton "pr:%%p lb:%%%s ub:%%%s st:%%%s "
14639ada854SJonathan Peyton "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
14739ada854SJonathan Peyton traits_t<T>::spec, traits_t<T>::spec,
14839ada854SJonathan Peyton traits_t<ST>::spec, traits_t<ST>::spec,
14939ada854SJonathan Peyton traits_t<T>::spec, traits_t<T>::spec);
15039ada854SJonathan Peyton KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid));
1515e8470afSJim Cownie __kmp_str_free(&buff);
1525e8470afSJim Cownie }
1535e8470afSJim Cownie #endif
1545e8470afSJim Cownie /* setup data */
1555e8470afSJim Cownie th = __kmp_threads[gtid];
1565e8470afSJim Cownie team = th->th.th_team;
1575e8470afSJim Cownie active = !team->t.t_serialized;
1585e8470afSJim Cownie
1594cc4bb4cSJim Cownie #if USE_ITT_BUILD
160e4b4f994SJonathan Peyton int itt_need_metadata_reporting =
161e4b4f994SJonathan Peyton __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
162e4b4f994SJonathan Peyton KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
16351aecb82SAndrey Churbanov team->t.t_active_level == 1;
1644cc4bb4cSJim Cownie #endif
16571abe28eSJonathan Peyton
16671abe28eSJonathan Peyton #if KMP_USE_HIER_SCHED
16771abe28eSJonathan Peyton use_hier = pr->flags.use_hier;
16871abe28eSJonathan Peyton #else
16971abe28eSJonathan Peyton use_hier = false;
170429dbc2aSAndrey Churbanov #endif
17171abe28eSJonathan Peyton
17271abe28eSJonathan Peyton /* Pick up the nonmonotonic/monotonic bits from the scheduling type */
1735e348774SPeyton, Jonathan L monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
174ea0fe1dfSJonathan Peyton schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
175ea0fe1dfSJonathan Peyton
1765e8470afSJim Cownie /* Pick up the nomerge/ordered bits from the scheduling type */
1775e8470afSJim Cownie if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) {
17839ada854SJonathan Peyton pr->flags.nomerge = TRUE;
1793041982dSJonathan Peyton schedule =
1803041982dSJonathan Peyton (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower));
1815e8470afSJim Cownie } else {
18239ada854SJonathan Peyton pr->flags.nomerge = FALSE;
1835e8470afSJim Cownie }
18412313d44SJonathan Peyton pr->type_size = traits_t<T>::type_size; // remember the size of variables
1855e8470afSJim Cownie if (kmp_ord_lower & schedule) {
18639ada854SJonathan Peyton pr->flags.ordered = TRUE;
1873041982dSJonathan Peyton schedule =
1883041982dSJonathan Peyton (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower));
1895e8470afSJim Cownie } else {
19039ada854SJonathan Peyton pr->flags.ordered = FALSE;
1915e8470afSJim Cownie }
19271abe28eSJonathan Peyton // Ordered overrides nonmonotonic
19371abe28eSJonathan Peyton if (pr->flags.ordered) {
19471abe28eSJonathan Peyton monotonicity = SCHEDULE_MONOTONIC;
19571abe28eSJonathan Peyton }
19645be4500SJonathan Peyton
1975e8470afSJim Cownie if (schedule == kmp_sch_static) {
1985e8470afSJim Cownie schedule = __kmp_static;
1995e8470afSJim Cownie } else {
2005e8470afSJim Cownie if (schedule == kmp_sch_runtime) {
2013041982dSJonathan Peyton // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if
2023041982dSJonathan Peyton // not specified)
2035e8470afSJim Cownie schedule = team->t.t_sched.r_sched_type;
2045e348774SPeyton, Jonathan L monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
20571abe28eSJonathan Peyton schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
2065dd4d0d4SAndreyChurbanov if (pr->flags.ordered) // correct monotonicity for ordered loop if needed
2075dd4d0d4SAndreyChurbanov monotonicity = SCHEDULE_MONOTONIC;
2083041982dSJonathan Peyton // Detail the schedule if needed (global controls are differentiated
2093041982dSJonathan Peyton // appropriately)
2105e8470afSJim Cownie if (schedule == kmp_sch_guided_chunked) {
2115e8470afSJim Cownie schedule = __kmp_guided;
2125e8470afSJim Cownie } else if (schedule == kmp_sch_static) {
2135e8470afSJim Cownie schedule = __kmp_static;
2145e8470afSJim Cownie }
2153041982dSJonathan Peyton // Use the chunk size specified by OMP_SCHEDULE (or default if not
2163041982dSJonathan Peyton // specified)
2175e8470afSJim Cownie chunk = team->t.t_sched.chunk;
21800afbd01SJonathan Peyton #if USE_ITT_BUILD
21939ada854SJonathan Peyton if (cur_chunk)
22039ada854SJonathan Peyton *cur_chunk = chunk;
22100afbd01SJonathan Peyton #endif
2225e8470afSJim Cownie #ifdef KMP_DEBUG
2235e8470afSJim Cownie {
224aeb40adaSJonas Hahnfeld char *buff;
2255e8470afSJim Cownie // create format specifiers before the debug output
22639ada854SJonathan Peyton buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: "
22739ada854SJonathan Peyton "schedule:%%d chunk:%%%s\n",
2285e8470afSJim Cownie traits_t<ST>::spec);
2295e8470afSJim Cownie KD_TRACE(10, (buff, gtid, schedule, chunk));
2305e8470afSJim Cownie __kmp_str_free(&buff);
2315e8470afSJim Cownie }
2325e8470afSJim Cownie #endif
2335e8470afSJim Cownie } else {
2345e8470afSJim Cownie if (schedule == kmp_sch_guided_chunked) {
2355e8470afSJim Cownie schedule = __kmp_guided;
2365e8470afSJim Cownie }
2375e8470afSJim Cownie if (chunk <= 0) {
2385e8470afSJim Cownie chunk = KMP_DEFAULT_CHUNK;
2395e8470afSJim Cownie }
2405e8470afSJim Cownie }
2415e8470afSJim Cownie
2425e8470afSJim Cownie if (schedule == kmp_sch_auto) {
2435e8470afSJim Cownie // mapping and differentiation: in the __kmp_do_serial_initialize()
2445e8470afSJim Cownie schedule = __kmp_auto;
2455e8470afSJim Cownie #ifdef KMP_DEBUG
2465e8470afSJim Cownie {
247aeb40adaSJonas Hahnfeld char *buff;
2485e8470afSJim Cownie // create format specifiers before the debug output
24939ada854SJonathan Peyton buff = __kmp_str_format(
25039ada854SJonathan Peyton "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
2513041982dSJonathan Peyton "schedule:%%d chunk:%%%s\n",
2525e8470afSJim Cownie traits_t<ST>::spec);
2535e8470afSJim Cownie KD_TRACE(10, (buff, gtid, schedule, chunk));
2545e8470afSJim Cownie __kmp_str_free(&buff);
2555e8470afSJim Cownie }
2565e8470afSJim Cownie #endif
2575e8470afSJim Cownie }
25871abe28eSJonathan Peyton #if KMP_STATIC_STEAL_ENABLED
25971abe28eSJonathan Peyton // map nonmonotonic:dynamic to static steal
26071abe28eSJonathan Peyton if (schedule == kmp_sch_dynamic_chunked) {
26171abe28eSJonathan Peyton if (monotonicity == SCHEDULE_NONMONOTONIC)
26271abe28eSJonathan Peyton schedule = kmp_sch_static_steal;
26371abe28eSJonathan Peyton }
26471abe28eSJonathan Peyton #endif
2655e8470afSJim Cownie /* guided analytical not safe for too many threads */
26639ada854SJonathan Peyton if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
2675e8470afSJim Cownie schedule = kmp_sch_guided_iterative_chunked;
2685e8470afSJim Cownie KMP_WARNING(DispatchManyThreads);
2695e8470afSJim Cownie }
270d454c73cSAndrey Churbanov if (schedule == kmp_sch_runtime_simd) {
271d454c73cSAndrey Churbanov // compiler provides simd_width in the chunk parameter
272d454c73cSAndrey Churbanov schedule = team->t.t_sched.r_sched_type;
2735e348774SPeyton, Jonathan L monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
27471abe28eSJonathan Peyton schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
275d454c73cSAndrey Churbanov // Detail the schedule if needed (global controls are differentiated
276d454c73cSAndrey Churbanov // appropriately)
277d454c73cSAndrey Churbanov if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
278d454c73cSAndrey Churbanov schedule == __kmp_static) {
279d454c73cSAndrey Churbanov schedule = kmp_sch_static_balanced_chunked;
280d454c73cSAndrey Churbanov } else {
281d454c73cSAndrey Churbanov if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) {
282d454c73cSAndrey Churbanov schedule = kmp_sch_guided_simd;
283d454c73cSAndrey Churbanov }
284d454c73cSAndrey Churbanov chunk = team->t.t_sched.chunk * chunk;
285d454c73cSAndrey Churbanov }
286d454c73cSAndrey Churbanov #if USE_ITT_BUILD
28739ada854SJonathan Peyton if (cur_chunk)
28839ada854SJonathan Peyton *cur_chunk = chunk;
289d454c73cSAndrey Churbanov #endif
290d454c73cSAndrey Churbanov #ifdef KMP_DEBUG
291d454c73cSAndrey Churbanov {
292aeb40adaSJonas Hahnfeld char *buff;
293d454c73cSAndrey Churbanov // create format specifiers before the debug output
29471abe28eSJonathan Peyton buff = __kmp_str_format(
29571abe28eSJonathan Peyton "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d"
296d454c73cSAndrey Churbanov " chunk:%%%s\n",
297d454c73cSAndrey Churbanov traits_t<ST>::spec);
298d454c73cSAndrey Churbanov KD_TRACE(10, (buff, gtid, schedule, chunk));
299d454c73cSAndrey Churbanov __kmp_str_free(&buff);
300d454c73cSAndrey Churbanov }
301d454c73cSAndrey Churbanov #endif
302d454c73cSAndrey Churbanov }
3035e8470afSJim Cownie pr->u.p.parm1 = chunk;
3045e8470afSJim Cownie }
3055e8470afSJim Cownie KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),
3065e8470afSJim Cownie "unknown scheduling type");
3075e8470afSJim Cownie
3085e8470afSJim Cownie pr->u.p.count = 0;
3095e8470afSJim Cownie
3105e8470afSJim Cownie if (__kmp_env_consistency_check) {
3115e8470afSJim Cownie if (st == 0) {
3123041982dSJonathan Peyton __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
31339ada854SJonathan Peyton (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc);
3145e8470afSJim Cownie }
3155e8470afSJim Cownie }
3165235a1b6SJonathan Peyton // compute trip count
3175235a1b6SJonathan Peyton if (st == 1) { // most common case
3185235a1b6SJonathan Peyton if (ub >= lb) {
3195235a1b6SJonathan Peyton tc = ub - lb + 1;
3205235a1b6SJonathan Peyton } else { // ub < lb
3215e8470afSJim Cownie tc = 0; // zero-trip
3225235a1b6SJonathan Peyton }
3235235a1b6SJonathan Peyton } else if (st < 0) {
3245235a1b6SJonathan Peyton if (lb >= ub) {
3255235a1b6SJonathan Peyton // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
3265235a1b6SJonathan Peyton // where the division needs to be unsigned regardless of the result type
3275235a1b6SJonathan Peyton tc = (UT)(lb - ub) / (-st) + 1;
3285235a1b6SJonathan Peyton } else { // lb < ub
3295235a1b6SJonathan Peyton tc = 0; // zero-trip
3305e8470afSJim Cownie }
3315e8470afSJim Cownie } else { // st > 0
3325235a1b6SJonathan Peyton if (ub >= lb) {
3335235a1b6SJonathan Peyton // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
3345235a1b6SJonathan Peyton // where the division needs to be unsigned regardless of the result type
3355235a1b6SJonathan Peyton tc = (UT)(ub - lb) / st + 1;
3365235a1b6SJonathan Peyton } else { // ub < lb
3375e8470afSJim Cownie tc = 0; // zero-trip
3385e8470afSJim Cownie }
3395e8470afSJim Cownie }
3405e8470afSJim Cownie
341d2b53cadSJonathan Peyton #if KMP_STATS_ENABLED
342d2b53cadSJonathan Peyton if (KMP_MASTER_GTID(gtid)) {
343d2b53cadSJonathan Peyton KMP_COUNT_VALUE(OMP_loop_dynamic_total_iterations, tc);
344d2b53cadSJonathan Peyton }
345d2b53cadSJonathan Peyton #endif
346d2b53cadSJonathan Peyton
3475e8470afSJim Cownie pr->u.p.lb = lb;
3485e8470afSJim Cownie pr->u.p.ub = ub;
3495e8470afSJim Cownie pr->u.p.st = st;
3505e8470afSJim Cownie pr->u.p.tc = tc;
3515e8470afSJim Cownie
3525e8470afSJim Cownie #if KMP_OS_WINDOWS
3535e8470afSJim Cownie pr->u.p.last_upper = ub + st;
3545e8470afSJim Cownie #endif /* KMP_OS_WINDOWS */
3555e8470afSJim Cownie
3565e8470afSJim Cownie /* NOTE: only the active parallel region(s) has active ordered sections */
3575e8470afSJim Cownie
3585e8470afSJim Cownie if (active) {
35939ada854SJonathan Peyton if (pr->flags.ordered) {
3605e8470afSJim Cownie pr->ordered_bumped = 0;
3615e8470afSJim Cownie pr->u.p.ordered_lower = 1;
3625e8470afSJim Cownie pr->u.p.ordered_upper = 0;
3635e8470afSJim Cownie }
3645e8470afSJim Cownie }
3655e8470afSJim Cownie
3665e8470afSJim Cownie switch (schedule) {
3675dd4d0d4SAndreyChurbanov #if KMP_STATIC_STEAL_ENABLED
3683041982dSJonathan Peyton case kmp_sch_static_steal: {
3695e8470afSJim Cownie T ntc, init;
3705e8470afSJim Cownie
3713041982dSJonathan Peyton KD_TRACE(100,
37239ada854SJonathan Peyton ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",
37339ada854SJonathan Peyton gtid));
3745e8470afSJim Cownie
3755e8470afSJim Cownie ntc = (tc % chunk ? 1 : 0) + tc / chunk;
3765e8470afSJim Cownie if (nproc > 1 && ntc >= nproc) {
377f0682ac4SJonathan Peyton KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL);
37839ada854SJonathan Peyton T id = tid;
3795e8470afSJim Cownie T small_chunk, extras;
3805dd4d0d4SAndreyChurbanov kmp_uint32 old = UNUSED;
3815dd4d0d4SAndreyChurbanov int claimed = pr->steal_flag.compare_exchange_strong(old, CLAIMED);
3825dd4d0d4SAndreyChurbanov if (traits_t<T>::type_size > 4) {
3835dd4d0d4SAndreyChurbanov // AC: TODO: check if 16-byte CAS available and use it to
3845dd4d0d4SAndreyChurbanov // improve performance (probably wait for explicit request
3855dd4d0d4SAndreyChurbanov // before spending time on this).
3865dd4d0d4SAndreyChurbanov // For now use dynamically allocated per-private-buffer lock,
3875dd4d0d4SAndreyChurbanov // free memory in __kmp_dispatch_next when status==0.
3885dd4d0d4SAndreyChurbanov pr->u.p.steal_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
3895dd4d0d4SAndreyChurbanov __kmp_init_lock(pr->u.p.steal_lock);
3905dd4d0d4SAndreyChurbanov }
3915e8470afSJim Cownie small_chunk = ntc / nproc;
3925e8470afSJim Cownie extras = ntc % nproc;
3935e8470afSJim Cownie
3945e8470afSJim Cownie init = id * small_chunk + (id < extras ? id : extras);
3955e8470afSJim Cownie pr->u.p.count = init;
3965dd4d0d4SAndreyChurbanov if (claimed) { // are we succeeded in claiming own buffer?
3975e8470afSJim Cownie pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0);
3985dd4d0d4SAndreyChurbanov // Other threads will inspect steal_flag when searching for a victim.
3995dd4d0d4SAndreyChurbanov // READY means other threads may steal from this thread from now on.
4005dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
4015dd4d0d4SAndreyChurbanov } else {
4025dd4d0d4SAndreyChurbanov // other thread has stolen whole our range
4035dd4d0d4SAndreyChurbanov KMP_DEBUG_ASSERT(pr->steal_flag == THIEF);
4045dd4d0d4SAndreyChurbanov pr->u.p.ub = init; // mark there is no iterations to work on
405429dbc2aSAndrey Churbanov }
4065dd4d0d4SAndreyChurbanov pr->u.p.parm2 = ntc; // save number of chunks
4075dd4d0d4SAndreyChurbanov // parm3 is the number of times to attempt stealing which is
4085dd4d0d4SAndreyChurbanov // nproc (just a heuristics, could be optimized later on).
4095dd4d0d4SAndreyChurbanov pr->u.p.parm3 = nproc;
4105dd4d0d4SAndreyChurbanov pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
4115e8470afSJim Cownie break;
4125e8470afSJim Cownie } else {
413bd2fb41cSAndreyChurbanov /* too few chunks: switching to kmp_sch_dynamic_chunked */
414bd2fb41cSAndreyChurbanov schedule = kmp_sch_dynamic_chunked;
415bd2fb41cSAndreyChurbanov KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to "
416bd2fb41cSAndreyChurbanov "kmp_sch_dynamic_chunked\n",
4175e8470afSJim Cownie gtid));
418e2738b37SPeyton, Jonathan L goto dynamic_init;
419bd2fb41cSAndreyChurbanov break;
4205e8470afSJim Cownie } // if
4215e8470afSJim Cownie } // case
4225e8470afSJim Cownie #endif
4233041982dSJonathan Peyton case kmp_sch_static_balanced: {
4245e8470afSJim Cownie T init, limit;
4255e8470afSJim Cownie
42639ada854SJonathan Peyton KD_TRACE(
42739ada854SJonathan Peyton 100,
42839ada854SJonathan Peyton ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",
4295e8470afSJim Cownie gtid));
4305e8470afSJim Cownie
4315e8470afSJim Cownie if (nproc > 1) {
43239ada854SJonathan Peyton T id = tid;
4335e8470afSJim Cownie
4345e8470afSJim Cownie if (tc < nproc) {
4355e8470afSJim Cownie if (id < tc) {
4365e8470afSJim Cownie init = id;
4375e8470afSJim Cownie limit = id;
4385e8470afSJim Cownie pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */
4395e8470afSJim Cownie } else {
4405e8470afSJim Cownie pr->u.p.count = 1; /* means no more chunks to execute */
4415e8470afSJim Cownie pr->u.p.parm1 = FALSE;
4425e8470afSJim Cownie break;
4435e8470afSJim Cownie }
4445e8470afSJim Cownie } else {
4455e8470afSJim Cownie T small_chunk = tc / nproc;
4465e8470afSJim Cownie T extras = tc % nproc;
4475e8470afSJim Cownie init = id * small_chunk + (id < extras ? id : extras);
4485e8470afSJim Cownie limit = init + small_chunk - (id < extras ? 0 : 1);
4495e8470afSJim Cownie pr->u.p.parm1 = (id == nproc - 1);
4505e8470afSJim Cownie }
4515e8470afSJim Cownie } else {
4525e8470afSJim Cownie if (tc > 0) {
4535e8470afSJim Cownie init = 0;
4545e8470afSJim Cownie limit = tc - 1;
4555e8470afSJim Cownie pr->u.p.parm1 = TRUE;
45639ada854SJonathan Peyton } else {
45739ada854SJonathan Peyton // zero trip count
4585e8470afSJim Cownie pr->u.p.count = 1; /* means no more chunks to execute */
4595e8470afSJim Cownie pr->u.p.parm1 = FALSE;
4605e8470afSJim Cownie break;
4615e8470afSJim Cownie }
4625e8470afSJim Cownie }
4634cc4bb4cSJim Cownie #if USE_ITT_BUILD
4644cc4bb4cSJim Cownie // Calculate chunk for metadata report
46551aecb82SAndrey Churbanov if (itt_need_metadata_reporting)
46639ada854SJonathan Peyton if (cur_chunk)
46739ada854SJonathan Peyton *cur_chunk = limit - init + 1;
4684cc4bb4cSJim Cownie #endif
4695e8470afSJim Cownie if (st == 1) {
4705e8470afSJim Cownie pr->u.p.lb = lb + init;
4715e8470afSJim Cownie pr->u.p.ub = lb + limit;
4725e8470afSJim Cownie } else {
4733041982dSJonathan Peyton // calculated upper bound, "ub" is user-defined upper bound
4743041982dSJonathan Peyton T ub_tmp = lb + limit * st;
4755e8470afSJim Cownie pr->u.p.lb = lb + init * st;
4763041982dSJonathan Peyton // adjust upper bound to "ub" if needed, so that MS lastprivate will match
4773041982dSJonathan Peyton // it exactly
4785e8470afSJim Cownie if (st > 0) {
4795e8470afSJim Cownie pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
4805e8470afSJim Cownie } else {
4815e8470afSJim Cownie pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
4825e8470afSJim Cownie }
4835e8470afSJim Cownie }
48439ada854SJonathan Peyton if (pr->flags.ordered) {
4855e8470afSJim Cownie pr->u.p.ordered_lower = init;
4865e8470afSJim Cownie pr->u.p.ordered_upper = limit;
4875e8470afSJim Cownie }
4885e8470afSJim Cownie break;
4895e8470afSJim Cownie } // case
490d454c73cSAndrey Churbanov case kmp_sch_static_balanced_chunked: {
491d454c73cSAndrey Churbanov // similar to balanced, but chunk adjusted to multiple of simd width
49239ada854SJonathan Peyton T nth = nproc;
49339ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
494d454c73cSAndrey Churbanov " -> falling-through to static_greedy\n",
495d454c73cSAndrey Churbanov gtid));
496d454c73cSAndrey Churbanov schedule = kmp_sch_static_greedy;
497d454c73cSAndrey Churbanov if (nth > 1)
498d454c73cSAndrey Churbanov pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
499d454c73cSAndrey Churbanov else
500d454c73cSAndrey Churbanov pr->u.p.parm1 = tc;
501d454c73cSAndrey Churbanov break;
502d454c73cSAndrey Churbanov } // case
50339ada854SJonathan Peyton case kmp_sch_guided_simd:
50439ada854SJonathan Peyton case kmp_sch_guided_iterative_chunked: {
50539ada854SJonathan Peyton KD_TRACE(
50639ada854SJonathan Peyton 100,
50739ada854SJonathan Peyton ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
5083041982dSJonathan Peyton " case\n",
5093041982dSJonathan Peyton gtid));
5105e8470afSJim Cownie
5115e8470afSJim Cownie if (nproc > 1) {
5125e8470afSJim Cownie if ((2L * chunk + 1) * nproc >= tc) {
5135e8470afSJim Cownie /* chunk size too large, switch to dynamic */
5145e8470afSJim Cownie schedule = kmp_sch_dynamic_chunked;
515e2738b37SPeyton, Jonathan L goto dynamic_init;
5165e8470afSJim Cownie } else {
5175e8470afSJim Cownie // when remaining iters become less than parm2 - switch to dynamic
5185e8470afSJim Cownie pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
5193041982dSJonathan Peyton *(double *)&pr->u.p.parm3 =
5206b316febSTerry Wilmarth guided_flt_param / (double)nproc; // may occupy parm3 and parm4
5215e8470afSJim Cownie }
5225e8470afSJim Cownie } else {
52339ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
5243041982dSJonathan Peyton "kmp_sch_static_greedy\n",
5253041982dSJonathan Peyton gtid));
5265e8470afSJim Cownie schedule = kmp_sch_static_greedy;
5275e8470afSJim Cownie /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
52839ada854SJonathan Peyton KD_TRACE(
52939ada854SJonathan Peyton 100,
53039ada854SJonathan Peyton ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
5313041982dSJonathan Peyton gtid));
5325e8470afSJim Cownie pr->u.p.parm1 = tc;
5335e8470afSJim Cownie } // if
5345e8470afSJim Cownie } // case
5355e8470afSJim Cownie break;
5363041982dSJonathan Peyton case kmp_sch_guided_analytical_chunked: {
53739ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
53839ada854SJonathan Peyton "kmp_sch_guided_analytical_chunked case\n",
5393041982dSJonathan Peyton gtid));
54039ada854SJonathan Peyton
5415e8470afSJim Cownie if (nproc > 1) {
5425e8470afSJim Cownie if ((2L * chunk + 1) * nproc >= tc) {
5435e8470afSJim Cownie /* chunk size too large, switch to dynamic */
5445e8470afSJim Cownie schedule = kmp_sch_dynamic_chunked;
545e2738b37SPeyton, Jonathan L goto dynamic_init;
5465e8470afSJim Cownie } else {
5475e8470afSJim Cownie /* commonly used term: (2 nproc - 1)/(2 nproc) */
5485e8470afSJim Cownie DBL x;
5495e8470afSJim Cownie
550f700e9edSAndrey Churbanov #if KMP_USE_X87CONTROL
5513041982dSJonathan Peyton /* Linux* OS already has 64-bit computation by default for long double,
5523041982dSJonathan Peyton and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On
5533041982dSJonathan Peyton Windows* OS on IA-32 architecture, we need to set precision to 64-bit
5543041982dSJonathan Peyton instead of the default 53-bit. Even though long double doesn't work
5553041982dSJonathan Peyton on Windows* OS on Intel(R) 64, the resulting lack of precision is not
5563041982dSJonathan Peyton expected to impact the correctness of the algorithm, but this has not
5573041982dSJonathan Peyton been mathematically proven. */
5585e8470afSJim Cownie // save original FPCW and set precision to 64-bit, as
5595e8470afSJim Cownie // Windows* OS on IA-32 architecture defaults to 53-bit
560181b4bb3SJim Cownie unsigned int oldFpcw = _control87(0, 0);
561181b4bb3SJim Cownie _control87(_PC_64, _MCW_PC); // 0,0x30000
5625e8470afSJim Cownie #endif
5635e8470afSJim Cownie /* value used for comparison in solver for cross-over point */
564b4a1f441SPeyton, Jonathan L KMP_ASSERT(tc > 0);
5655e8470afSJim Cownie long double target = ((long double)chunk * 2 + 1) * nproc / tc;
5665e8470afSJim Cownie
5675e8470afSJim Cownie /* crossover point--chunk indexes equal to or greater than
5685e8470afSJim Cownie this point switch to dynamic-style scheduling */
5695e8470afSJim Cownie UT cross;
5705e8470afSJim Cownie
5715e8470afSJim Cownie /* commonly used term: (2 nproc - 1)/(2 nproc) */
5726b316febSTerry Wilmarth x = 1.0 - 0.5 / (double)nproc;
5735e8470afSJim Cownie
5745e8470afSJim Cownie #ifdef KMP_DEBUG
5755e8470afSJim Cownie { // test natural alignment
5765e8470afSJim Cownie struct _test_a {
5775e8470afSJim Cownie char a;
5785e8470afSJim Cownie union {
5795e8470afSJim Cownie char b;
5805e8470afSJim Cownie DBL d;
5815e8470afSJim Cownie };
5825e8470afSJim Cownie } t;
5833041982dSJonathan Peyton ptrdiff_t natural_alignment =
5843041982dSJonathan Peyton (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
5853041982dSJonathan Peyton //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long
5863041982dSJonathan Peyton // long)natural_alignment );
5873041982dSJonathan Peyton KMP_DEBUG_ASSERT(
5883041982dSJonathan Peyton (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
5895e8470afSJim Cownie }
5905e8470afSJim Cownie #endif // KMP_DEBUG
5915e8470afSJim Cownie
5925e8470afSJim Cownie /* save the term in thread private dispatch structure */
5935e8470afSJim Cownie *(DBL *)&pr->u.p.parm3 = x;
5945e8470afSJim Cownie
5953041982dSJonathan Peyton /* solve for the crossover point to the nearest integer i for which C_i
5963041982dSJonathan Peyton <= chunk */
5975e8470afSJim Cownie {
5985e8470afSJim Cownie UT left, right, mid;
5995e8470afSJim Cownie long double p;
6005e8470afSJim Cownie
6015e8470afSJim Cownie /* estimate initial upper and lower bound */
6025e8470afSJim Cownie
6035e8470afSJim Cownie /* doesn't matter what value right is as long as it is positive, but
6043041982dSJonathan Peyton it affects performance of the solver */
6055e8470afSJim Cownie right = 229;
6065e8470afSJim Cownie p = __kmp_pow<UT>(x, right);
6075e8470afSJim Cownie if (p > target) {
6085e8470afSJim Cownie do {
6095e8470afSJim Cownie p *= p;
6105e8470afSJim Cownie right <<= 1;
6115e8470afSJim Cownie } while (p > target && right < (1 << 27));
6123041982dSJonathan Peyton /* lower bound is previous (failed) estimate of upper bound */
6133041982dSJonathan Peyton left = right >> 1;
6145e8470afSJim Cownie } else {
6155e8470afSJim Cownie left = 0;
6165e8470afSJim Cownie }
6175e8470afSJim Cownie
6185e8470afSJim Cownie /* bisection root-finding method */
6195e8470afSJim Cownie while (left + 1 < right) {
6205e8470afSJim Cownie mid = (left + right) / 2;
6215e8470afSJim Cownie if (__kmp_pow<UT>(x, mid) > target) {
6225e8470afSJim Cownie left = mid;
6235e8470afSJim Cownie } else {
6245e8470afSJim Cownie right = mid;
6255e8470afSJim Cownie }
6265e8470afSJim Cownie } // while
6275e8470afSJim Cownie cross = right;
6285e8470afSJim Cownie }
6295e8470afSJim Cownie /* assert sanity of computed crossover point */
6303041982dSJonathan Peyton KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
6313041982dSJonathan Peyton __kmp_pow<UT>(x, cross) <= target);
6325e8470afSJim Cownie
6335e8470afSJim Cownie /* save the crossover point in thread private dispatch structure */
6345e8470afSJim Cownie pr->u.p.parm2 = cross;
6355e8470afSJim Cownie
6365e8470afSJim Cownie // C75803
6375e8470afSJim Cownie #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8))
6385e8470afSJim Cownie #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3)
6395e8470afSJim Cownie #else
6405e8470afSJim Cownie #define GUIDED_ANALYTICAL_WORKAROUND (x)
6415e8470afSJim Cownie #endif
6425e8470afSJim Cownie /* dynamic-style scheduling offset */
643309b00a4SShilei Tian pr->u.p.count = tc -
644309b00a4SShilei Tian __kmp_dispatch_guided_remaining(
6453041982dSJonathan Peyton tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
6463041982dSJonathan Peyton cross * chunk;
647f700e9edSAndrey Churbanov #if KMP_USE_X87CONTROL
6485e8470afSJim Cownie // restore FPCW
649181b4bb3SJim Cownie _control87(oldFpcw, _MCW_PC);
6505e8470afSJim Cownie #endif
6515e8470afSJim Cownie } // if
6525e8470afSJim Cownie } else {
65339ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
6543041982dSJonathan Peyton "kmp_sch_static_greedy\n",
6555e8470afSJim Cownie gtid));
6565e8470afSJim Cownie schedule = kmp_sch_static_greedy;
6575e8470afSJim Cownie /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
6585e8470afSJim Cownie pr->u.p.parm1 = tc;
6595e8470afSJim Cownie } // if
6605e8470afSJim Cownie } // case
6615e8470afSJim Cownie break;
6625e8470afSJim Cownie case kmp_sch_static_greedy:
66339ada854SJonathan Peyton KD_TRACE(
66439ada854SJonathan Peyton 100,
66539ada854SJonathan Peyton ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
66639ada854SJonathan Peyton gtid));
66739ada854SJonathan Peyton pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
6685e8470afSJim Cownie break;
6695e8470afSJim Cownie case kmp_sch_static_chunked:
6705e8470afSJim Cownie case kmp_sch_dynamic_chunked:
671e2738b37SPeyton, Jonathan L dynamic_init:
67252cac541SAndreyChurbanov if (tc == 0)
67352cac541SAndreyChurbanov break;
67456223b1eSPeyton, Jonathan L if (pr->u.p.parm1 <= 0)
67570bda912SJonathan Peyton pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
67656223b1eSPeyton, Jonathan L else if (pr->u.p.parm1 > tc)
67756223b1eSPeyton, Jonathan L pr->u.p.parm1 = tc;
678e2738b37SPeyton, Jonathan L // Store the total number of chunks to prevent integer overflow during
679e2738b37SPeyton, Jonathan L // bounds calculations in the get next chunk routine.
680e2738b37SPeyton, Jonathan L pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
68139ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
6823041982dSJonathan Peyton "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
6833041982dSJonathan Peyton gtid));
6845e8470afSJim Cownie break;
6853041982dSJonathan Peyton case kmp_sch_trapezoidal: {
6865e8470afSJim Cownie /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */
6875e8470afSJim Cownie
6885e8470afSJim Cownie T parm1, parm2, parm3, parm4;
6893041982dSJonathan Peyton KD_TRACE(100,
69039ada854SJonathan Peyton ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",
69139ada854SJonathan Peyton gtid));
6925e8470afSJim Cownie
6935e8470afSJim Cownie parm1 = chunk;
6945e8470afSJim Cownie
6955e8470afSJim Cownie /* F : size of the first cycle */
69639ada854SJonathan Peyton parm2 = (tc / (2 * nproc));
6975e8470afSJim Cownie
6985e8470afSJim Cownie if (parm2 < 1) {
6995e8470afSJim Cownie parm2 = 1;
7005e8470afSJim Cownie }
7015e8470afSJim Cownie
7023041982dSJonathan Peyton /* L : size of the last cycle. Make sure the last cycle is not larger
7033041982dSJonathan Peyton than the first cycle. */
7045e8470afSJim Cownie if (parm1 < 1) {
7055e8470afSJim Cownie parm1 = 1;
7065e8470afSJim Cownie } else if (parm1 > parm2) {
7075e8470afSJim Cownie parm1 = parm2;
7085e8470afSJim Cownie }
7095e8470afSJim Cownie
7105e8470afSJim Cownie /* N : number of cycles */
7115e8470afSJim Cownie parm3 = (parm2 + parm1);
7125e8470afSJim Cownie parm3 = (2 * tc + parm3 - 1) / parm3;
7135e8470afSJim Cownie
7145e8470afSJim Cownie if (parm3 < 2) {
7155e8470afSJim Cownie parm3 = 2;
7165e8470afSJim Cownie }
7175e8470afSJim Cownie
7185e8470afSJim Cownie /* sigma : decreasing incr of the trapezoid */
7195e8470afSJim Cownie parm4 = (parm3 - 1);
7205e8470afSJim Cownie parm4 = (parm2 - parm1) / parm4;
7215e8470afSJim Cownie
7225e8470afSJim Cownie // pointless check, because parm4 >= 0 always
7235e8470afSJim Cownie // if ( parm4 < 0 ) {
7245e8470afSJim Cownie // parm4 = 0;
7255e8470afSJim Cownie //}
7265e8470afSJim Cownie
7275e8470afSJim Cownie pr->u.p.parm1 = parm1;
7285e8470afSJim Cownie pr->u.p.parm2 = parm2;
7295e8470afSJim Cownie pr->u.p.parm3 = parm3;
7305e8470afSJim Cownie pr->u.p.parm4 = parm4;
7315e8470afSJim Cownie } // case
7325e8470afSJim Cownie break;
7335e8470afSJim Cownie
7343041982dSJonathan Peyton default: {
7356a393f75SJonathan Peyton __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message
7365e8470afSJim Cownie KMP_HNT(GetNewerLibrary), // Hint
7375e8470afSJim Cownie __kmp_msg_null // Variadic argument list terminator
7385e8470afSJim Cownie );
7393041982dSJonathan Peyton } break;
7405e8470afSJim Cownie } // switch
7415e8470afSJim Cownie pr->schedule = schedule;
74239ada854SJonathan Peyton }
74339ada854SJonathan Peyton
744f6399367SJonathan Peyton #if KMP_USE_HIER_SCHED
745f6399367SJonathan Peyton template <typename T>
746f6399367SJonathan Peyton inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub,
747f6399367SJonathan Peyton typename traits_t<T>::signed_t st);
748f6399367SJonathan Peyton template <>
749f6399367SJonathan Peyton inline void
__kmp_dispatch_init_hier_runtime(ident_t * loc,kmp_int32 lb,kmp_int32 ub,kmp_int32 st)750f6399367SJonathan Peyton __kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb,
751f6399367SJonathan Peyton kmp_int32 ub, kmp_int32 st) {
752f6399367SJonathan Peyton __kmp_dispatch_init_hierarchy<kmp_int32>(
753f6399367SJonathan Peyton loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
754f6399367SJonathan Peyton __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
755f6399367SJonathan Peyton }
756f6399367SJonathan Peyton template <>
757f6399367SJonathan Peyton inline void
__kmp_dispatch_init_hier_runtime(ident_t * loc,kmp_uint32 lb,kmp_uint32 ub,kmp_int32 st)758f6399367SJonathan Peyton __kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb,
759f6399367SJonathan Peyton kmp_uint32 ub, kmp_int32 st) {
760f6399367SJonathan Peyton __kmp_dispatch_init_hierarchy<kmp_uint32>(
761f6399367SJonathan Peyton loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
762f6399367SJonathan Peyton __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
763f6399367SJonathan Peyton }
764f6399367SJonathan Peyton template <>
765f6399367SJonathan Peyton inline void
__kmp_dispatch_init_hier_runtime(ident_t * loc,kmp_int64 lb,kmp_int64 ub,kmp_int64 st)766f6399367SJonathan Peyton __kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb,
767f6399367SJonathan Peyton kmp_int64 ub, kmp_int64 st) {
768f6399367SJonathan Peyton __kmp_dispatch_init_hierarchy<kmp_int64>(
769f6399367SJonathan Peyton loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
770f6399367SJonathan Peyton __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
771f6399367SJonathan Peyton }
772f6399367SJonathan Peyton template <>
773f6399367SJonathan Peyton inline void
__kmp_dispatch_init_hier_runtime(ident_t * loc,kmp_uint64 lb,kmp_uint64 ub,kmp_int64 st)774f6399367SJonathan Peyton __kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb,
775f6399367SJonathan Peyton kmp_uint64 ub, kmp_int64 st) {
776f6399367SJonathan Peyton __kmp_dispatch_init_hierarchy<kmp_uint64>(
777f6399367SJonathan Peyton loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
778f6399367SJonathan Peyton __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
779f6399367SJonathan Peyton }
780f6399367SJonathan Peyton
781f6399367SJonathan Peyton // free all the hierarchy scheduling memory associated with the team
__kmp_dispatch_free_hierarchies(kmp_team_t * team)782f6399367SJonathan Peyton void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
783f6399367SJonathan Peyton int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
784f6399367SJonathan Peyton for (int i = 0; i < num_disp_buff; ++i) {
785f6399367SJonathan Peyton // type does not matter here so use kmp_int32
786f6399367SJonathan Peyton auto sh =
787f6399367SJonathan Peyton reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
788f6399367SJonathan Peyton &team->t.t_disp_buffer[i]);
789f6399367SJonathan Peyton if (sh->hier) {
790f6399367SJonathan Peyton sh->hier->deallocate();
791f6399367SJonathan Peyton __kmp_free(sh->hier);
792f6399367SJonathan Peyton }
793f6399367SJonathan Peyton }
794f6399367SJonathan Peyton }
795f6399367SJonathan Peyton #endif
796f6399367SJonathan Peyton
79739ada854SJonathan Peyton // UT - unsigned flavor of T, ST - signed flavor of T,
79839ada854SJonathan Peyton // DBL - double if sizeof(T)==4, or long double if sizeof(T)==8
79939ada854SJonathan Peyton template <typename T>
80039ada854SJonathan Peyton static void
__kmp_dispatch_init(ident_t * loc,int gtid,enum sched_type schedule,T lb,T ub,typename traits_t<T>::signed_t st,typename traits_t<T>::signed_t chunk,int push_ws)80139ada854SJonathan Peyton __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
80239ada854SJonathan Peyton T ub, typename traits_t<T>::signed_t st,
80339ada854SJonathan Peyton typename traits_t<T>::signed_t chunk, int push_ws) {
80439ada854SJonathan Peyton typedef typename traits_t<T>::unsigned_t UT;
80539ada854SJonathan Peyton
80639ada854SJonathan Peyton int active;
80739ada854SJonathan Peyton kmp_info_t *th;
80839ada854SJonathan Peyton kmp_team_t *team;
80939ada854SJonathan Peyton kmp_uint32 my_buffer_index;
81039ada854SJonathan Peyton dispatch_private_info_template<T> *pr;
81139ada854SJonathan Peyton dispatch_shared_info_template<T> volatile *sh;
81239ada854SJonathan Peyton
81339ada854SJonathan Peyton KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) ==
81439ada854SJonathan Peyton sizeof(dispatch_private_info));
81539ada854SJonathan Peyton KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) ==
81639ada854SJonathan Peyton sizeof(dispatch_shared_info));
817787eb0c6SAndreyChurbanov __kmp_assert_valid_gtid(gtid);
81839ada854SJonathan Peyton
81939ada854SJonathan Peyton if (!TCR_4(__kmp_init_parallel))
82039ada854SJonathan Peyton __kmp_parallel_initialize();
82139ada854SJonathan Peyton
8229b8bb323SJonathan Peyton __kmp_resume_if_soft_paused();
8239b8bb323SJonathan Peyton
82439ada854SJonathan Peyton #if INCLUDE_SSC_MARKS
82539ada854SJonathan Peyton SSC_MARK_DISPATCH_INIT();
82639ada854SJonathan Peyton #endif
82739ada854SJonathan Peyton #ifdef KMP_DEBUG
828baad3f60SJonathan Peyton typedef typename traits_t<T>::signed_t ST;
82939ada854SJonathan Peyton {
83039ada854SJonathan Peyton char *buff;
83139ada854SJonathan Peyton // create format specifiers before the debug output
83239ada854SJonathan Peyton buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d "
83339ada854SJonathan Peyton "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
83439ada854SJonathan Peyton traits_t<ST>::spec, traits_t<T>::spec,
83539ada854SJonathan Peyton traits_t<T>::spec, traits_t<ST>::spec);
83639ada854SJonathan Peyton KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
83739ada854SJonathan Peyton __kmp_str_free(&buff);
83839ada854SJonathan Peyton }
83939ada854SJonathan Peyton #endif
84039ada854SJonathan Peyton /* setup data */
84139ada854SJonathan Peyton th = __kmp_threads[gtid];
84239ada854SJonathan Peyton team = th->th.th_team;
84339ada854SJonathan Peyton active = !team->t.t_serialized;
84439ada854SJonathan Peyton th->th.th_ident = loc;
84539ada854SJonathan Peyton
846f0682ac4SJonathan Peyton // Any half-decent optimizer will remove this test when the blocks are empty
847f0682ac4SJonathan Peyton // since the macros expand to nothing
848f0682ac4SJonathan Peyton // when statistics are disabled.
849f0682ac4SJonathan Peyton if (schedule == __kmp_static) {
850f0682ac4SJonathan Peyton KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
851f0682ac4SJonathan Peyton } else {
852f0682ac4SJonathan Peyton KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC);
853f0682ac4SJonathan Peyton }
854f0682ac4SJonathan Peyton
855f6399367SJonathan Peyton #if KMP_USE_HIER_SCHED
856f6399367SJonathan Peyton // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable
857f6399367SJonathan Peyton // Hierarchical scheduling does not work with ordered, so if ordered is
858f6399367SJonathan Peyton // detected, then revert back to threaded scheduling.
859f6399367SJonathan Peyton bool ordered;
860f6399367SJonathan Peyton enum sched_type my_sched = schedule;
861f6399367SJonathan Peyton my_buffer_index = th->th.th_dispatch->th_disp_index;
862f6399367SJonathan Peyton pr = reinterpret_cast<dispatch_private_info_template<T> *>(
863f6399367SJonathan Peyton &th->th.th_dispatch
864f6399367SJonathan Peyton ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
865f6399367SJonathan Peyton my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched);
866f6399367SJonathan Peyton if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper))
867f6399367SJonathan Peyton my_sched =
868f6399367SJonathan Peyton (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower));
869f6399367SJonathan Peyton ordered = (kmp_ord_lower & my_sched);
870f6399367SJonathan Peyton if (pr->flags.use_hier) {
871f6399367SJonathan Peyton if (ordered) {
872f6399367SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. "
873f6399367SJonathan Peyton "Disabling hierarchical scheduling.\n",
874f6399367SJonathan Peyton gtid));
875f6399367SJonathan Peyton pr->flags.use_hier = FALSE;
876f6399367SJonathan Peyton }
877f6399367SJonathan Peyton }
878f6399367SJonathan Peyton if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
879f6399367SJonathan Peyton // Don't use hierarchical for ordered parallel loops and don't
880f6399367SJonathan Peyton // use the runtime hierarchy if one was specified in the program
881f6399367SJonathan Peyton if (!ordered && !pr->flags.use_hier)
882f6399367SJonathan Peyton __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
883f6399367SJonathan Peyton }
884f6399367SJonathan Peyton #endif // KMP_USE_HIER_SCHED
885f6399367SJonathan Peyton
88639ada854SJonathan Peyton #if USE_ITT_BUILD
88739ada854SJonathan Peyton kmp_uint64 cur_chunk = chunk;
888e4b4f994SJonathan Peyton int itt_need_metadata_reporting =
889e4b4f994SJonathan Peyton __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
890e4b4f994SJonathan Peyton KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
89139ada854SJonathan Peyton team->t.t_active_level == 1;
89239ada854SJonathan Peyton #endif
89339ada854SJonathan Peyton if (!active) {
89439ada854SJonathan Peyton pr = reinterpret_cast<dispatch_private_info_template<T> *>(
89539ada854SJonathan Peyton th->th.th_dispatch->th_disp_buffer); /* top of the stack */
89639ada854SJonathan Peyton } else {
89739ada854SJonathan Peyton KMP_DEBUG_ASSERT(th->th.th_dispatch ==
89839ada854SJonathan Peyton &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
89939ada854SJonathan Peyton
90039ada854SJonathan Peyton my_buffer_index = th->th.th_dispatch->th_disp_index++;
90139ada854SJonathan Peyton
90239ada854SJonathan Peyton /* What happens when number of threads changes, need to resize buffer? */
90339ada854SJonathan Peyton pr = reinterpret_cast<dispatch_private_info_template<T> *>(
90439ada854SJonathan Peyton &th->th.th_dispatch
90539ada854SJonathan Peyton ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
90639ada854SJonathan Peyton sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
90739ada854SJonathan Peyton &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
90839ada854SJonathan Peyton KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,
90939ada854SJonathan Peyton my_buffer_index));
9105dd4d0d4SAndreyChurbanov if (sh->buffer_index != my_buffer_index) { // too many loops in progress?
9115dd4d0d4SAndreyChurbanov KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
9125dd4d0d4SAndreyChurbanov " sh->buffer_index:%d\n",
9135dd4d0d4SAndreyChurbanov gtid, my_buffer_index, sh->buffer_index));
9145dd4d0d4SAndreyChurbanov __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
9155dd4d0d4SAndreyChurbanov __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
9165dd4d0d4SAndreyChurbanov // Note: KMP_WAIT() cannot be used there: buffer index and
9175dd4d0d4SAndreyChurbanov // my_buffer_index are *always* 32-bit integers.
9185dd4d0d4SAndreyChurbanov KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
9195dd4d0d4SAndreyChurbanov "sh->buffer_index:%d\n",
9205dd4d0d4SAndreyChurbanov gtid, my_buffer_index, sh->buffer_index));
9215dd4d0d4SAndreyChurbanov }
92239ada854SJonathan Peyton }
92339ada854SJonathan Peyton
92439ada854SJonathan Peyton __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
92539ada854SJonathan Peyton #if USE_ITT_BUILD
92639ada854SJonathan Peyton &cur_chunk,
92739ada854SJonathan Peyton #endif
92839ada854SJonathan Peyton chunk, (T)th->th.th_team_nproc,
92939ada854SJonathan Peyton (T)th->th.th_info.ds.ds_tid);
93039ada854SJonathan Peyton if (active) {
93139ada854SJonathan Peyton if (pr->flags.ordered == 0) {
93239ada854SJonathan Peyton th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
93339ada854SJonathan Peyton th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
93439ada854SJonathan Peyton } else {
93539ada854SJonathan Peyton th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
93639ada854SJonathan Peyton th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
93739ada854SJonathan Peyton }
9385e8470afSJim Cownie th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
939c47afcd9SAndrey Churbanov th->th.th_dispatch->th_dispatch_sh_current =
9405ba90c79SAndrey Churbanov CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh);
9415e8470afSJim Cownie #if USE_ITT_BUILD
94239ada854SJonathan Peyton if (pr->flags.ordered) {
9435e8470afSJim Cownie __kmp_itt_ordered_init(gtid);
944bd3a7633SJonathan Peyton }
9454cc4bb4cSJim Cownie // Report loop metadata
94651aecb82SAndrey Churbanov if (itt_need_metadata_reporting) {
94797d000cfStlwilmar // Only report metadata by primary thread of active team at level 1
9484cc4bb4cSJim Cownie kmp_uint64 schedtype = 0;
9494cc4bb4cSJim Cownie switch (schedule) {
9504cc4bb4cSJim Cownie case kmp_sch_static_chunked:
9514cc4bb4cSJim Cownie case kmp_sch_static_balanced: // Chunk is calculated in the switch above
9524cc4bb4cSJim Cownie break;
9534cc4bb4cSJim Cownie case kmp_sch_static_greedy:
9544cc4bb4cSJim Cownie cur_chunk = pr->u.p.parm1;
9554cc4bb4cSJim Cownie break;
9564cc4bb4cSJim Cownie case kmp_sch_dynamic_chunked:
9574cc4bb4cSJim Cownie schedtype = 1;
9584cc4bb4cSJim Cownie break;
9594cc4bb4cSJim Cownie case kmp_sch_guided_iterative_chunked:
9604cc4bb4cSJim Cownie case kmp_sch_guided_analytical_chunked:
961d454c73cSAndrey Churbanov case kmp_sch_guided_simd:
9624cc4bb4cSJim Cownie schedtype = 2;
9634cc4bb4cSJim Cownie break;
9644cc4bb4cSJim Cownie default:
9654cc4bb4cSJim Cownie // Should we put this case under "static"?
9664cc4bb4cSJim Cownie // case kmp_sch_static_steal:
9674cc4bb4cSJim Cownie schedtype = 3;
9684cc4bb4cSJim Cownie break;
9694cc4bb4cSJim Cownie }
97039ada854SJonathan Peyton __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
9714cc4bb4cSJim Cownie }
972f6399367SJonathan Peyton #if KMP_USE_HIER_SCHED
973f6399367SJonathan Peyton if (pr->flags.use_hier) {
974f6399367SJonathan Peyton pr->u.p.count = 0;
975f6399367SJonathan Peyton pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
976f6399367SJonathan Peyton }
977f6399367SJonathan Peyton #endif // KMP_USER_HIER_SCHED
9784cc4bb4cSJim Cownie #endif /* USE_ITT_BUILD */
979bd3a7633SJonathan Peyton }
9804cc4bb4cSJim Cownie
9815e8470afSJim Cownie #ifdef KMP_DEBUG
9825e8470afSJim Cownie {
983aeb40adaSJonas Hahnfeld char *buff;
9845e8470afSJim Cownie // create format specifiers before the debug output
9855e8470afSJim Cownie buff = __kmp_str_format(
9863041982dSJonathan Peyton "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
9873041982dSJonathan Peyton "lb:%%%s ub:%%%s"
9883041982dSJonathan Peyton " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
9895e8470afSJim Cownie " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
9905e8470afSJim Cownie traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
9915e8470afSJim Cownie traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
9925e8470afSJim Cownie traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
9935e8470afSJim Cownie traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
99439ada854SJonathan Peyton KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,
99539ada854SJonathan Peyton pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,
99639ada854SJonathan Peyton pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
99739ada854SJonathan Peyton pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4));
9985e8470afSJim Cownie __kmp_str_free(&buff);
9995e8470afSJim Cownie }
10005e8470afSJim Cownie #endif
100182e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
100282e94a59SJoachim Protze if (ompt_enabled.ompt_callback_work) {
1003d7d088f8SAndrey Churbanov ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
100482e94a59SJoachim Protze ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
100582e94a59SJoachim Protze ompt_callbacks.ompt_callback(ompt_callback_work)(
100682e94a59SJoachim Protze ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
100739ada854SJonathan Peyton &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
1008d7d088f8SAndrey Churbanov }
1009d7d088f8SAndrey Churbanov #endif
1010f0682ac4SJonathan Peyton KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
10115e8470afSJim Cownie }
10125e8470afSJim Cownie
10133041982dSJonathan Peyton /* For ordered loops, either __kmp_dispatch_finish() should be called after
10145e8470afSJim Cownie * every iteration, or __kmp_dispatch_finish_chunk() should be called after
10155e8470afSJim Cownie * every chunk of iterations. If the ordered section(s) were not executed
10165e8470afSJim Cownie * for this iteration (or every iteration in this chunk), we need to set the
10173041982dSJonathan Peyton * ordered iteration counters so that the next thread can proceed. */
10185e8470afSJim Cownie template <typename UT>
__kmp_dispatch_finish(int gtid,ident_t * loc)10193041982dSJonathan Peyton static void __kmp_dispatch_finish(int gtid, ident_t *loc) {
10205e8470afSJim Cownie typedef typename traits_t<UT>::signed_t ST;
1021787eb0c6SAndreyChurbanov __kmp_assert_valid_gtid(gtid);
10225e8470afSJim Cownie kmp_info_t *th = __kmp_threads[gtid];
10235e8470afSJim Cownie
10245e8470afSJim Cownie KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid));
10255e8470afSJim Cownie if (!th->th.th_team->t.t_serialized) {
10265e8470afSJim Cownie
10275e8470afSJim Cownie dispatch_private_info_template<UT> *pr =
10283041982dSJonathan Peyton reinterpret_cast<dispatch_private_info_template<UT> *>(
10293041982dSJonathan Peyton th->th.th_dispatch->th_dispatch_pr_current);
10305e8470afSJim Cownie dispatch_shared_info_template<UT> volatile *sh =
10313041982dSJonathan Peyton reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
10323041982dSJonathan Peyton th->th.th_dispatch->th_dispatch_sh_current);
10335e8470afSJim Cownie KMP_DEBUG_ASSERT(pr);
10345e8470afSJim Cownie KMP_DEBUG_ASSERT(sh);
10355e8470afSJim Cownie KMP_DEBUG_ASSERT(th->th.th_dispatch ==
10365e8470afSJim Cownie &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
10375e8470afSJim Cownie
10385e8470afSJim Cownie if (pr->ordered_bumped) {
10393041982dSJonathan Peyton KD_TRACE(
10403041982dSJonathan Peyton 1000,
10413041982dSJonathan Peyton ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
10425e8470afSJim Cownie gtid));
10435e8470afSJim Cownie pr->ordered_bumped = 0;
10445e8470afSJim Cownie } else {
10455e8470afSJim Cownie UT lower = pr->u.p.ordered_lower;
10465e8470afSJim Cownie
10475e8470afSJim Cownie #ifdef KMP_DEBUG
10485e8470afSJim Cownie {
1049aeb40adaSJonas Hahnfeld char *buff;
10505e8470afSJim Cownie // create format specifiers before the debug output
10513041982dSJonathan Peyton buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: "
10523041982dSJonathan Peyton "ordered_iteration:%%%s lower:%%%s\n",
10535e8470afSJim Cownie traits_t<UT>::spec, traits_t<UT>::spec);
10545e8470afSJim Cownie KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
10555e8470afSJim Cownie __kmp_str_free(&buff);
10565e8470afSJim Cownie }
10575e8470afSJim Cownie #endif
10585e8470afSJim Cownie
1059e47d32f1SJonathan Peyton __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
10603041982dSJonathan Peyton __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
10615e8470afSJim Cownie KMP_MB(); /* is this necessary? */
10625e8470afSJim Cownie #ifdef KMP_DEBUG
10635e8470afSJim Cownie {
1064aeb40adaSJonas Hahnfeld char *buff;
10655e8470afSJim Cownie // create format specifiers before the debug output
10663041982dSJonathan Peyton buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: "
10673041982dSJonathan Peyton "ordered_iteration:%%%s lower:%%%s\n",
10685e8470afSJim Cownie traits_t<UT>::spec, traits_t<UT>::spec);
10695e8470afSJim Cownie KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
10705e8470afSJim Cownie __kmp_str_free(&buff);
10715e8470afSJim Cownie }
10725e8470afSJim Cownie #endif
10735e8470afSJim Cownie
10745e8470afSJim Cownie test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration);
10755e8470afSJim Cownie } // if
10765e8470afSJim Cownie } // if
10775e8470afSJim Cownie KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid));
10785e8470afSJim Cownie }
10795e8470afSJim Cownie
10805e8470afSJim Cownie #ifdef KMP_GOMP_COMPAT
10815e8470afSJim Cownie
10825e8470afSJim Cownie template <typename UT>
__kmp_dispatch_finish_chunk(int gtid,ident_t * loc)10833041982dSJonathan Peyton static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
10845e8470afSJim Cownie typedef typename traits_t<UT>::signed_t ST;
1085787eb0c6SAndreyChurbanov __kmp_assert_valid_gtid(gtid);
10865e8470afSJim Cownie kmp_info_t *th = __kmp_threads[gtid];
10875e8470afSJim Cownie
10885e8470afSJim Cownie KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
10895e8470afSJim Cownie if (!th->th.th_team->t.t_serialized) {
10905e8470afSJim Cownie dispatch_private_info_template<UT> *pr =
10913041982dSJonathan Peyton reinterpret_cast<dispatch_private_info_template<UT> *>(
10923041982dSJonathan Peyton th->th.th_dispatch->th_dispatch_pr_current);
10935e8470afSJim Cownie dispatch_shared_info_template<UT> volatile *sh =
10943041982dSJonathan Peyton reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
10953041982dSJonathan Peyton th->th.th_dispatch->th_dispatch_sh_current);
10965e8470afSJim Cownie KMP_DEBUG_ASSERT(pr);
10975e8470afSJim Cownie KMP_DEBUG_ASSERT(sh);
10985e8470afSJim Cownie KMP_DEBUG_ASSERT(th->th.th_dispatch ==
10995e8470afSJim Cownie &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
11005e8470afSJim Cownie
11015e8470afSJim Cownie UT lower = pr->u.p.ordered_lower;
11025e8470afSJim Cownie UT upper = pr->u.p.ordered_upper;
11035e8470afSJim Cownie UT inc = upper - lower + 1;
11045e8470afSJim Cownie
11055e8470afSJim Cownie if (pr->ordered_bumped == inc) {
11063041982dSJonathan Peyton KD_TRACE(
11073041982dSJonathan Peyton 1000,
11083041982dSJonathan Peyton ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
11095e8470afSJim Cownie gtid));
11105e8470afSJim Cownie pr->ordered_bumped = 0;
11115e8470afSJim Cownie } else {
11125e8470afSJim Cownie inc -= pr->ordered_bumped;
11135e8470afSJim Cownie
11145e8470afSJim Cownie #ifdef KMP_DEBUG
11155e8470afSJim Cownie {
1116aeb40adaSJonas Hahnfeld char *buff;
11175e8470afSJim Cownie // create format specifiers before the debug output
11185e8470afSJim Cownie buff = __kmp_str_format(
11193041982dSJonathan Peyton "__kmp_dispatch_finish_chunk: T#%%d before wait: "
11205e8470afSJim Cownie "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
11215e8470afSJim Cownie traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
11225e8470afSJim Cownie KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
11235e8470afSJim Cownie __kmp_str_free(&buff);
11245e8470afSJim Cownie }
11255e8470afSJim Cownie #endif
11265e8470afSJim Cownie
1127e47d32f1SJonathan Peyton __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
11283041982dSJonathan Peyton __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
11295e8470afSJim Cownie
11305e8470afSJim Cownie KMP_MB(); /* is this necessary? */
11313041982dSJonathan Peyton KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "
11323041982dSJonathan Peyton "ordered_bumped to zero\n",
11335e8470afSJim Cownie gtid));
11345e8470afSJim Cownie pr->ordered_bumped = 0;
11355e8470afSJim Cownie //!!!!! TODO check if the inc should be unsigned, or signed???
11365e8470afSJim Cownie #ifdef KMP_DEBUG
11375e8470afSJim Cownie {
1138aeb40adaSJonas Hahnfeld char *buff;
11395e8470afSJim Cownie // create format specifiers before the debug output
11405e8470afSJim Cownie buff = __kmp_str_format(
11413041982dSJonathan Peyton "__kmp_dispatch_finish_chunk: T#%%d after wait: "
11425e8470afSJim Cownie "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
11433041982dSJonathan Peyton traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
11443041982dSJonathan Peyton traits_t<UT>::spec);
11453041982dSJonathan Peyton KD_TRACE(1000,
11463041982dSJonathan Peyton (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
11475e8470afSJim Cownie __kmp_str_free(&buff);
11485e8470afSJim Cownie }
11495e8470afSJim Cownie #endif
11505e8470afSJim Cownie
11515e8470afSJim Cownie test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc);
11525e8470afSJim Cownie }
11535e8470afSJim Cownie // }
11545e8470afSJim Cownie }
11555e8470afSJim Cownie KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
11565e8470afSJim Cownie }
11575e8470afSJim Cownie
11585e8470afSJim Cownie #endif /* KMP_GOMP_COMPAT */
11595e8470afSJim Cownie
11605e8470afSJim Cownie template <typename T>
__kmp_dispatch_next_algorithm(int gtid,dispatch_private_info_template<T> * pr,dispatch_shared_info_template<T> volatile * sh,kmp_int32 * p_last,T * p_lb,T * p_ub,typename traits_t<T>::signed_t * p_st,T nproc,T tid)116139ada854SJonathan Peyton int __kmp_dispatch_next_algorithm(int gtid,
116239ada854SJonathan Peyton dispatch_private_info_template<T> *pr,
116339ada854SJonathan Peyton dispatch_shared_info_template<T> volatile *sh,
116439ada854SJonathan Peyton kmp_int32 *p_last, T *p_lb, T *p_ub,
116539ada854SJonathan Peyton typename traits_t<T>::signed_t *p_st, T nproc,
116639ada854SJonathan Peyton T tid) {
11675e8470afSJim Cownie typedef typename traits_t<T>::unsigned_t UT;
11685e8470afSJim Cownie typedef typename traits_t<T>::signed_t ST;
11695e8470afSJim Cownie typedef typename traits_t<T>::floating_t DBL;
117039ada854SJonathan Peyton int status = 0;
11716b316febSTerry Wilmarth bool last = false;
117239ada854SJonathan Peyton T start;
117339ada854SJonathan Peyton ST incr;
117439ada854SJonathan Peyton UT limit, trip, init;
11755e8470afSJim Cownie kmp_info_t *th = __kmp_threads[gtid];
11765e8470afSJim Cownie kmp_team_t *team = th->th.th_team;
11775e8470afSJim Cownie
11785e8470afSJim Cownie KMP_DEBUG_ASSERT(th->th.th_dispatch ==
11795e8470afSJim Cownie &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
11805e8470afSJim Cownie KMP_DEBUG_ASSERT(pr);
11815e8470afSJim Cownie KMP_DEBUG_ASSERT(sh);
118239ada854SJonathan Peyton KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc);
118339ada854SJonathan Peyton #ifdef KMP_DEBUG
118439ada854SJonathan Peyton {
118539ada854SJonathan Peyton char *buff;
118639ada854SJonathan Peyton // create format specifiers before the debug output
118739ada854SJonathan Peyton buff =
118839ada854SJonathan Peyton __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p "
118939ada854SJonathan Peyton "sh:%%p nproc:%%%s tid:%%%s\n",
119039ada854SJonathan Peyton traits_t<T>::spec, traits_t<T>::spec);
119139ada854SJonathan Peyton KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid));
119239ada854SJonathan Peyton __kmp_str_free(&buff);
119339ada854SJonathan Peyton }
119439ada854SJonathan Peyton #endif
11955e8470afSJim Cownie
11965e8470afSJim Cownie // zero trip count
119739ada854SJonathan Peyton if (pr->u.p.tc == 0) {
119839ada854SJonathan Peyton KD_TRACE(10,
119939ada854SJonathan Peyton ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
120039ada854SJonathan Peyton "zero status:%d\n",
120139ada854SJonathan Peyton gtid, status));
120239ada854SJonathan Peyton return 0;
120339ada854SJonathan Peyton }
120439ada854SJonathan Peyton
12055e8470afSJim Cownie switch (pr->schedule) {
12065dd4d0d4SAndreyChurbanov #if KMP_STATIC_STEAL_ENABLED
12073041982dSJonathan Peyton case kmp_sch_static_steal: {
12085e8470afSJim Cownie T chunk = pr->u.p.parm1;
12095dd4d0d4SAndreyChurbanov UT nchunks = pr->u.p.parm2;
121039ada854SJonathan Peyton KD_TRACE(100,
121139ada854SJonathan Peyton ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",
12123041982dSJonathan Peyton gtid));
12135e8470afSJim Cownie
12145e8470afSJim Cownie trip = pr->u.p.tc - 1;
12155e8470afSJim Cownie
121612313d44SJonathan Peyton if (traits_t<T>::type_size > 4) {
12175dd4d0d4SAndreyChurbanov // use lock for 8-byte induction variable.
12185dd4d0d4SAndreyChurbanov // TODO (optional): check presence and use 16-byte CAS
12195dd4d0d4SAndreyChurbanov kmp_lock_t *lck = pr->u.p.steal_lock;
1220429dbc2aSAndrey Churbanov KMP_DEBUG_ASSERT(lck != NULL);
1221429dbc2aSAndrey Churbanov if (pr->u.p.count < (UT)pr->u.p.ub) {
12225dd4d0d4SAndreyChurbanov KMP_DEBUG_ASSERT(pr->steal_flag == READY);
1223429dbc2aSAndrey Churbanov __kmp_acquire_lock(lck, gtid);
1224429dbc2aSAndrey Churbanov // try to get own chunk of iterations
12255e8470afSJim Cownie init = (pr->u.p.count)++;
12265e8470afSJim Cownie status = (init < (UT)pr->u.p.ub);
1227429dbc2aSAndrey Churbanov __kmp_release_lock(lck, gtid);
12285e8470afSJim Cownie } else {
1229429dbc2aSAndrey Churbanov status = 0; // no own chunks
1230429dbc2aSAndrey Churbanov }
1231429dbc2aSAndrey Churbanov if (!status) { // try to steal
12325dd4d0d4SAndreyChurbanov kmp_lock_t *lckv; // victim buffer's lock
12336b316febSTerry Wilmarth T while_limit = pr->u.p.parm3;
12346b316febSTerry Wilmarth T while_index = 0;
1235abe64360SAndreyChurbanov int idx = (th->th.th_dispatch->th_disp_index - 1) %
1236abe64360SAndreyChurbanov __kmp_dispatch_num_buffers; // current loop index
1237abe64360SAndreyChurbanov // note: victim thread can potentially execute another loop
12385dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF); // mark self buffer inactive
1239429dbc2aSAndrey Churbanov while ((!status) && (while_limit != ++while_index)) {
12405dd4d0d4SAndreyChurbanov dispatch_private_info_template<T> *v;
1241429dbc2aSAndrey Churbanov T remaining;
12425dd4d0d4SAndreyChurbanov T victimId = pr->u.p.parm4;
12435dd4d0d4SAndreyChurbanov T oldVictimId = victimId ? victimId - 1 : nproc - 1;
12445dd4d0d4SAndreyChurbanov v = reinterpret_cast<dispatch_private_info_template<T> *>(
12455dd4d0d4SAndreyChurbanov &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
12465dd4d0d4SAndreyChurbanov KMP_DEBUG_ASSERT(v);
12475dd4d0d4SAndreyChurbanov while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
12485dd4d0d4SAndreyChurbanov oldVictimId != victimId) {
12495dd4d0d4SAndreyChurbanov victimId = (victimId + 1) % nproc;
12505dd4d0d4SAndreyChurbanov v = reinterpret_cast<dispatch_private_info_template<T> *>(
12515dd4d0d4SAndreyChurbanov &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
12525dd4d0d4SAndreyChurbanov KMP_DEBUG_ASSERT(v);
1253bd3a7633SJonathan Peyton }
12545dd4d0d4SAndreyChurbanov if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
1255429dbc2aSAndrey Churbanov continue; // try once more (nproc attempts in total)
1256429dbc2aSAndrey Churbanov }
12575dd4d0d4SAndreyChurbanov if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
12585dd4d0d4SAndreyChurbanov kmp_uint32 old = UNUSED;
12595dd4d0d4SAndreyChurbanov // try to steal whole range from inactive victim
12605dd4d0d4SAndreyChurbanov status = v->steal_flag.compare_exchange_strong(old, THIEF);
12615dd4d0d4SAndreyChurbanov if (status) {
12625dd4d0d4SAndreyChurbanov // initialize self buffer with victim's whole range of chunks
12635dd4d0d4SAndreyChurbanov T id = victimId;
12645dd4d0d4SAndreyChurbanov T small_chunk, extras;
12655dd4d0d4SAndreyChurbanov small_chunk = nchunks / nproc; // chunks per thread
12665dd4d0d4SAndreyChurbanov extras = nchunks % nproc;
12675dd4d0d4SAndreyChurbanov init = id * small_chunk + (id < extras ? id : extras);
12685dd4d0d4SAndreyChurbanov __kmp_acquire_lock(lck, gtid);
12695dd4d0d4SAndreyChurbanov pr->u.p.count = init + 1; // exclude one we execute immediately
12705dd4d0d4SAndreyChurbanov pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0);
12715dd4d0d4SAndreyChurbanov __kmp_release_lock(lck, gtid);
12725dd4d0d4SAndreyChurbanov pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
12735dd4d0d4SAndreyChurbanov // no need to reinitialize other thread invariants: lb, st, etc.
12745dd4d0d4SAndreyChurbanov #ifdef KMP_DEBUG
12755dd4d0d4SAndreyChurbanov {
12765dd4d0d4SAndreyChurbanov char *buff;
12775dd4d0d4SAndreyChurbanov // create format specifiers before the debug output
12785dd4d0d4SAndreyChurbanov buff = __kmp_str_format(
12795dd4d0d4SAndreyChurbanov "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
12805dd4d0d4SAndreyChurbanov "count:%%%s ub:%%%s\n",
12815dd4d0d4SAndreyChurbanov traits_t<UT>::spec, traits_t<T>::spec);
12825dd4d0d4SAndreyChurbanov KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub));
12835dd4d0d4SAndreyChurbanov __kmp_str_free(&buff);
12845dd4d0d4SAndreyChurbanov }
12855dd4d0d4SAndreyChurbanov #endif
12865dd4d0d4SAndreyChurbanov // activate non-empty buffer and let others steal from us
12875dd4d0d4SAndreyChurbanov if (pr->u.p.count < (UT)pr->u.p.ub)
12885dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
12895dd4d0d4SAndreyChurbanov break;
12905dd4d0d4SAndreyChurbanov }
12915dd4d0d4SAndreyChurbanov }
12925dd4d0d4SAndreyChurbanov if (KMP_ATOMIC_LD_RLX(&v->steal_flag) != READY ||
12935dd4d0d4SAndreyChurbanov v->u.p.count >= (UT)v->u.p.ub) {
12945dd4d0d4SAndreyChurbanov pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid
12955dd4d0d4SAndreyChurbanov continue; // no chunks to steal, try next victim
12965dd4d0d4SAndreyChurbanov }
12975dd4d0d4SAndreyChurbanov lckv = v->u.p.steal_lock;
12985dd4d0d4SAndreyChurbanov KMP_ASSERT(lckv != NULL);
12995dd4d0d4SAndreyChurbanov __kmp_acquire_lock(lckv, gtid);
13005dd4d0d4SAndreyChurbanov limit = v->u.p.ub; // keep initial ub
13015dd4d0d4SAndreyChurbanov if (v->u.p.count >= limit) {
13025dd4d0d4SAndreyChurbanov __kmp_release_lock(lckv, gtid);
13035dd4d0d4SAndreyChurbanov pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid
13045dd4d0d4SAndreyChurbanov continue; // no chunks to steal, try next victim
1305429dbc2aSAndrey Churbanov }
1306429dbc2aSAndrey Churbanov
13075dd4d0d4SAndreyChurbanov // stealing succeded, reduce victim's ub by 1/4 of undone chunks
13085dd4d0d4SAndreyChurbanov // TODO: is this heuristics good enough??
13095dd4d0d4SAndreyChurbanov remaining = limit - v->u.p.count;
13105dd4d0d4SAndreyChurbanov if (remaining > 7) {
131139ada854SJonathan Peyton // steal 1/4 of remaining
1312f0682ac4SJonathan Peyton KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2);
13135dd4d0d4SAndreyChurbanov init = (v->u.p.ub -= (remaining >> 2));
1314429dbc2aSAndrey Churbanov } else {
13155dd4d0d4SAndreyChurbanov // steal 1 chunk of 1..7 remaining
1316f0682ac4SJonathan Peyton KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1);
13175dd4d0d4SAndreyChurbanov init = (v->u.p.ub -= 1);
1318429dbc2aSAndrey Churbanov }
13195dd4d0d4SAndreyChurbanov __kmp_release_lock(lckv, gtid);
13205dd4d0d4SAndreyChurbanov #ifdef KMP_DEBUG
13215dd4d0d4SAndreyChurbanov {
13225dd4d0d4SAndreyChurbanov char *buff;
13235dd4d0d4SAndreyChurbanov // create format specifiers before the debug output
13245dd4d0d4SAndreyChurbanov buff = __kmp_str_format(
13255dd4d0d4SAndreyChurbanov "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
13265dd4d0d4SAndreyChurbanov "count:%%%s ub:%%%s\n",
13275dd4d0d4SAndreyChurbanov traits_t<UT>::spec, traits_t<UT>::spec);
13285dd4d0d4SAndreyChurbanov KD_TRACE(10, (buff, gtid, victimId, init, limit));
13295dd4d0d4SAndreyChurbanov __kmp_str_free(&buff);
13305dd4d0d4SAndreyChurbanov }
13315dd4d0d4SAndreyChurbanov #endif
1332429dbc2aSAndrey Churbanov KMP_DEBUG_ASSERT(init + 1 <= limit);
13335dd4d0d4SAndreyChurbanov pr->u.p.parm4 = victimId; // remember victim to steal from
1334429dbc2aSAndrey Churbanov status = 1;
13355dd4d0d4SAndreyChurbanov // now update own count and ub with stolen range excluding init chunk
13365dd4d0d4SAndreyChurbanov __kmp_acquire_lock(lck, gtid);
1337429dbc2aSAndrey Churbanov pr->u.p.count = init + 1;
1338429dbc2aSAndrey Churbanov pr->u.p.ub = limit;
13395dd4d0d4SAndreyChurbanov __kmp_release_lock(lck, gtid);
13405dd4d0d4SAndreyChurbanov // activate non-empty buffer and let others steal from us
13415dd4d0d4SAndreyChurbanov if (init + 1 < limit)
13425dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1343429dbc2aSAndrey Churbanov } // while (search for victim)
1344429dbc2aSAndrey Churbanov } // if (try to find victim and steal)
1345429dbc2aSAndrey Churbanov } else {
1346429dbc2aSAndrey Churbanov // 4-byte induction variable, use 8-byte CAS for pair (count, ub)
13475dd4d0d4SAndreyChurbanov // as all operations on pair (count, ub) must be done atomically
13485e8470afSJim Cownie typedef union {
13495e8470afSJim Cownie struct {
13505e8470afSJim Cownie UT count;
13515e8470afSJim Cownie T ub;
13525e8470afSJim Cownie } p;
13535e8470afSJim Cownie kmp_int64 b;
13545e8470afSJim Cownie } union_i4;
13555e8470afSJim Cownie union_i4 vold, vnew;
13565dd4d0d4SAndreyChurbanov if (pr->u.p.count < (UT)pr->u.p.ub) {
13575dd4d0d4SAndreyChurbanov KMP_DEBUG_ASSERT(pr->steal_flag == READY);
13585e8470afSJim Cownie vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
13595dd4d0d4SAndreyChurbanov vnew.b = vold.b;
13605dd4d0d4SAndreyChurbanov vnew.p.count++; // get chunk from head of self range
13615dd4d0d4SAndreyChurbanov while (!KMP_COMPARE_AND_STORE_REL64(
13625e8470afSJim Cownie (volatile kmp_int64 *)&pr->u.p.count,
13635e8470afSJim Cownie *VOLATILE_CAST(kmp_int64 *) & vold.b,
13645e8470afSJim Cownie *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
13655e8470afSJim Cownie KMP_CPU_PAUSE();
13665e8470afSJim Cownie vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
13675dd4d0d4SAndreyChurbanov vnew.b = vold.b;
13685e8470afSJim Cownie vnew.p.count++;
13695e8470afSJim Cownie }
13705dd4d0d4SAndreyChurbanov init = vold.p.count;
13715dd4d0d4SAndreyChurbanov status = (init < (UT)vold.p.ub);
13725dd4d0d4SAndreyChurbanov } else {
13735dd4d0d4SAndreyChurbanov status = 0; // no own chunks
13745e8470afSJim Cownie }
13755dd4d0d4SAndreyChurbanov if (!status) { // try to steal
13766b316febSTerry Wilmarth T while_limit = pr->u.p.parm3;
13776b316febSTerry Wilmarth T while_index = 0;
1378abe64360SAndreyChurbanov int idx = (th->th.th_dispatch->th_disp_index - 1) %
1379abe64360SAndreyChurbanov __kmp_dispatch_num_buffers; // current loop index
1380abe64360SAndreyChurbanov // note: victim thread can potentially execute another loop
13815dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF); // mark self buffer inactive
13825e8470afSJim Cownie while ((!status) && (while_limit != ++while_index)) {
13835dd4d0d4SAndreyChurbanov dispatch_private_info_template<T> *v;
13846b316febSTerry Wilmarth T remaining;
13855dd4d0d4SAndreyChurbanov T victimId = pr->u.p.parm4;
13865dd4d0d4SAndreyChurbanov T oldVictimId = victimId ? victimId - 1 : nproc - 1;
13875dd4d0d4SAndreyChurbanov v = reinterpret_cast<dispatch_private_info_template<T> *>(
13885dd4d0d4SAndreyChurbanov &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
13895dd4d0d4SAndreyChurbanov KMP_DEBUG_ASSERT(v);
13905dd4d0d4SAndreyChurbanov while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
13915dd4d0d4SAndreyChurbanov oldVictimId != victimId) {
13925dd4d0d4SAndreyChurbanov victimId = (victimId + 1) % nproc;
13935dd4d0d4SAndreyChurbanov v = reinterpret_cast<dispatch_private_info_template<T> *>(
13945dd4d0d4SAndreyChurbanov &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
13955dd4d0d4SAndreyChurbanov KMP_DEBUG_ASSERT(v);
1396bd3a7633SJonathan Peyton }
13975dd4d0d4SAndreyChurbanov if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
1398429dbc2aSAndrey Churbanov continue; // try once more (nproc attempts in total)
13995e8470afSJim Cownie }
14005dd4d0d4SAndreyChurbanov if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
14015dd4d0d4SAndreyChurbanov kmp_uint32 old = UNUSED;
14025dd4d0d4SAndreyChurbanov // try to steal whole range from inactive victim
14035dd4d0d4SAndreyChurbanov status = v->steal_flag.compare_exchange_strong(old, THIEF);
14045dd4d0d4SAndreyChurbanov if (status) {
14055dd4d0d4SAndreyChurbanov // initialize self buffer with victim's whole range of chunks
14065dd4d0d4SAndreyChurbanov T id = victimId;
14075dd4d0d4SAndreyChurbanov T small_chunk, extras;
14085dd4d0d4SAndreyChurbanov small_chunk = nchunks / nproc; // chunks per thread
14095dd4d0d4SAndreyChurbanov extras = nchunks % nproc;
14105dd4d0d4SAndreyChurbanov init = id * small_chunk + (id < extras ? id : extras);
14115dd4d0d4SAndreyChurbanov vnew.p.count = init + 1;
14125dd4d0d4SAndreyChurbanov vnew.p.ub = init + small_chunk + (id < extras ? 1 : 0);
14135dd4d0d4SAndreyChurbanov // write pair (count, ub) at once atomically
14145dd4d0d4SAndreyChurbanov #if KMP_ARCH_X86
14155dd4d0d4SAndreyChurbanov KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vnew.b);
14165dd4d0d4SAndreyChurbanov #else
14175dd4d0d4SAndreyChurbanov *(volatile kmp_int64 *)(&pr->u.p.count) = vnew.b;
14185dd4d0d4SAndreyChurbanov #endif
14195dd4d0d4SAndreyChurbanov pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
14205dd4d0d4SAndreyChurbanov // no need to initialize other thread invariants: lb, st, etc.
14215dd4d0d4SAndreyChurbanov #ifdef KMP_DEBUG
14225dd4d0d4SAndreyChurbanov {
14235dd4d0d4SAndreyChurbanov char *buff;
14245dd4d0d4SAndreyChurbanov // create format specifiers before the debug output
14255dd4d0d4SAndreyChurbanov buff = __kmp_str_format(
14265dd4d0d4SAndreyChurbanov "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
14275dd4d0d4SAndreyChurbanov "count:%%%s ub:%%%s\n",
14285dd4d0d4SAndreyChurbanov traits_t<UT>::spec, traits_t<T>::spec);
14295dd4d0d4SAndreyChurbanov KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub));
14305dd4d0d4SAndreyChurbanov __kmp_str_free(&buff);
14315e8470afSJim Cownie }
14325dd4d0d4SAndreyChurbanov #endif
14335dd4d0d4SAndreyChurbanov // activate non-empty buffer and let others steal from us
14345dd4d0d4SAndreyChurbanov if (pr->u.p.count < (UT)pr->u.p.ub)
14355dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
14365dd4d0d4SAndreyChurbanov break;
14375dd4d0d4SAndreyChurbanov }
14385dd4d0d4SAndreyChurbanov }
14395dd4d0d4SAndreyChurbanov while (1) { // CAS loop with check if victim still has enough chunks
14405dd4d0d4SAndreyChurbanov // many threads may be stealing concurrently from same victim
14415dd4d0d4SAndreyChurbanov vold.b = *(volatile kmp_int64 *)(&v->u.p.count);
14425dd4d0d4SAndreyChurbanov if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY ||
14435dd4d0d4SAndreyChurbanov vold.p.count >= (UT)vold.p.ub) {
14445dd4d0d4SAndreyChurbanov pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim id
14455dd4d0d4SAndreyChurbanov break; // no chunks to steal, try next victim
14465dd4d0d4SAndreyChurbanov }
14475dd4d0d4SAndreyChurbanov vnew.b = vold.b;
14485dd4d0d4SAndreyChurbanov remaining = vold.p.ub - vold.p.count;
14496b316febSTerry Wilmarth // try to steal 1/4 of remaining
14505dd4d0d4SAndreyChurbanov // TODO: is this heuristics good enough??
14515dd4d0d4SAndreyChurbanov if (remaining > 7) {
14525dd4d0d4SAndreyChurbanov vnew.p.ub -= remaining >> 2; // steal from tail of victim's range
1453429dbc2aSAndrey Churbanov } else {
14545dd4d0d4SAndreyChurbanov vnew.p.ub -= 1; // steal 1 chunk of 1..7 remaining
1455429dbc2aSAndrey Churbanov }
1456b2787945SAndreyChurbanov KMP_DEBUG_ASSERT(vnew.p.ub * (UT)chunk <= trip);
14575dd4d0d4SAndreyChurbanov if (KMP_COMPARE_AND_STORE_REL64(
14585dd4d0d4SAndreyChurbanov (volatile kmp_int64 *)&v->u.p.count,
14595e8470afSJim Cownie *VOLATILE_CAST(kmp_int64 *) & vold.b,
14605e8470afSJim Cownie *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
14615dd4d0d4SAndreyChurbanov // stealing succedded
14625dd4d0d4SAndreyChurbanov #ifdef KMP_DEBUG
14635dd4d0d4SAndreyChurbanov {
14645dd4d0d4SAndreyChurbanov char *buff;
14655dd4d0d4SAndreyChurbanov // create format specifiers before the debug output
14665dd4d0d4SAndreyChurbanov buff = __kmp_str_format(
14675dd4d0d4SAndreyChurbanov "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
14685dd4d0d4SAndreyChurbanov "count:%%%s ub:%%%s\n",
14695dd4d0d4SAndreyChurbanov traits_t<T>::spec, traits_t<T>::spec);
14705dd4d0d4SAndreyChurbanov KD_TRACE(10, (buff, gtid, victimId, vnew.p.ub, vold.p.ub));
14715dd4d0d4SAndreyChurbanov __kmp_str_free(&buff);
14725dd4d0d4SAndreyChurbanov }
14735dd4d0d4SAndreyChurbanov #endif
1474f0682ac4SJonathan Peyton KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,
1475f0682ac4SJonathan Peyton vold.p.ub - vnew.p.ub);
14765e8470afSJim Cownie status = 1;
14775dd4d0d4SAndreyChurbanov pr->u.p.parm4 = victimId; // keep victim id
14785e8470afSJim Cownie // now update own count and ub
14795e8470afSJim Cownie init = vnew.p.ub;
14805e8470afSJim Cownie vold.p.count = init + 1;
1481429dbc2aSAndrey Churbanov #if KMP_ARCH_X86
148239ada854SJonathan Peyton KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b);
1483429dbc2aSAndrey Churbanov #else
14845e8470afSJim Cownie *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1485429dbc2aSAndrey Churbanov #endif
14865dd4d0d4SAndreyChurbanov // activate non-empty buffer and let others steal from us
14875dd4d0d4SAndreyChurbanov if (vold.p.count < (UT)vold.p.ub)
14885dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
14895e8470afSJim Cownie break;
1490429dbc2aSAndrey Churbanov } // if (check CAS result)
149142016791SKazuaki Ishizaki KMP_CPU_PAUSE(); // CAS failed, repeatedly attempt
1492429dbc2aSAndrey Churbanov } // while (try to steal from particular victim)
1493429dbc2aSAndrey Churbanov } // while (search for victim)
1494429dbc2aSAndrey Churbanov } // if (try to find victim and steal)
1495429dbc2aSAndrey Churbanov } // if (4-byte induction variable)
14965e8470afSJim Cownie if (!status) {
14975e8470afSJim Cownie *p_lb = 0;
14985e8470afSJim Cownie *p_ub = 0;
14993041982dSJonathan Peyton if (p_st != NULL)
15003041982dSJonathan Peyton *p_st = 0;
15015e8470afSJim Cownie } else {
15025dd4d0d4SAndreyChurbanov start = pr->u.p.lb;
15035e8470afSJim Cownie init *= chunk;
15045e8470afSJim Cownie limit = chunk + init - 1;
15055e8470afSJim Cownie incr = pr->u.p.st;
1506f0682ac4SJonathan Peyton KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1);
15075e8470afSJim Cownie
15085e8470afSJim Cownie KMP_DEBUG_ASSERT(init <= trip);
15095dd4d0d4SAndreyChurbanov // keep track of done chunks for possible early exit from stealing
15105dd4d0d4SAndreyChurbanov // TODO: count executed chunks locally with rare update of shared location
15115dd4d0d4SAndreyChurbanov // test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
15125e8470afSJim Cownie if ((last = (limit >= trip)) != 0)
15135e8470afSJim Cownie limit = trip;
15143041982dSJonathan Peyton if (p_st != NULL)
15153041982dSJonathan Peyton *p_st = incr;
15165e8470afSJim Cownie
15175e8470afSJim Cownie if (incr == 1) {
15185e8470afSJim Cownie *p_lb = start + init;
15195e8470afSJim Cownie *p_ub = start + limit;
15205e8470afSJim Cownie } else {
15215e8470afSJim Cownie *p_lb = start + init * incr;
15225e8470afSJim Cownie *p_ub = start + limit * incr;
15235e8470afSJim Cownie }
15245e8470afSJim Cownie } // if
15255e8470afSJim Cownie break;
15265e8470afSJim Cownie } // case
15275dd4d0d4SAndreyChurbanov #endif // KMP_STATIC_STEAL_ENABLED
15283041982dSJonathan Peyton case kmp_sch_static_balanced: {
15293041982dSJonathan Peyton KD_TRACE(
153039ada854SJonathan Peyton 10,
153139ada854SJonathan Peyton ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",
153239ada854SJonathan Peyton gtid));
153339ada854SJonathan Peyton /* check if thread has any iteration to do */
153439ada854SJonathan Peyton if ((status = !pr->u.p.count) != 0) {
15355e8470afSJim Cownie pr->u.p.count = 1;
15365e8470afSJim Cownie *p_lb = pr->u.p.lb;
15375e8470afSJim Cownie *p_ub = pr->u.p.ub;
15386b316febSTerry Wilmarth last = (pr->u.p.parm1 != 0);
15394cc4bb4cSJim Cownie if (p_st != NULL)
15405e8470afSJim Cownie *p_st = pr->u.p.st;
15415e8470afSJim Cownie } else { /* no iterations to do */
15425e8470afSJim Cownie pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
15435e8470afSJim Cownie }
15445e8470afSJim Cownie } // case
15455e8470afSJim Cownie break;
15463041982dSJonathan Peyton case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was
15473041982dSJonathan Peyton merged here */
15483041982dSJonathan Peyton case kmp_sch_static_chunked: {
15495e8470afSJim Cownie T parm1;
15505e8470afSJim Cownie
155139ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "
15523041982dSJonathan Peyton "kmp_sch_static_[affinity|chunked] case\n",
15535e8470afSJim Cownie gtid));
15545e8470afSJim Cownie parm1 = pr->u.p.parm1;
15555e8470afSJim Cownie
15565e8470afSJim Cownie trip = pr->u.p.tc - 1;
155739ada854SJonathan Peyton init = parm1 * (pr->u.p.count + tid);
15585e8470afSJim Cownie
15595e8470afSJim Cownie if ((status = (init <= trip)) != 0) {
15605e8470afSJim Cownie start = pr->u.p.lb;
15615e8470afSJim Cownie incr = pr->u.p.st;
15625e8470afSJim Cownie limit = parm1 + init - 1;
15635e8470afSJim Cownie
15645e8470afSJim Cownie if ((last = (limit >= trip)) != 0)
15655e8470afSJim Cownie limit = trip;
15665e8470afSJim Cownie
15673041982dSJonathan Peyton if (p_st != NULL)
15683041982dSJonathan Peyton *p_st = incr;
15695e8470afSJim Cownie
157039ada854SJonathan Peyton pr->u.p.count += nproc;
15715e8470afSJim Cownie
15725e8470afSJim Cownie if (incr == 1) {
15735e8470afSJim Cownie *p_lb = start + init;
15745e8470afSJim Cownie *p_ub = start + limit;
15753041982dSJonathan Peyton } else {
15765e8470afSJim Cownie *p_lb = start + init * incr;
15775e8470afSJim Cownie *p_ub = start + limit * incr;
15785e8470afSJim Cownie }
15795e8470afSJim Cownie
158039ada854SJonathan Peyton if (pr->flags.ordered) {
15815e8470afSJim Cownie pr->u.p.ordered_lower = init;
15825e8470afSJim Cownie pr->u.p.ordered_upper = limit;
15835e8470afSJim Cownie } // if
15845e8470afSJim Cownie } // if
15855e8470afSJim Cownie } // case
15865e8470afSJim Cownie break;
15875e8470afSJim Cownie
15883041982dSJonathan Peyton case kmp_sch_dynamic_chunked: {
1589e2738b37SPeyton, Jonathan L UT chunk_number;
1590e2738b37SPeyton, Jonathan L UT chunk_size = pr->u.p.parm1;
1591e2738b37SPeyton, Jonathan L UT nchunks = pr->u.p.parm2;
15925e8470afSJim Cownie
15933041982dSJonathan Peyton KD_TRACE(
15943041982dSJonathan Peyton 100,
159539ada854SJonathan Peyton ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
159639ada854SJonathan Peyton gtid));
15975e8470afSJim Cownie
1598e2738b37SPeyton, Jonathan L chunk_number = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1599e2738b37SPeyton, Jonathan L status = (chunk_number < nchunks);
1600e2738b37SPeyton, Jonathan L if (!status) {
16015e8470afSJim Cownie *p_lb = 0;
16025e8470afSJim Cownie *p_ub = 0;
16033041982dSJonathan Peyton if (p_st != NULL)
16043041982dSJonathan Peyton *p_st = 0;
16055e8470afSJim Cownie } else {
1606e2738b37SPeyton, Jonathan L init = chunk_size * chunk_number;
1607e2738b37SPeyton, Jonathan L trip = pr->u.p.tc - 1;
16085e8470afSJim Cownie start = pr->u.p.lb;
16095e8470afSJim Cownie incr = pr->u.p.st;
16105e8470afSJim Cownie
1611e2738b37SPeyton, Jonathan L if ((last = (trip - init < (UT)chunk_size)))
16125e8470afSJim Cownie limit = trip;
1613e2738b37SPeyton, Jonathan L else
1614e2738b37SPeyton, Jonathan L limit = chunk_size + init - 1;
16154cc4bb4cSJim Cownie
16163041982dSJonathan Peyton if (p_st != NULL)
16173041982dSJonathan Peyton *p_st = incr;
16185e8470afSJim Cownie
16195e8470afSJim Cownie if (incr == 1) {
16205e8470afSJim Cownie *p_lb = start + init;
16215e8470afSJim Cownie *p_ub = start + limit;
16225e8470afSJim Cownie } else {
16235e8470afSJim Cownie *p_lb = start + init * incr;
16245e8470afSJim Cownie *p_ub = start + limit * incr;
16255e8470afSJim Cownie }
16265e8470afSJim Cownie
162739ada854SJonathan Peyton if (pr->flags.ordered) {
16285e8470afSJim Cownie pr->u.p.ordered_lower = init;
16295e8470afSJim Cownie pr->u.p.ordered_upper = limit;
16305e8470afSJim Cownie } // if
16315e8470afSJim Cownie } // if
16325e8470afSJim Cownie } // case
16335e8470afSJim Cownie break;
16345e8470afSJim Cownie
16353041982dSJonathan Peyton case kmp_sch_guided_iterative_chunked: {
16365e8470afSJim Cownie T chunkspec = pr->u.p.parm1;
163739ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
16383041982dSJonathan Peyton "iterative case\n",
16393041982dSJonathan Peyton gtid));
16405e8470afSJim Cownie trip = pr->u.p.tc;
16415e8470afSJim Cownie // Start atomic part of calculations
16425e8470afSJim Cownie while (1) {
16435e8470afSJim Cownie ST remaining; // signed, because can be < 0
16445e8470afSJim Cownie init = sh->u.s.iteration; // shared value
16455e8470afSJim Cownie remaining = trip - init;
16465e8470afSJim Cownie if (remaining <= 0) { // AC: need to compare with 0 first
16475e8470afSJim Cownie // nothing to do, don't try atomic op
16485e8470afSJim Cownie status = 0;
16495e8470afSJim Cownie break;
16505e8470afSJim Cownie }
16513041982dSJonathan Peyton if ((T)remaining <
16523041982dSJonathan Peyton pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default
165342016791SKazuaki Ishizaki // use dynamic-style schedule
16544c6a098aSKazuaki Ishizaki // atomically increment iterations, get old value
165594a114fcSJonathan Peyton init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
165694a114fcSJonathan Peyton (ST)chunkspec);
16575e8470afSJim Cownie remaining = trip - init;
16585e8470afSJim Cownie if (remaining <= 0) {
16595e8470afSJim Cownie status = 0; // all iterations got by other threads
166039ada854SJonathan Peyton } else {
166139ada854SJonathan Peyton // got some iterations to work on
16625e8470afSJim Cownie status = 1;
16635e8470afSJim Cownie if ((T)remaining > chunkspec) {
16645e8470afSJim Cownie limit = init + chunkspec - 1;
16655e8470afSJim Cownie } else {
16666b316febSTerry Wilmarth last = true; // the last chunk
16675e8470afSJim Cownie limit = init + remaining - 1;
16685e8470afSJim Cownie } // if
16695e8470afSJim Cownie } // if
16705e8470afSJim Cownie break;
16715e8470afSJim Cownie } // if
16726b316febSTerry Wilmarth limit = init + (UT)((double)remaining *
16736b316febSTerry Wilmarth *(double *)&pr->u.p.parm3); // divide by K*nproc
16745ba90c79SAndrey Churbanov if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
1675c47afcd9SAndrey Churbanov (ST)init, (ST)limit)) {
16765e8470afSJim Cownie // CAS was successful, chunk obtained
16775e8470afSJim Cownie status = 1;
16785e8470afSJim Cownie --limit;
16795e8470afSJim Cownie break;
16805e8470afSJim Cownie } // if
16815e8470afSJim Cownie } // while
16825e8470afSJim Cownie if (status != 0) {
16835e8470afSJim Cownie start = pr->u.p.lb;
16845e8470afSJim Cownie incr = pr->u.p.st;
16855e8470afSJim Cownie if (p_st != NULL)
16865e8470afSJim Cownie *p_st = incr;
16875e8470afSJim Cownie *p_lb = start + init * incr;
16885e8470afSJim Cownie *p_ub = start + limit * incr;
168939ada854SJonathan Peyton if (pr->flags.ordered) {
16905e8470afSJim Cownie pr->u.p.ordered_lower = init;
16915e8470afSJim Cownie pr->u.p.ordered_upper = limit;
16925e8470afSJim Cownie } // if
16935e8470afSJim Cownie } else {
16945e8470afSJim Cownie *p_lb = 0;
16955e8470afSJim Cownie *p_ub = 0;
16965e8470afSJim Cownie if (p_st != NULL)
16975e8470afSJim Cownie *p_st = 0;
16985e8470afSJim Cownie } // if
16995e8470afSJim Cownie } // case
17005e8470afSJim Cownie break;
17015e8470afSJim Cownie
1702d454c73cSAndrey Churbanov case kmp_sch_guided_simd: {
1703d454c73cSAndrey Churbanov // same as iterative but curr-chunk adjusted to be multiple of given
1704d454c73cSAndrey Churbanov // chunk
1705d454c73cSAndrey Churbanov T chunk = pr->u.p.parm1;
170639ada854SJonathan Peyton KD_TRACE(100,
170739ada854SJonathan Peyton ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",
1708d454c73cSAndrey Churbanov gtid));
1709d454c73cSAndrey Churbanov trip = pr->u.p.tc;
1710d454c73cSAndrey Churbanov // Start atomic part of calculations
1711d454c73cSAndrey Churbanov while (1) {
1712d454c73cSAndrey Churbanov ST remaining; // signed, because can be < 0
1713d454c73cSAndrey Churbanov init = sh->u.s.iteration; // shared value
1714d454c73cSAndrey Churbanov remaining = trip - init;
1715d454c73cSAndrey Churbanov if (remaining <= 0) { // AC: need to compare with 0 first
1716d454c73cSAndrey Churbanov status = 0; // nothing to do, don't try atomic op
1717d454c73cSAndrey Churbanov break;
1718d454c73cSAndrey Churbanov }
1719b4a1f441SPeyton, Jonathan L KMP_DEBUG_ASSERT(chunk && init % chunk == 0);
1720d454c73cSAndrey Churbanov // compare with K*nproc*(chunk+1), K=2 by default
1721d454c73cSAndrey Churbanov if ((T)remaining < pr->u.p.parm2) {
172242016791SKazuaki Ishizaki // use dynamic-style schedule
17234c6a098aSKazuaki Ishizaki // atomically increment iterations, get old value
172494a114fcSJonathan Peyton init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
172594a114fcSJonathan Peyton (ST)chunk);
1726d454c73cSAndrey Churbanov remaining = trip - init;
1727d454c73cSAndrey Churbanov if (remaining <= 0) {
1728d454c73cSAndrey Churbanov status = 0; // all iterations got by other threads
1729d454c73cSAndrey Churbanov } else {
1730d454c73cSAndrey Churbanov // got some iterations to work on
1731d454c73cSAndrey Churbanov status = 1;
1732d454c73cSAndrey Churbanov if ((T)remaining > chunk) {
1733d454c73cSAndrey Churbanov limit = init + chunk - 1;
1734d454c73cSAndrey Churbanov } else {
17356b316febSTerry Wilmarth last = true; // the last chunk
1736d454c73cSAndrey Churbanov limit = init + remaining - 1;
1737d454c73cSAndrey Churbanov } // if
1738d454c73cSAndrey Churbanov } // if
1739d454c73cSAndrey Churbanov break;
1740d454c73cSAndrey Churbanov } // if
1741d454c73cSAndrey Churbanov // divide by K*nproc
17426b316febSTerry Wilmarth UT span;
17436b316febSTerry Wilmarth __kmp_type_convert((double)remaining * (*(double *)&pr->u.p.parm3),
17446b316febSTerry Wilmarth &span);
1745d454c73cSAndrey Churbanov UT rem = span % chunk;
1746d454c73cSAndrey Churbanov if (rem) // adjust so that span%chunk == 0
1747d454c73cSAndrey Churbanov span += chunk - rem;
1748d454c73cSAndrey Churbanov limit = init + span;
17495ba90c79SAndrey Churbanov if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
1750c47afcd9SAndrey Churbanov (ST)init, (ST)limit)) {
1751d454c73cSAndrey Churbanov // CAS was successful, chunk obtained
1752d454c73cSAndrey Churbanov status = 1;
1753d454c73cSAndrey Churbanov --limit;
1754d454c73cSAndrey Churbanov break;
1755d454c73cSAndrey Churbanov } // if
1756d454c73cSAndrey Churbanov } // while
1757d454c73cSAndrey Churbanov if (status != 0) {
1758d454c73cSAndrey Churbanov start = pr->u.p.lb;
1759d454c73cSAndrey Churbanov incr = pr->u.p.st;
1760d454c73cSAndrey Churbanov if (p_st != NULL)
1761d454c73cSAndrey Churbanov *p_st = incr;
1762d454c73cSAndrey Churbanov *p_lb = start + init * incr;
1763d454c73cSAndrey Churbanov *p_ub = start + limit * incr;
176439ada854SJonathan Peyton if (pr->flags.ordered) {
1765d454c73cSAndrey Churbanov pr->u.p.ordered_lower = init;
1766d454c73cSAndrey Churbanov pr->u.p.ordered_upper = limit;
1767d454c73cSAndrey Churbanov } // if
1768d454c73cSAndrey Churbanov } else {
1769d454c73cSAndrey Churbanov *p_lb = 0;
1770d454c73cSAndrey Churbanov *p_ub = 0;
1771d454c73cSAndrey Churbanov if (p_st != NULL)
1772d454c73cSAndrey Churbanov *p_st = 0;
1773d454c73cSAndrey Churbanov } // if
1774d454c73cSAndrey Churbanov } // case
1775d454c73cSAndrey Churbanov break;
1776d454c73cSAndrey Churbanov
17773041982dSJonathan Peyton case kmp_sch_guided_analytical_chunked: {
17785e8470afSJim Cownie T chunkspec = pr->u.p.parm1;
17795e8470afSJim Cownie UT chunkIdx;
1780f700e9edSAndrey Churbanov #if KMP_USE_X87CONTROL
17815e8470afSJim Cownie /* for storing original FPCW value for Windows* OS on
17825e8470afSJim Cownie IA-32 architecture 8-byte version */
17835e8470afSJim Cownie unsigned int oldFpcw;
1784181b4bb3SJim Cownie unsigned int fpcwSet = 0;
17855e8470afSJim Cownie #endif
178639ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "
178739ada854SJonathan Peyton "kmp_sch_guided_analytical_chunked case\n",
17885e8470afSJim Cownie gtid));
17895e8470afSJim Cownie
17905e8470afSJim Cownie trip = pr->u.p.tc;
17915e8470afSJim Cownie
179239ada854SJonathan Peyton KMP_DEBUG_ASSERT(nproc > 1);
179339ada854SJonathan Peyton KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip);
17945e8470afSJim Cownie
17953041982dSJonathan Peyton while (1) { /* this while loop is a safeguard against unexpected zero
17963041982dSJonathan Peyton chunk sizes */
17975e8470afSJim Cownie chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
17985e8470afSJim Cownie if (chunkIdx >= (UT)pr->u.p.parm2) {
17995e8470afSJim Cownie --trip;
18005e8470afSJim Cownie /* use dynamic-style scheduling */
18015e8470afSJim Cownie init = chunkIdx * chunkspec + pr->u.p.count;
18023041982dSJonathan Peyton /* need to verify init > 0 in case of overflow in the above
18033041982dSJonathan Peyton * calculation */
18045e8470afSJim Cownie if ((status = (init > 0 && init <= trip)) != 0) {
18055e8470afSJim Cownie limit = init + chunkspec - 1;
18065e8470afSJim Cownie
18075e8470afSJim Cownie if ((last = (limit >= trip)) != 0)
18085e8470afSJim Cownie limit = trip;
18095e8470afSJim Cownie }
18105e8470afSJim Cownie break;
18115e8470afSJim Cownie } else {
18125e8470afSJim Cownie /* use exponential-style scheduling */
18133041982dSJonathan Peyton /* The following check is to workaround the lack of long double precision on
18143041982dSJonathan Peyton Windows* OS.
18155e8470afSJim Cownie This check works around the possible effect that init != 0 for chunkIdx == 0.
18165e8470afSJim Cownie */
1817f700e9edSAndrey Churbanov #if KMP_USE_X87CONTROL
181839ada854SJonathan Peyton /* If we haven't already done so, save original
181939ada854SJonathan Peyton FPCW and set precision to 64-bit, as Windows* OS
182039ada854SJonathan Peyton on IA-32 architecture defaults to 53-bit */
18215e8470afSJim Cownie if (!fpcwSet) {
1822181b4bb3SJim Cownie oldFpcw = _control87(0, 0);
1823181b4bb3SJim Cownie _control87(_PC_64, _MCW_PC);
18245e8470afSJim Cownie fpcwSet = 0x30000;
18255e8470afSJim Cownie }
18265e8470afSJim Cownie #endif
18275e8470afSJim Cownie if (chunkIdx) {
18285e8470afSJim Cownie init = __kmp_dispatch_guided_remaining<T>(
18295e8470afSJim Cownie trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
18305e8470afSJim Cownie KMP_DEBUG_ASSERT(init);
18315e8470afSJim Cownie init = trip - init;
18325e8470afSJim Cownie } else
18335e8470afSJim Cownie init = 0;
18345e8470afSJim Cownie limit = trip - __kmp_dispatch_guided_remaining<T>(
18355e8470afSJim Cownie trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
18365e8470afSJim Cownie KMP_ASSERT(init <= limit);
18375e8470afSJim Cownie if (init < limit) {
18385e8470afSJim Cownie KMP_DEBUG_ASSERT(limit <= trip);
18395e8470afSJim Cownie --limit;
18405e8470afSJim Cownie status = 1;
18415e8470afSJim Cownie break;
18425e8470afSJim Cownie } // if
18435e8470afSJim Cownie } // if
18445e8470afSJim Cownie } // while (1)
1845f700e9edSAndrey Churbanov #if KMP_USE_X87CONTROL
1846181b4bb3SJim Cownie /* restore FPCW if necessary
184739ada854SJonathan Peyton AC: check fpcwSet flag first because oldFpcw can be uninitialized here
184839ada854SJonathan Peyton */
1849181b4bb3SJim Cownie if (fpcwSet && (oldFpcw & fpcwSet))
1850181b4bb3SJim Cownie _control87(oldFpcw, _MCW_PC);
18515e8470afSJim Cownie #endif
18525e8470afSJim Cownie if (status != 0) {
18535e8470afSJim Cownie start = pr->u.p.lb;
18545e8470afSJim Cownie incr = pr->u.p.st;
18555e8470afSJim Cownie if (p_st != NULL)
18565e8470afSJim Cownie *p_st = incr;
18575e8470afSJim Cownie *p_lb = start + init * incr;
18585e8470afSJim Cownie *p_ub = start + limit * incr;
185939ada854SJonathan Peyton if (pr->flags.ordered) {
18605e8470afSJim Cownie pr->u.p.ordered_lower = init;
18615e8470afSJim Cownie pr->u.p.ordered_upper = limit;
18625e8470afSJim Cownie }
18635e8470afSJim Cownie } else {
18645e8470afSJim Cownie *p_lb = 0;
18655e8470afSJim Cownie *p_ub = 0;
18665e8470afSJim Cownie if (p_st != NULL)
18675e8470afSJim Cownie *p_st = 0;
18685e8470afSJim Cownie }
18695e8470afSJim Cownie } // case
18705e8470afSJim Cownie break;
18715e8470afSJim Cownie
18723041982dSJonathan Peyton case kmp_sch_trapezoidal: {
18735e8470afSJim Cownie UT index;
18745e8470afSJim Cownie T parm2 = pr->u.p.parm2;
18755e8470afSJim Cownie T parm3 = pr->u.p.parm3;
18765e8470afSJim Cownie T parm4 = pr->u.p.parm4;
187739ada854SJonathan Peyton KD_TRACE(100,
187839ada854SJonathan Peyton ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",
18795e8470afSJim Cownie gtid));
18805e8470afSJim Cownie
18815e8470afSJim Cownie index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
18825e8470afSJim Cownie
18835e8470afSJim Cownie init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
18845e8470afSJim Cownie trip = pr->u.p.tc - 1;
18855e8470afSJim Cownie
18865e8470afSJim Cownie if ((status = ((T)index < parm3 && init <= trip)) == 0) {
18875e8470afSJim Cownie *p_lb = 0;
18885e8470afSJim Cownie *p_ub = 0;
18893041982dSJonathan Peyton if (p_st != NULL)
18903041982dSJonathan Peyton *p_st = 0;
18915e8470afSJim Cownie } else {
18925e8470afSJim Cownie start = pr->u.p.lb;
18935e8470afSJim Cownie limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
18945e8470afSJim Cownie incr = pr->u.p.st;
18955e8470afSJim Cownie
18965e8470afSJim Cownie if ((last = (limit >= trip)) != 0)
18975e8470afSJim Cownie limit = trip;
18985e8470afSJim Cownie
18993041982dSJonathan Peyton if (p_st != NULL)
19003041982dSJonathan Peyton *p_st = incr;
19015e8470afSJim Cownie
19025e8470afSJim Cownie if (incr == 1) {
19035e8470afSJim Cownie *p_lb = start + init;
19045e8470afSJim Cownie *p_ub = start + limit;
19055e8470afSJim Cownie } else {
19065e8470afSJim Cownie *p_lb = start + init * incr;
19075e8470afSJim Cownie *p_ub = start + limit * incr;
19085e8470afSJim Cownie }
19095e8470afSJim Cownie
191039ada854SJonathan Peyton if (pr->flags.ordered) {
191139ada854SJonathan Peyton pr->u.p.ordered_lower = init;
191239ada854SJonathan Peyton pr->u.p.ordered_upper = limit;
191339ada854SJonathan Peyton } // if
191439ada854SJonathan Peyton } // if
191539ada854SJonathan Peyton } // case
191639ada854SJonathan Peyton break;
191739ada854SJonathan Peyton default: {
191839ada854SJonathan Peyton status = 0; // to avoid complaints on uninitialized variable use
191939ada854SJonathan Peyton __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message
192039ada854SJonathan Peyton KMP_HNT(GetNewerLibrary), // Hint
192139ada854SJonathan Peyton __kmp_msg_null // Variadic argument list terminator
192239ada854SJonathan Peyton );
192339ada854SJonathan Peyton } break;
192439ada854SJonathan Peyton } // switch
192539ada854SJonathan Peyton if (p_last)
192639ada854SJonathan Peyton *p_last = last;
192739ada854SJonathan Peyton #ifdef KMP_DEBUG
192839ada854SJonathan Peyton if (pr->flags.ordered) {
192939ada854SJonathan Peyton char *buff;
193039ada854SJonathan Peyton // create format specifiers before the debug output
193139ada854SJonathan Peyton buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d "
193239ada854SJonathan Peyton "ordered_lower:%%%s ordered_upper:%%%s\n",
193339ada854SJonathan Peyton traits_t<UT>::spec, traits_t<UT>::spec);
193439ada854SJonathan Peyton KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper));
193539ada854SJonathan Peyton __kmp_str_free(&buff);
193639ada854SJonathan Peyton }
193739ada854SJonathan Peyton {
193839ada854SJonathan Peyton char *buff;
193939ada854SJonathan Peyton // create format specifiers before the debug output
194039ada854SJonathan Peyton buff = __kmp_str_format(
194139ada854SJonathan Peyton "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
194239ada854SJonathan Peyton "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
194339ada854SJonathan Peyton traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1944467f3924SHansang Bae KMP_DEBUG_ASSERT(p_last);
1945467f3924SHansang Bae KMP_DEBUG_ASSERT(p_st);
194639ada854SJonathan Peyton KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st));
194739ada854SJonathan Peyton __kmp_str_free(&buff);
194839ada854SJonathan Peyton }
194939ada854SJonathan Peyton #endif
195039ada854SJonathan Peyton return status;
195139ada854SJonathan Peyton }
195239ada854SJonathan Peyton
195339ada854SJonathan Peyton /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more
195439ada854SJonathan Peyton work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini()
195539ada854SJonathan Peyton is not called. */
195639ada854SJonathan Peyton #if OMPT_SUPPORT && OMPT_OPTIONAL
195739ada854SJonathan Peyton #define OMPT_LOOP_END \
195839ada854SJonathan Peyton if (status == 0) { \
195939ada854SJonathan Peyton if (ompt_enabled.ompt_callback_work) { \
196039ada854SJonathan Peyton ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
196139ada854SJonathan Peyton ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
196239ada854SJonathan Peyton ompt_callbacks.ompt_callback(ompt_callback_work)( \
196339ada854SJonathan Peyton ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
196439ada854SJonathan Peyton &(task_info->task_data), 0, codeptr); \
196539ada854SJonathan Peyton } \
196639ada854SJonathan Peyton }
1967e4ac11beSHansang Bae #define OMPT_LOOP_DISPATCH(lb, ub, st, status) \
1968e4ac11beSHansang Bae if (ompt_enabled.ompt_callback_dispatch && status) { \
1969e4ac11beSHansang Bae ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
1970e4ac11beSHansang Bae ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1971e4ac11beSHansang Bae ompt_dispatch_chunk_t chunk; \
1972e4ac11beSHansang Bae ompt_data_t instance = ompt_data_none; \
1973e4ac11beSHansang Bae OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st); \
1974e4ac11beSHansang Bae instance.ptr = &chunk; \
1975e4ac11beSHansang Bae ompt_callbacks.ompt_callback(ompt_callback_dispatch)( \
1976e4ac11beSHansang Bae &(team_info->parallel_data), &(task_info->task_data), \
1977e4ac11beSHansang Bae ompt_dispatch_ws_loop_chunk, instance); \
1978e4ac11beSHansang Bae }
197939ada854SJonathan Peyton // TODO: implement count
198039ada854SJonathan Peyton #else
198139ada854SJonathan Peyton #define OMPT_LOOP_END // no-op
1982*482e2dc2SMartin Storsjö #define OMPT_LOOP_DISPATCH(lb, ub, st, status) // no-op
198339ada854SJonathan Peyton #endif
198439ada854SJonathan Peyton
1985f0682ac4SJonathan Peyton #if KMP_STATS_ENABLED
1986f0682ac4SJonathan Peyton #define KMP_STATS_LOOP_END \
1987f0682ac4SJonathan Peyton { \
1988f0682ac4SJonathan Peyton kmp_int64 u, l, t, i; \
1989f0682ac4SJonathan Peyton l = (kmp_int64)(*p_lb); \
1990f0682ac4SJonathan Peyton u = (kmp_int64)(*p_ub); \
1991f0682ac4SJonathan Peyton i = (kmp_int64)(pr->u.p.st); \
1992f0682ac4SJonathan Peyton if (status == 0) { \
1993f0682ac4SJonathan Peyton t = 0; \
1994f0682ac4SJonathan Peyton KMP_POP_PARTITIONED_TIMER(); \
1995f0682ac4SJonathan Peyton } else if (i == 1) { \
1996f0682ac4SJonathan Peyton if (u >= l) \
1997f0682ac4SJonathan Peyton t = u - l + 1; \
1998f0682ac4SJonathan Peyton else \
1999f0682ac4SJonathan Peyton t = 0; \
2000f0682ac4SJonathan Peyton } else if (i < 0) { \
2001f0682ac4SJonathan Peyton if (l >= u) \
2002f0682ac4SJonathan Peyton t = (l - u) / (-i) + 1; \
2003f0682ac4SJonathan Peyton else \
2004f0682ac4SJonathan Peyton t = 0; \
2005f0682ac4SJonathan Peyton } else { \
2006f0682ac4SJonathan Peyton if (u >= l) \
2007f0682ac4SJonathan Peyton t = (u - l) / i + 1; \
2008f0682ac4SJonathan Peyton else \
2009f0682ac4SJonathan Peyton t = 0; \
2010f0682ac4SJonathan Peyton } \
2011f0682ac4SJonathan Peyton KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \
2012f0682ac4SJonathan Peyton }
2013f0682ac4SJonathan Peyton #else
2014f0682ac4SJonathan Peyton #define KMP_STATS_LOOP_END /* Nothing */
2015f0682ac4SJonathan Peyton #endif
2016f0682ac4SJonathan Peyton
201739ada854SJonathan Peyton template <typename T>
__kmp_dispatch_next(ident_t * loc,int gtid,kmp_int32 * p_last,T * p_lb,T * p_ub,typename traits_t<T>::signed_t * p_st,void * codeptr)201839ada854SJonathan Peyton static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
201939ada854SJonathan Peyton T *p_lb, T *p_ub,
202039ada854SJonathan Peyton typename traits_t<T>::signed_t *p_st
202139ada854SJonathan Peyton #if OMPT_SUPPORT && OMPT_OPTIONAL
202239ada854SJonathan Peyton ,
202339ada854SJonathan Peyton void *codeptr
202439ada854SJonathan Peyton #endif
202539ada854SJonathan Peyton ) {
202639ada854SJonathan Peyton
202739ada854SJonathan Peyton typedef typename traits_t<T>::unsigned_t UT;
202839ada854SJonathan Peyton typedef typename traits_t<T>::signed_t ST;
202939ada854SJonathan Peyton // This is potentially slightly misleading, schedule(runtime) will appear here
203042016791SKazuaki Ishizaki // even if the actual runtime schedule is static. (Which points out a
20314c6a098aSKazuaki Ishizaki // disadvantage of schedule(runtime): even when static scheduling is used it
203239ada854SJonathan Peyton // costs more than a compile time choice to use static scheduling would.)
2033f0682ac4SJonathan Peyton KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling);
203439ada854SJonathan Peyton
203539ada854SJonathan Peyton int status;
203639ada854SJonathan Peyton dispatch_private_info_template<T> *pr;
2037787eb0c6SAndreyChurbanov __kmp_assert_valid_gtid(gtid);
203839ada854SJonathan Peyton kmp_info_t *th = __kmp_threads[gtid];
203939ada854SJonathan Peyton kmp_team_t *team = th->th.th_team;
204039ada854SJonathan Peyton
204139ada854SJonathan Peyton KMP_DEBUG_ASSERT(p_lb && p_ub && p_st); // AC: these cannot be NULL
204239ada854SJonathan Peyton KD_TRACE(
204339ada854SJonathan Peyton 1000,
204439ada854SJonathan Peyton ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",
204539ada854SJonathan Peyton gtid, p_lb, p_ub, p_st, p_last));
204639ada854SJonathan Peyton
204739ada854SJonathan Peyton if (team->t.t_serialized) {
204842016791SKazuaki Ishizaki /* NOTE: serialize this dispatch because we are not at the active level */
204939ada854SJonathan Peyton pr = reinterpret_cast<dispatch_private_info_template<T> *>(
205039ada854SJonathan Peyton th->th.th_dispatch->th_disp_buffer); /* top of the stack */
205139ada854SJonathan Peyton KMP_DEBUG_ASSERT(pr);
205239ada854SJonathan Peyton
205339ada854SJonathan Peyton if ((status = (pr->u.p.tc != 0)) == 0) {
205439ada854SJonathan Peyton *p_lb = 0;
205539ada854SJonathan Peyton *p_ub = 0;
205639ada854SJonathan Peyton // if ( p_last != NULL )
205739ada854SJonathan Peyton // *p_last = 0;
205839ada854SJonathan Peyton if (p_st != NULL)
205939ada854SJonathan Peyton *p_st = 0;
206039ada854SJonathan Peyton if (__kmp_env_consistency_check) {
206139ada854SJonathan Peyton if (pr->pushed_ws != ct_none) {
206239ada854SJonathan Peyton pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
206339ada854SJonathan Peyton }
206439ada854SJonathan Peyton }
206539ada854SJonathan Peyton } else if (pr->flags.nomerge) {
206639ada854SJonathan Peyton kmp_int32 last;
206739ada854SJonathan Peyton T start;
206839ada854SJonathan Peyton UT limit, trip, init;
206939ada854SJonathan Peyton ST incr;
207039ada854SJonathan Peyton T chunk = pr->u.p.parm1;
207139ada854SJonathan Peyton
207239ada854SJonathan Peyton KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
207339ada854SJonathan Peyton gtid));
207439ada854SJonathan Peyton
207539ada854SJonathan Peyton init = chunk * pr->u.p.count++;
207639ada854SJonathan Peyton trip = pr->u.p.tc - 1;
207739ada854SJonathan Peyton
207839ada854SJonathan Peyton if ((status = (init <= trip)) == 0) {
207939ada854SJonathan Peyton *p_lb = 0;
208039ada854SJonathan Peyton *p_ub = 0;
208139ada854SJonathan Peyton // if ( p_last != NULL )
208239ada854SJonathan Peyton // *p_last = 0;
208339ada854SJonathan Peyton if (p_st != NULL)
208439ada854SJonathan Peyton *p_st = 0;
208539ada854SJonathan Peyton if (__kmp_env_consistency_check) {
208639ada854SJonathan Peyton if (pr->pushed_ws != ct_none) {
208739ada854SJonathan Peyton pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
208839ada854SJonathan Peyton }
208939ada854SJonathan Peyton }
209039ada854SJonathan Peyton } else {
209139ada854SJonathan Peyton start = pr->u.p.lb;
209239ada854SJonathan Peyton limit = chunk + init - 1;
209339ada854SJonathan Peyton incr = pr->u.p.st;
209439ada854SJonathan Peyton
209539ada854SJonathan Peyton if ((last = (limit >= trip)) != 0) {
209639ada854SJonathan Peyton limit = trip;
209739ada854SJonathan Peyton #if KMP_OS_WINDOWS
209839ada854SJonathan Peyton pr->u.p.last_upper = pr->u.p.ub;
209939ada854SJonathan Peyton #endif /* KMP_OS_WINDOWS */
210039ada854SJonathan Peyton }
210139ada854SJonathan Peyton if (p_last != NULL)
210239ada854SJonathan Peyton *p_last = last;
210339ada854SJonathan Peyton if (p_st != NULL)
210439ada854SJonathan Peyton *p_st = incr;
210539ada854SJonathan Peyton if (incr == 1) {
210639ada854SJonathan Peyton *p_lb = start + init;
210739ada854SJonathan Peyton *p_ub = start + limit;
210839ada854SJonathan Peyton } else {
210939ada854SJonathan Peyton *p_lb = start + init * incr;
211039ada854SJonathan Peyton *p_ub = start + limit * incr;
211139ada854SJonathan Peyton }
211239ada854SJonathan Peyton
211339ada854SJonathan Peyton if (pr->flags.ordered) {
21145e8470afSJim Cownie pr->u.p.ordered_lower = init;
21155e8470afSJim Cownie pr->u.p.ordered_upper = limit;
21165e8470afSJim Cownie #ifdef KMP_DEBUG
21175e8470afSJim Cownie {
2118aeb40adaSJonas Hahnfeld char *buff;
21195e8470afSJim Cownie // create format specifiers before the debug output
21203041982dSJonathan Peyton buff = __kmp_str_format("__kmp_dispatch_next: T#%%d "
21213041982dSJonathan Peyton "ordered_lower:%%%s ordered_upper:%%%s\n",
21225e8470afSJim Cownie traits_t<UT>::spec, traits_t<UT>::spec);
21233041982dSJonathan Peyton KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
21243041982dSJonathan Peyton pr->u.p.ordered_upper));
21255e8470afSJim Cownie __kmp_str_free(&buff);
21265e8470afSJim Cownie }
21275e8470afSJim Cownie #endif
21285e8470afSJim Cownie } // if
21295e8470afSJim Cownie } // if
213039ada854SJonathan Peyton } else {
213139ada854SJonathan Peyton pr->u.p.tc = 0;
213239ada854SJonathan Peyton *p_lb = pr->u.p.lb;
213339ada854SJonathan Peyton *p_ub = pr->u.p.ub;
213439ada854SJonathan Peyton #if KMP_OS_WINDOWS
213539ada854SJonathan Peyton pr->u.p.last_upper = *p_ub;
213639ada854SJonathan Peyton #endif /* KMP_OS_WINDOWS */
213739ada854SJonathan Peyton if (p_last != NULL)
213839ada854SJonathan Peyton *p_last = TRUE;
213939ada854SJonathan Peyton if (p_st != NULL)
214039ada854SJonathan Peyton *p_st = pr->u.p.st;
214139ada854SJonathan Peyton } // if
214239ada854SJonathan Peyton #ifdef KMP_DEBUG
214339ada854SJonathan Peyton {
214439ada854SJonathan Peyton char *buff;
214539ada854SJonathan Peyton // create format specifiers before the debug output
214639ada854SJonathan Peyton buff = __kmp_str_format(
214739ada854SJonathan Peyton "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
214839ada854SJonathan Peyton "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
214939ada854SJonathan Peyton traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2150771f0fb9SPeyton, Jonathan L KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last,
2151771f0fb9SPeyton, Jonathan L (p_last ? *p_last : 0), status));
215239ada854SJonathan Peyton __kmp_str_free(&buff);
215339ada854SJonathan Peyton }
215439ada854SJonathan Peyton #endif
215539ada854SJonathan Peyton #if INCLUDE_SSC_MARKS
215639ada854SJonathan Peyton SSC_MARK_DISPATCH_NEXT();
215739ada854SJonathan Peyton #endif
2158e4ac11beSHansang Bae OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
215939ada854SJonathan Peyton OMPT_LOOP_END;
2160f0682ac4SJonathan Peyton KMP_STATS_LOOP_END;
216139ada854SJonathan Peyton return status;
216239ada854SJonathan Peyton } else {
216339ada854SJonathan Peyton kmp_int32 last = 0;
216439ada854SJonathan Peyton dispatch_shared_info_template<T> volatile *sh;
21655e8470afSJim Cownie
216639ada854SJonathan Peyton KMP_DEBUG_ASSERT(th->th.th_dispatch ==
216739ada854SJonathan Peyton &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
216839ada854SJonathan Peyton
216939ada854SJonathan Peyton pr = reinterpret_cast<dispatch_private_info_template<T> *>(
217039ada854SJonathan Peyton th->th.th_dispatch->th_dispatch_pr_current);
217139ada854SJonathan Peyton KMP_DEBUG_ASSERT(pr);
217239ada854SJonathan Peyton sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
217339ada854SJonathan Peyton th->th.th_dispatch->th_dispatch_sh_current);
217439ada854SJonathan Peyton KMP_DEBUG_ASSERT(sh);
217539ada854SJonathan Peyton
2176f6399367SJonathan Peyton #if KMP_USE_HIER_SCHED
2177f6399367SJonathan Peyton if (pr->flags.use_hier)
2178f6399367SJonathan Peyton status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
2179f6399367SJonathan Peyton else
2180f6399367SJonathan Peyton #endif // KMP_USE_HIER_SCHED
218139ada854SJonathan Peyton status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
218239ada854SJonathan Peyton p_st, th->th.th_team_nproc,
218339ada854SJonathan Peyton th->th.th_info.ds.ds_tid);
218439ada854SJonathan Peyton // status == 0: no more iterations to execute
21855e8470afSJim Cownie if (status == 0) {
21865dd4d0d4SAndreyChurbanov ST num_done;
21875dd4d0d4SAndreyChurbanov num_done = test_then_inc<ST>(&sh->u.s.num_done);
21885e8470afSJim Cownie #ifdef KMP_DEBUG
21895e8470afSJim Cownie {
2190aeb40adaSJonas Hahnfeld char *buff;
21915e8470afSJim Cownie // create format specifiers before the debug output
21925e8470afSJim Cownie buff = __kmp_str_format(
21935e8470afSJim Cownie "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
21945dd4d0d4SAndreyChurbanov traits_t<ST>::spec);
219539ada854SJonathan Peyton KD_TRACE(10, (buff, gtid, sh->u.s.num_done));
21965e8470afSJim Cownie __kmp_str_free(&buff);
21975e8470afSJim Cownie }
21985e8470afSJim Cownie #endif
21995e8470afSJim Cownie
2200f6399367SJonathan Peyton #if KMP_USE_HIER_SCHED
2201f6399367SJonathan Peyton pr->flags.use_hier = FALSE;
2202f6399367SJonathan Peyton #endif
22035dd4d0d4SAndreyChurbanov if (num_done == th->th.th_team_nproc - 1) {
22045dd4d0d4SAndreyChurbanov #if KMP_STATIC_STEAL_ENABLED
22055dd4d0d4SAndreyChurbanov if (pr->schedule == kmp_sch_static_steal) {
2206429dbc2aSAndrey Churbanov int i;
2207abe64360SAndreyChurbanov int idx = (th->th.th_dispatch->th_disp_index - 1) %
2208abe64360SAndreyChurbanov __kmp_dispatch_num_buffers; // current loop index
2209429dbc2aSAndrey Churbanov // loop complete, safe to destroy locks used for stealing
2210429dbc2aSAndrey Churbanov for (i = 0; i < th->th.th_team_nproc; ++i) {
2211abe64360SAndreyChurbanov dispatch_private_info_template<T> *buf =
2212abe64360SAndreyChurbanov reinterpret_cast<dispatch_private_info_template<T> *>(
22135dd4d0d4SAndreyChurbanov &team->t.t_dispatch[i].th_disp_buffer[idx]);
22145dd4d0d4SAndreyChurbanov KMP_ASSERT(buf->steal_flag == THIEF); // buffer must be inactive
22155dd4d0d4SAndreyChurbanov KMP_ATOMIC_ST_RLX(&buf->steal_flag, UNUSED);
22165dd4d0d4SAndreyChurbanov if (traits_t<T>::type_size > 4) {
22175dd4d0d4SAndreyChurbanov // destroy locks used for stealing
22185dd4d0d4SAndreyChurbanov kmp_lock_t *lck = buf->u.p.steal_lock;
2219429dbc2aSAndrey Churbanov KMP_ASSERT(lck != NULL);
2220429dbc2aSAndrey Churbanov __kmp_destroy_lock(lck);
2221429dbc2aSAndrey Churbanov __kmp_free(lck);
22225dd4d0d4SAndreyChurbanov buf->u.p.steal_lock = NULL;
22235dd4d0d4SAndreyChurbanov }
2224429dbc2aSAndrey Churbanov }
2225429dbc2aSAndrey Churbanov }
2226429dbc2aSAndrey Churbanov #endif
22275dd4d0d4SAndreyChurbanov /* NOTE: release shared buffer to be reused */
22285e8470afSJim Cownie
22295e8470afSJim Cownie KMP_MB(); /* Flush all pending memory write invalidates. */
22305e8470afSJim Cownie
22315e8470afSJim Cownie sh->u.s.num_done = 0;
22325e8470afSJim Cownie sh->u.s.iteration = 0;
22335e8470afSJim Cownie
22345e8470afSJim Cownie /* TODO replace with general release procedure? */
223539ada854SJonathan Peyton if (pr->flags.ordered) {
22365e8470afSJim Cownie sh->u.s.ordered_iteration = 0;
22375e8470afSJim Cownie }
22385e8470afSJim Cownie
2239067325f9SJonathan Peyton sh->buffer_index += __kmp_dispatch_num_buffers;
22405e8470afSJim Cownie KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",
22415e8470afSJim Cownie gtid, sh->buffer_index));
22425e8470afSJim Cownie
22435e8470afSJim Cownie KMP_MB(); /* Flush all pending memory write invalidates. */
22445e8470afSJim Cownie
22455e8470afSJim Cownie } // if
22465e8470afSJim Cownie if (__kmp_env_consistency_check) {
22475e8470afSJim Cownie if (pr->pushed_ws != ct_none) {
22485e8470afSJim Cownie pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
22495e8470afSJim Cownie }
22505e8470afSJim Cownie }
22515e8470afSJim Cownie
22525e8470afSJim Cownie th->th.th_dispatch->th_deo_fcn = NULL;
22535e8470afSJim Cownie th->th.th_dispatch->th_dxo_fcn = NULL;
22545e8470afSJim Cownie th->th.th_dispatch->th_dispatch_sh_current = NULL;
22555e8470afSJim Cownie th->th.th_dispatch->th_dispatch_pr_current = NULL;
22565e8470afSJim Cownie } // if (status == 0)
22575e8470afSJim Cownie #if KMP_OS_WINDOWS
22585e8470afSJim Cownie else if (last) {
22595e8470afSJim Cownie pr->u.p.last_upper = pr->u.p.ub;
22605e8470afSJim Cownie }
22615e8470afSJim Cownie #endif /* KMP_OS_WINDOWS */
22624cc4bb4cSJim Cownie if (p_last != NULL && status != 0)
22634cc4bb4cSJim Cownie *p_last = last;
22645e8470afSJim Cownie } // if
22655e8470afSJim Cownie
22665e8470afSJim Cownie #ifdef KMP_DEBUG
22675e8470afSJim Cownie {
2268aeb40adaSJonas Hahnfeld char *buff;
22695e8470afSJim Cownie // create format specifiers before the debug output
22705e8470afSJim Cownie buff = __kmp_str_format(
22713041982dSJonathan Peyton "__kmp_dispatch_next: T#%%d normal case: "
227239ada854SJonathan Peyton "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
22735e8470afSJim Cownie traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
227439ada854SJonathan Peyton KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,
227539ada854SJonathan Peyton (p_last ? *p_last : 0), status));
22765e8470afSJim Cownie __kmp_str_free(&buff);
22775e8470afSJim Cownie }
22785e8470afSJim Cownie #endif
22794cc4bb4cSJim Cownie #if INCLUDE_SSC_MARKS
22804cc4bb4cSJim Cownie SSC_MARK_DISPATCH_NEXT();
22814cc4bb4cSJim Cownie #endif
2282e4ac11beSHansang Bae OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
2283d7d088f8SAndrey Churbanov OMPT_LOOP_END;
2284f0682ac4SJonathan Peyton KMP_STATS_LOOP_END;
22855e8470afSJim Cownie return status;
22865e8470afSJim Cownie }
22875e8470afSJim Cownie
228843d5c4d5SVadim Paretsky /*!
228943d5c4d5SVadim Paretsky @ingroup WORK_SHARING
229043d5c4d5SVadim Paretsky @param loc source location information
229143d5c4d5SVadim Paretsky @param global_tid global thread number
229243d5c4d5SVadim Paretsky @return Zero if the parallel region is not active and this thread should execute
229343d5c4d5SVadim Paretsky all sections, non-zero otherwise.
229443d5c4d5SVadim Paretsky
229543d5c4d5SVadim Paretsky Beginning of sections construct.
229643d5c4d5SVadim Paretsky There are no implicit barriers in the "sections" calls, rather the compiler
229743d5c4d5SVadim Paretsky should introduce an explicit barrier if it is required.
229843d5c4d5SVadim Paretsky
229943d5c4d5SVadim Paretsky This implementation is based on __kmp_dispatch_init, using same constructs for
230043d5c4d5SVadim Paretsky shared data (we can't have sections nested directly in omp for loop, there
230143d5c4d5SVadim Paretsky should be a parallel region in between)
230243d5c4d5SVadim Paretsky */
__kmpc_sections_init(ident_t * loc,kmp_int32 gtid)230343d5c4d5SVadim Paretsky kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) {
230443d5c4d5SVadim Paretsky
230543d5c4d5SVadim Paretsky int active;
230643d5c4d5SVadim Paretsky kmp_info_t *th;
230743d5c4d5SVadim Paretsky kmp_team_t *team;
230843d5c4d5SVadim Paretsky kmp_uint32 my_buffer_index;
230943d5c4d5SVadim Paretsky dispatch_shared_info_template<kmp_int32> volatile *sh;
231043d5c4d5SVadim Paretsky
231143d5c4d5SVadim Paretsky KMP_DEBUG_ASSERT(__kmp_init_serial);
231243d5c4d5SVadim Paretsky
231343d5c4d5SVadim Paretsky if (!TCR_4(__kmp_init_parallel))
231443d5c4d5SVadim Paretsky __kmp_parallel_initialize();
231543d5c4d5SVadim Paretsky __kmp_resume_if_soft_paused();
231643d5c4d5SVadim Paretsky
231743d5c4d5SVadim Paretsky /* setup data */
231843d5c4d5SVadim Paretsky th = __kmp_threads[gtid];
231943d5c4d5SVadim Paretsky team = th->th.th_team;
232043d5c4d5SVadim Paretsky active = !team->t.t_serialized;
232143d5c4d5SVadim Paretsky th->th.th_ident = loc;
232243d5c4d5SVadim Paretsky
232343d5c4d5SVadim Paretsky KMP_COUNT_BLOCK(OMP_SECTIONS);
232443d5c4d5SVadim Paretsky KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid));
232543d5c4d5SVadim Paretsky
232643d5c4d5SVadim Paretsky if (active) {
232743d5c4d5SVadim Paretsky // Setup sections in the same way as dynamic scheduled loops.
232843d5c4d5SVadim Paretsky // We need one shared data: which section is to execute next.
232943d5c4d5SVadim Paretsky // (in case parallel is not active, all sections will be executed on the
233043d5c4d5SVadim Paretsky // same thread)
233143d5c4d5SVadim Paretsky KMP_DEBUG_ASSERT(th->th.th_dispatch ==
233243d5c4d5SVadim Paretsky &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
233343d5c4d5SVadim Paretsky
233443d5c4d5SVadim Paretsky my_buffer_index = th->th.th_dispatch->th_disp_index++;
233543d5c4d5SVadim Paretsky
233643d5c4d5SVadim Paretsky // reuse shared data structures from dynamic sched loops:
233743d5c4d5SVadim Paretsky sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
233843d5c4d5SVadim Paretsky &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
233943d5c4d5SVadim Paretsky KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,
234043d5c4d5SVadim Paretsky my_buffer_index));
234143d5c4d5SVadim Paretsky
234243d5c4d5SVadim Paretsky th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
234343d5c4d5SVadim Paretsky th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
234443d5c4d5SVadim Paretsky
234543d5c4d5SVadim Paretsky KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
234643d5c4d5SVadim Paretsky "sh->buffer_index:%d\n",
234743d5c4d5SVadim Paretsky gtid, my_buffer_index, sh->buffer_index));
234843d5c4d5SVadim Paretsky __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
234943d5c4d5SVadim Paretsky __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
235043d5c4d5SVadim Paretsky // Note: KMP_WAIT() cannot be used there: buffer index and
235143d5c4d5SVadim Paretsky // my_buffer_index are *always* 32-bit integers.
235243d5c4d5SVadim Paretsky KMP_MB();
235343d5c4d5SVadim Paretsky KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
235443d5c4d5SVadim Paretsky "sh->buffer_index:%d\n",
235543d5c4d5SVadim Paretsky gtid, my_buffer_index, sh->buffer_index));
235643d5c4d5SVadim Paretsky
235743d5c4d5SVadim Paretsky th->th.th_dispatch->th_dispatch_pr_current =
235843d5c4d5SVadim Paretsky nullptr; // sections construct doesn't need private data
235943d5c4d5SVadim Paretsky th->th.th_dispatch->th_dispatch_sh_current =
236043d5c4d5SVadim Paretsky CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh);
236143d5c4d5SVadim Paretsky }
236243d5c4d5SVadim Paretsky
236343d5c4d5SVadim Paretsky #if OMPT_SUPPORT && OMPT_OPTIONAL
236443d5c4d5SVadim Paretsky if (ompt_enabled.ompt_callback_work) {
236543d5c4d5SVadim Paretsky ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
236643d5c4d5SVadim Paretsky ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
236743d5c4d5SVadim Paretsky ompt_callbacks.ompt_callback(ompt_callback_work)(
236843d5c4d5SVadim Paretsky ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data),
236943d5c4d5SVadim Paretsky &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
237043d5c4d5SVadim Paretsky }
237143d5c4d5SVadim Paretsky #endif
237243d5c4d5SVadim Paretsky KMP_PUSH_PARTITIONED_TIMER(OMP_sections);
237343d5c4d5SVadim Paretsky
237443d5c4d5SVadim Paretsky return active;
237543d5c4d5SVadim Paretsky }
237643d5c4d5SVadim Paretsky
237743d5c4d5SVadim Paretsky /*!
237843d5c4d5SVadim Paretsky @ingroup WORK_SHARING
237943d5c4d5SVadim Paretsky @param loc source location information
238043d5c4d5SVadim Paretsky @param global_tid global thread number
238143d5c4d5SVadim Paretsky @param numberOfSections number of sections in the 'sections' construct
238243d5c4d5SVadim Paretsky @return unsigned [from 0 to n) - number (id) of the section to execute next on
238343d5c4d5SVadim Paretsky this thread. n (or any other number not in range) - nothing to execute on this
238443d5c4d5SVadim Paretsky thread
238543d5c4d5SVadim Paretsky */
238643d5c4d5SVadim Paretsky
__kmpc_next_section(ident_t * loc,kmp_int32 gtid,kmp_int32 numberOfSections)238743d5c4d5SVadim Paretsky kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid,
238843d5c4d5SVadim Paretsky kmp_int32 numberOfSections) {
238943d5c4d5SVadim Paretsky
239043d5c4d5SVadim Paretsky KMP_TIME_PARTITIONED_BLOCK(OMP_sections);
239143d5c4d5SVadim Paretsky
239243d5c4d5SVadim Paretsky kmp_info_t *th = __kmp_threads[gtid];
239343d5c4d5SVadim Paretsky #ifdef KMP_DEBUG
239443d5c4d5SVadim Paretsky kmp_team_t *team = th->th.th_team;
239543d5c4d5SVadim Paretsky #endif
239643d5c4d5SVadim Paretsky
239743d5c4d5SVadim Paretsky KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,
239843d5c4d5SVadim Paretsky numberOfSections));
239943d5c4d5SVadim Paretsky
240043d5c4d5SVadim Paretsky // For serialized case we should not call this function:
240143d5c4d5SVadim Paretsky KMP_DEBUG_ASSERT(!team->t.t_serialized);
240243d5c4d5SVadim Paretsky
240343d5c4d5SVadim Paretsky dispatch_shared_info_template<kmp_int32> volatile *sh;
240443d5c4d5SVadim Paretsky
240543d5c4d5SVadim Paretsky KMP_DEBUG_ASSERT(th->th.th_dispatch ==
240643d5c4d5SVadim Paretsky &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
240743d5c4d5SVadim Paretsky
240843d5c4d5SVadim Paretsky KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current));
240943d5c4d5SVadim Paretsky sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
241043d5c4d5SVadim Paretsky th->th.th_dispatch->th_dispatch_sh_current);
241143d5c4d5SVadim Paretsky KMP_DEBUG_ASSERT(sh);
241243d5c4d5SVadim Paretsky
241343d5c4d5SVadim Paretsky kmp_int32 sectionIndex = 0;
241443d5c4d5SVadim Paretsky bool moreSectionsToExecute = true;
241543d5c4d5SVadim Paretsky
241643d5c4d5SVadim Paretsky // Find section to execute:
241743d5c4d5SVadim Paretsky sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration);
241843d5c4d5SVadim Paretsky if (sectionIndex >= numberOfSections) {
241943d5c4d5SVadim Paretsky moreSectionsToExecute = false;
242043d5c4d5SVadim Paretsky }
242143d5c4d5SVadim Paretsky
242243d5c4d5SVadim Paretsky // status == 0: no more sections to execute;
242343d5c4d5SVadim Paretsky // OMPTODO: __kmpc_end_sections could be bypassed?
242443d5c4d5SVadim Paretsky if (!moreSectionsToExecute) {
242543d5c4d5SVadim Paretsky kmp_int32 num_done;
242643d5c4d5SVadim Paretsky
242743d5c4d5SVadim Paretsky num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done));
242843d5c4d5SVadim Paretsky
242943d5c4d5SVadim Paretsky if (num_done == th->th.th_team_nproc - 1) {
243043d5c4d5SVadim Paretsky /* NOTE: release this buffer to be reused */
243143d5c4d5SVadim Paretsky
243243d5c4d5SVadim Paretsky KMP_MB(); /* Flush all pending memory write invalidates. */
243343d5c4d5SVadim Paretsky
243443d5c4d5SVadim Paretsky sh->u.s.num_done = 0;
243543d5c4d5SVadim Paretsky sh->u.s.iteration = 0;
243643d5c4d5SVadim Paretsky
243743d5c4d5SVadim Paretsky KMP_MB(); /* Flush all pending memory write invalidates. */
243843d5c4d5SVadim Paretsky
243943d5c4d5SVadim Paretsky sh->buffer_index += __kmp_dispatch_num_buffers;
244043d5c4d5SVadim Paretsky KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,
244143d5c4d5SVadim Paretsky sh->buffer_index));
244243d5c4d5SVadim Paretsky
244343d5c4d5SVadim Paretsky KMP_MB(); /* Flush all pending memory write invalidates. */
244443d5c4d5SVadim Paretsky
244543d5c4d5SVadim Paretsky } // if
244643d5c4d5SVadim Paretsky
244743d5c4d5SVadim Paretsky th->th.th_dispatch->th_deo_fcn = NULL;
244843d5c4d5SVadim Paretsky th->th.th_dispatch->th_dxo_fcn = NULL;
244943d5c4d5SVadim Paretsky th->th.th_dispatch->th_dispatch_sh_current = NULL;
245043d5c4d5SVadim Paretsky th->th.th_dispatch->th_dispatch_pr_current = NULL;
245143d5c4d5SVadim Paretsky
245243d5c4d5SVadim Paretsky #if OMPT_SUPPORT && OMPT_OPTIONAL
245343d5c4d5SVadim Paretsky if (ompt_enabled.ompt_callback_dispatch) {
245443d5c4d5SVadim Paretsky ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
245543d5c4d5SVadim Paretsky ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
245643d5c4d5SVadim Paretsky ompt_data_t instance = ompt_data_none;
245743d5c4d5SVadim Paretsky instance.ptr = OMPT_GET_RETURN_ADDRESS(0);
245843d5c4d5SVadim Paretsky ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
245943d5c4d5SVadim Paretsky &(team_info->parallel_data), &(task_info->task_data),
246043d5c4d5SVadim Paretsky ompt_dispatch_section, instance);
246143d5c4d5SVadim Paretsky }
246243d5c4d5SVadim Paretsky #endif
246343d5c4d5SVadim Paretsky KMP_POP_PARTITIONED_TIMER();
246443d5c4d5SVadim Paretsky }
246543d5c4d5SVadim Paretsky
246643d5c4d5SVadim Paretsky return sectionIndex;
246743d5c4d5SVadim Paretsky }
246843d5c4d5SVadim Paretsky
246943d5c4d5SVadim Paretsky /*!
247043d5c4d5SVadim Paretsky @ingroup WORK_SHARING
247143d5c4d5SVadim Paretsky @param loc source location information
247243d5c4d5SVadim Paretsky @param global_tid global thread number
247343d5c4d5SVadim Paretsky
247443d5c4d5SVadim Paretsky End of "sections" construct.
247543d5c4d5SVadim Paretsky Don't need to wait here: barrier is added separately when needed.
247643d5c4d5SVadim Paretsky */
__kmpc_end_sections(ident_t * loc,kmp_int32 gtid)247743d5c4d5SVadim Paretsky void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) {
247843d5c4d5SVadim Paretsky
247943d5c4d5SVadim Paretsky kmp_info_t *th = __kmp_threads[gtid];
248043d5c4d5SVadim Paretsky int active = !th->th.th_team->t.t_serialized;
248143d5c4d5SVadim Paretsky
248243d5c4d5SVadim Paretsky KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid));
248343d5c4d5SVadim Paretsky
248443d5c4d5SVadim Paretsky if (!active) {
248543d5c4d5SVadim Paretsky // In active case call finalization is done in __kmpc_next_section
248643d5c4d5SVadim Paretsky #if OMPT_SUPPORT && OMPT_OPTIONAL
248743d5c4d5SVadim Paretsky if (ompt_enabled.ompt_callback_work) {
248843d5c4d5SVadim Paretsky ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
248943d5c4d5SVadim Paretsky ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
249043d5c4d5SVadim Paretsky ompt_callbacks.ompt_callback(ompt_callback_work)(
249143d5c4d5SVadim Paretsky ompt_work_sections, ompt_scope_end, &(team_info->parallel_data),
249243d5c4d5SVadim Paretsky &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
249343d5c4d5SVadim Paretsky }
249443d5c4d5SVadim Paretsky #endif
249543d5c4d5SVadim Paretsky KMP_POP_PARTITIONED_TIMER();
249643d5c4d5SVadim Paretsky }
249743d5c4d5SVadim Paretsky
249843d5c4d5SVadim Paretsky KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid));
249943d5c4d5SVadim Paretsky }
250043d5c4d5SVadim Paretsky
25014cc4bb4cSJim Cownie template <typename T>
__kmp_dist_get_bounds(ident_t * loc,kmp_int32 gtid,kmp_int32 * plastiter,T * plower,T * pupper,typename traits_t<T>::signed_t incr)25023041982dSJonathan Peyton static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
25033041982dSJonathan Peyton kmp_int32 *plastiter, T *plower, T *pupper,
25043041982dSJonathan Peyton typename traits_t<T>::signed_t incr) {
25054cc4bb4cSJim Cownie typedef typename traits_t<T>::unsigned_t UT;
2506414544c9SEd Maste kmp_uint32 team_id;
2507414544c9SEd Maste kmp_uint32 nteams;
2508414544c9SEd Maste UT trip_count;
2509414544c9SEd Maste kmp_team_t *team;
25104cc4bb4cSJim Cownie kmp_info_t *th;
25114cc4bb4cSJim Cownie
25124cc4bb4cSJim Cownie KMP_DEBUG_ASSERT(plastiter && plower && pupper);
25134cc4bb4cSJim Cownie KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid));
25144cc4bb4cSJim Cownie #ifdef KMP_DEBUG
2515baad3f60SJonathan Peyton typedef typename traits_t<T>::signed_t ST;
25164cc4bb4cSJim Cownie {
2517aeb40adaSJonas Hahnfeld char *buff;
25184cc4bb4cSJim Cownie // create format specifiers before the debug output
25193041982dSJonathan Peyton buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d "
25204cc4bb4cSJim Cownie "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
25213041982dSJonathan Peyton traits_t<T>::spec, traits_t<T>::spec,
25223041982dSJonathan Peyton traits_t<ST>::spec, traits_t<T>::spec);
25234cc4bb4cSJim Cownie KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
25244cc4bb4cSJim Cownie __kmp_str_free(&buff);
25254cc4bb4cSJim Cownie }
25264cc4bb4cSJim Cownie #endif
25274cc4bb4cSJim Cownie
25284cc4bb4cSJim Cownie if (__kmp_env_consistency_check) {
25294cc4bb4cSJim Cownie if (incr == 0) {
25303041982dSJonathan Peyton __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
25313041982dSJonathan Peyton loc);
25324cc4bb4cSJim Cownie }
25334cc4bb4cSJim Cownie if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
25344cc4bb4cSJim Cownie // The loop is illegal.
25354cc4bb4cSJim Cownie // Some zero-trip loops maintained by compiler, e.g.:
25364cc4bb4cSJim Cownie // for(i=10;i<0;++i) // lower >= upper - run-time check
25374cc4bb4cSJim Cownie // for(i=0;i>10;--i) // lower <= upper - run-time check
25384cc4bb4cSJim Cownie // for(i=0;i>10;++i) // incr > 0 - compile-time check
25394cc4bb4cSJim Cownie // for(i=10;i<0;--i) // incr < 0 - compile-time check
25404cc4bb4cSJim Cownie // Compiler does not check the following illegal loops:
25414cc4bb4cSJim Cownie // for(i=0;i<10;i+=incr) // where incr<0
25424cc4bb4cSJim Cownie // for(i=10;i>0;i-=incr) // where incr<0
25434cc4bb4cSJim Cownie __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
25444cc4bb4cSJim Cownie }
25454cc4bb4cSJim Cownie }
2546787eb0c6SAndreyChurbanov __kmp_assert_valid_gtid(gtid);
25474cc4bb4cSJim Cownie th = __kmp_threads[gtid];
25484cc4bb4cSJim Cownie team = th->th.th_team;
2549441f3376SJonathan Peyton KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
25504cc4bb4cSJim Cownie nteams = th->th.th_teams_size.nteams;
25514cc4bb4cSJim Cownie team_id = team->t.t_master_tid;
2552baad3f60SJonathan Peyton KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
25534cc4bb4cSJim Cownie
25544cc4bb4cSJim Cownie // compute global trip count
25554cc4bb4cSJim Cownie if (incr == 1) {
25564cc4bb4cSJim Cownie trip_count = *pupper - *plower + 1;
25574cc4bb4cSJim Cownie } else if (incr == -1) {
25584cc4bb4cSJim Cownie trip_count = *plower - *pupper + 1;
25595235a1b6SJonathan Peyton } else if (incr > 0) {
25605235a1b6SJonathan Peyton // upper-lower can exceed the limit of signed type
25615235a1b6SJonathan Peyton trip_count = (UT)(*pupper - *plower) / incr + 1;
25624cc4bb4cSJim Cownie } else {
25635235a1b6SJonathan Peyton trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
25644cc4bb4cSJim Cownie }
256545be4500SJonathan Peyton
25664cc4bb4cSJim Cownie if (trip_count <= nteams) {
25674cc4bb4cSJim Cownie KMP_DEBUG_ASSERT(
25683041982dSJonathan Peyton __kmp_static == kmp_sch_static_greedy ||
25693041982dSJonathan Peyton __kmp_static ==
25703041982dSJonathan Peyton kmp_sch_static_balanced); // Unknown static scheduling type.
25714cc4bb4cSJim Cownie // only some teams get single iteration, others get nothing
25724cc4bb4cSJim Cownie if (team_id < trip_count) {
25734cc4bb4cSJim Cownie *pupper = *plower = *plower + team_id * incr;
25744cc4bb4cSJim Cownie } else {
25754cc4bb4cSJim Cownie *plower = *pupper + incr; // zero-trip loop
25764cc4bb4cSJim Cownie }
25774cc4bb4cSJim Cownie if (plastiter != NULL)
25784cc4bb4cSJim Cownie *plastiter = (team_id == trip_count - 1);
25794cc4bb4cSJim Cownie } else {
25804cc4bb4cSJim Cownie if (__kmp_static == kmp_sch_static_balanced) {
2581414544c9SEd Maste UT chunk = trip_count / nteams;
2582414544c9SEd Maste UT extras = trip_count % nteams;
25833041982dSJonathan Peyton *plower +=
25843041982dSJonathan Peyton incr * (team_id * chunk + (team_id < extras ? team_id : extras));
25854cc4bb4cSJim Cownie *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
25864cc4bb4cSJim Cownie if (plastiter != NULL)
25874cc4bb4cSJim Cownie *plastiter = (team_id == nteams - 1);
25884cc4bb4cSJim Cownie } else {
2589414544c9SEd Maste T chunk_inc_count =
25904cc4bb4cSJim Cownie (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2591414544c9SEd Maste T upper = *pupper;
25924cc4bb4cSJim Cownie KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
25934cc4bb4cSJim Cownie // Unknown static scheduling type.
25944cc4bb4cSJim Cownie *plower += team_id * chunk_inc_count;
25954cc4bb4cSJim Cownie *pupper = *plower + chunk_inc_count - incr;
25964cc4bb4cSJim Cownie // Check/correct bounds if needed
25974cc4bb4cSJim Cownie if (incr > 0) {
25984cc4bb4cSJim Cownie if (*pupper < *plower)
259912313d44SJonathan Peyton *pupper = traits_t<T>::max_value;
26004cc4bb4cSJim Cownie if (plastiter != NULL)
26014cc4bb4cSJim Cownie *plastiter = *plower <= upper && *pupper > upper - incr;
26024cc4bb4cSJim Cownie if (*pupper > upper)
26034cc4bb4cSJim Cownie *pupper = upper; // tracker C73258
26044cc4bb4cSJim Cownie } else {
26054cc4bb4cSJim Cownie if (*pupper > *plower)
260612313d44SJonathan Peyton *pupper = traits_t<T>::min_value;
26074cc4bb4cSJim Cownie if (plastiter != NULL)
26084cc4bb4cSJim Cownie *plastiter = *plower >= upper && *pupper < upper - incr;
26094cc4bb4cSJim Cownie if (*pupper < upper)
26104cc4bb4cSJim Cownie *pupper = upper; // tracker C73258
26114cc4bb4cSJim Cownie }
26124cc4bb4cSJim Cownie }
26134cc4bb4cSJim Cownie }
26144cc4bb4cSJim Cownie }
26154cc4bb4cSJim Cownie
26163041982dSJonathan Peyton //-----------------------------------------------------------------------------
26175e8470afSJim Cownie // Dispatch routines
26185e8470afSJim Cownie // Transfer call to template< type T >
26195e8470afSJim Cownie // __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule,
26205e8470afSJim Cownie // T lb, T ub, ST st, ST chunk )
26215e8470afSJim Cownie extern "C" {
26225e8470afSJim Cownie
26235e8470afSJim Cownie /*!
26245e8470afSJim Cownie @ingroup WORK_SHARING
26255e8470afSJim Cownie @{
26265e8470afSJim Cownie @param loc Source location
26275e8470afSJim Cownie @param gtid Global thread id
26285e8470afSJim Cownie @param schedule Schedule type
26295e8470afSJim Cownie @param lb Lower bound
26305e8470afSJim Cownie @param ub Upper bound
26315e8470afSJim Cownie @param st Step (or increment if you prefer)
26325e8470afSJim Cownie @param chunk The chunk size to block with
26335e8470afSJim Cownie
26343041982dSJonathan Peyton This function prepares the runtime to start a dynamically scheduled for loop,
26353041982dSJonathan Peyton saving the loop arguments.
26365e8470afSJim Cownie These functions are all identical apart from the types of the arguments.
26375e8470afSJim Cownie */
26385e8470afSJim Cownie
__kmpc_dispatch_init_4(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int32 lb,kmp_int32 ub,kmp_int32 st,kmp_int32 chunk)26393041982dSJonathan Peyton void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
26403041982dSJonathan Peyton enum sched_type schedule, kmp_int32 lb,
26413041982dSJonathan Peyton kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
26425e8470afSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
264382e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
264482e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
264582e94a59SJoachim Protze #endif
26465e8470afSJim Cownie __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
26475e8470afSJim Cownie }
26485e8470afSJim Cownie /*!
26495e8470afSJim Cownie See @ref __kmpc_dispatch_init_4
26505e8470afSJim Cownie */
__kmpc_dispatch_init_4u(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_uint32 lb,kmp_uint32 ub,kmp_int32 st,kmp_int32 chunk)26513041982dSJonathan Peyton void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
26523041982dSJonathan Peyton enum sched_type schedule, kmp_uint32 lb,
26533041982dSJonathan Peyton kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
26545e8470afSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
265582e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
265682e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
265782e94a59SJoachim Protze #endif
26585e8470afSJim Cownie __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
26595e8470afSJim Cownie }
26605e8470afSJim Cownie
26615e8470afSJim Cownie /*!
26625e8470afSJim Cownie See @ref __kmpc_dispatch_init_4
26635e8470afSJim Cownie */
__kmpc_dispatch_init_8(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int64 lb,kmp_int64 ub,kmp_int64 st,kmp_int64 chunk)26643041982dSJonathan Peyton void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
26653041982dSJonathan Peyton enum sched_type schedule, kmp_int64 lb,
26663041982dSJonathan Peyton kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
26675e8470afSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
266882e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
266982e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
267082e94a59SJoachim Protze #endif
26715e8470afSJim Cownie __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
26725e8470afSJim Cownie }
26735e8470afSJim Cownie
26745e8470afSJim Cownie /*!
26755e8470afSJim Cownie See @ref __kmpc_dispatch_init_4
26765e8470afSJim Cownie */
__kmpc_dispatch_init_8u(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_uint64 lb,kmp_uint64 ub,kmp_int64 st,kmp_int64 chunk)26773041982dSJonathan Peyton void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
26783041982dSJonathan Peyton enum sched_type schedule, kmp_uint64 lb,
26793041982dSJonathan Peyton kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
26805e8470afSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
268182e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
268282e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
268382e94a59SJoachim Protze #endif
26845e8470afSJim Cownie __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
26855e8470afSJim Cownie }
26865e8470afSJim Cownie
26875e8470afSJim Cownie /*!
26884cc4bb4cSJim Cownie See @ref __kmpc_dispatch_init_4
26894cc4bb4cSJim Cownie
26904cc4bb4cSJim Cownie Difference from __kmpc_dispatch_init set of functions is these functions
26914cc4bb4cSJim Cownie are called for composite distribute parallel for construct. Thus before
26924cc4bb4cSJim Cownie regular iterations dispatching we need to calc per-team iteration space.
26934cc4bb4cSJim Cownie
26944cc4bb4cSJim Cownie These functions are all identical apart from the types of the arguments.
26954cc4bb4cSJim Cownie */
__kmpc_dist_dispatch_init_4(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int32 * p_last,kmp_int32 lb,kmp_int32 ub,kmp_int32 st,kmp_int32 chunk)26963041982dSJonathan Peyton void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
26973041982dSJonathan Peyton enum sched_type schedule, kmp_int32 *p_last,
26983041982dSJonathan Peyton kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
26993041982dSJonathan Peyton kmp_int32 chunk) {
27004cc4bb4cSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
270182e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
270282e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
270382e94a59SJoachim Protze #endif
27044cc4bb4cSJim Cownie __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
27054cc4bb4cSJim Cownie __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
27064cc4bb4cSJim Cownie }
27074cc4bb4cSJim Cownie
__kmpc_dist_dispatch_init_4u(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int32 * p_last,kmp_uint32 lb,kmp_uint32 ub,kmp_int32 st,kmp_int32 chunk)27083041982dSJonathan Peyton void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
27093041982dSJonathan Peyton enum sched_type schedule, kmp_int32 *p_last,
27103041982dSJonathan Peyton kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
27113041982dSJonathan Peyton kmp_int32 chunk) {
27124cc4bb4cSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
271382e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
271482e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
271582e94a59SJoachim Protze #endif
27164cc4bb4cSJim Cownie __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
27174cc4bb4cSJim Cownie __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
27184cc4bb4cSJim Cownie }
27194cc4bb4cSJim Cownie
__kmpc_dist_dispatch_init_8(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int32 * p_last,kmp_int64 lb,kmp_int64 ub,kmp_int64 st,kmp_int64 chunk)27203041982dSJonathan Peyton void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
27213041982dSJonathan Peyton enum sched_type schedule, kmp_int32 *p_last,
27223041982dSJonathan Peyton kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
27233041982dSJonathan Peyton kmp_int64 chunk) {
27244cc4bb4cSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
272582e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
272682e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
272782e94a59SJoachim Protze #endif
27284cc4bb4cSJim Cownie __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
27294cc4bb4cSJim Cownie __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
27304cc4bb4cSJim Cownie }
27314cc4bb4cSJim Cownie
__kmpc_dist_dispatch_init_8u(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int32 * p_last,kmp_uint64 lb,kmp_uint64 ub,kmp_int64 st,kmp_int64 chunk)27323041982dSJonathan Peyton void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
27333041982dSJonathan Peyton enum sched_type schedule, kmp_int32 *p_last,
27343041982dSJonathan Peyton kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
27353041982dSJonathan Peyton kmp_int64 chunk) {
27364cc4bb4cSJim Cownie KMP_DEBUG_ASSERT(__kmp_init_serial);
273782e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
273882e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
273982e94a59SJoachim Protze #endif
27404cc4bb4cSJim Cownie __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
27414cc4bb4cSJim Cownie __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
27424cc4bb4cSJim Cownie }
27434cc4bb4cSJim Cownie
27444cc4bb4cSJim Cownie /*!
27455e8470afSJim Cownie @param loc Source code location
27465e8470afSJim Cownie @param gtid Global thread id
27473041982dSJonathan Peyton @param p_last Pointer to a flag set to one if this is the last chunk or zero
27483041982dSJonathan Peyton otherwise
27495e8470afSJim Cownie @param p_lb Pointer to the lower bound for the next chunk of work
27505e8470afSJim Cownie @param p_ub Pointer to the upper bound for the next chunk of work
27515e8470afSJim Cownie @param p_st Pointer to the stride for the next chunk of work
27525e8470afSJim Cownie @return one if there is work to be done, zero otherwise
27535e8470afSJim Cownie
27545e8470afSJim Cownie Get the next dynamically allocated chunk of work for this thread.
27555e8470afSJim Cownie If there is no more work, then the lb,ub and stride need not be modified.
27565e8470afSJim Cownie */
__kmpc_dispatch_next_4(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int32 * p_lb,kmp_int32 * p_ub,kmp_int32 * p_st)27573041982dSJonathan Peyton int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
27583041982dSJonathan Peyton kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
275982e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
276082e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
276182e94a59SJoachim Protze #endif
276282e94a59SJoachim Protze return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
276382e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
276482e94a59SJoachim Protze ,
276582e94a59SJoachim Protze OMPT_LOAD_RETURN_ADDRESS(gtid)
276682e94a59SJoachim Protze #endif
276782e94a59SJoachim Protze );
27685e8470afSJim Cownie }
27695e8470afSJim Cownie
27705e8470afSJim Cownie /*!
27715e8470afSJim Cownie See @ref __kmpc_dispatch_next_4
27725e8470afSJim Cownie */
__kmpc_dispatch_next_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint32 * p_lb,kmp_uint32 * p_ub,kmp_int32 * p_st)27733041982dSJonathan Peyton int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
27743041982dSJonathan Peyton kmp_uint32 *p_lb, kmp_uint32 *p_ub,
27753041982dSJonathan Peyton kmp_int32 *p_st) {
277682e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
277782e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
277882e94a59SJoachim Protze #endif
277982e94a59SJoachim Protze return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
278082e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
278182e94a59SJoachim Protze ,
278282e94a59SJoachim Protze OMPT_LOAD_RETURN_ADDRESS(gtid)
278382e94a59SJoachim Protze #endif
278482e94a59SJoachim Protze );
27855e8470afSJim Cownie }
27865e8470afSJim Cownie
27875e8470afSJim Cownie /*!
27885e8470afSJim Cownie See @ref __kmpc_dispatch_next_4
27895e8470afSJim Cownie */
__kmpc_dispatch_next_8(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int64 * p_lb,kmp_int64 * p_ub,kmp_int64 * p_st)27903041982dSJonathan Peyton int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
27913041982dSJonathan Peyton kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
279282e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
279382e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
279482e94a59SJoachim Protze #endif
279582e94a59SJoachim Protze return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
279682e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
279782e94a59SJoachim Protze ,
279882e94a59SJoachim Protze OMPT_LOAD_RETURN_ADDRESS(gtid)
279982e94a59SJoachim Protze #endif
280082e94a59SJoachim Protze );
28015e8470afSJim Cownie }
28025e8470afSJim Cownie
28035e8470afSJim Cownie /*!
28045e8470afSJim Cownie See @ref __kmpc_dispatch_next_4
28055e8470afSJim Cownie */
__kmpc_dispatch_next_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint64 * p_lb,kmp_uint64 * p_ub,kmp_int64 * p_st)28063041982dSJonathan Peyton int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
28073041982dSJonathan Peyton kmp_uint64 *p_lb, kmp_uint64 *p_ub,
28083041982dSJonathan Peyton kmp_int64 *p_st) {
280982e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
281082e94a59SJoachim Protze OMPT_STORE_RETURN_ADDRESS(gtid);
281182e94a59SJoachim Protze #endif
281282e94a59SJoachim Protze return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
281382e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
281482e94a59SJoachim Protze ,
281582e94a59SJoachim Protze OMPT_LOAD_RETURN_ADDRESS(gtid)
281682e94a59SJoachim Protze #endif
281782e94a59SJoachim Protze );
28185e8470afSJim Cownie }
28195e8470afSJim Cownie
28205e8470afSJim Cownie /*!
28215e8470afSJim Cownie @param loc Source code location
28225e8470afSJim Cownie @param gtid Global thread id
28235e8470afSJim Cownie
28245e8470afSJim Cownie Mark the end of a dynamic loop.
28255e8470afSJim Cownie */
__kmpc_dispatch_fini_4(ident_t * loc,kmp_int32 gtid)28263041982dSJonathan Peyton void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) {
28275e8470afSJim Cownie __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
28285e8470afSJim Cownie }
28295e8470afSJim Cownie
28305e8470afSJim Cownie /*!
28315e8470afSJim Cownie See @ref __kmpc_dispatch_fini_4
28325e8470afSJim Cownie */
__kmpc_dispatch_fini_8(ident_t * loc,kmp_int32 gtid)28333041982dSJonathan Peyton void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) {
28345e8470afSJim Cownie __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
28355e8470afSJim Cownie }
28365e8470afSJim Cownie
28375e8470afSJim Cownie /*!
28385e8470afSJim Cownie See @ref __kmpc_dispatch_fini_4
28395e8470afSJim Cownie */
__kmpc_dispatch_fini_4u(ident_t * loc,kmp_int32 gtid)28403041982dSJonathan Peyton void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) {
28415e8470afSJim Cownie __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
28425e8470afSJim Cownie }
28435e8470afSJim Cownie
28445e8470afSJim Cownie /*!
28455e8470afSJim Cownie See @ref __kmpc_dispatch_fini_4
28465e8470afSJim Cownie */
__kmpc_dispatch_fini_8u(ident_t * loc,kmp_int32 gtid)28473041982dSJonathan Peyton void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) {
28485e8470afSJim Cownie __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
28495e8470afSJim Cownie }
28505e8470afSJim Cownie /*! @} */
28515e8470afSJim Cownie
28523041982dSJonathan Peyton //-----------------------------------------------------------------------------
2853de4749b7SJonathan Peyton // Non-template routines from kmp_dispatch.cpp used in other sources
28545e8470afSJim Cownie
__kmp_eq_4(kmp_uint32 value,kmp_uint32 checker)28555e8470afSJim Cownie kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
28565e8470afSJim Cownie return value == checker;
28575e8470afSJim Cownie }
28585e8470afSJim Cownie
__kmp_neq_4(kmp_uint32 value,kmp_uint32 checker)28595e8470afSJim Cownie kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
28605e8470afSJim Cownie return value != checker;
28615e8470afSJim Cownie }
28625e8470afSJim Cownie
__kmp_lt_4(kmp_uint32 value,kmp_uint32 checker)28635e8470afSJim Cownie kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
28645e8470afSJim Cownie return value < checker;
28655e8470afSJim Cownie }
28665e8470afSJim Cownie
__kmp_ge_4(kmp_uint32 value,kmp_uint32 checker)28675e8470afSJim Cownie kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
28685e8470afSJim Cownie return value >= checker;
28695e8470afSJim Cownie }
28705e8470afSJim Cownie
__kmp_le_4(kmp_uint32 value,kmp_uint32 checker)28715e8470afSJim Cownie kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
28725e8470afSJim Cownie return value <= checker;
28735e8470afSJim Cownie }
28745e8470afSJim Cownie
28755e8470afSJim Cownie kmp_uint32
__kmp_wait_4(volatile kmp_uint32 * spinner,kmp_uint32 checker,kmp_uint32 (* pred)(kmp_uint32,kmp_uint32),void * obj)2876e47d32f1SJonathan Peyton __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
28773041982dSJonathan Peyton kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
28783041982dSJonathan Peyton void *obj // Higher-level synchronization object, or NULL.
28793041982dSJonathan Peyton ) {
28805e8470afSJim Cownie // note: we may not belong to a team at this point
2881414544c9SEd Maste volatile kmp_uint32 *spin = spinner;
2882414544c9SEd Maste kmp_uint32 check = checker;
2883414544c9SEd Maste kmp_uint32 spins;
2884414544c9SEd Maste kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2885414544c9SEd Maste kmp_uint32 r;
28862e02579aSTerry Wilmarth kmp_uint64 time;
28875e8470afSJim Cownie
2888c47afcd9SAndrey Churbanov KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
28895e8470afSJim Cownie KMP_INIT_YIELD(spins);
28902e02579aSTerry Wilmarth KMP_INIT_BACKOFF(time);
28915e8470afSJim Cownie // main wait spin loop
28925e8470afSJim Cownie while (!f(r = TCR_4(*spin), check)) {
28935e8470afSJim Cownie KMP_FSYNC_SPIN_PREPARE(obj);
28943041982dSJonathan Peyton /* GEH - remove this since it was accidentally introduced when kmp_wait was
28953041982dSJonathan Peyton split. It causes problems with infinite recursion because of exit lock */
28965e8470afSJim Cownie /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
28975e8470afSJim Cownie __kmp_abort_thread(); */
28982e02579aSTerry Wilmarth KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
28995e8470afSJim Cownie }
29005e8470afSJim Cownie KMP_FSYNC_SPIN_ACQUIRED(obj);
29015e8470afSJim Cownie return r;
29025e8470afSJim Cownie }
29035e8470afSJim Cownie
__kmp_wait_4_ptr(void * spinner,kmp_uint32 checker,kmp_uint32 (* pred)(void *,kmp_uint32),void * obj)2904e47d32f1SJonathan Peyton void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
2905e47d32f1SJonathan Peyton kmp_uint32 (*pred)(void *, kmp_uint32),
2906f7cc6affSPaul Osmialowski void *obj // Higher-level synchronization object, or NULL.
29073041982dSJonathan Peyton ) {
2908f7cc6affSPaul Osmialowski // note: we may not belong to a team at this point
2909414544c9SEd Maste void *spin = spinner;
2910414544c9SEd Maste kmp_uint32 check = checker;
2911414544c9SEd Maste kmp_uint32 spins;
2912414544c9SEd Maste kmp_uint32 (*f)(void *, kmp_uint32) = pred;
29132e02579aSTerry Wilmarth kmp_uint64 time;
2914f7cc6affSPaul Osmialowski
2915f7cc6affSPaul Osmialowski KMP_FSYNC_SPIN_INIT(obj, spin);
2916f7cc6affSPaul Osmialowski KMP_INIT_YIELD(spins);
29172e02579aSTerry Wilmarth KMP_INIT_BACKOFF(time);
2918f7cc6affSPaul Osmialowski // main wait spin loop
2919f7cc6affSPaul Osmialowski while (!f(spin, check)) {
2920f7cc6affSPaul Osmialowski KMP_FSYNC_SPIN_PREPARE(obj);
2921e47d32f1SJonathan Peyton /* if we have waited a bit, or are noversubscribed, yield */
2922f7cc6affSPaul Osmialowski /* pause is in the following code */
29232e02579aSTerry Wilmarth KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2924f7cc6affSPaul Osmialowski }
2925f7cc6affSPaul Osmialowski KMP_FSYNC_SPIN_ACQUIRED(obj);
2926f7cc6affSPaul Osmialowski }
2927f7cc6affSPaul Osmialowski
29285e8470afSJim Cownie } // extern "C"
29295e8470afSJim Cownie
29305e8470afSJim Cownie #ifdef KMP_GOMP_COMPAT
29315e8470afSJim Cownie
__kmp_aux_dispatch_init_4(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int32 lb,kmp_int32 ub,kmp_int32 st,kmp_int32 chunk,int push_ws)29323041982dSJonathan Peyton void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
29333041982dSJonathan Peyton enum sched_type schedule, kmp_int32 lb,
29343041982dSJonathan Peyton kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
29353041982dSJonathan Peyton int push_ws) {
29365e8470afSJim Cownie __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
29375e8470afSJim Cownie push_ws);
29385e8470afSJim Cownie }
29395e8470afSJim Cownie
__kmp_aux_dispatch_init_4u(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_uint32 lb,kmp_uint32 ub,kmp_int32 st,kmp_int32 chunk,int push_ws)29403041982dSJonathan Peyton void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
29413041982dSJonathan Peyton enum sched_type schedule, kmp_uint32 lb,
29423041982dSJonathan Peyton kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
29433041982dSJonathan Peyton int push_ws) {
29445e8470afSJim Cownie __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
29455e8470afSJim Cownie push_ws);
29465e8470afSJim Cownie }
29475e8470afSJim Cownie
__kmp_aux_dispatch_init_8(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_int64 lb,kmp_int64 ub,kmp_int64 st,kmp_int64 chunk,int push_ws)29483041982dSJonathan Peyton void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
29493041982dSJonathan Peyton enum sched_type schedule, kmp_int64 lb,
29503041982dSJonathan Peyton kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
29513041982dSJonathan Peyton int push_ws) {
29525e8470afSJim Cownie __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
29535e8470afSJim Cownie push_ws);
29545e8470afSJim Cownie }
29555e8470afSJim Cownie
__kmp_aux_dispatch_init_8u(ident_t * loc,kmp_int32 gtid,enum sched_type schedule,kmp_uint64 lb,kmp_uint64 ub,kmp_int64 st,kmp_int64 chunk,int push_ws)29563041982dSJonathan Peyton void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
29573041982dSJonathan Peyton enum sched_type schedule, kmp_uint64 lb,
29583041982dSJonathan Peyton kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
29593041982dSJonathan Peyton int push_ws) {
29605e8470afSJim Cownie __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
29615e8470afSJim Cownie push_ws);
29625e8470afSJim Cownie }
29635e8470afSJim Cownie
__kmp_aux_dispatch_fini_chunk_4(ident_t * loc,kmp_int32 gtid)29643041982dSJonathan Peyton void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) {
29655e8470afSJim Cownie __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
29665e8470afSJim Cownie }
29675e8470afSJim Cownie
__kmp_aux_dispatch_fini_chunk_8(ident_t * loc,kmp_int32 gtid)29683041982dSJonathan Peyton void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) {
29695e8470afSJim Cownie __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
29705e8470afSJim Cownie }
29715e8470afSJim Cownie
__kmp_aux_dispatch_fini_chunk_4u(ident_t * loc,kmp_int32 gtid)29723041982dSJonathan Peyton void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) {
29735e8470afSJim Cownie __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
29745e8470afSJim Cownie }
29755e8470afSJim Cownie
__kmp_aux_dispatch_fini_chunk_8u(ident_t * loc,kmp_int32 gtid)29763041982dSJonathan Peyton void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) {
29775e8470afSJim Cownie __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
29785e8470afSJim Cownie }
29795e8470afSJim Cownie
29805e8470afSJim Cownie #endif /* KMP_GOMP_COMPAT */
29815e8470afSJim Cownie
29825e8470afSJim Cownie /* ------------------------------------------------------------------------ */
2983