1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16         it may change values between parallel regions.  __kmp_max_nth
17         is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat)                                               \
43   {                                                                            \
44     kmp_int64 t;                                                               \
45     kmp_int64 u = (kmp_int64)(*pupper);                                        \
46     kmp_int64 l = (kmp_int64)(*plower);                                        \
47     kmp_int64 i = (kmp_int64)incr;                                             \
48     if (i == 1) {                                                              \
49       t = u - l + 1;                                                           \
50     } else if (i == -1) {                                                      \
51       t = l - u + 1;                                                           \
52     } else if (i > 0) {                                                        \
53       t = (u - l) / i + 1;                                                     \
54     } else {                                                                   \
55       t = (l - u) / (-i) + 1;                                                  \
56     }                                                                          \
57     KMP_COUNT_VALUE(stat, t);                                                  \
58     KMP_POP_PARTITIONED_TIMER();                                               \
59   }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66   if (loc == NULL)
67     loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72                                   kmp_int32 schedtype, kmp_int32 *plastiter,
73                                   T *plower, T *pupper,
74                                   typename traits_t<T>::signed_t *pstride,
75                                   typename traits_t<T>::signed_t incr,
76                                   typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78                                   ,
79                                   void *codeptr
80 #endif
81 ) {
82   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86   typedef typename traits_t<T>::unsigned_t UT;
87   typedef typename traits_t<T>::signed_t ST;
88   /*  this all has to be changed back to TID and such.. */
89   kmp_int32 gtid = global_tid;
90   kmp_uint32 tid;
91   kmp_uint32 nth;
92   UT trip_count;
93   kmp_team_t *team;
94   __kmp_assert_valid_gtid(gtid);
95   kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98   ompt_team_info_t *team_info = NULL;
99   ompt_task_info_t *task_info = NULL;
100   ompt_work_t ompt_work_type = ompt_work_loop;
101 
102   static kmp_int8 warn = 0;
103 
104   if (ompt_enabled.ompt_callback_work) {
105     // Only fully initialize variables needed by OMPT if OMPT is enabled.
106     team_info = __ompt_get_teaminfo(0, NULL);
107     task_info = __ompt_get_task_info_object(0);
108     // Determine workshare type
109     if (loc != NULL) {
110       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111         ompt_work_type = ompt_work_loop;
112       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113         ompt_work_type = ompt_work_sections;
114       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115         ompt_work_type = ompt_work_distribute;
116       } else {
117         kmp_int8 bool_res =
118             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119         if (bool_res)
120           KMP_WARNING(OmptOutdatedWorkshare);
121       }
122       KMP_DEBUG_ASSERT(ompt_work_type);
123     }
124   }
125 #endif
126 
127   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130   {
131     char *buff;
132     // create format specifiers before the debug output
133     buff = __kmp_str_format(
134         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139                    *pstride, incr, chunk));
140     __kmp_str_free(&buff);
141   }
142 #endif
143 
144   if (__kmp_env_consistency_check) {
145     __kmp_push_workshare(global_tid, ct_pdo, loc);
146     if (incr == 0) {
147       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148                             loc);
149     }
150   }
151   /* special handling for zero-trip loops */
152   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153     if (plastiter != NULL)
154       *plastiter = FALSE;
155     /* leave pupper and plower set to entire iteration space */
156     *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162     {
163       char *buff;
164       // create format specifiers before the debug output
165       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166                               "lower=%%%s upper=%%%s stride = %%%s "
167                               "signed?<%s>, loc = %%s\n",
168                               traits_t<T>::spec, traits_t<T>::spec,
169                               traits_t<ST>::spec, traits_t<T>::spec);
170       check_loc(loc);
171       KD_TRACE(100,
172                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173       __kmp_str_free(&buff);
174     }
175 #endif
176     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177 
178 #if OMPT_SUPPORT && OMPT_OPTIONAL
179     if (ompt_enabled.ompt_callback_work) {
180       ompt_callbacks.ompt_callback(ompt_callback_work)(
181           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182           &(task_info->task_data), 0, codeptr);
183     }
184 #endif
185     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186     return;
187   }
188 
189   // Although there are schedule enumerations above kmp_ord_upper which are not
190   // schedules for "distribute", the only ones which are useful are dynamic, so
191   // cannot be seen here, since this codepath is only executed for static
192   // schedules.
193   if (schedtype > kmp_ord_upper) {
194     // we are in DISTRIBUTE construct
195     schedtype += kmp_sch_static -
196                  kmp_distribute_static; // AC: convert to usual schedule type
197     if (th->th.th_team->t.t_serialized > 1) {
198       tid = 0;
199       team = th->th.th_team;
200     } else {
201       tid = th->th.th_team->t.t_master_tid;
202       team = th->th.th_team->t.t_parent;
203     }
204   } else {
205     tid = __kmp_tid_from_gtid(global_tid);
206     team = th->th.th_team;
207   }
208 
209   /* determine if "for" loop is an active worksharing construct */
210   if (team->t.t_serialized) {
211     /* serialized parallel, each thread executes whole iteration space */
212     if (plastiter != NULL)
213       *plastiter = TRUE;
214     /* leave pupper and plower set to entire iteration space */
215     *pstride =
216         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
217 
218 #ifdef KMP_DEBUG
219     {
220       char *buff;
221       // create format specifiers before the debug output
222       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
223                               "lower=%%%s upper=%%%s stride = %%%s\n",
224                               traits_t<T>::spec, traits_t<T>::spec,
225                               traits_t<ST>::spec);
226       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
227       __kmp_str_free(&buff);
228     }
229 #endif
230     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
231 
232 #if OMPT_SUPPORT && OMPT_OPTIONAL
233     if (ompt_enabled.ompt_callback_work) {
234       ompt_callbacks.ompt_callback(ompt_callback_work)(
235           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
236           &(task_info->task_data), *pstride, codeptr);
237     }
238 #endif
239     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
240     return;
241   }
242   nth = team->t.t_nproc;
243   if (nth == 1) {
244     if (plastiter != NULL)
245       *plastiter = TRUE;
246     *pstride =
247         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
248 #ifdef KMP_DEBUG
249     {
250       char *buff;
251       // create format specifiers before the debug output
252       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
253                               "lower=%%%s upper=%%%s stride = %%%s\n",
254                               traits_t<T>::spec, traits_t<T>::spec,
255                               traits_t<ST>::spec);
256       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
257       __kmp_str_free(&buff);
258     }
259 #endif
260     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
261 
262 #if OMPT_SUPPORT && OMPT_OPTIONAL
263     if (ompt_enabled.ompt_callback_work) {
264       ompt_callbacks.ompt_callback(ompt_callback_work)(
265           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
266           &(task_info->task_data), *pstride, codeptr);
267     }
268 #endif
269     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
270     return;
271   }
272 
273   /* compute trip count */
274   if (incr == 1) {
275     trip_count = *pupper - *plower + 1;
276   } else if (incr == -1) {
277     trip_count = *plower - *pupper + 1;
278   } else if (incr > 0) {
279     // upper-lower can exceed the limit of signed type
280     trip_count = (UT)(*pupper - *plower) / incr + 1;
281   } else {
282     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
283   }
284 
285 #if KMP_STATS_ENABLED
286   if (KMP_MASTER_GTID(gtid)) {
287     KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
288   }
289 #endif
290 
291   if (__kmp_env_consistency_check) {
292     /* tripcount overflow? */
293     if (trip_count == 0 && *pupper != *plower) {
294       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
295                             loc);
296     }
297   }
298 
299   /* compute remaining parameters */
300   switch (schedtype) {
301   case kmp_sch_static: {
302     if (trip_count < nth) {
303       KMP_DEBUG_ASSERT(
304           __kmp_static == kmp_sch_static_greedy ||
305           __kmp_static ==
306               kmp_sch_static_balanced); // Unknown static scheduling type.
307       if (tid < trip_count) {
308         *pupper = *plower = *plower + tid * incr;
309       } else {
310         // set bounds so non-active threads execute no iterations
311         *plower = *pupper + (incr > 0 ? 1 : -1);
312       }
313       if (plastiter != NULL)
314         *plastiter = (tid == trip_count - 1);
315     } else {
316       if (__kmp_static == kmp_sch_static_balanced) {
317         UT small_chunk = trip_count / nth;
318         UT extras = trip_count % nth;
319         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
320         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
321         if (plastiter != NULL)
322           *plastiter = (tid == nth - 1);
323       } else {
324         T big_chunk_inc_count =
325             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
326         T old_upper = *pupper;
327 
328         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
329         // Unknown static scheduling type.
330 
331         *plower += tid * big_chunk_inc_count;
332         *pupper = *plower + big_chunk_inc_count - incr;
333         if (incr > 0) {
334           if (*pupper < *plower)
335             *pupper = traits_t<T>::max_value;
336           if (plastiter != NULL)
337             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
338           if (*pupper > old_upper)
339             *pupper = old_upper; // tracker C73258
340         } else {
341           if (*pupper > *plower)
342             *pupper = traits_t<T>::min_value;
343           if (plastiter != NULL)
344             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
345           if (*pupper < old_upper)
346             *pupper = old_upper; // tracker C73258
347         }
348       }
349     }
350     *pstride = trip_count;
351     break;
352   }
353   case kmp_sch_static_chunked: {
354     ST span;
355     UT nchunks;
356     if (chunk < 1)
357       chunk = 1;
358     else if ((UT)chunk > trip_count)
359       chunk = trip_count;
360     nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
361     span = chunk * incr;
362     if (nchunks < nth) {
363       *pstride = span * nchunks;
364       if (tid < nchunks) {
365         *plower = *plower + (span * tid);
366         *pupper = *plower + span - incr;
367       } else {
368         *plower = *pupper + (incr > 0 ? 1 : -1);
369       }
370     } else {
371       *pstride = span * nth;
372       *plower = *plower + (span * tid);
373       *pupper = *plower + span - incr;
374     }
375     if (plastiter != NULL)
376       *plastiter = (tid == (nchunks - 1) % nth);
377     break;
378   }
379   case kmp_sch_static_balanced_chunked: {
380     T old_upper = *pupper;
381     // round up to make sure the chunk is enough to cover all iterations
382     UT span = (trip_count + nth - 1) / nth;
383 
384     // perform chunk adjustment
385     chunk = (span + chunk - 1) & ~(chunk - 1);
386 
387     span = chunk * incr;
388     *plower = *plower + (span * tid);
389     *pupper = *plower + span - incr;
390     if (incr > 0) {
391       if (*pupper > old_upper)
392         *pupper = old_upper;
393     } else if (*pupper < old_upper)
394       *pupper = old_upper;
395 
396     if (plastiter != NULL)
397       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
398     break;
399   }
400   default:
401     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
402     break;
403   }
404 
405 #if USE_ITT_BUILD
406   // Report loop metadata
407   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
408       __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
409       team->t.t_active_level == 1) {
410     kmp_uint64 cur_chunk = chunk;
411     check_loc(loc);
412     // Calculate chunk in case it was not specified; it is specified for
413     // kmp_sch_static_chunked
414     if (schedtype == kmp_sch_static) {
415       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
416     }
417     // 0 - "static" schedule
418     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
419   }
420 #endif
421 #ifdef KMP_DEBUG
422   {
423     char *buff;
424     // create format specifiers before the debug output
425     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
426                             "upper=%%%s stride = %%%s signed?<%s>\n",
427                             traits_t<T>::spec, traits_t<T>::spec,
428                             traits_t<ST>::spec, traits_t<T>::spec);
429     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
430     __kmp_str_free(&buff);
431   }
432 #endif
433   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
434 
435 #if OMPT_SUPPORT && OMPT_OPTIONAL
436   if (ompt_enabled.ompt_callback_work) {
437     ompt_callbacks.ompt_callback(ompt_callback_work)(
438         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
439         &(task_info->task_data), trip_count, codeptr);
440   }
441 #endif
442 
443   KMP_STATS_LOOP_END(OMP_loop_static_iterations);
444   return;
445 }
446 
447 template <typename T>
448 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
449                                        kmp_int32 schedule, kmp_int32 *plastiter,
450                                        T *plower, T *pupper, T *pupperDist,
451                                        typename traits_t<T>::signed_t *pstride,
452                                        typename traits_t<T>::signed_t incr,
453                                        typename traits_t<T>::signed_t chunk) {
454   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
455   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
456   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
457   typedef typename traits_t<T>::unsigned_t UT;
458   typedef typename traits_t<T>::signed_t ST;
459   kmp_uint32 tid;
460   kmp_uint32 nth;
461   kmp_uint32 team_id;
462   kmp_uint32 nteams;
463   UT trip_count;
464   kmp_team_t *team;
465   kmp_info_t *th;
466 
467   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
468   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
469   __kmp_assert_valid_gtid(gtid);
470 #ifdef KMP_DEBUG
471   {
472     char *buff;
473     // create format specifiers before the debug output
474     buff = __kmp_str_format(
475         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
476         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
477         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
478         traits_t<ST>::spec, traits_t<T>::spec);
479     KD_TRACE(100,
480              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
481     __kmp_str_free(&buff);
482   }
483 #endif
484 
485   if (__kmp_env_consistency_check) {
486     __kmp_push_workshare(gtid, ct_pdo, loc);
487     if (incr == 0) {
488       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
489                             loc);
490     }
491     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
492       // The loop is illegal.
493       // Some zero-trip loops maintained by compiler, e.g.:
494       //   for(i=10;i<0;++i) // lower >= upper - run-time check
495       //   for(i=0;i>10;--i) // lower <= upper - run-time check
496       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
497       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
498       // Compiler does not check the following illegal loops:
499       //   for(i=0;i<10;i+=incr) // where incr<0
500       //   for(i=10;i>0;i-=incr) // where incr<0
501       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
502     }
503   }
504   tid = __kmp_tid_from_gtid(gtid);
505   th = __kmp_threads[gtid];
506   nth = th->th.th_team_nproc;
507   team = th->th.th_team;
508   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
509   nteams = th->th.th_teams_size.nteams;
510   team_id = team->t.t_master_tid;
511   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
512 
513   // compute global trip count
514   if (incr == 1) {
515     trip_count = *pupper - *plower + 1;
516   } else if (incr == -1) {
517     trip_count = *plower - *pupper + 1;
518   } else if (incr > 0) {
519     // upper-lower can exceed the limit of signed type
520     trip_count = (UT)(*pupper - *plower) / incr + 1;
521   } else {
522     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
523   }
524 
525   *pstride = *pupper - *plower; // just in case (can be unused)
526   if (trip_count <= nteams) {
527     KMP_DEBUG_ASSERT(
528         __kmp_static == kmp_sch_static_greedy ||
529         __kmp_static ==
530             kmp_sch_static_balanced); // Unknown static scheduling type.
531     // only primary threads of some teams get single iteration, other threads
532     // get nothing
533     if (team_id < trip_count && tid == 0) {
534       *pupper = *pupperDist = *plower = *plower + team_id * incr;
535     } else {
536       *pupperDist = *pupper;
537       *plower = *pupper + incr; // compiler should skip loop body
538     }
539     if (plastiter != NULL)
540       *plastiter = (tid == 0 && team_id == trip_count - 1);
541   } else {
542     // Get the team's chunk first (each team gets at most one chunk)
543     if (__kmp_static == kmp_sch_static_balanced) {
544       UT chunkD = trip_count / nteams;
545       UT extras = trip_count % nteams;
546       *plower +=
547           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
548       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
549       if (plastiter != NULL)
550         *plastiter = (team_id == nteams - 1);
551     } else {
552       T chunk_inc_count =
553           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
554       T upper = *pupper;
555       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
556       // Unknown static scheduling type.
557       *plower += team_id * chunk_inc_count;
558       *pupperDist = *plower + chunk_inc_count - incr;
559       // Check/correct bounds if needed
560       if (incr > 0) {
561         if (*pupperDist < *plower)
562           *pupperDist = traits_t<T>::max_value;
563         if (plastiter != NULL)
564           *plastiter = *plower <= upper && *pupperDist > upper - incr;
565         if (*pupperDist > upper)
566           *pupperDist = upper; // tracker C73258
567         if (*plower > *pupperDist) {
568           *pupper = *pupperDist; // no iterations available for the team
569           goto end;
570         }
571       } else {
572         if (*pupperDist > *plower)
573           *pupperDist = traits_t<T>::min_value;
574         if (plastiter != NULL)
575           *plastiter = *plower >= upper && *pupperDist < upper - incr;
576         if (*pupperDist < upper)
577           *pupperDist = upper; // tracker C73258
578         if (*plower < *pupperDist) {
579           *pupper = *pupperDist; // no iterations available for the team
580           goto end;
581         }
582       }
583     }
584     // Get the parallel loop chunk now (for thread)
585     // compute trip count for team's chunk
586     if (incr == 1) {
587       trip_count = *pupperDist - *plower + 1;
588     } else if (incr == -1) {
589       trip_count = *plower - *pupperDist + 1;
590     } else if (incr > 1) {
591       // upper-lower can exceed the limit of signed type
592       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
593     } else {
594       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
595     }
596     KMP_DEBUG_ASSERT(trip_count);
597     switch (schedule) {
598     case kmp_sch_static: {
599       if (trip_count <= nth) {
600         KMP_DEBUG_ASSERT(
601             __kmp_static == kmp_sch_static_greedy ||
602             __kmp_static ==
603                 kmp_sch_static_balanced); // Unknown static scheduling type.
604         if (tid < trip_count)
605           *pupper = *plower = *plower + tid * incr;
606         else
607           *plower = *pupper + incr; // no iterations available
608         if (plastiter != NULL)
609           if (*plastiter != 0 && !(tid == trip_count - 1))
610             *plastiter = 0;
611       } else {
612         if (__kmp_static == kmp_sch_static_balanced) {
613           UT chunkL = trip_count / nth;
614           UT extras = trip_count % nth;
615           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
616           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
617           if (plastiter != NULL)
618             if (*plastiter != 0 && !(tid == nth - 1))
619               *plastiter = 0;
620         } else {
621           T chunk_inc_count =
622               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
623           T upper = *pupperDist;
624           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
625           // Unknown static scheduling type.
626           *plower += tid * chunk_inc_count;
627           *pupper = *plower + chunk_inc_count - incr;
628           if (incr > 0) {
629             if (*pupper < *plower)
630               *pupper = traits_t<T>::max_value;
631             if (plastiter != NULL)
632               if (*plastiter != 0 &&
633                   !(*plower <= upper && *pupper > upper - incr))
634                 *plastiter = 0;
635             if (*pupper > upper)
636               *pupper = upper; // tracker C73258
637           } else {
638             if (*pupper > *plower)
639               *pupper = traits_t<T>::min_value;
640             if (plastiter != NULL)
641               if (*plastiter != 0 &&
642                   !(*plower >= upper && *pupper < upper - incr))
643                 *plastiter = 0;
644             if (*pupper < upper)
645               *pupper = upper; // tracker C73258
646           }
647         }
648       }
649       break;
650     }
651     case kmp_sch_static_chunked: {
652       ST span;
653       if (chunk < 1)
654         chunk = 1;
655       span = chunk * incr;
656       *pstride = span * nth;
657       *plower = *plower + (span * tid);
658       *pupper = *plower + span - incr;
659       if (plastiter != NULL)
660         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
661           *plastiter = 0;
662       break;
663     }
664     default:
665       KMP_ASSERT2(0,
666                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
667       break;
668     }
669   }
670 end:;
671 #ifdef KMP_DEBUG
672   {
673     char *buff;
674     // create format specifiers before the debug output
675     buff = __kmp_str_format(
676         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
677         "stride=%%%s signed?<%s>\n",
678         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
679         traits_t<ST>::spec, traits_t<T>::spec);
680     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
681     __kmp_str_free(&buff);
682   }
683 #endif
684   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
685   KMP_STATS_LOOP_END(OMP_distribute_iterations);
686   return;
687 }
688 
689 template <typename T>
690 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
691                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
692                                    typename traits_t<T>::signed_t *p_st,
693                                    typename traits_t<T>::signed_t incr,
694                                    typename traits_t<T>::signed_t chunk) {
695   // The routine returns the first chunk distributed to the team and
696   // stride for next chunks calculation.
697   // Last iteration flag set for the team that will execute
698   // the last iteration of the loop.
699   // The routine is called for dist_schedule(static,chunk) only.
700   typedef typename traits_t<T>::unsigned_t UT;
701   typedef typename traits_t<T>::signed_t ST;
702   kmp_uint32 team_id;
703   kmp_uint32 nteams;
704   UT trip_count;
705   T lower;
706   T upper;
707   ST span;
708   kmp_team_t *team;
709   kmp_info_t *th;
710 
711   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
712   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
713   __kmp_assert_valid_gtid(gtid);
714 #ifdef KMP_DEBUG
715   {
716     char *buff;
717     // create format specifiers before the debug output
718     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
719                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
720                             traits_t<T>::spec, traits_t<T>::spec,
721                             traits_t<ST>::spec, traits_t<ST>::spec,
722                             traits_t<T>::spec);
723     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
724     __kmp_str_free(&buff);
725   }
726 #endif
727 
728   lower = *p_lb;
729   upper = *p_ub;
730   if (__kmp_env_consistency_check) {
731     if (incr == 0) {
732       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
733                             loc);
734     }
735     if (incr > 0 ? (upper < lower) : (lower < upper)) {
736       // The loop is illegal.
737       // Some zero-trip loops maintained by compiler, e.g.:
738       //   for(i=10;i<0;++i) // lower >= upper - run-time check
739       //   for(i=0;i>10;--i) // lower <= upper - run-time check
740       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
741       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
742       // Compiler does not check the following illegal loops:
743       //   for(i=0;i<10;i+=incr) // where incr<0
744       //   for(i=10;i>0;i-=incr) // where incr<0
745       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
746     }
747   }
748   th = __kmp_threads[gtid];
749   team = th->th.th_team;
750   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
751   nteams = th->th.th_teams_size.nteams;
752   team_id = team->t.t_master_tid;
753   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
754 
755   // compute trip count
756   if (incr == 1) {
757     trip_count = upper - lower + 1;
758   } else if (incr == -1) {
759     trip_count = lower - upper + 1;
760   } else if (incr > 0) {
761     // upper-lower can exceed the limit of signed type
762     trip_count = (UT)(upper - lower) / incr + 1;
763   } else {
764     trip_count = (UT)(lower - upper) / (-incr) + 1;
765   }
766   if (chunk < 1)
767     chunk = 1;
768   span = chunk * incr;
769   *p_st = span * nteams;
770   *p_lb = lower + (span * team_id);
771   *p_ub = *p_lb + span - incr;
772   if (p_last != NULL)
773     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
774   // Correct upper bound if needed
775   if (incr > 0) {
776     if (*p_ub < *p_lb) // overflow?
777       *p_ub = traits_t<T>::max_value;
778     if (*p_ub > upper)
779       *p_ub = upper; // tracker C73258
780   } else { // incr < 0
781     if (*p_ub > *p_lb)
782       *p_ub = traits_t<T>::min_value;
783     if (*p_ub < upper)
784       *p_ub = upper; // tracker C73258
785   }
786 #ifdef KMP_DEBUG
787   {
788     char *buff;
789     // create format specifiers before the debug output
790     buff =
791         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
792                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
793                          traits_t<T>::spec, traits_t<T>::spec,
794                          traits_t<ST>::spec, traits_t<ST>::spec);
795     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
796     __kmp_str_free(&buff);
797   }
798 #endif
799 }
800 
801 //------------------------------------------------------------------------------
802 extern "C" {
803 /*!
804 @ingroup WORK_SHARING
805 @param    loc       Source code location
806 @param    gtid      Global thread id of this thread
807 @param    schedtype  Scheduling type
808 @param    plastiter Pointer to the "last iteration" flag
809 @param    plower    Pointer to the lower bound
810 @param    pupper    Pointer to the upper bound
811 @param    pstride   Pointer to the stride
812 @param    incr      Loop increment
813 @param    chunk     The chunk size
814 
815 Each of the four functions here are identical apart from the argument types.
816 
817 The functions compute the upper and lower bounds and stride to be used for the
818 set of iterations to be executed by the current thread from the statically
819 scheduled loop that is described by the initial values of the bounds, stride,
820 increment and chunk size.
821 
822 @{
823 */
824 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
825                               kmp_int32 *plastiter, kmp_int32 *plower,
826                               kmp_int32 *pupper, kmp_int32 *pstride,
827                               kmp_int32 incr, kmp_int32 chunk) {
828   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
829                                    pupper, pstride, incr, chunk
830 #if OMPT_SUPPORT && OMPT_OPTIONAL
831                                    ,
832                                    OMPT_GET_RETURN_ADDRESS(0)
833 #endif
834   );
835 }
836 
837 /*!
838  See @ref __kmpc_for_static_init_4
839  */
840 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
841                                kmp_int32 schedtype, kmp_int32 *plastiter,
842                                kmp_uint32 *plower, kmp_uint32 *pupper,
843                                kmp_int32 *pstride, kmp_int32 incr,
844                                kmp_int32 chunk) {
845   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
846                                     pupper, pstride, incr, chunk
847 #if OMPT_SUPPORT && OMPT_OPTIONAL
848                                     ,
849                                     OMPT_GET_RETURN_ADDRESS(0)
850 #endif
851   );
852 }
853 
854 /*!
855  See @ref __kmpc_for_static_init_4
856  */
857 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
858                               kmp_int32 *plastiter, kmp_int64 *plower,
859                               kmp_int64 *pupper, kmp_int64 *pstride,
860                               kmp_int64 incr, kmp_int64 chunk) {
861   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
862                                    pupper, pstride, incr, chunk
863 #if OMPT_SUPPORT && OMPT_OPTIONAL
864                                    ,
865                                    OMPT_GET_RETURN_ADDRESS(0)
866 #endif
867   );
868 }
869 
870 /*!
871  See @ref __kmpc_for_static_init_4
872  */
873 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
874                                kmp_int32 schedtype, kmp_int32 *plastiter,
875                                kmp_uint64 *plower, kmp_uint64 *pupper,
876                                kmp_int64 *pstride, kmp_int64 incr,
877                                kmp_int64 chunk) {
878   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
879                                     pupper, pstride, incr, chunk
880 #if OMPT_SUPPORT && OMPT_OPTIONAL
881                                     ,
882                                     OMPT_GET_RETURN_ADDRESS(0)
883 #endif
884   );
885 }
886 /*!
887 @}
888 */
889 
890 /*!
891 @ingroup WORK_SHARING
892 @param    loc       Source code location
893 @param    gtid      Global thread id of this thread
894 @param    schedule  Scheduling type for the parallel loop
895 @param    plastiter Pointer to the "last iteration" flag
896 @param    plower    Pointer to the lower bound
897 @param    pupper    Pointer to the upper bound of loop chunk
898 @param    pupperD   Pointer to the upper bound of dist_chunk
899 @param    pstride   Pointer to the stride for parallel loop
900 @param    incr      Loop increment
901 @param    chunk     The chunk size for the parallel loop
902 
903 Each of the four functions here are identical apart from the argument types.
904 
905 The functions compute the upper and lower bounds and strides to be used for the
906 set of iterations to be executed by the current thread from the statically
907 scheduled loop that is described by the initial values of the bounds, strides,
908 increment and chunks for parallel loop and distribute constructs.
909 
910 @{
911 */
912 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
913                                    kmp_int32 schedule, kmp_int32 *plastiter,
914                                    kmp_int32 *plower, kmp_int32 *pupper,
915                                    kmp_int32 *pupperD, kmp_int32 *pstride,
916                                    kmp_int32 incr, kmp_int32 chunk) {
917   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
918                                         pupper, pupperD, pstride, incr, chunk);
919 }
920 
921 /*!
922  See @ref __kmpc_dist_for_static_init_4
923  */
924 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
925                                     kmp_int32 schedule, kmp_int32 *plastiter,
926                                     kmp_uint32 *plower, kmp_uint32 *pupper,
927                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
928                                     kmp_int32 incr, kmp_int32 chunk) {
929   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
930                                          pupper, pupperD, pstride, incr, chunk);
931 }
932 
933 /*!
934  See @ref __kmpc_dist_for_static_init_4
935  */
936 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
937                                    kmp_int32 schedule, kmp_int32 *plastiter,
938                                    kmp_int64 *plower, kmp_int64 *pupper,
939                                    kmp_int64 *pupperD, kmp_int64 *pstride,
940                                    kmp_int64 incr, kmp_int64 chunk) {
941   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
942                                         pupper, pupperD, pstride, incr, chunk);
943 }
944 
945 /*!
946  See @ref __kmpc_dist_for_static_init_4
947  */
948 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
949                                     kmp_int32 schedule, kmp_int32 *plastiter,
950                                     kmp_uint64 *plower, kmp_uint64 *pupper,
951                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
952                                     kmp_int64 incr, kmp_int64 chunk) {
953   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
954                                          pupper, pupperD, pstride, incr, chunk);
955 }
956 /*!
957 @}
958 */
959 
960 //------------------------------------------------------------------------------
961 // Auxiliary routines for Distribute Parallel Loop construct implementation
962 //    Transfer call to template< type T >
963 //    __kmp_team_static_init( ident_t *loc, int gtid,
964 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
965 
966 /*!
967 @ingroup WORK_SHARING
968 @{
969 @param loc Source location
970 @param gtid Global thread id
971 @param p_last pointer to last iteration flag
972 @param p_lb  pointer to Lower bound
973 @param p_ub  pointer to Upper bound
974 @param p_st  Step (or increment if you prefer)
975 @param incr  Loop increment
976 @param chunk The chunk size to block with
977 
978 The functions compute the upper and lower bounds and stride to be used for the
979 set of iterations to be executed by the current team from the statically
980 scheduled loop that is described by the initial values of the bounds, stride,
981 increment and chunk for the distribute construct as part of composite distribute
982 parallel loop construct. These functions are all identical apart from the types
983 of the arguments.
984 */
985 
986 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
987                                kmp_int32 *p_lb, kmp_int32 *p_ub,
988                                kmp_int32 *p_st, kmp_int32 incr,
989                                kmp_int32 chunk) {
990   KMP_DEBUG_ASSERT(__kmp_init_serial);
991   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
992                                     chunk);
993 }
994 
995 /*!
996  See @ref __kmpc_team_static_init_4
997  */
998 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
999                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1000                                 kmp_int32 *p_st, kmp_int32 incr,
1001                                 kmp_int32 chunk) {
1002   KMP_DEBUG_ASSERT(__kmp_init_serial);
1003   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1004                                      chunk);
1005 }
1006 
1007 /*!
1008  See @ref __kmpc_team_static_init_4
1009  */
1010 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1011                                kmp_int64 *p_lb, kmp_int64 *p_ub,
1012                                kmp_int64 *p_st, kmp_int64 incr,
1013                                kmp_int64 chunk) {
1014   KMP_DEBUG_ASSERT(__kmp_init_serial);
1015   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1016                                     chunk);
1017 }
1018 
1019 /*!
1020  See @ref __kmpc_team_static_init_4
1021  */
1022 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1023                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1024                                 kmp_int64 *p_st, kmp_int64 incr,
1025                                 kmp_int64 chunk) {
1026   KMP_DEBUG_ASSERT(__kmp_init_serial);
1027   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1028                                      chunk);
1029 }
1030 /*!
1031 @}
1032 */
1033 
1034 } // extern "C"
1035