1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16         it may change values between parallel regions.  __kmp_max_nth
17         is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat)                                               \
43   {                                                                            \
44     kmp_int64 t;                                                               \
45     kmp_int64 u = (kmp_int64)(*pupper);                                        \
46     kmp_int64 l = (kmp_int64)(*plower);                                        \
47     kmp_int64 i = (kmp_int64)incr;                                             \
48     if (i == 1) {                                                              \
49       t = u - l + 1;                                                           \
50     } else if (i == -1) {                                                      \
51       t = l - u + 1;                                                           \
52     } else if (i > 0) {                                                        \
53       t = (u - l) / i + 1;                                                     \
54     } else {                                                                   \
55       t = (l - u) / (-i) + 1;                                                  \
56     }                                                                          \
57     KMP_COUNT_VALUE(stat, t);                                                  \
58     KMP_POP_PARTITIONED_TIMER();                                               \
59   }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66   if (loc == NULL)
67     loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72                                   kmp_int32 schedtype, kmp_int32 *plastiter,
73                                   T *plower, T *pupper,
74                                   typename traits_t<T>::signed_t *pstride,
75                                   typename traits_t<T>::signed_t incr,
76                                   typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78                                   ,
79                                   void *codeptr
80 #endif
81 ) {
82   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86   typedef typename traits_t<T>::unsigned_t UT;
87   typedef typename traits_t<T>::signed_t ST;
88   /*  this all has to be changed back to TID and such.. */
89   kmp_int32 gtid = global_tid;
90   kmp_uint32 tid;
91   kmp_uint32 nth;
92   UT trip_count;
93   kmp_team_t *team;
94   __kmp_assert_valid_gtid(gtid);
95   kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98   ompt_team_info_t *team_info = NULL;
99   ompt_task_info_t *task_info = NULL;
100   ompt_work_t ompt_work_type = ompt_work_loop;
101 
102   static kmp_int8 warn = 0;
103 
104   if (ompt_enabled.ompt_callback_work) {
105     // Only fully initialize variables needed by OMPT if OMPT is enabled.
106     team_info = __ompt_get_teaminfo(0, NULL);
107     task_info = __ompt_get_task_info_object(0);
108     // Determine workshare type
109     if (loc != NULL) {
110       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111         ompt_work_type = ompt_work_loop;
112       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113         ompt_work_type = ompt_work_sections;
114       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115         ompt_work_type = ompt_work_distribute;
116       } else {
117         kmp_int8 bool_res =
118             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119         if (bool_res)
120           KMP_WARNING(OmptOutdatedWorkshare);
121       }
122       KMP_DEBUG_ASSERT(ompt_work_type);
123     }
124   }
125 #endif
126 
127   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130   {
131     char *buff;
132     // create format specifiers before the debug output
133     buff = __kmp_str_format(
134         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139                    *pstride, incr, chunk));
140     __kmp_str_free(&buff);
141   }
142 #endif
143 
144   if (__kmp_env_consistency_check) {
145     __kmp_push_workshare(global_tid, ct_pdo, loc);
146     if (incr == 0) {
147       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148                             loc);
149     }
150   }
151   /* special handling for zero-trip loops */
152   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153     if (plastiter != NULL)
154       *plastiter = FALSE;
155     /* leave pupper and plower set to entire iteration space */
156     *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162     {
163       char *buff;
164       // create format specifiers before the debug output
165       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166                               "lower=%%%s upper=%%%s stride = %%%s "
167                               "signed?<%s>, loc = %%s\n",
168                               traits_t<T>::spec, traits_t<T>::spec,
169                               traits_t<ST>::spec, traits_t<T>::spec);
170       KD_TRACE(100,
171                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
172       __kmp_str_free(&buff);
173     }
174 #endif
175     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
176 
177 #if OMPT_SUPPORT && OMPT_OPTIONAL
178     if (ompt_enabled.ompt_callback_work) {
179       ompt_callbacks.ompt_callback(ompt_callback_work)(
180           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
181           &(task_info->task_data), 0, codeptr);
182     }
183 #endif
184     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
185     return;
186   }
187 
188   // Although there are schedule enumerations above kmp_ord_upper which are not
189   // schedules for "distribute", the only ones which are useful are dynamic, so
190   // cannot be seen here, since this codepath is only executed for static
191   // schedules.
192   if (schedtype > kmp_ord_upper) {
193     // we are in DISTRIBUTE construct
194     schedtype += kmp_sch_static -
195                  kmp_distribute_static; // AC: convert to usual schedule type
196     tid = th->th.th_team->t.t_master_tid;
197     team = th->th.th_team->t.t_parent;
198   } else {
199     tid = __kmp_tid_from_gtid(global_tid);
200     team = th->th.th_team;
201   }
202 
203   /* determine if "for" loop is an active worksharing construct */
204   if (team->t.t_serialized) {
205     /* serialized parallel, each thread executes whole iteration space */
206     if (plastiter != NULL)
207       *plastiter = TRUE;
208     /* leave pupper and plower set to entire iteration space */
209     *pstride =
210         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
211 
212 #ifdef KMP_DEBUG
213     {
214       char *buff;
215       // create format specifiers before the debug output
216       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
217                               "lower=%%%s upper=%%%s stride = %%%s\n",
218                               traits_t<T>::spec, traits_t<T>::spec,
219                               traits_t<ST>::spec);
220       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
221       __kmp_str_free(&buff);
222     }
223 #endif
224     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
225 
226 #if OMPT_SUPPORT && OMPT_OPTIONAL
227     if (ompt_enabled.ompt_callback_work) {
228       ompt_callbacks.ompt_callback(ompt_callback_work)(
229           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
230           &(task_info->task_data), *pstride, codeptr);
231     }
232 #endif
233     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
234     return;
235   }
236   nth = team->t.t_nproc;
237   if (nth == 1) {
238     if (plastiter != NULL)
239       *plastiter = TRUE;
240     *pstride =
241         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
242 #ifdef KMP_DEBUG
243     {
244       char *buff;
245       // create format specifiers before the debug output
246       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
247                               "lower=%%%s upper=%%%s stride = %%%s\n",
248                               traits_t<T>::spec, traits_t<T>::spec,
249                               traits_t<ST>::spec);
250       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
251       __kmp_str_free(&buff);
252     }
253 #endif
254     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
255 
256 #if OMPT_SUPPORT && OMPT_OPTIONAL
257     if (ompt_enabled.ompt_callback_work) {
258       ompt_callbacks.ompt_callback(ompt_callback_work)(
259           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
260           &(task_info->task_data), *pstride, codeptr);
261     }
262 #endif
263     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
264     return;
265   }
266 
267   /* compute trip count */
268   if (incr == 1) {
269     trip_count = *pupper - *plower + 1;
270   } else if (incr == -1) {
271     trip_count = *plower - *pupper + 1;
272   } else if (incr > 0) {
273     // upper-lower can exceed the limit of signed type
274     trip_count = (UT)(*pupper - *plower) / incr + 1;
275   } else {
276     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
277   }
278 
279 #if KMP_STATS_ENABLED
280   if (KMP_MASTER_GTID(gtid)) {
281     KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
282   }
283 #endif
284 
285   if (__kmp_env_consistency_check) {
286     /* tripcount overflow? */
287     if (trip_count == 0 && *pupper != *plower) {
288       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
289                             loc);
290     }
291   }
292 
293   /* compute remaining parameters */
294   switch (schedtype) {
295   case kmp_sch_static: {
296     if (trip_count < nth) {
297       KMP_DEBUG_ASSERT(
298           __kmp_static == kmp_sch_static_greedy ||
299           __kmp_static ==
300               kmp_sch_static_balanced); // Unknown static scheduling type.
301       if (tid < trip_count) {
302         *pupper = *plower = *plower + tid * incr;
303       } else {
304         // set bounds so non-active threads execute no iterations
305         *plower = *pupper + (incr > 0 ? 1 : -1);
306       }
307       if (plastiter != NULL)
308         *plastiter = (tid == trip_count - 1);
309     } else {
310       if (__kmp_static == kmp_sch_static_balanced) {
311         UT small_chunk = trip_count / nth;
312         UT extras = trip_count % nth;
313         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
314         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
315         if (plastiter != NULL)
316           *plastiter = (tid == nth - 1);
317       } else {
318         T big_chunk_inc_count =
319             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
320         T old_upper = *pupper;
321 
322         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
323         // Unknown static scheduling type.
324 
325         *plower += tid * big_chunk_inc_count;
326         *pupper = *plower + big_chunk_inc_count - incr;
327         if (incr > 0) {
328           if (*pupper < *plower)
329             *pupper = traits_t<T>::max_value;
330           if (plastiter != NULL)
331             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
332           if (*pupper > old_upper)
333             *pupper = old_upper; // tracker C73258
334         } else {
335           if (*pupper > *plower)
336             *pupper = traits_t<T>::min_value;
337           if (plastiter != NULL)
338             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
339           if (*pupper < old_upper)
340             *pupper = old_upper; // tracker C73258
341         }
342       }
343     }
344     *pstride = trip_count;
345     break;
346   }
347   case kmp_sch_static_chunked: {
348     ST span;
349     UT nchunks;
350     if (chunk < 1)
351       chunk = 1;
352     else if ((UT)chunk > trip_count)
353       chunk = trip_count;
354     nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
355     span = chunk * incr;
356     if (nchunks < nth) {
357       *pstride = span * nchunks;
358       if (tid < nchunks) {
359         *plower = *plower + (span * tid);
360         *pupper = *plower + span - incr;
361       } else {
362         *plower = *pupper + (incr > 0 ? 1 : -1);
363       }
364     } else {
365       *pstride = span * nth;
366       *plower = *plower + (span * tid);
367       *pupper = *plower + span - incr;
368     }
369     if (plastiter != NULL)
370       *plastiter = (tid == (nchunks - 1) % nth);
371     break;
372   }
373   case kmp_sch_static_balanced_chunked: {
374     T old_upper = *pupper;
375     // round up to make sure the chunk is enough to cover all iterations
376     UT span = (trip_count + nth - 1) / nth;
377 
378     // perform chunk adjustment
379     chunk = (span + chunk - 1) & ~(chunk - 1);
380 
381     span = chunk * incr;
382     *plower = *plower + (span * tid);
383     *pupper = *plower + span - incr;
384     if (incr > 0) {
385       if (*pupper > old_upper)
386         *pupper = old_upper;
387     } else if (*pupper < old_upper)
388       *pupper = old_upper;
389 
390     if (plastiter != NULL)
391       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
392     break;
393   }
394   default:
395     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
396     break;
397   }
398 
399 #if USE_ITT_BUILD
400   // Report loop metadata
401   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
402       __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
403       team->t.t_active_level == 1) {
404     kmp_uint64 cur_chunk = chunk;
405     check_loc(loc);
406     // Calculate chunk in case it was not specified; it is specified for
407     // kmp_sch_static_chunked
408     if (schedtype == kmp_sch_static) {
409       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
410     }
411     // 0 - "static" schedule
412     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
413   }
414 #endif
415 #ifdef KMP_DEBUG
416   {
417     char *buff;
418     // create format specifiers before the debug output
419     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
420                             "upper=%%%s stride = %%%s signed?<%s>\n",
421                             traits_t<T>::spec, traits_t<T>::spec,
422                             traits_t<ST>::spec, traits_t<T>::spec);
423     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
424     __kmp_str_free(&buff);
425   }
426 #endif
427   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
428 
429 #if OMPT_SUPPORT && OMPT_OPTIONAL
430   if (ompt_enabled.ompt_callback_work) {
431     ompt_callbacks.ompt_callback(ompt_callback_work)(
432         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
433         &(task_info->task_data), trip_count, codeptr);
434   }
435 #endif
436 
437   KMP_STATS_LOOP_END(OMP_loop_static_iterations);
438   return;
439 }
440 
441 template <typename T>
442 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
443                                        kmp_int32 schedule, kmp_int32 *plastiter,
444                                        T *plower, T *pupper, T *pupperDist,
445                                        typename traits_t<T>::signed_t *pstride,
446                                        typename traits_t<T>::signed_t incr,
447                                        typename traits_t<T>::signed_t chunk) {
448   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
449   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
450   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
451   typedef typename traits_t<T>::unsigned_t UT;
452   typedef typename traits_t<T>::signed_t ST;
453   kmp_uint32 tid;
454   kmp_uint32 nth;
455   kmp_uint32 team_id;
456   kmp_uint32 nteams;
457   UT trip_count;
458   kmp_team_t *team;
459   kmp_info_t *th;
460 
461   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
462   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
463   __kmp_assert_valid_gtid(gtid);
464 #ifdef KMP_DEBUG
465   {
466     char *buff;
467     // create format specifiers before the debug output
468     buff = __kmp_str_format(
469         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
470         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
471         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
472         traits_t<ST>::spec, traits_t<T>::spec);
473     KD_TRACE(100,
474              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
475     __kmp_str_free(&buff);
476   }
477 #endif
478 
479   if (__kmp_env_consistency_check) {
480     __kmp_push_workshare(gtid, ct_pdo, loc);
481     if (incr == 0) {
482       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
483                             loc);
484     }
485     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
486       // The loop is illegal.
487       // Some zero-trip loops maintained by compiler, e.g.:
488       //   for(i=10;i<0;++i) // lower >= upper - run-time check
489       //   for(i=0;i>10;--i) // lower <= upper - run-time check
490       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
491       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
492       // Compiler does not check the following illegal loops:
493       //   for(i=0;i<10;i+=incr) // where incr<0
494       //   for(i=10;i>0;i-=incr) // where incr<0
495       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
496     }
497   }
498   tid = __kmp_tid_from_gtid(gtid);
499   th = __kmp_threads[gtid];
500   nth = th->th.th_team_nproc;
501   team = th->th.th_team;
502   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
503   nteams = th->th.th_teams_size.nteams;
504   team_id = team->t.t_master_tid;
505   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
506 
507   // compute global trip count
508   if (incr == 1) {
509     trip_count = *pupper - *plower + 1;
510   } else if (incr == -1) {
511     trip_count = *plower - *pupper + 1;
512   } else if (incr > 0) {
513     // upper-lower can exceed the limit of signed type
514     trip_count = (UT)(*pupper - *plower) / incr + 1;
515   } else {
516     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
517   }
518 
519   *pstride = *pupper - *plower; // just in case (can be unused)
520   if (trip_count <= nteams) {
521     KMP_DEBUG_ASSERT(
522         __kmp_static == kmp_sch_static_greedy ||
523         __kmp_static ==
524             kmp_sch_static_balanced); // Unknown static scheduling type.
525     // only primary threads of some teams get single iteration, other threads
526     // get nothing
527     if (team_id < trip_count && tid == 0) {
528       *pupper = *pupperDist = *plower = *plower + team_id * incr;
529     } else {
530       *pupperDist = *pupper;
531       *plower = *pupper + incr; // compiler should skip loop body
532     }
533     if (plastiter != NULL)
534       *plastiter = (tid == 0 && team_id == trip_count - 1);
535   } else {
536     // Get the team's chunk first (each team gets at most one chunk)
537     if (__kmp_static == kmp_sch_static_balanced) {
538       UT chunkD = trip_count / nteams;
539       UT extras = trip_count % nteams;
540       *plower +=
541           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
542       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
543       if (plastiter != NULL)
544         *plastiter = (team_id == nteams - 1);
545     } else {
546       T chunk_inc_count =
547           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
548       T upper = *pupper;
549       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
550       // Unknown static scheduling type.
551       *plower += team_id * chunk_inc_count;
552       *pupperDist = *plower + chunk_inc_count - incr;
553       // Check/correct bounds if needed
554       if (incr > 0) {
555         if (*pupperDist < *plower)
556           *pupperDist = traits_t<T>::max_value;
557         if (plastiter != NULL)
558           *plastiter = *plower <= upper && *pupperDist > upper - incr;
559         if (*pupperDist > upper)
560           *pupperDist = upper; // tracker C73258
561         if (*plower > *pupperDist) {
562           *pupper = *pupperDist; // no iterations available for the team
563           goto end;
564         }
565       } else {
566         if (*pupperDist > *plower)
567           *pupperDist = traits_t<T>::min_value;
568         if (plastiter != NULL)
569           *plastiter = *plower >= upper && *pupperDist < upper - incr;
570         if (*pupperDist < upper)
571           *pupperDist = upper; // tracker C73258
572         if (*plower < *pupperDist) {
573           *pupper = *pupperDist; // no iterations available for the team
574           goto end;
575         }
576       }
577     }
578     // Get the parallel loop chunk now (for thread)
579     // compute trip count for team's chunk
580     if (incr == 1) {
581       trip_count = *pupperDist - *plower + 1;
582     } else if (incr == -1) {
583       trip_count = *plower - *pupperDist + 1;
584     } else if (incr > 1) {
585       // upper-lower can exceed the limit of signed type
586       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
587     } else {
588       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
589     }
590     KMP_DEBUG_ASSERT(trip_count);
591     switch (schedule) {
592     case kmp_sch_static: {
593       if (trip_count <= nth) {
594         KMP_DEBUG_ASSERT(
595             __kmp_static == kmp_sch_static_greedy ||
596             __kmp_static ==
597                 kmp_sch_static_balanced); // Unknown static scheduling type.
598         if (tid < trip_count)
599           *pupper = *plower = *plower + tid * incr;
600         else
601           *plower = *pupper + incr; // no iterations available
602         if (plastiter != NULL)
603           if (*plastiter != 0 && !(tid == trip_count - 1))
604             *plastiter = 0;
605       } else {
606         if (__kmp_static == kmp_sch_static_balanced) {
607           UT chunkL = trip_count / nth;
608           UT extras = trip_count % nth;
609           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
610           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
611           if (plastiter != NULL)
612             if (*plastiter != 0 && !(tid == nth - 1))
613               *plastiter = 0;
614         } else {
615           T chunk_inc_count =
616               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
617           T upper = *pupperDist;
618           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
619           // Unknown static scheduling type.
620           *plower += tid * chunk_inc_count;
621           *pupper = *plower + chunk_inc_count - incr;
622           if (incr > 0) {
623             if (*pupper < *plower)
624               *pupper = traits_t<T>::max_value;
625             if (plastiter != NULL)
626               if (*plastiter != 0 &&
627                   !(*plower <= upper && *pupper > upper - incr))
628                 *plastiter = 0;
629             if (*pupper > upper)
630               *pupper = upper; // tracker C73258
631           } else {
632             if (*pupper > *plower)
633               *pupper = traits_t<T>::min_value;
634             if (plastiter != NULL)
635               if (*plastiter != 0 &&
636                   !(*plower >= upper && *pupper < upper - incr))
637                 *plastiter = 0;
638             if (*pupper < upper)
639               *pupper = upper; // tracker C73258
640           }
641         }
642       }
643       break;
644     }
645     case kmp_sch_static_chunked: {
646       ST span;
647       if (chunk < 1)
648         chunk = 1;
649       span = chunk * incr;
650       *pstride = span * nth;
651       *plower = *plower + (span * tid);
652       *pupper = *plower + span - incr;
653       if (plastiter != NULL)
654         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
655           *plastiter = 0;
656       break;
657     }
658     default:
659       KMP_ASSERT2(0,
660                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
661       break;
662     }
663   }
664 end:;
665 #ifdef KMP_DEBUG
666   {
667     char *buff;
668     // create format specifiers before the debug output
669     buff = __kmp_str_format(
670         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
671         "stride=%%%s signed?<%s>\n",
672         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
673         traits_t<ST>::spec, traits_t<T>::spec);
674     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
675     __kmp_str_free(&buff);
676   }
677 #endif
678   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
679   KMP_STATS_LOOP_END(OMP_distribute_iterations);
680   return;
681 }
682 
683 template <typename T>
684 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
685                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
686                                    typename traits_t<T>::signed_t *p_st,
687                                    typename traits_t<T>::signed_t incr,
688                                    typename traits_t<T>::signed_t chunk) {
689   // The routine returns the first chunk distributed to the team and
690   // stride for next chunks calculation.
691   // Last iteration flag set for the team that will execute
692   // the last iteration of the loop.
693   // The routine is called for dist_schedule(static,chunk) only.
694   typedef typename traits_t<T>::unsigned_t UT;
695   typedef typename traits_t<T>::signed_t ST;
696   kmp_uint32 team_id;
697   kmp_uint32 nteams;
698   UT trip_count;
699   T lower;
700   T upper;
701   ST span;
702   kmp_team_t *team;
703   kmp_info_t *th;
704 
705   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
706   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
707   __kmp_assert_valid_gtid(gtid);
708 #ifdef KMP_DEBUG
709   {
710     char *buff;
711     // create format specifiers before the debug output
712     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
713                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
714                             traits_t<T>::spec, traits_t<T>::spec,
715                             traits_t<ST>::spec, traits_t<ST>::spec,
716                             traits_t<T>::spec);
717     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
718     __kmp_str_free(&buff);
719   }
720 #endif
721 
722   lower = *p_lb;
723   upper = *p_ub;
724   if (__kmp_env_consistency_check) {
725     if (incr == 0) {
726       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
727                             loc);
728     }
729     if (incr > 0 ? (upper < lower) : (lower < upper)) {
730       // The loop is illegal.
731       // Some zero-trip loops maintained by compiler, e.g.:
732       //   for(i=10;i<0;++i) // lower >= upper - run-time check
733       //   for(i=0;i>10;--i) // lower <= upper - run-time check
734       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
735       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
736       // Compiler does not check the following illegal loops:
737       //   for(i=0;i<10;i+=incr) // where incr<0
738       //   for(i=10;i>0;i-=incr) // where incr<0
739       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
740     }
741   }
742   th = __kmp_threads[gtid];
743   team = th->th.th_team;
744   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
745   nteams = th->th.th_teams_size.nteams;
746   team_id = team->t.t_master_tid;
747   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
748 
749   // compute trip count
750   if (incr == 1) {
751     trip_count = upper - lower + 1;
752   } else if (incr == -1) {
753     trip_count = lower - upper + 1;
754   } else if (incr > 0) {
755     // upper-lower can exceed the limit of signed type
756     trip_count = (UT)(upper - lower) / incr + 1;
757   } else {
758     trip_count = (UT)(lower - upper) / (-incr) + 1;
759   }
760   if (chunk < 1)
761     chunk = 1;
762   span = chunk * incr;
763   *p_st = span * nteams;
764   *p_lb = lower + (span * team_id);
765   *p_ub = *p_lb + span - incr;
766   if (p_last != NULL)
767     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
768   // Correct upper bound if needed
769   if (incr > 0) {
770     if (*p_ub < *p_lb) // overflow?
771       *p_ub = traits_t<T>::max_value;
772     if (*p_ub > upper)
773       *p_ub = upper; // tracker C73258
774   } else { // incr < 0
775     if (*p_ub > *p_lb)
776       *p_ub = traits_t<T>::min_value;
777     if (*p_ub < upper)
778       *p_ub = upper; // tracker C73258
779   }
780 #ifdef KMP_DEBUG
781   {
782     char *buff;
783     // create format specifiers before the debug output
784     buff =
785         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
786                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
787                          traits_t<T>::spec, traits_t<T>::spec,
788                          traits_t<ST>::spec, traits_t<ST>::spec);
789     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
790     __kmp_str_free(&buff);
791   }
792 #endif
793 }
794 
795 //------------------------------------------------------------------------------
796 extern "C" {
797 /*!
798 @ingroup WORK_SHARING
799 @param    loc       Source code location
800 @param    gtid      Global thread id of this thread
801 @param    schedtype  Scheduling type
802 @param    plastiter Pointer to the "last iteration" flag
803 @param    plower    Pointer to the lower bound
804 @param    pupper    Pointer to the upper bound
805 @param    pstride   Pointer to the stride
806 @param    incr      Loop increment
807 @param    chunk     The chunk size
808 
809 Each of the four functions here are identical apart from the argument types.
810 
811 The functions compute the upper and lower bounds and stride to be used for the
812 set of iterations to be executed by the current thread from the statically
813 scheduled loop that is described by the initial values of the bounds, stride,
814 increment and chunk size.
815 
816 @{
817 */
818 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
819                               kmp_int32 *plastiter, kmp_int32 *plower,
820                               kmp_int32 *pupper, kmp_int32 *pstride,
821                               kmp_int32 incr, kmp_int32 chunk) {
822   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
823                                    pupper, pstride, incr, chunk
824 #if OMPT_SUPPORT && OMPT_OPTIONAL
825                                    ,
826                                    OMPT_GET_RETURN_ADDRESS(0)
827 #endif
828   );
829 }
830 
831 /*!
832  See @ref __kmpc_for_static_init_4
833  */
834 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
835                                kmp_int32 schedtype, kmp_int32 *plastiter,
836                                kmp_uint32 *plower, kmp_uint32 *pupper,
837                                kmp_int32 *pstride, kmp_int32 incr,
838                                kmp_int32 chunk) {
839   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
840                                     pupper, pstride, incr, chunk
841 #if OMPT_SUPPORT && OMPT_OPTIONAL
842                                     ,
843                                     OMPT_GET_RETURN_ADDRESS(0)
844 #endif
845   );
846 }
847 
848 /*!
849  See @ref __kmpc_for_static_init_4
850  */
851 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
852                               kmp_int32 *plastiter, kmp_int64 *plower,
853                               kmp_int64 *pupper, kmp_int64 *pstride,
854                               kmp_int64 incr, kmp_int64 chunk) {
855   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
856                                    pupper, pstride, incr, chunk
857 #if OMPT_SUPPORT && OMPT_OPTIONAL
858                                    ,
859                                    OMPT_GET_RETURN_ADDRESS(0)
860 #endif
861   );
862 }
863 
864 /*!
865  See @ref __kmpc_for_static_init_4
866  */
867 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
868                                kmp_int32 schedtype, kmp_int32 *plastiter,
869                                kmp_uint64 *plower, kmp_uint64 *pupper,
870                                kmp_int64 *pstride, kmp_int64 incr,
871                                kmp_int64 chunk) {
872   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
873                                     pupper, pstride, incr, chunk
874 #if OMPT_SUPPORT && OMPT_OPTIONAL
875                                     ,
876                                     OMPT_GET_RETURN_ADDRESS(0)
877 #endif
878   );
879 }
880 /*!
881 @}
882 */
883 
884 /*!
885 @ingroup WORK_SHARING
886 @param    loc       Source code location
887 @param    gtid      Global thread id of this thread
888 @param    schedule  Scheduling type for the parallel loop
889 @param    plastiter Pointer to the "last iteration" flag
890 @param    plower    Pointer to the lower bound
891 @param    pupper    Pointer to the upper bound of loop chunk
892 @param    pupperD   Pointer to the upper bound of dist_chunk
893 @param    pstride   Pointer to the stride for parallel loop
894 @param    incr      Loop increment
895 @param    chunk     The chunk size for the parallel loop
896 
897 Each of the four functions here are identical apart from the argument types.
898 
899 The functions compute the upper and lower bounds and strides to be used for the
900 set of iterations to be executed by the current thread from the statically
901 scheduled loop that is described by the initial values of the bounds, strides,
902 increment and chunks for parallel loop and distribute constructs.
903 
904 @{
905 */
906 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
907                                    kmp_int32 schedule, kmp_int32 *plastiter,
908                                    kmp_int32 *plower, kmp_int32 *pupper,
909                                    kmp_int32 *pupperD, kmp_int32 *pstride,
910                                    kmp_int32 incr, kmp_int32 chunk) {
911   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
912                                         pupper, pupperD, pstride, incr, chunk);
913 }
914 
915 /*!
916  See @ref __kmpc_dist_for_static_init_4
917  */
918 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
919                                     kmp_int32 schedule, kmp_int32 *plastiter,
920                                     kmp_uint32 *plower, kmp_uint32 *pupper,
921                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
922                                     kmp_int32 incr, kmp_int32 chunk) {
923   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
924                                          pupper, pupperD, pstride, incr, chunk);
925 }
926 
927 /*!
928  See @ref __kmpc_dist_for_static_init_4
929  */
930 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
931                                    kmp_int32 schedule, kmp_int32 *plastiter,
932                                    kmp_int64 *plower, kmp_int64 *pupper,
933                                    kmp_int64 *pupperD, kmp_int64 *pstride,
934                                    kmp_int64 incr, kmp_int64 chunk) {
935   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
936                                         pupper, pupperD, pstride, incr, chunk);
937 }
938 
939 /*!
940  See @ref __kmpc_dist_for_static_init_4
941  */
942 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
943                                     kmp_int32 schedule, kmp_int32 *plastiter,
944                                     kmp_uint64 *plower, kmp_uint64 *pupper,
945                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
946                                     kmp_int64 incr, kmp_int64 chunk) {
947   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
948                                          pupper, pupperD, pstride, incr, chunk);
949 }
950 /*!
951 @}
952 */
953 
954 //------------------------------------------------------------------------------
955 // Auxiliary routines for Distribute Parallel Loop construct implementation
956 //    Transfer call to template< type T >
957 //    __kmp_team_static_init( ident_t *loc, int gtid,
958 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
959 
960 /*!
961 @ingroup WORK_SHARING
962 @{
963 @param loc Source location
964 @param gtid Global thread id
965 @param p_last pointer to last iteration flag
966 @param p_lb  pointer to Lower bound
967 @param p_ub  pointer to Upper bound
968 @param p_st  Step (or increment if you prefer)
969 @param incr  Loop increment
970 @param chunk The chunk size to block with
971 
972 The functions compute the upper and lower bounds and stride to be used for the
973 set of iterations to be executed by the current team from the statically
974 scheduled loop that is described by the initial values of the bounds, stride,
975 increment and chunk for the distribute construct as part of composite distribute
976 parallel loop construct. These functions are all identical apart from the types
977 of the arguments.
978 */
979 
980 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981                                kmp_int32 *p_lb, kmp_int32 *p_ub,
982                                kmp_int32 *p_st, kmp_int32 incr,
983                                kmp_int32 chunk) {
984   KMP_DEBUG_ASSERT(__kmp_init_serial);
985   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986                                     chunk);
987 }
988 
989 /*!
990  See @ref __kmpc_team_static_init_4
991  */
992 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
994                                 kmp_int32 *p_st, kmp_int32 incr,
995                                 kmp_int32 chunk) {
996   KMP_DEBUG_ASSERT(__kmp_init_serial);
997   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998                                      chunk);
999 }
1000 
1001 /*!
1002  See @ref __kmpc_team_static_init_4
1003  */
1004 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1005                                kmp_int64 *p_lb, kmp_int64 *p_ub,
1006                                kmp_int64 *p_st, kmp_int64 incr,
1007                                kmp_int64 chunk) {
1008   KMP_DEBUG_ASSERT(__kmp_init_serial);
1009   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1010                                     chunk);
1011 }
1012 
1013 /*!
1014  See @ref __kmpc_team_static_init_4
1015  */
1016 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1017                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1018                                 kmp_int64 *p_st, kmp_int64 incr,
1019                                 kmp_int64 chunk) {
1020   KMP_DEBUG_ASSERT(__kmp_init_serial);
1021   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1022                                      chunk);
1023 }
1024 /*!
1025 @}
1026 */
1027 
1028 } // extern "C"
1029