1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16         it may change values between parallel regions.  __kmp_max_nth
17         is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat)                                               \
43   {                                                                            \
44     kmp_int64 t;                                                               \
45     kmp_int64 u = (kmp_int64)(*pupper);                                        \
46     kmp_int64 l = (kmp_int64)(*plower);                                        \
47     kmp_int64 i = (kmp_int64)incr;                                             \
48     if (i == 1) {                                                              \
49       t = u - l + 1;                                                           \
50     } else if (i == -1) {                                                      \
51       t = l - u + 1;                                                           \
52     } else if (i > 0) {                                                        \
53       t = (u - l) / i + 1;                                                     \
54     } else {                                                                   \
55       t = (l - u) / (-i) + 1;                                                  \
56     }                                                                          \
57     KMP_COUNT_VALUE(stat, t);                                                  \
58     KMP_POP_PARTITIONED_TIMER();                                               \
59   }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 template <typename T>
65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66                                   kmp_int32 schedtype, kmp_int32 *plastiter,
67                                   T *plower, T *pupper,
68                                   typename traits_t<T>::signed_t *pstride,
69                                   typename traits_t<T>::signed_t incr,
70                                   typename traits_t<T>::signed_t chunk
71 #if OMPT_SUPPORT && OMPT_OPTIONAL
72                                   ,
73                                   void *codeptr
74 #endif
75                                   ) {
76   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
79 
80   typedef typename traits_t<T>::unsigned_t UT;
81   typedef typename traits_t<T>::signed_t ST;
82   /*  this all has to be changed back to TID and such.. */
83   kmp_int32 gtid = global_tid;
84   kmp_uint32 tid;
85   kmp_uint32 nth;
86   UT trip_count;
87   kmp_team_t *team;
88   kmp_info_t *th = __kmp_threads[gtid];
89 
90 #if OMPT_SUPPORT && OMPT_OPTIONAL
91   ompt_team_info_t *team_info = NULL;
92   ompt_task_info_t *task_info = NULL;
93   ompt_work_t ompt_work_type = ompt_work_loop;
94 
95   static kmp_int8 warn = 0;
96 
97   if (ompt_enabled.ompt_callback_work) {
98     // Only fully initialize variables needed by OMPT if OMPT is enabled.
99     team_info = __ompt_get_teaminfo(0, NULL);
100     task_info = __ompt_get_task_info_object(0);
101     // Determine workshare type
102     if (loc != NULL) {
103       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104         ompt_work_type = ompt_work_loop;
105       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106         ompt_work_type = ompt_work_sections;
107       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108         ompt_work_type = ompt_work_distribute;
109       } else {
110         kmp_int8 bool_res =
111             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
112         if (bool_res)
113           KMP_WARNING(OmptOutdatedWorkshare);
114       }
115       KMP_DEBUG_ASSERT(ompt_work_type);
116     }
117   }
118 #endif
119 
120   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
122 #ifdef KMP_DEBUG
123   {
124     char *buff;
125     // create format specifiers before the debug output
126     buff = __kmp_str_format(
127         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132                    *pstride, incr, chunk));
133     __kmp_str_free(&buff);
134   }
135 #endif
136 
137   if (__kmp_env_consistency_check) {
138     __kmp_push_workshare(global_tid, ct_pdo, loc);
139     if (incr == 0) {
140       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
141                             loc);
142     }
143   }
144   /* special handling for zero-trip loops */
145   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146     if (plastiter != NULL)
147       *plastiter = FALSE;
148     /* leave pupper and plower set to entire iteration space */
149     *pstride = incr; /* value should never be used */
150 // *plower = *pupper - incr;
151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
154 #ifdef KMP_DEBUG
155     {
156       char *buff;
157       // create format specifiers before the debug output
158       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159                               "lower=%%%s upper=%%%s stride = %%%s "
160                               "signed?<%s>, loc = %%s\n",
161                               traits_t<T>::spec, traits_t<T>::spec,
162                               traits_t<ST>::spec, traits_t<T>::spec);
163       KD_TRACE(100,
164                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165       __kmp_str_free(&buff);
166     }
167 #endif
168     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
169 
170 #if OMPT_SUPPORT && OMPT_OPTIONAL
171     if (ompt_enabled.ompt_callback_work) {
172       ompt_callbacks.ompt_callback(ompt_callback_work)(
173           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174           &(task_info->task_data), 0, codeptr);
175     }
176 #endif
177     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
178     return;
179   }
180 
181 #if OMP_40_ENABLED
182   // Although there are schedule enumerations above kmp_ord_upper which are not
183   // schedules for "distribute", the only ones which are useful are dynamic, so
184   // cannot be seen here, since this codepath is only executed for static
185   // schedules.
186   if (schedtype > kmp_ord_upper) {
187     // we are in DISTRIBUTE construct
188     schedtype += kmp_sch_static -
189                  kmp_distribute_static; // AC: convert to usual schedule type
190     tid = th->th.th_team->t.t_master_tid;
191     team = th->th.th_team->t.t_parent;
192   } else
193 #endif
194   {
195     tid = __kmp_tid_from_gtid(global_tid);
196     team = th->th.th_team;
197   }
198 
199   /* determine if "for" loop is an active worksharing construct */
200   if (team->t.t_serialized) {
201     /* serialized parallel, each thread executes whole iteration space */
202     if (plastiter != NULL)
203       *plastiter = TRUE;
204     /* leave pupper and plower set to entire iteration space */
205     *pstride =
206         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
207 
208 #ifdef KMP_DEBUG
209     {
210       char *buff;
211       // create format specifiers before the debug output
212       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
213                               "lower=%%%s upper=%%%s stride = %%%s\n",
214                               traits_t<T>::spec, traits_t<T>::spec,
215                               traits_t<ST>::spec);
216       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
217       __kmp_str_free(&buff);
218     }
219 #endif
220     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
221 
222 #if OMPT_SUPPORT && OMPT_OPTIONAL
223     if (ompt_enabled.ompt_callback_work) {
224       ompt_callbacks.ompt_callback(ompt_callback_work)(
225           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
226           &(task_info->task_data), *pstride, codeptr);
227     }
228 #endif
229     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
230     return;
231   }
232   nth = team->t.t_nproc;
233   if (nth == 1) {
234     if (plastiter != NULL)
235       *plastiter = TRUE;
236     *pstride =
237         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
238 #ifdef KMP_DEBUG
239     {
240       char *buff;
241       // create format specifiers before the debug output
242       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
243                               "lower=%%%s upper=%%%s stride = %%%s\n",
244                               traits_t<T>::spec, traits_t<T>::spec,
245                               traits_t<ST>::spec);
246       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
247       __kmp_str_free(&buff);
248     }
249 #endif
250     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
251 
252 #if OMPT_SUPPORT && OMPT_OPTIONAL
253     if (ompt_enabled.ompt_callback_work) {
254       ompt_callbacks.ompt_callback(ompt_callback_work)(
255           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
256           &(task_info->task_data), *pstride, codeptr);
257     }
258 #endif
259     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
260     return;
261   }
262 
263   /* compute trip count */
264   if (incr == 1) {
265     trip_count = *pupper - *plower + 1;
266   } else if (incr == -1) {
267     trip_count = *plower - *pupper + 1;
268   } else if (incr > 0) {
269     // upper-lower can exceed the limit of signed type
270     trip_count = (UT)(*pupper - *plower) / incr + 1;
271   } else {
272     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
273   }
274 
275 #if KMP_STATS_ENABLED
276   if (KMP_MASTER_GTID(gtid)) {
277     KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
278   }
279 #endif
280 
281   if (__kmp_env_consistency_check) {
282     /* tripcount overflow? */
283     if (trip_count == 0 && *pupper != *plower) {
284       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
285                             loc);
286     }
287   }
288 
289   /* compute remaining parameters */
290   switch (schedtype) {
291   case kmp_sch_static: {
292     if (trip_count < nth) {
293       KMP_DEBUG_ASSERT(
294           __kmp_static == kmp_sch_static_greedy ||
295           __kmp_static ==
296               kmp_sch_static_balanced); // Unknown static scheduling type.
297       if (tid < trip_count) {
298         *pupper = *plower = *plower + tid * incr;
299       } else {
300         *plower = *pupper + incr;
301       }
302       if (plastiter != NULL)
303         *plastiter = (tid == trip_count - 1);
304     } else {
305       if (__kmp_static == kmp_sch_static_balanced) {
306         UT small_chunk = trip_count / nth;
307         UT extras = trip_count % nth;
308         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
309         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
310         if (plastiter != NULL)
311           *plastiter = (tid == nth - 1);
312       } else {
313         T big_chunk_inc_count =
314             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
315         T old_upper = *pupper;
316 
317         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
318         // Unknown static scheduling type.
319 
320         *plower += tid * big_chunk_inc_count;
321         *pupper = *plower + big_chunk_inc_count - incr;
322         if (incr > 0) {
323           if (*pupper < *plower)
324             *pupper = traits_t<T>::max_value;
325           if (plastiter != NULL)
326             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
327           if (*pupper > old_upper)
328             *pupper = old_upper; // tracker C73258
329         } else {
330           if (*pupper > *plower)
331             *pupper = traits_t<T>::min_value;
332           if (plastiter != NULL)
333             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
334           if (*pupper < old_upper)
335             *pupper = old_upper; // tracker C73258
336         }
337       }
338     }
339     *pstride = trip_count;
340     break;
341   }
342   case kmp_sch_static_chunked: {
343     ST span;
344     if (chunk < 1) {
345       chunk = 1;
346     }
347     span = chunk * incr;
348     *pstride = span * nth;
349     *plower = *plower + (span * tid);
350     *pupper = *plower + span - incr;
351     if (plastiter != NULL)
352       *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
353     break;
354   }
355 #if OMP_45_ENABLED
356   case kmp_sch_static_balanced_chunked: {
357     T old_upper = *pupper;
358     // round up to make sure the chunk is enough to cover all iterations
359     UT span = (trip_count + nth - 1) / nth;
360 
361     // perform chunk adjustment
362     chunk = (span + chunk - 1) & ~(chunk - 1);
363 
364     span = chunk * incr;
365     *plower = *plower + (span * tid);
366     *pupper = *plower + span - incr;
367     if (incr > 0) {
368       if (*pupper > old_upper)
369         *pupper = old_upper;
370     } else if (*pupper < old_upper)
371       *pupper = old_upper;
372 
373     if (plastiter != NULL)
374       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
375     break;
376   }
377 #endif
378   default:
379     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
380     break;
381   }
382 
383 #if USE_ITT_BUILD
384   // Report loop metadata
385   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
386       __kmp_forkjoin_frames_mode == 3 &&
387 #if OMP_40_ENABLED
388       th->th.th_teams_microtask == NULL &&
389 #endif
390       team->t.t_active_level == 1) {
391     kmp_uint64 cur_chunk = chunk;
392     // Calculate chunk in case it was not specified; it is specified for
393     // kmp_sch_static_chunked
394     if (schedtype == kmp_sch_static) {
395       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
396     }
397     // 0 - "static" schedule
398     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
399   }
400 #endif
401 #ifdef KMP_DEBUG
402   {
403     char *buff;
404     // create format specifiers before the debug output
405     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
406                             "upper=%%%s stride = %%%s signed?<%s>\n",
407                             traits_t<T>::spec, traits_t<T>::spec,
408                             traits_t<ST>::spec, traits_t<T>::spec);
409     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
410     __kmp_str_free(&buff);
411   }
412 #endif
413   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
414 
415 #if OMPT_SUPPORT && OMPT_OPTIONAL
416   if (ompt_enabled.ompt_callback_work) {
417     ompt_callbacks.ompt_callback(ompt_callback_work)(
418         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
419         &(task_info->task_data), trip_count, codeptr);
420   }
421 #endif
422 
423   KMP_STATS_LOOP_END(OMP_loop_static_iterations);
424   return;
425 }
426 
427 template <typename T>
428 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
429                                        kmp_int32 schedule, kmp_int32 *plastiter,
430                                        T *plower, T *pupper, T *pupperDist,
431                                        typename traits_t<T>::signed_t *pstride,
432                                        typename traits_t<T>::signed_t incr,
433                                        typename traits_t<T>::signed_t chunk) {
434   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
435   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
436   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
437   typedef typename traits_t<T>::unsigned_t UT;
438   typedef typename traits_t<T>::signed_t ST;
439   kmp_uint32 tid;
440   kmp_uint32 nth;
441   kmp_uint32 team_id;
442   kmp_uint32 nteams;
443   UT trip_count;
444   kmp_team_t *team;
445   kmp_info_t *th;
446 
447   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
448   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
449 #ifdef KMP_DEBUG
450   {
451     char *buff;
452     // create format specifiers before the debug output
453     buff = __kmp_str_format(
454         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
455         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
456         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
457         traits_t<ST>::spec, traits_t<T>::spec);
458     KD_TRACE(100,
459              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
460     __kmp_str_free(&buff);
461   }
462 #endif
463 
464   if (__kmp_env_consistency_check) {
465     __kmp_push_workshare(gtid, ct_pdo, loc);
466     if (incr == 0) {
467       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
468                             loc);
469     }
470     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
471       // The loop is illegal.
472       // Some zero-trip loops maintained by compiler, e.g.:
473       //   for(i=10;i<0;++i) // lower >= upper - run-time check
474       //   for(i=0;i>10;--i) // lower <= upper - run-time check
475       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
476       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
477       // Compiler does not check the following illegal loops:
478       //   for(i=0;i<10;i+=incr) // where incr<0
479       //   for(i=10;i>0;i-=incr) // where incr<0
480       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
481     }
482   }
483   tid = __kmp_tid_from_gtid(gtid);
484   th = __kmp_threads[gtid];
485   nth = th->th.th_team_nproc;
486   team = th->th.th_team;
487 #if OMP_40_ENABLED
488   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
489   nteams = th->th.th_teams_size.nteams;
490 #endif
491   team_id = team->t.t_master_tid;
492   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
493 
494   // compute global trip count
495   if (incr == 1) {
496     trip_count = *pupper - *plower + 1;
497   } else if (incr == -1) {
498     trip_count = *plower - *pupper + 1;
499   } else if (incr > 0) {
500     // upper-lower can exceed the limit of signed type
501     trip_count = (UT)(*pupper - *plower) / incr + 1;
502   } else {
503     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
504   }
505 
506   *pstride = *pupper - *plower; // just in case (can be unused)
507   if (trip_count <= nteams) {
508     KMP_DEBUG_ASSERT(
509         __kmp_static == kmp_sch_static_greedy ||
510         __kmp_static ==
511             kmp_sch_static_balanced); // Unknown static scheduling type.
512     // only masters of some teams get single iteration, other threads get
513     // nothing
514     if (team_id < trip_count && tid == 0) {
515       *pupper = *pupperDist = *plower = *plower + team_id * incr;
516     } else {
517       *pupperDist = *pupper;
518       *plower = *pupper + incr; // compiler should skip loop body
519     }
520     if (plastiter != NULL)
521       *plastiter = (tid == 0 && team_id == trip_count - 1);
522   } else {
523     // Get the team's chunk first (each team gets at most one chunk)
524     if (__kmp_static == kmp_sch_static_balanced) {
525       UT chunkD = trip_count / nteams;
526       UT extras = trip_count % nteams;
527       *plower +=
528           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
529       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
530       if (plastiter != NULL)
531         *plastiter = (team_id == nteams - 1);
532     } else {
533       T chunk_inc_count =
534           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
535       T upper = *pupper;
536       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
537       // Unknown static scheduling type.
538       *plower += team_id * chunk_inc_count;
539       *pupperDist = *plower + chunk_inc_count - incr;
540       // Check/correct bounds if needed
541       if (incr > 0) {
542         if (*pupperDist < *plower)
543           *pupperDist = traits_t<T>::max_value;
544         if (plastiter != NULL)
545           *plastiter = *plower <= upper && *pupperDist > upper - incr;
546         if (*pupperDist > upper)
547           *pupperDist = upper; // tracker C73258
548         if (*plower > *pupperDist) {
549           *pupper = *pupperDist; // no iterations available for the team
550           goto end;
551         }
552       } else {
553         if (*pupperDist > *plower)
554           *pupperDist = traits_t<T>::min_value;
555         if (plastiter != NULL)
556           *plastiter = *plower >= upper && *pupperDist < upper - incr;
557         if (*pupperDist < upper)
558           *pupperDist = upper; // tracker C73258
559         if (*plower < *pupperDist) {
560           *pupper = *pupperDist; // no iterations available for the team
561           goto end;
562         }
563       }
564     }
565     // Get the parallel loop chunk now (for thread)
566     // compute trip count for team's chunk
567     if (incr == 1) {
568       trip_count = *pupperDist - *plower + 1;
569     } else if (incr == -1) {
570       trip_count = *plower - *pupperDist + 1;
571     } else if (incr > 1) {
572       // upper-lower can exceed the limit of signed type
573       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
574     } else {
575       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
576     }
577     KMP_DEBUG_ASSERT(trip_count);
578     switch (schedule) {
579     case kmp_sch_static: {
580       if (trip_count <= nth) {
581         KMP_DEBUG_ASSERT(
582             __kmp_static == kmp_sch_static_greedy ||
583             __kmp_static ==
584                 kmp_sch_static_balanced); // Unknown static scheduling type.
585         if (tid < trip_count)
586           *pupper = *plower = *plower + tid * incr;
587         else
588           *plower = *pupper + incr; // no iterations available
589         if (plastiter != NULL)
590           if (*plastiter != 0 && !(tid == trip_count - 1))
591             *plastiter = 0;
592       } else {
593         if (__kmp_static == kmp_sch_static_balanced) {
594           UT chunkL = trip_count / nth;
595           UT extras = trip_count % nth;
596           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
597           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
598           if (plastiter != NULL)
599             if (*plastiter != 0 && !(tid == nth - 1))
600               *plastiter = 0;
601         } else {
602           T chunk_inc_count =
603               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
604           T upper = *pupperDist;
605           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
606           // Unknown static scheduling type.
607           *plower += tid * chunk_inc_count;
608           *pupper = *plower + chunk_inc_count - incr;
609           if (incr > 0) {
610             if (*pupper < *plower)
611               *pupper = traits_t<T>::max_value;
612             if (plastiter != NULL)
613               if (*plastiter != 0 &&
614                   !(*plower <= upper && *pupper > upper - incr))
615                 *plastiter = 0;
616             if (*pupper > upper)
617               *pupper = upper; // tracker C73258
618           } else {
619             if (*pupper > *plower)
620               *pupper = traits_t<T>::min_value;
621             if (plastiter != NULL)
622               if (*plastiter != 0 &&
623                   !(*plower >= upper && *pupper < upper - incr))
624                 *plastiter = 0;
625             if (*pupper < upper)
626               *pupper = upper; // tracker C73258
627           }
628         }
629       }
630       break;
631     }
632     case kmp_sch_static_chunked: {
633       ST span;
634       if (chunk < 1)
635         chunk = 1;
636       span = chunk * incr;
637       *pstride = span * nth;
638       *plower = *plower + (span * tid);
639       *pupper = *plower + span - incr;
640       if (plastiter != NULL)
641         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
642           *plastiter = 0;
643       break;
644     }
645     default:
646       KMP_ASSERT2(0,
647                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
648       break;
649     }
650   }
651 end:;
652 #ifdef KMP_DEBUG
653   {
654     char *buff;
655     // create format specifiers before the debug output
656     buff = __kmp_str_format(
657         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
658         "stride=%%%s signed?<%s>\n",
659         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
660         traits_t<ST>::spec, traits_t<T>::spec);
661     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
662     __kmp_str_free(&buff);
663   }
664 #endif
665   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
666   KMP_STATS_LOOP_END(OMP_distribute_iterations);
667   return;
668 }
669 
670 template <typename T>
671 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
672                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
673                                    typename traits_t<T>::signed_t *p_st,
674                                    typename traits_t<T>::signed_t incr,
675                                    typename traits_t<T>::signed_t chunk) {
676   // The routine returns the first chunk distributed to the team and
677   // stride for next chunks calculation.
678   // Last iteration flag set for the team that will execute
679   // the last iteration of the loop.
680   // The routine is called for dist_schedue(static,chunk) only.
681   typedef typename traits_t<T>::unsigned_t UT;
682   typedef typename traits_t<T>::signed_t ST;
683   kmp_uint32 team_id;
684   kmp_uint32 nteams;
685   UT trip_count;
686   T lower;
687   T upper;
688   ST span;
689   kmp_team_t *team;
690   kmp_info_t *th;
691 
692   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
693   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
694 #ifdef KMP_DEBUG
695   {
696     char *buff;
697     // create format specifiers before the debug output
698     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
699                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
700                             traits_t<T>::spec, traits_t<T>::spec,
701                             traits_t<ST>::spec, traits_t<ST>::spec,
702                             traits_t<T>::spec);
703     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
704     __kmp_str_free(&buff);
705   }
706 #endif
707 
708   lower = *p_lb;
709   upper = *p_ub;
710   if (__kmp_env_consistency_check) {
711     if (incr == 0) {
712       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
713                             loc);
714     }
715     if (incr > 0 ? (upper < lower) : (lower < upper)) {
716       // The loop is illegal.
717       // Some zero-trip loops maintained by compiler, e.g.:
718       //   for(i=10;i<0;++i) // lower >= upper - run-time check
719       //   for(i=0;i>10;--i) // lower <= upper - run-time check
720       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
721       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
722       // Compiler does not check the following illegal loops:
723       //   for(i=0;i<10;i+=incr) // where incr<0
724       //   for(i=10;i>0;i-=incr) // where incr<0
725       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
726     }
727   }
728   th = __kmp_threads[gtid];
729   team = th->th.th_team;
730 #if OMP_40_ENABLED
731   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
732   nteams = th->th.th_teams_size.nteams;
733 #endif
734   team_id = team->t.t_master_tid;
735   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
736 
737   // compute trip count
738   if (incr == 1) {
739     trip_count = upper - lower + 1;
740   } else if (incr == -1) {
741     trip_count = lower - upper + 1;
742   } else if (incr > 0) {
743     // upper-lower can exceed the limit of signed type
744     trip_count = (UT)(upper - lower) / incr + 1;
745   } else {
746     trip_count = (UT)(lower - upper) / (-incr) + 1;
747   }
748   if (chunk < 1)
749     chunk = 1;
750   span = chunk * incr;
751   *p_st = span * nteams;
752   *p_lb = lower + (span * team_id);
753   *p_ub = *p_lb + span - incr;
754   if (p_last != NULL)
755     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
756   // Correct upper bound if needed
757   if (incr > 0) {
758     if (*p_ub < *p_lb) // overflow?
759       *p_ub = traits_t<T>::max_value;
760     if (*p_ub > upper)
761       *p_ub = upper; // tracker C73258
762   } else { // incr < 0
763     if (*p_ub > *p_lb)
764       *p_ub = traits_t<T>::min_value;
765     if (*p_ub < upper)
766       *p_ub = upper; // tracker C73258
767   }
768 #ifdef KMP_DEBUG
769   {
770     char *buff;
771     // create format specifiers before the debug output
772     buff =
773         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
774                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
775                          traits_t<T>::spec, traits_t<T>::spec,
776                          traits_t<ST>::spec, traits_t<ST>::spec);
777     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
778     __kmp_str_free(&buff);
779   }
780 #endif
781 }
782 
783 //------------------------------------------------------------------------------
784 extern "C" {
785 /*!
786 @ingroup WORK_SHARING
787 @param    loc       Source code location
788 @param    gtid      Global thread id of this thread
789 @param    schedtype  Scheduling type
790 @param    plastiter Pointer to the "last iteration" flag
791 @param    plower    Pointer to the lower bound
792 @param    pupper    Pointer to the upper bound
793 @param    pstride   Pointer to the stride
794 @param    incr      Loop increment
795 @param    chunk     The chunk size
796 
797 Each of the four functions here are identical apart from the argument types.
798 
799 The functions compute the upper and lower bounds and stride to be used for the
800 set of iterations to be executed by the current thread from the statically
801 scheduled loop that is described by the initial values of the bounds, stride,
802 increment and chunk size.
803 
804 @{
805 */
806 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
807                               kmp_int32 *plastiter, kmp_int32 *plower,
808                               kmp_int32 *pupper, kmp_int32 *pstride,
809                               kmp_int32 incr, kmp_int32 chunk) {
810   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
811                                    pupper, pstride, incr, chunk
812 #if OMPT_SUPPORT && OMPT_OPTIONAL
813                                    ,
814                                    OMPT_GET_RETURN_ADDRESS(0)
815 #endif
816                                        );
817 }
818 
819 /*!
820  See @ref __kmpc_for_static_init_4
821  */
822 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
823                                kmp_int32 schedtype, kmp_int32 *plastiter,
824                                kmp_uint32 *plower, kmp_uint32 *pupper,
825                                kmp_int32 *pstride, kmp_int32 incr,
826                                kmp_int32 chunk) {
827   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
828                                     pupper, pstride, incr, chunk
829 #if OMPT_SUPPORT && OMPT_OPTIONAL
830                                     ,
831                                     OMPT_GET_RETURN_ADDRESS(0)
832 #endif
833                                         );
834 }
835 
836 /*!
837  See @ref __kmpc_for_static_init_4
838  */
839 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
840                               kmp_int32 *plastiter, kmp_int64 *plower,
841                               kmp_int64 *pupper, kmp_int64 *pstride,
842                               kmp_int64 incr, kmp_int64 chunk) {
843   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
844                                    pupper, pstride, incr, chunk
845 #if OMPT_SUPPORT && OMPT_OPTIONAL
846                                    ,
847                                    OMPT_GET_RETURN_ADDRESS(0)
848 #endif
849                                        );
850 }
851 
852 /*!
853  See @ref __kmpc_for_static_init_4
854  */
855 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
856                                kmp_int32 schedtype, kmp_int32 *plastiter,
857                                kmp_uint64 *plower, kmp_uint64 *pupper,
858                                kmp_int64 *pstride, kmp_int64 incr,
859                                kmp_int64 chunk) {
860   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
861                                     pupper, pstride, incr, chunk
862 #if OMPT_SUPPORT && OMPT_OPTIONAL
863                                     ,
864                                     OMPT_GET_RETURN_ADDRESS(0)
865 #endif
866                                         );
867 }
868 /*!
869 @}
870 */
871 
872 /*!
873 @ingroup WORK_SHARING
874 @param    loc       Source code location
875 @param    gtid      Global thread id of this thread
876 @param    schedule  Scheduling type for the parallel loop
877 @param    plastiter Pointer to the "last iteration" flag
878 @param    plower    Pointer to the lower bound
879 @param    pupper    Pointer to the upper bound of loop chunk
880 @param    pupperD   Pointer to the upper bound of dist_chunk
881 @param    pstride   Pointer to the stride for parallel loop
882 @param    incr      Loop increment
883 @param    chunk     The chunk size for the parallel loop
884 
885 Each of the four functions here are identical apart from the argument types.
886 
887 The functions compute the upper and lower bounds and strides to be used for the
888 set of iterations to be executed by the current thread from the statically
889 scheduled loop that is described by the initial values of the bounds, strides,
890 increment and chunks for parallel loop and distribute constructs.
891 
892 @{
893 */
894 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
895                                    kmp_int32 schedule, kmp_int32 *plastiter,
896                                    kmp_int32 *plower, kmp_int32 *pupper,
897                                    kmp_int32 *pupperD, kmp_int32 *pstride,
898                                    kmp_int32 incr, kmp_int32 chunk) {
899   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
900                                         pupper, pupperD, pstride, incr, chunk);
901 }
902 
903 /*!
904  See @ref __kmpc_dist_for_static_init_4
905  */
906 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
907                                     kmp_int32 schedule, kmp_int32 *plastiter,
908                                     kmp_uint32 *plower, kmp_uint32 *pupper,
909                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
910                                     kmp_int32 incr, kmp_int32 chunk) {
911   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
912                                          pupper, pupperD, pstride, incr, chunk);
913 }
914 
915 /*!
916  See @ref __kmpc_dist_for_static_init_4
917  */
918 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
919                                    kmp_int32 schedule, kmp_int32 *plastiter,
920                                    kmp_int64 *plower, kmp_int64 *pupper,
921                                    kmp_int64 *pupperD, kmp_int64 *pstride,
922                                    kmp_int64 incr, kmp_int64 chunk) {
923   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
924                                         pupper, pupperD, pstride, incr, chunk);
925 }
926 
927 /*!
928  See @ref __kmpc_dist_for_static_init_4
929  */
930 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
931                                     kmp_int32 schedule, kmp_int32 *plastiter,
932                                     kmp_uint64 *plower, kmp_uint64 *pupper,
933                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
934                                     kmp_int64 incr, kmp_int64 chunk) {
935   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
936                                          pupper, pupperD, pstride, incr, chunk);
937 }
938 /*!
939 @}
940 */
941 
942 //------------------------------------------------------------------------------
943 // Auxiliary routines for Distribute Parallel Loop construct implementation
944 //    Transfer call to template< type T >
945 //    __kmp_team_static_init( ident_t *loc, int gtid,
946 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
947 
948 /*!
949 @ingroup WORK_SHARING
950 @{
951 @param loc Source location
952 @param gtid Global thread id
953 @param p_last pointer to last iteration flag
954 @param p_lb  pointer to Lower bound
955 @param p_ub  pointer to Upper bound
956 @param p_st  Step (or increment if you prefer)
957 @param incr  Loop increment
958 @param chunk The chunk size to block with
959 
960 The functions compute the upper and lower bounds and stride to be used for the
961 set of iterations to be executed by the current team from the statically
962 scheduled loop that is described by the initial values of the bounds, stride,
963 increment and chunk for the distribute construct as part of composite distribute
964 parallel loop construct. These functions are all identical apart from the types
965 of the arguments.
966 */
967 
968 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969                                kmp_int32 *p_lb, kmp_int32 *p_ub,
970                                kmp_int32 *p_st, kmp_int32 incr,
971                                kmp_int32 chunk) {
972   KMP_DEBUG_ASSERT(__kmp_init_serial);
973   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
974                                     chunk);
975 }
976 
977 /*!
978  See @ref __kmpc_team_static_init_4
979  */
980 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
982                                 kmp_int32 *p_st, kmp_int32 incr,
983                                 kmp_int32 chunk) {
984   KMP_DEBUG_ASSERT(__kmp_init_serial);
985   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986                                      chunk);
987 }
988 
989 /*!
990  See @ref __kmpc_team_static_init_4
991  */
992 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993                                kmp_int64 *p_lb, kmp_int64 *p_ub,
994                                kmp_int64 *p_st, kmp_int64 incr,
995                                kmp_int64 chunk) {
996   KMP_DEBUG_ASSERT(__kmp_init_serial);
997   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998                                     chunk);
999 }
1000 
1001 /*!
1002  See @ref __kmpc_team_static_init_4
1003  */
1004 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1005                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1006                                 kmp_int64 *p_st, kmp_int64 incr,
1007                                 kmp_int64 chunk) {
1008   KMP_DEBUG_ASSERT(__kmp_init_serial);
1009   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1010                                      chunk);
1011 }
1012 /*!
1013 @}
1014 */
1015 
1016 } // extern "C"
1017