1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 /* Static scheduling initialization.
15 
16   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
17         it may change values between parallel regions.  __kmp_max_nth
18         is the largest value __kmp_nth may take, 1 is the smallest. */
19 
20 #include "kmp.h"
21 #include "kmp_error.h"
22 #include "kmp_i18n.h"
23 #include "kmp_itt.h"
24 #include "kmp_stats.h"
25 #include "kmp_str.h"
26 
27 #if OMPT_SUPPORT
28 #include "ompt-specific.h"
29 #endif
30 
31 #ifdef KMP_DEBUG
32 //-------------------------------------------------------------------------
33 // template for debug prints specification ( d, u, lld, llu )
34 char const *traits_t<int>::spec = "d";
35 char const *traits_t<unsigned int>::spec = "u";
36 char const *traits_t<long long>::spec = "lld";
37 char const *traits_t<unsigned long long>::spec = "llu";
38 char const *traits_t<long>::spec = "ld";
39 //-------------------------------------------------------------------------
40 #endif
41 
42 template <typename T>
43 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
44                                   kmp_int32 schedtype, kmp_int32 *plastiter,
45                                   T *plower, T *pupper,
46                                   typename traits_t<T>::signed_t *pstride,
47                                   typename traits_t<T>::signed_t incr,
48                                   typename traits_t<T>::signed_t chunk
49 #if OMPT_SUPPORT && OMPT_OPTIONAL
50                                   ,
51                                   void *codeptr
52 #endif
53                                   ) {
54   KMP_COUNT_BLOCK(OMP_FOR_static);
55   KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
56 
57   typedef typename traits_t<T>::unsigned_t UT;
58   typedef typename traits_t<T>::signed_t ST;
59   /*  this all has to be changed back to TID and such.. */
60   kmp_int32 gtid = global_tid;
61   kmp_uint32 tid;
62   kmp_uint32 nth;
63   UT trip_count;
64   kmp_team_t *team;
65   kmp_info_t *th = __kmp_threads[gtid];
66 
67 #if OMPT_SUPPORT && OMPT_OPTIONAL
68   ompt_team_info_t *team_info = NULL;
69   ompt_task_info_t *task_info = NULL;
70   ompt_work_type_t ompt_work_type = ompt_work_loop;
71 
72   static kmp_int8 warn = 0;
73 
74   if (ompt_enabled.ompt_callback_work) {
75     // Only fully initialize variables needed by OMPT if OMPT is enabled.
76     team_info = __ompt_get_teaminfo(0, NULL);
77     task_info = __ompt_get_task_info_object(0);
78     // Determine workshare type
79     if (loc != NULL) {
80       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
81         ompt_work_type = ompt_work_loop;
82       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
83         ompt_work_type = ompt_work_sections;
84       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
85         ompt_work_type = ompt_work_distribute;
86       } else {
87         kmp_int8 bool_res =
88             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
89         if (bool_res)
90           KMP_WARNING(OmptOutdatedWorkshare);
91       }
92       KMP_DEBUG_ASSERT(ompt_work_type);
93     }
94   }
95 #endif
96 
97   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
98   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
99 #ifdef KMP_DEBUG
100   {
101     char *buff;
102     // create format specifiers before the debug output
103     buff = __kmp_str_format(
104         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
105         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
106         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
107         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
108     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
109                    *pstride, incr, chunk));
110     __kmp_str_free(&buff);
111   }
112 #endif
113 
114   if (__kmp_env_consistency_check) {
115     __kmp_push_workshare(global_tid, ct_pdo, loc);
116     if (incr == 0) {
117       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
118                             loc);
119     }
120   }
121   /* special handling for zero-trip loops */
122   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
123     if (plastiter != NULL)
124       *plastiter = FALSE;
125     /* leave pupper and plower set to entire iteration space */
126     *pstride = incr; /* value should never be used */
127 // *plower = *pupper - incr;
128 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
129 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
130 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
131 #ifdef KMP_DEBUG
132     {
133       char *buff;
134       // create format specifiers before the debug output
135       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
136                               "lower=%%%s upper=%%%s stride = %%%s "
137                               "signed?<%s>, loc = %%s\n",
138                               traits_t<T>::spec, traits_t<T>::spec,
139                               traits_t<ST>::spec, traits_t<T>::spec);
140       KD_TRACE(100,
141                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
142       __kmp_str_free(&buff);
143     }
144 #endif
145     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
146 
147 #if OMPT_SUPPORT && OMPT_OPTIONAL
148     if (ompt_enabled.ompt_callback_work) {
149       ompt_callbacks.ompt_callback(ompt_callback_work)(
150           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
151           &(task_info->task_data), 0, codeptr);
152     }
153 #endif
154     KMP_COUNT_VALUE(FOR_static_iterations, 0);
155     return;
156   }
157 
158 #if OMP_40_ENABLED
159   // Although there are schedule enumerations above kmp_ord_upper which are not
160   // schedules for "distribute", the only ones which are useful are dynamic, so
161   // cannot be seen here, since this codepath is only executed for static
162   // schedules.
163   if (schedtype > kmp_ord_upper) {
164     // we are in DISTRIBUTE construct
165     schedtype += kmp_sch_static -
166                  kmp_distribute_static; // AC: convert to usual schedule type
167     tid = th->th.th_team->t.t_master_tid;
168     team = th->th.th_team->t.t_parent;
169   } else
170 #endif
171   {
172     tid = __kmp_tid_from_gtid(global_tid);
173     team = th->th.th_team;
174   }
175 
176   /* determine if "for" loop is an active worksharing construct */
177   if (team->t.t_serialized) {
178     /* serialized parallel, each thread executes whole iteration space */
179     if (plastiter != NULL)
180       *plastiter = TRUE;
181     /* leave pupper and plower set to entire iteration space */
182     *pstride =
183         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
184 
185 #ifdef KMP_DEBUG
186     {
187       char *buff;
188       // create format specifiers before the debug output
189       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
190                               "lower=%%%s upper=%%%s stride = %%%s\n",
191                               traits_t<T>::spec, traits_t<T>::spec,
192                               traits_t<ST>::spec);
193       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
194       __kmp_str_free(&buff);
195     }
196 #endif
197     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
198 
199 #if OMPT_SUPPORT && OMPT_OPTIONAL
200     if (ompt_enabled.ompt_callback_work) {
201       ompt_callbacks.ompt_callback(ompt_callback_work)(
202           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
203           &(task_info->task_data), *pstride, codeptr);
204     }
205 #endif
206     return;
207   }
208   nth = team->t.t_nproc;
209   if (nth == 1) {
210     if (plastiter != NULL)
211       *plastiter = TRUE;
212     *pstride =
213         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
214 #ifdef KMP_DEBUG
215     {
216       char *buff;
217       // create format specifiers before the debug output
218       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
219                               "lower=%%%s upper=%%%s stride = %%%s\n",
220                               traits_t<T>::spec, traits_t<T>::spec,
221                               traits_t<ST>::spec);
222       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
223       __kmp_str_free(&buff);
224     }
225 #endif
226     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
227 
228 #if OMPT_SUPPORT && OMPT_OPTIONAL
229     if (ompt_enabled.ompt_callback_work) {
230       ompt_callbacks.ompt_callback(ompt_callback_work)(
231           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
232           &(task_info->task_data), *pstride, codeptr);
233     }
234 #endif
235     return;
236   }
237 
238   /* compute trip count */
239   if (incr == 1) {
240     trip_count = *pupper - *plower + 1;
241   } else if (incr == -1) {
242     trip_count = *plower - *pupper + 1;
243   } else if (incr > 0) {
244     // upper-lower can exceed the limit of signed type
245     trip_count = (UT)(*pupper - *plower) / incr + 1;
246   } else {
247     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
248   }
249 
250   if (__kmp_env_consistency_check) {
251     /* tripcount overflow? */
252     if (trip_count == 0 && *pupper != *plower) {
253       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
254                             loc);
255     }
256   }
257   KMP_COUNT_VALUE(FOR_static_iterations, trip_count);
258 
259   /* compute remaining parameters */
260   switch (schedtype) {
261   case kmp_sch_static: {
262     if (trip_count < nth) {
263       KMP_DEBUG_ASSERT(
264           __kmp_static == kmp_sch_static_greedy ||
265           __kmp_static ==
266               kmp_sch_static_balanced); // Unknown static scheduling type.
267       if (tid < trip_count) {
268         *pupper = *plower = *plower + tid * incr;
269       } else {
270         *plower = *pupper + incr;
271       }
272       if (plastiter != NULL)
273         *plastiter = (tid == trip_count - 1);
274     } else {
275       if (__kmp_static == kmp_sch_static_balanced) {
276         UT small_chunk = trip_count / nth;
277         UT extras = trip_count % nth;
278         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
279         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
280         if (plastiter != NULL)
281           *plastiter = (tid == nth - 1);
282       } else {
283         T big_chunk_inc_count =
284             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
285         T old_upper = *pupper;
286 
287         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
288         // Unknown static scheduling type.
289 
290         *plower += tid * big_chunk_inc_count;
291         *pupper = *plower + big_chunk_inc_count - incr;
292         if (incr > 0) {
293           if (*pupper < *plower)
294             *pupper = traits_t<T>::max_value;
295           if (plastiter != NULL)
296             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
297           if (*pupper > old_upper)
298             *pupper = old_upper; // tracker C73258
299         } else {
300           if (*pupper > *plower)
301             *pupper = traits_t<T>::min_value;
302           if (plastiter != NULL)
303             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
304           if (*pupper < old_upper)
305             *pupper = old_upper; // tracker C73258
306         }
307       }
308     }
309     *pstride = trip_count;
310     break;
311   }
312   case kmp_sch_static_chunked: {
313     ST span;
314     if (chunk < 1) {
315       chunk = 1;
316     }
317     span = chunk * incr;
318     *pstride = span * nth;
319     *plower = *plower + (span * tid);
320     *pupper = *plower + span - incr;
321     if (plastiter != NULL)
322       *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
323     break;
324   }
325 #if OMP_45_ENABLED
326   case kmp_sch_static_balanced_chunked: {
327     T old_upper = *pupper;
328     // round up to make sure the chunk is enough to cover all iterations
329     UT span = (trip_count + nth - 1) / nth;
330 
331     // perform chunk adjustment
332     chunk = (span + chunk - 1) & ~(chunk - 1);
333 
334     span = chunk * incr;
335     *plower = *plower + (span * tid);
336     *pupper = *plower + span - incr;
337     if (incr > 0) {
338       if (*pupper > old_upper)
339         *pupper = old_upper;
340     } else if (*pupper < old_upper)
341       *pupper = old_upper;
342 
343     if (plastiter != NULL)
344       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
345     break;
346   }
347 #endif
348   default:
349     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
350     break;
351   }
352 
353 #if USE_ITT_BUILD
354   // Report loop metadata
355   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
356       __kmp_forkjoin_frames_mode == 3 &&
357 #if OMP_40_ENABLED
358       th->th.th_teams_microtask == NULL &&
359 #endif
360       team->t.t_active_level == 1) {
361     kmp_uint64 cur_chunk = chunk;
362     // Calculate chunk in case it was not specified; it is specified for
363     // kmp_sch_static_chunked
364     if (schedtype == kmp_sch_static) {
365       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
366     }
367     // 0 - "static" schedule
368     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
369   }
370 #endif
371 #ifdef KMP_DEBUG
372   {
373     char *buff;
374     // create format specifiers before the debug output
375     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
376                             "upper=%%%s stride = %%%s signed?<%s>\n",
377                             traits_t<T>::spec, traits_t<T>::spec,
378                             traits_t<ST>::spec, traits_t<T>::spec);
379     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
380     __kmp_str_free(&buff);
381   }
382 #endif
383   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
384 
385 #if OMPT_SUPPORT && OMPT_OPTIONAL
386   if (ompt_enabled.ompt_callback_work) {
387     ompt_callbacks.ompt_callback(ompt_callback_work)(
388         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
389         &(task_info->task_data), trip_count, codeptr);
390   }
391 #endif
392 
393   return;
394 }
395 
396 template <typename T>
397 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
398                                        kmp_int32 schedule, kmp_int32 *plastiter,
399                                        T *plower, T *pupper, T *pupperDist,
400                                        typename traits_t<T>::signed_t *pstride,
401                                        typename traits_t<T>::signed_t incr,
402                                        typename traits_t<T>::signed_t chunk) {
403   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
404   typedef typename traits_t<T>::unsigned_t UT;
405   typedef typename traits_t<T>::signed_t ST;
406   kmp_uint32 tid;
407   kmp_uint32 nth;
408   kmp_uint32 team_id;
409   kmp_uint32 nteams;
410   UT trip_count;
411   kmp_team_t *team;
412   kmp_info_t *th;
413 
414   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
415   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
416 #ifdef KMP_DEBUG
417   {
418     char *buff;
419     // create format specifiers before the debug output
420     buff = __kmp_str_format(
421         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
422         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
423         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
424         traits_t<ST>::spec, traits_t<T>::spec);
425     KD_TRACE(100,
426              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
427     __kmp_str_free(&buff);
428   }
429 #endif
430 
431   if (__kmp_env_consistency_check) {
432     __kmp_push_workshare(gtid, ct_pdo, loc);
433     if (incr == 0) {
434       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
435                             loc);
436     }
437     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
438       // The loop is illegal.
439       // Some zero-trip loops maintained by compiler, e.g.:
440       //   for(i=10;i<0;++i) // lower >= upper - run-time check
441       //   for(i=0;i>10;--i) // lower <= upper - run-time check
442       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
443       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
444       // Compiler does not check the following illegal loops:
445       //   for(i=0;i<10;i+=incr) // where incr<0
446       //   for(i=10;i>0;i-=incr) // where incr<0
447       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
448     }
449   }
450   tid = __kmp_tid_from_gtid(gtid);
451   th = __kmp_threads[gtid];
452   nth = th->th.th_team_nproc;
453   team = th->th.th_team;
454 #if OMP_40_ENABLED
455   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
456   nteams = th->th.th_teams_size.nteams;
457 #endif
458   team_id = team->t.t_master_tid;
459   KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
460 
461   // compute global trip count
462   if (incr == 1) {
463     trip_count = *pupper - *plower + 1;
464   } else if (incr == -1) {
465     trip_count = *plower - *pupper + 1;
466   } else if (incr > 0) {
467     // upper-lower can exceed the limit of signed type
468     trip_count = (UT)(*pupper - *plower) / incr + 1;
469   } else {
470     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
471   }
472 
473   *pstride = *pupper - *plower; // just in case (can be unused)
474   if (trip_count <= nteams) {
475     KMP_DEBUG_ASSERT(
476         __kmp_static == kmp_sch_static_greedy ||
477         __kmp_static ==
478             kmp_sch_static_balanced); // Unknown static scheduling type.
479     // only masters of some teams get single iteration, other threads get
480     // nothing
481     if (team_id < trip_count && tid == 0) {
482       *pupper = *pupperDist = *plower = *plower + team_id * incr;
483     } else {
484       *pupperDist = *pupper;
485       *plower = *pupper + incr; // compiler should skip loop body
486     }
487     if (plastiter != NULL)
488       *plastiter = (tid == 0 && team_id == trip_count - 1);
489   } else {
490     // Get the team's chunk first (each team gets at most one chunk)
491     if (__kmp_static == kmp_sch_static_balanced) {
492       UT chunkD = trip_count / nteams;
493       UT extras = trip_count % nteams;
494       *plower +=
495           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
496       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
497       if (plastiter != NULL)
498         *plastiter = (team_id == nteams - 1);
499     } else {
500       T chunk_inc_count =
501           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
502       T upper = *pupper;
503       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
504       // Unknown static scheduling type.
505       *plower += team_id * chunk_inc_count;
506       *pupperDist = *plower + chunk_inc_count - incr;
507       // Check/correct bounds if needed
508       if (incr > 0) {
509         if (*pupperDist < *plower)
510           *pupperDist = traits_t<T>::max_value;
511         if (plastiter != NULL)
512           *plastiter = *plower <= upper && *pupperDist > upper - incr;
513         if (*pupperDist > upper)
514           *pupperDist = upper; // tracker C73258
515         if (*plower > *pupperDist) {
516           *pupper = *pupperDist; // no iterations available for the team
517           goto end;
518         }
519       } else {
520         if (*pupperDist > *plower)
521           *pupperDist = traits_t<T>::min_value;
522         if (plastiter != NULL)
523           *plastiter = *plower >= upper && *pupperDist < upper - incr;
524         if (*pupperDist < upper)
525           *pupperDist = upper; // tracker C73258
526         if (*plower < *pupperDist) {
527           *pupper = *pupperDist; // no iterations available for the team
528           goto end;
529         }
530       }
531     }
532     // Get the parallel loop chunk now (for thread)
533     // compute trip count for team's chunk
534     if (incr == 1) {
535       trip_count = *pupperDist - *plower + 1;
536     } else if (incr == -1) {
537       trip_count = *plower - *pupperDist + 1;
538     } else if (incr > 1) {
539       // upper-lower can exceed the limit of signed type
540       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
541     } else {
542       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
543     }
544     KMP_DEBUG_ASSERT(trip_count);
545     switch (schedule) {
546     case kmp_sch_static: {
547       if (trip_count <= nth) {
548         KMP_DEBUG_ASSERT(
549             __kmp_static == kmp_sch_static_greedy ||
550             __kmp_static ==
551                 kmp_sch_static_balanced); // Unknown static scheduling type.
552         if (tid < trip_count)
553           *pupper = *plower = *plower + tid * incr;
554         else
555           *plower = *pupper + incr; // no iterations available
556         if (plastiter != NULL)
557           if (*plastiter != 0 && !(tid == trip_count - 1))
558             *plastiter = 0;
559       } else {
560         if (__kmp_static == kmp_sch_static_balanced) {
561           UT chunkL = trip_count / nth;
562           UT extras = trip_count % nth;
563           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
564           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
565           if (plastiter != NULL)
566             if (*plastiter != 0 && !(tid == nth - 1))
567               *plastiter = 0;
568         } else {
569           T chunk_inc_count =
570               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
571           T upper = *pupperDist;
572           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
573           // Unknown static scheduling type.
574           *plower += tid * chunk_inc_count;
575           *pupper = *plower + chunk_inc_count - incr;
576           if (incr > 0) {
577             if (*pupper < *plower)
578               *pupper = traits_t<T>::max_value;
579             if (plastiter != NULL)
580               if (*plastiter != 0 &&
581                   !(*plower <= upper && *pupper > upper - incr))
582                 *plastiter = 0;
583             if (*pupper > upper)
584               *pupper = upper; // tracker C73258
585           } else {
586             if (*pupper > *plower)
587               *pupper = traits_t<T>::min_value;
588             if (plastiter != NULL)
589               if (*plastiter != 0 &&
590                   !(*plower >= upper && *pupper < upper - incr))
591                 *plastiter = 0;
592             if (*pupper < upper)
593               *pupper = upper; // tracker C73258
594           }
595         }
596       }
597       break;
598     }
599     case kmp_sch_static_chunked: {
600       ST span;
601       if (chunk < 1)
602         chunk = 1;
603       span = chunk * incr;
604       *pstride = span * nth;
605       *plower = *plower + (span * tid);
606       *pupper = *plower + span - incr;
607       if (plastiter != NULL)
608         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
609           *plastiter = 0;
610       break;
611     }
612     default:
613       KMP_ASSERT2(0,
614                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
615       break;
616     }
617   }
618 end:;
619 #ifdef KMP_DEBUG
620   {
621     char *buff;
622     // create format specifiers before the debug output
623     buff = __kmp_str_format(
624         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
625         "stride=%%%s signed?<%s>\n",
626         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
627         traits_t<ST>::spec, traits_t<T>::spec);
628     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
629     __kmp_str_free(&buff);
630   }
631 #endif
632   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
633   return;
634 }
635 
636 template <typename T>
637 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
638                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
639                                    typename traits_t<T>::signed_t *p_st,
640                                    typename traits_t<T>::signed_t incr,
641                                    typename traits_t<T>::signed_t chunk) {
642   // The routine returns the first chunk distributed to the team and
643   // stride for next chunks calculation.
644   // Last iteration flag set for the team that will execute
645   // the last iteration of the loop.
646   // The routine is called for dist_schedue(static,chunk) only.
647   typedef typename traits_t<T>::unsigned_t UT;
648   typedef typename traits_t<T>::signed_t ST;
649   kmp_uint32 team_id;
650   kmp_uint32 nteams;
651   UT trip_count;
652   T lower;
653   T upper;
654   ST span;
655   kmp_team_t *team;
656   kmp_info_t *th;
657 
658   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
659   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
660 #ifdef KMP_DEBUG
661   {
662     char *buff;
663     // create format specifiers before the debug output
664     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
665                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
666                             traits_t<T>::spec, traits_t<T>::spec,
667                             traits_t<ST>::spec, traits_t<ST>::spec,
668                             traits_t<T>::spec);
669     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
670     __kmp_str_free(&buff);
671   }
672 #endif
673 
674   lower = *p_lb;
675   upper = *p_ub;
676   if (__kmp_env_consistency_check) {
677     if (incr == 0) {
678       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
679                             loc);
680     }
681     if (incr > 0 ? (upper < lower) : (lower < upper)) {
682       // The loop is illegal.
683       // Some zero-trip loops maintained by compiler, e.g.:
684       //   for(i=10;i<0;++i) // lower >= upper - run-time check
685       //   for(i=0;i>10;--i) // lower <= upper - run-time check
686       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
687       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
688       // Compiler does not check the following illegal loops:
689       //   for(i=0;i<10;i+=incr) // where incr<0
690       //   for(i=10;i>0;i-=incr) // where incr<0
691       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
692     }
693   }
694   th = __kmp_threads[gtid];
695   team = th->th.th_team;
696 #if OMP_40_ENABLED
697   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
698   nteams = th->th.th_teams_size.nteams;
699 #endif
700   team_id = team->t.t_master_tid;
701   KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
702 
703   // compute trip count
704   if (incr == 1) {
705     trip_count = upper - lower + 1;
706   } else if (incr == -1) {
707     trip_count = lower - upper + 1;
708   } else if (incr > 0) {
709     // upper-lower can exceed the limit of signed type
710     trip_count = (UT)(upper - lower) / incr + 1;
711   } else {
712     trip_count = (UT)(lower - upper) / (-incr) + 1;
713   }
714   if (chunk < 1)
715     chunk = 1;
716   span = chunk * incr;
717   *p_st = span * nteams;
718   *p_lb = lower + (span * team_id);
719   *p_ub = *p_lb + span - incr;
720   if (p_last != NULL)
721     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
722   // Correct upper bound if needed
723   if (incr > 0) {
724     if (*p_ub < *p_lb) // overflow?
725       *p_ub = traits_t<T>::max_value;
726     if (*p_ub > upper)
727       *p_ub = upper; // tracker C73258
728   } else { // incr < 0
729     if (*p_ub > *p_lb)
730       *p_ub = traits_t<T>::min_value;
731     if (*p_ub < upper)
732       *p_ub = upper; // tracker C73258
733   }
734 #ifdef KMP_DEBUG
735   {
736     char *buff;
737     // create format specifiers before the debug output
738     buff =
739         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
740                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
741                          traits_t<T>::spec, traits_t<T>::spec,
742                          traits_t<ST>::spec, traits_t<ST>::spec);
743     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
744     __kmp_str_free(&buff);
745   }
746 #endif
747 }
748 
749 //------------------------------------------------------------------------------
750 extern "C" {
751 /*!
752 @ingroup WORK_SHARING
753 @param    loc       Source code location
754 @param    gtid      Global thread id of this thread
755 @param    schedtype  Scheduling type
756 @param    plastiter Pointer to the "last iteration" flag
757 @param    plower    Pointer to the lower bound
758 @param    pupper    Pointer to the upper bound
759 @param    pstride   Pointer to the stride
760 @param    incr      Loop increment
761 @param    chunk     The chunk size
762 
763 Each of the four functions here are identical apart from the argument types.
764 
765 The functions compute the upper and lower bounds and stride to be used for the
766 set of iterations to be executed by the current thread from the statically
767 scheduled loop that is described by the initial values of the bounds, stride,
768 increment and chunk size.
769 
770 @{
771 */
772 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
773                               kmp_int32 *plastiter, kmp_int32 *plower,
774                               kmp_int32 *pupper, kmp_int32 *pstride,
775                               kmp_int32 incr, kmp_int32 chunk) {
776   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
777                                    pupper, pstride, incr, chunk
778 #if OMPT_SUPPORT && OMPT_OPTIONAL
779                                    ,
780                                    OMPT_GET_RETURN_ADDRESS(0)
781 #endif
782                                        );
783 }
784 
785 /*!
786  See @ref __kmpc_for_static_init_4
787  */
788 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
789                                kmp_int32 schedtype, kmp_int32 *plastiter,
790                                kmp_uint32 *plower, kmp_uint32 *pupper,
791                                kmp_int32 *pstride, kmp_int32 incr,
792                                kmp_int32 chunk) {
793   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
794                                     pupper, pstride, incr, chunk
795 #if OMPT_SUPPORT && OMPT_OPTIONAL
796                                     ,
797                                     OMPT_GET_RETURN_ADDRESS(0)
798 #endif
799                                         );
800 }
801 
802 /*!
803  See @ref __kmpc_for_static_init_4
804  */
805 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
806                               kmp_int32 *plastiter, kmp_int64 *plower,
807                               kmp_int64 *pupper, kmp_int64 *pstride,
808                               kmp_int64 incr, kmp_int64 chunk) {
809   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
810                                    pupper, pstride, incr, chunk
811 #if OMPT_SUPPORT && OMPT_OPTIONAL
812                                    ,
813                                    OMPT_GET_RETURN_ADDRESS(0)
814 #endif
815                                        );
816 }
817 
818 /*!
819  See @ref __kmpc_for_static_init_4
820  */
821 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
822                                kmp_int32 schedtype, kmp_int32 *plastiter,
823                                kmp_uint64 *plower, kmp_uint64 *pupper,
824                                kmp_int64 *pstride, kmp_int64 incr,
825                                kmp_int64 chunk) {
826   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
827                                     pupper, pstride, incr, chunk
828 #if OMPT_SUPPORT && OMPT_OPTIONAL
829                                     ,
830                                     OMPT_GET_RETURN_ADDRESS(0)
831 #endif
832                                         );
833 }
834 /*!
835 @}
836 */
837 
838 /*!
839 @ingroup WORK_SHARING
840 @param    loc       Source code location
841 @param    gtid      Global thread id of this thread
842 @param    schedule  Scheduling type for the parallel loop
843 @param    plastiter Pointer to the "last iteration" flag
844 @param    plower    Pointer to the lower bound
845 @param    pupper    Pointer to the upper bound of loop chunk
846 @param    pupperD   Pointer to the upper bound of dist_chunk
847 @param    pstride   Pointer to the stride for parallel loop
848 @param    incr      Loop increment
849 @param    chunk     The chunk size for the parallel loop
850 
851 Each of the four functions here are identical apart from the argument types.
852 
853 The functions compute the upper and lower bounds and strides to be used for the
854 set of iterations to be executed by the current thread from the statically
855 scheduled loop that is described by the initial values of the bounds, strides,
856 increment and chunks for parallel loop and distribute constructs.
857 
858 @{
859 */
860 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
861                                    kmp_int32 schedule, kmp_int32 *plastiter,
862                                    kmp_int32 *plower, kmp_int32 *pupper,
863                                    kmp_int32 *pupperD, kmp_int32 *pstride,
864                                    kmp_int32 incr, kmp_int32 chunk) {
865   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
866                                         pupper, pupperD, pstride, incr, chunk);
867 }
868 
869 /*!
870  See @ref __kmpc_dist_for_static_init_4
871  */
872 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
873                                     kmp_int32 schedule, kmp_int32 *plastiter,
874                                     kmp_uint32 *plower, kmp_uint32 *pupper,
875                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
876                                     kmp_int32 incr, kmp_int32 chunk) {
877   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
878                                          pupper, pupperD, pstride, incr, chunk);
879 }
880 
881 /*!
882  See @ref __kmpc_dist_for_static_init_4
883  */
884 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
885                                    kmp_int32 schedule, kmp_int32 *plastiter,
886                                    kmp_int64 *plower, kmp_int64 *pupper,
887                                    kmp_int64 *pupperD, kmp_int64 *pstride,
888                                    kmp_int64 incr, kmp_int64 chunk) {
889   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
890                                         pupper, pupperD, pstride, incr, chunk);
891 }
892 
893 /*!
894  See @ref __kmpc_dist_for_static_init_4
895  */
896 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
897                                     kmp_int32 schedule, kmp_int32 *plastiter,
898                                     kmp_uint64 *plower, kmp_uint64 *pupper,
899                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
900                                     kmp_int64 incr, kmp_int64 chunk) {
901   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
902                                          pupper, pupperD, pstride, incr, chunk);
903 }
904 /*!
905 @}
906 */
907 
908 //------------------------------------------------------------------------------
909 // Auxiliary routines for Distribute Parallel Loop construct implementation
910 //    Transfer call to template< type T >
911 //    __kmp_team_static_init( ident_t *loc, int gtid,
912 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
913 
914 /*!
915 @ingroup WORK_SHARING
916 @{
917 @param loc Source location
918 @param gtid Global thread id
919 @param p_last pointer to last iteration flag
920 @param p_lb  pointer to Lower bound
921 @param p_ub  pointer to Upper bound
922 @param p_st  Step (or increment if you prefer)
923 @param incr  Loop increment
924 @param chunk The chunk size to block with
925 
926 The functions compute the upper and lower bounds and stride to be used for the
927 set of iterations to be executed by the current team from the statically
928 scheduled loop that is described by the initial values of the bounds, stride,
929 increment and chunk for the distribute construct as part of composite distribute
930 parallel loop construct. These functions are all identical apart from the types
931 of the arguments.
932 */
933 
934 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
935                                kmp_int32 *p_lb, kmp_int32 *p_ub,
936                                kmp_int32 *p_st, kmp_int32 incr,
937                                kmp_int32 chunk) {
938   KMP_DEBUG_ASSERT(__kmp_init_serial);
939   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
940                                     chunk);
941 }
942 
943 /*!
944  See @ref __kmpc_team_static_init_4
945  */
946 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
947                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
948                                 kmp_int32 *p_st, kmp_int32 incr,
949                                 kmp_int32 chunk) {
950   KMP_DEBUG_ASSERT(__kmp_init_serial);
951   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
952                                      chunk);
953 }
954 
955 /*!
956  See @ref __kmpc_team_static_init_4
957  */
958 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
959                                kmp_int64 *p_lb, kmp_int64 *p_ub,
960                                kmp_int64 *p_st, kmp_int64 incr,
961                                kmp_int64 chunk) {
962   KMP_DEBUG_ASSERT(__kmp_init_serial);
963   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
964                                     chunk);
965 }
966 
967 /*!
968  See @ref __kmpc_team_static_init_4
969  */
970 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
971                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
972                                 kmp_int64 *p_st, kmp_int64 incr,
973                                 kmp_int64 chunk) {
974   KMP_DEBUG_ASSERT(__kmp_init_serial);
975   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
976                                      chunk);
977 }
978 /*!
979 @}
980 */
981 
982 } // extern "C"
983