1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 /* Static scheduling initialization.
17 
18   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
19         it may change values between parallel regions.  __kmp_max_nth
20         is the largest value __kmp_nth may take, 1 is the smallest. */
21 
22 #include "kmp.h"
23 #include "kmp_error.h"
24 #include "kmp_i18n.h"
25 #include "kmp_itt.h"
26 #include "kmp_stats.h"
27 #include "kmp_str.h"
28 
29 #if OMPT_SUPPORT
30 #include "ompt-specific.h"
31 #endif
32 
33 #ifdef KMP_DEBUG
34 //-------------------------------------------------------------------------
35 // template for debug prints specification ( d, u, lld, llu )
36 char const *traits_t<int>::spec = "d";
37 char const *traits_t<unsigned int>::spec = "u";
38 char const *traits_t<long long>::spec = "lld";
39 char const *traits_t<unsigned long long>::spec = "llu";
40 //-------------------------------------------------------------------------
41 #endif
42 
43 template <typename T>
44 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
45                                   kmp_int32 schedtype, kmp_int32 *plastiter,
46                                   T *plower, T *pupper,
47                                   typename traits_t<T>::signed_t *pstride,
48                                   typename traits_t<T>::signed_t incr,
49                                   typename traits_t<T>::signed_t chunk) {
50   KMP_COUNT_BLOCK(OMP_FOR_static);
51   KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
52 
53   typedef typename traits_t<T>::unsigned_t UT;
54   typedef typename traits_t<T>::signed_t ST;
55   /*  this all has to be changed back to TID and such.. */
56   register kmp_int32 gtid = global_tid;
57   register kmp_uint32 tid;
58   register kmp_uint32 nth;
59   register UT trip_count;
60   register kmp_team_t *team;
61   register kmp_info_t *th = __kmp_threads[gtid];
62 
63 #if OMPT_SUPPORT && OMPT_TRACE
64   ompt_team_info_t *team_info = NULL;
65   ompt_task_info_t *task_info = NULL;
66 
67   if (ompt_enabled) {
68     // Only fully initialize variables needed by OMPT if OMPT is enabled.
69     team_info = __ompt_get_teaminfo(0, NULL);
70     task_info = __ompt_get_taskinfo(0);
71   }
72 #endif
73 
74   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
75   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
76 #ifdef KMP_DEBUG
77   {
78     const char *buff;
79     // create format specifiers before the debug output
80     buff = __kmp_str_format(
81         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
82         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
83         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
84         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
85     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
86                    *pstride, incr, chunk));
87     __kmp_str_free(&buff);
88   }
89 #endif
90 
91   if (__kmp_env_consistency_check) {
92     __kmp_push_workshare(global_tid, ct_pdo, loc);
93     if (incr == 0) {
94       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
95                             loc);
96     }
97   }
98   /* special handling for zero-trip loops */
99   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
100     if (plastiter != NULL)
101       *plastiter = FALSE;
102     /* leave pupper and plower set to entire iteration space */
103     *pstride = incr; /* value should never be used */
104 // *plower = *pupper - incr;
105 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
106 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
107 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
108 #ifdef KMP_DEBUG
109     {
110       const char *buff;
111       // create format specifiers before the debug output
112       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
113                               "lower=%%%s upper=%%%s stride = %%%s "
114                               "signed?<%s>, loc = %%s\n",
115                               traits_t<T>::spec, traits_t<T>::spec,
116                               traits_t<ST>::spec, traits_t<T>::spec);
117       KD_TRACE(100,
118                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
119       __kmp_str_free(&buff);
120     }
121 #endif
122     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
123 
124 #if OMPT_SUPPORT && OMPT_TRACE
125     if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
126       ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
127           team_info->parallel_id, task_info->task_id, team_info->microtask);
128     }
129 #endif
130     KMP_COUNT_VALUE(FOR_static_iterations, 0);
131     return;
132   }
133 
134 #if OMP_40_ENABLED
135   // Although there are schedule enumerations above kmp_ord_upper which are not
136   // schedules for "distribute", the only ones which are useful are dynamic, so
137   // cannot be seen here, since this codepath is only executed for static
138   // schedules.
139   if (schedtype > kmp_ord_upper) {
140     // we are in DISTRIBUTE construct
141     schedtype += kmp_sch_static -
142                  kmp_distribute_static; // AC: convert to usual schedule type
143     tid = th->th.th_team->t.t_master_tid;
144     team = th->th.th_team->t.t_parent;
145   } else
146 #endif
147   {
148     tid = __kmp_tid_from_gtid(global_tid);
149     team = th->th.th_team;
150   }
151 
152   /* determine if "for" loop is an active worksharing construct */
153   if (team->t.t_serialized) {
154     /* serialized parallel, each thread executes whole iteration space */
155     if (plastiter != NULL)
156       *plastiter = TRUE;
157     /* leave pupper and plower set to entire iteration space */
158     *pstride =
159         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
160 
161 #ifdef KMP_DEBUG
162     {
163       const char *buff;
164       // create format specifiers before the debug output
165       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
166                               "lower=%%%s upper=%%%s stride = %%%s\n",
167                               traits_t<T>::spec, traits_t<T>::spec,
168                               traits_t<ST>::spec);
169       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
170       __kmp_str_free(&buff);
171     }
172 #endif
173     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
174 
175 #if OMPT_SUPPORT && OMPT_TRACE
176     if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
177       ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
178           team_info->parallel_id, task_info->task_id, team_info->microtask);
179     }
180 #endif
181     return;
182   }
183   nth = team->t.t_nproc;
184   if (nth == 1) {
185     if (plastiter != NULL)
186       *plastiter = TRUE;
187     *pstride =
188         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
189 #ifdef KMP_DEBUG
190     {
191       const char *buff;
192       // create format specifiers before the debug output
193       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
194                               "lower=%%%s upper=%%%s stride = %%%s\n",
195                               traits_t<T>::spec, traits_t<T>::spec,
196                               traits_t<ST>::spec);
197       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
198       __kmp_str_free(&buff);
199     }
200 #endif
201     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
202 
203 #if OMPT_SUPPORT && OMPT_TRACE
204     if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
205       ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
206           team_info->parallel_id, task_info->task_id, team_info->microtask);
207     }
208 #endif
209     return;
210   }
211 
212   /* compute trip count */
213   if (incr == 1) {
214     trip_count = *pupper - *plower + 1;
215   } else if (incr == -1) {
216     trip_count = *plower - *pupper + 1;
217   } else if (incr > 0) {
218     // upper-lower can exceed the limit of signed type
219     trip_count = (UT)(*pupper - *plower) / incr + 1;
220   } else {
221     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
222   }
223 
224   if (__kmp_env_consistency_check) {
225     /* tripcount overflow? */
226     if (trip_count == 0 && *pupper != *plower) {
227       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
228                             loc);
229     }
230   }
231   KMP_COUNT_VALUE(FOR_static_iterations, trip_count);
232 
233   /* compute remaining parameters */
234   switch (schedtype) {
235   case kmp_sch_static: {
236     if (trip_count < nth) {
237       KMP_DEBUG_ASSERT(
238           __kmp_static == kmp_sch_static_greedy ||
239           __kmp_static ==
240               kmp_sch_static_balanced); // Unknown static scheduling type.
241       if (tid < trip_count) {
242         *pupper = *plower = *plower + tid * incr;
243       } else {
244         *plower = *pupper + incr;
245       }
246       if (plastiter != NULL)
247         *plastiter = (tid == trip_count - 1);
248     } else {
249       if (__kmp_static == kmp_sch_static_balanced) {
250         register UT small_chunk = trip_count / nth;
251         register UT extras = trip_count % nth;
252         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
253         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
254         if (plastiter != NULL)
255           *plastiter = (tid == nth - 1);
256       } else {
257         register T big_chunk_inc_count =
258             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
259         register T old_upper = *pupper;
260 
261         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
262         // Unknown static scheduling type.
263 
264         *plower += tid * big_chunk_inc_count;
265         *pupper = *plower + big_chunk_inc_count - incr;
266         if (incr > 0) {
267           if (*pupper < *plower)
268             *pupper = traits_t<T>::max_value;
269           if (plastiter != NULL)
270             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
271           if (*pupper > old_upper)
272             *pupper = old_upper; // tracker C73258
273         } else {
274           if (*pupper > *plower)
275             *pupper = traits_t<T>::min_value;
276           if (plastiter != NULL)
277             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
278           if (*pupper < old_upper)
279             *pupper = old_upper; // tracker C73258
280         }
281       }
282     }
283     *pstride = trip_count;
284     break;
285   }
286   case kmp_sch_static_chunked: {
287     register ST span;
288     if (chunk < 1) {
289       chunk = 1;
290     }
291     span = chunk * incr;
292     *pstride = span * nth;
293     *plower = *plower + (span * tid);
294     *pupper = *plower + span - incr;
295     if (plastiter != NULL)
296       *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
297     break;
298   }
299 #if OMP_45_ENABLED
300   case kmp_sch_static_balanced_chunked: {
301     register T old_upper = *pupper;
302     // round up to make sure the chunk is enough to cover all iterations
303     register UT span = (trip_count + nth - 1) / nth;
304 
305     // perform chunk adjustment
306     chunk = (span + chunk - 1) & ~(chunk - 1);
307 
308     span = chunk * incr;
309     *plower = *plower + (span * tid);
310     *pupper = *plower + span - incr;
311     if (incr > 0) {
312       if (*pupper > old_upper)
313         *pupper = old_upper;
314     } else if (*pupper < old_upper)
315       *pupper = old_upper;
316 
317     if (plastiter != NULL)
318       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
319     break;
320   }
321 #endif
322   default:
323     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
324     break;
325   }
326 
327 #if USE_ITT_BUILD
328   // Report loop metadata
329   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
330       __kmp_forkjoin_frames_mode == 3 &&
331 #if OMP_40_ENABLED
332       th->th.th_teams_microtask == NULL &&
333 #endif
334       team->t.t_active_level == 1) {
335     kmp_uint64 cur_chunk = chunk;
336     // Calculate chunk in case it was not specified; it is specified for
337     // kmp_sch_static_chunked
338     if (schedtype == kmp_sch_static) {
339       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
340     }
341     // 0 - "static" schedule
342     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
343   }
344 #endif
345 #ifdef KMP_DEBUG
346   {
347     const char *buff;
348     // create format specifiers before the debug output
349     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
350                             "upper=%%%s stride = %%%s signed?<%s>\n",
351                             traits_t<T>::spec, traits_t<T>::spec,
352                             traits_t<ST>::spec, traits_t<T>::spec);
353     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
354     __kmp_str_free(&buff);
355   }
356 #endif
357   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
358 
359 #if OMPT_SUPPORT && OMPT_TRACE
360   if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
361     ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
362         team_info->parallel_id, task_info->task_id, team_info->microtask);
363   }
364 #endif
365 
366   return;
367 }
368 
369 template <typename T>
370 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
371                                        kmp_int32 schedule, kmp_int32 *plastiter,
372                                        T *plower, T *pupper, T *pupperDist,
373                                        typename traits_t<T>::signed_t *pstride,
374                                        typename traits_t<T>::signed_t incr,
375                                        typename traits_t<T>::signed_t chunk) {
376   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
377   typedef typename traits_t<T>::unsigned_t UT;
378   typedef typename traits_t<T>::signed_t ST;
379   register kmp_uint32 tid;
380   register kmp_uint32 nth;
381   register kmp_uint32 team_id;
382   register kmp_uint32 nteams;
383   register UT trip_count;
384   register kmp_team_t *team;
385   kmp_info_t *th;
386 
387   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
388   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
389 #ifdef KMP_DEBUG
390   {
391     const char *buff;
392     // create format specifiers before the debug output
393     buff = __kmp_str_format(
394         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
395         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
396         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
397         traits_t<ST>::spec, traits_t<T>::spec);
398     KD_TRACE(100,
399              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
400     __kmp_str_free(&buff);
401   }
402 #endif
403 
404   if (__kmp_env_consistency_check) {
405     __kmp_push_workshare(gtid, ct_pdo, loc);
406     if (incr == 0) {
407       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
408                             loc);
409     }
410     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
411       // The loop is illegal.
412       // Some zero-trip loops maintained by compiler, e.g.:
413       //   for(i=10;i<0;++i) // lower >= upper - run-time check
414       //   for(i=0;i>10;--i) // lower <= upper - run-time check
415       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
416       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
417       // Compiler does not check the following illegal loops:
418       //   for(i=0;i<10;i+=incr) // where incr<0
419       //   for(i=10;i>0;i-=incr) // where incr<0
420       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
421     }
422   }
423   tid = __kmp_tid_from_gtid(gtid);
424   th = __kmp_threads[gtid];
425   nth = th->th.th_team_nproc;
426   team = th->th.th_team;
427 #if OMP_40_ENABLED
428   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
429   nteams = th->th.th_teams_size.nteams;
430 #endif
431   team_id = team->t.t_master_tid;
432   KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
433 
434   // compute global trip count
435   if (incr == 1) {
436     trip_count = *pupper - *plower + 1;
437   } else if (incr == -1) {
438     trip_count = *plower - *pupper + 1;
439   } else if (incr > 0) {
440     // upper-lower can exceed the limit of signed type
441     trip_count = (UT)(*pupper - *plower) / incr + 1;
442   } else {
443     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
444   }
445 
446   *pstride = *pupper - *plower; // just in case (can be unused)
447   if (trip_count <= nteams) {
448     KMP_DEBUG_ASSERT(
449         __kmp_static == kmp_sch_static_greedy ||
450         __kmp_static ==
451             kmp_sch_static_balanced); // Unknown static scheduling type.
452     // only masters of some teams get single iteration, other threads get
453     // nothing
454     if (team_id < trip_count && tid == 0) {
455       *pupper = *pupperDist = *plower = *plower + team_id * incr;
456     } else {
457       *pupperDist = *pupper;
458       *plower = *pupper + incr; // compiler should skip loop body
459     }
460     if (plastiter != NULL)
461       *plastiter = (tid == 0 && team_id == trip_count - 1);
462   } else {
463     // Get the team's chunk first (each team gets at most one chunk)
464     if (__kmp_static == kmp_sch_static_balanced) {
465       register UT chunkD = trip_count / nteams;
466       register UT extras = trip_count % nteams;
467       *plower +=
468           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
469       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
470       if (plastiter != NULL)
471         *plastiter = (team_id == nteams - 1);
472     } else {
473       register T chunk_inc_count =
474           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
475       register T upper = *pupper;
476       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
477       // Unknown static scheduling type.
478       *plower += team_id * chunk_inc_count;
479       *pupperDist = *plower + chunk_inc_count - incr;
480       // Check/correct bounds if needed
481       if (incr > 0) {
482         if (*pupperDist < *plower)
483           *pupperDist = traits_t<T>::max_value;
484         if (plastiter != NULL)
485           *plastiter = *plower <= upper && *pupperDist > upper - incr;
486         if (*pupperDist > upper)
487           *pupperDist = upper; // tracker C73258
488         if (*plower > *pupperDist) {
489           *pupper = *pupperDist; // no iterations available for the team
490           goto end;
491         }
492       } else {
493         if (*pupperDist > *plower)
494           *pupperDist = traits_t<T>::min_value;
495         if (plastiter != NULL)
496           *plastiter = *plower >= upper && *pupperDist < upper - incr;
497         if (*pupperDist < upper)
498           *pupperDist = upper; // tracker C73258
499         if (*plower < *pupperDist) {
500           *pupper = *pupperDist; // no iterations available for the team
501           goto end;
502         }
503       }
504     }
505     // Get the parallel loop chunk now (for thread)
506     // compute trip count for team's chunk
507     if (incr == 1) {
508       trip_count = *pupperDist - *plower + 1;
509     } else if (incr == -1) {
510       trip_count = *plower - *pupperDist + 1;
511     } else if (incr > 1) {
512       // upper-lower can exceed the limit of signed type
513       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
514     } else {
515       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
516     }
517     KMP_DEBUG_ASSERT(trip_count);
518     switch (schedule) {
519     case kmp_sch_static: {
520       if (trip_count <= nth) {
521         KMP_DEBUG_ASSERT(
522             __kmp_static == kmp_sch_static_greedy ||
523             __kmp_static ==
524                 kmp_sch_static_balanced); // Unknown static scheduling type.
525         if (tid < trip_count)
526           *pupper = *plower = *plower + tid * incr;
527         else
528           *plower = *pupper + incr; // no iterations available
529         if (plastiter != NULL)
530           if (*plastiter != 0 && !(tid == trip_count - 1))
531             *plastiter = 0;
532       } else {
533         if (__kmp_static == kmp_sch_static_balanced) {
534           register UT chunkL = trip_count / nth;
535           register UT extras = trip_count % nth;
536           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
537           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
538           if (plastiter != NULL)
539             if (*plastiter != 0 && !(tid == nth - 1))
540               *plastiter = 0;
541         } else {
542           register T chunk_inc_count =
543               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
544           register T upper = *pupperDist;
545           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
546           // Unknown static scheduling type.
547           *plower += tid * chunk_inc_count;
548           *pupper = *plower + chunk_inc_count - incr;
549           if (incr > 0) {
550             if (*pupper < *plower)
551               *pupper = traits_t<T>::max_value;
552             if (plastiter != NULL)
553               if (*plastiter != 0 &&
554                   !(*plower <= upper && *pupper > upper - incr))
555                 *plastiter = 0;
556             if (*pupper > upper)
557               *pupper = upper; // tracker C73258
558           } else {
559             if (*pupper > *plower)
560               *pupper = traits_t<T>::min_value;
561             if (plastiter != NULL)
562               if (*plastiter != 0 &&
563                   !(*plower >= upper && *pupper < upper - incr))
564                 *plastiter = 0;
565             if (*pupper < upper)
566               *pupper = upper; // tracker C73258
567           }
568         }
569       }
570       break;
571     }
572     case kmp_sch_static_chunked: {
573       register ST span;
574       if (chunk < 1)
575         chunk = 1;
576       span = chunk * incr;
577       *pstride = span * nth;
578       *plower = *plower + (span * tid);
579       *pupper = *plower + span - incr;
580       if (plastiter != NULL)
581         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
582           *plastiter = 0;
583       break;
584     }
585     default:
586       KMP_ASSERT2(0,
587                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
588       break;
589     }
590   }
591 end:;
592 #ifdef KMP_DEBUG
593   {
594     const char *buff;
595     // create format specifiers before the debug output
596     buff = __kmp_str_format(
597         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
598         "stride=%%%s signed?<%s>\n",
599         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
600         traits_t<ST>::spec, traits_t<T>::spec);
601     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
602     __kmp_str_free(&buff);
603   }
604 #endif
605   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
606   return;
607 }
608 
609 template <typename T>
610 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
611                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
612                                    typename traits_t<T>::signed_t *p_st,
613                                    typename traits_t<T>::signed_t incr,
614                                    typename traits_t<T>::signed_t chunk) {
615   // The routine returns the first chunk distributed to the team and
616   // stride for next chunks calculation.
617   // Last iteration flag set for the team that will execute
618   // the last iteration of the loop.
619   // The routine is called for dist_schedue(static,chunk) only.
620   typedef typename traits_t<T>::unsigned_t UT;
621   typedef typename traits_t<T>::signed_t ST;
622   kmp_uint32 team_id;
623   kmp_uint32 nteams;
624   UT trip_count;
625   T lower;
626   T upper;
627   ST span;
628   kmp_team_t *team;
629   kmp_info_t *th;
630 
631   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
632   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
633 #ifdef KMP_DEBUG
634   {
635     const char *buff;
636     // create format specifiers before the debug output
637     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
638                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
639                             traits_t<T>::spec, traits_t<T>::spec,
640                             traits_t<ST>::spec, traits_t<ST>::spec,
641                             traits_t<T>::spec);
642     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
643     __kmp_str_free(&buff);
644   }
645 #endif
646 
647   lower = *p_lb;
648   upper = *p_ub;
649   if (__kmp_env_consistency_check) {
650     if (incr == 0) {
651       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
652                             loc);
653     }
654     if (incr > 0 ? (upper < lower) : (lower < upper)) {
655       // The loop is illegal.
656       // Some zero-trip loops maintained by compiler, e.g.:
657       //   for(i=10;i<0;++i) // lower >= upper - run-time check
658       //   for(i=0;i>10;--i) // lower <= upper - run-time check
659       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
660       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
661       // Compiler does not check the following illegal loops:
662       //   for(i=0;i<10;i+=incr) // where incr<0
663       //   for(i=10;i>0;i-=incr) // where incr<0
664       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
665     }
666   }
667   th = __kmp_threads[gtid];
668   team = th->th.th_team;
669 #if OMP_40_ENABLED
670   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
671   nteams = th->th.th_teams_size.nteams;
672 #endif
673   team_id = team->t.t_master_tid;
674   KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
675 
676   // compute trip count
677   if (incr == 1) {
678     trip_count = upper - lower + 1;
679   } else if (incr == -1) {
680     trip_count = lower - upper + 1;
681   } else if (incr > 0) {
682     // upper-lower can exceed the limit of signed type
683     trip_count = (UT)(upper - lower) / incr + 1;
684   } else {
685     trip_count = (UT)(lower - upper) / (-incr) + 1;
686   }
687   if (chunk < 1)
688     chunk = 1;
689   span = chunk * incr;
690   *p_st = span * nteams;
691   *p_lb = lower + (span * team_id);
692   *p_ub = *p_lb + span - incr;
693   if (p_last != NULL)
694     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
695   // Correct upper bound if needed
696   if (incr > 0) {
697     if (*p_ub < *p_lb) // overflow?
698       *p_ub = traits_t<T>::max_value;
699     if (*p_ub > upper)
700       *p_ub = upper; // tracker C73258
701   } else { // incr < 0
702     if (*p_ub > *p_lb)
703       *p_ub = traits_t<T>::min_value;
704     if (*p_ub < upper)
705       *p_ub = upper; // tracker C73258
706   }
707 #ifdef KMP_DEBUG
708   {
709     const char *buff;
710     // create format specifiers before the debug output
711     buff =
712         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
713                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
714                          traits_t<T>::spec, traits_t<T>::spec,
715                          traits_t<ST>::spec, traits_t<ST>::spec);
716     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
717     __kmp_str_free(&buff);
718   }
719 #endif
720 }
721 
722 //------------------------------------------------------------------------------
723 extern "C" {
724 /*!
725 @ingroup WORK_SHARING
726 @param    loc       Source code location
727 @param    gtid      Global thread id of this thread
728 @param    schedtype  Scheduling type
729 @param    plastiter Pointer to the "last iteration" flag
730 @param    plower    Pointer to the lower bound
731 @param    pupper    Pointer to the upper bound
732 @param    pstride   Pointer to the stride
733 @param    incr      Loop increment
734 @param    chunk     The chunk size
735 
736 Each of the four functions here are identical apart from the argument types.
737 
738 The functions compute the upper and lower bounds and stride to be used for the
739 set of iterations to be executed by the current thread from the statically
740 scheduled loop that is described by the initial values of the bounds, stride,
741 increment and chunk size.
742 
743 @{
744 */
745 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
746                               kmp_int32 *plastiter, kmp_int32 *plower,
747                               kmp_int32 *pupper, kmp_int32 *pstride,
748                               kmp_int32 incr, kmp_int32 chunk) {
749   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
750                                    pupper, pstride, incr, chunk);
751 }
752 
753 /*!
754  See @ref __kmpc_for_static_init_4
755  */
756 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
757                                kmp_int32 schedtype, kmp_int32 *plastiter,
758                                kmp_uint32 *plower, kmp_uint32 *pupper,
759                                kmp_int32 *pstride, kmp_int32 incr,
760                                kmp_int32 chunk) {
761   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
762                                     pupper, pstride, incr, chunk);
763 }
764 
765 /*!
766  See @ref __kmpc_for_static_init_4
767  */
768 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
769                               kmp_int32 *plastiter, kmp_int64 *plower,
770                               kmp_int64 *pupper, kmp_int64 *pstride,
771                               kmp_int64 incr, kmp_int64 chunk) {
772   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
773                                    pupper, pstride, incr, chunk);
774 }
775 
776 /*!
777  See @ref __kmpc_for_static_init_4
778  */
779 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
780                                kmp_int32 schedtype, kmp_int32 *plastiter,
781                                kmp_uint64 *plower, kmp_uint64 *pupper,
782                                kmp_int64 *pstride, kmp_int64 incr,
783                                kmp_int64 chunk) {
784   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
785                                     pupper, pstride, incr, chunk);
786 }
787 /*!
788 @}
789 */
790 
791 /*!
792 @ingroup WORK_SHARING
793 @param    loc       Source code location
794 @param    gtid      Global thread id of this thread
795 @param    schedule  Scheduling type for the parallel loop
796 @param    plastiter Pointer to the "last iteration" flag
797 @param    plower    Pointer to the lower bound
798 @param    pupper    Pointer to the upper bound of loop chunk
799 @param    pupperD   Pointer to the upper bound of dist_chunk
800 @param    pstride   Pointer to the stride for parallel loop
801 @param    incr      Loop increment
802 @param    chunk     The chunk size for the parallel loop
803 
804 Each of the four functions here are identical apart from the argument types.
805 
806 The functions compute the upper and lower bounds and strides to be used for the
807 set of iterations to be executed by the current thread from the statically
808 scheduled loop that is described by the initial values of the bounds, strides,
809 increment and chunks for parallel loop and distribute constructs.
810 
811 @{
812 */
813 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
814                                    kmp_int32 schedule, kmp_int32 *plastiter,
815                                    kmp_int32 *plower, kmp_int32 *pupper,
816                                    kmp_int32 *pupperD, kmp_int32 *pstride,
817                                    kmp_int32 incr, kmp_int32 chunk) {
818   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
819                                         pupper, pupperD, pstride, incr, chunk);
820 }
821 
822 /*!
823  See @ref __kmpc_dist_for_static_init_4
824  */
825 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
826                                     kmp_int32 schedule, kmp_int32 *plastiter,
827                                     kmp_uint32 *plower, kmp_uint32 *pupper,
828                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
829                                     kmp_int32 incr, kmp_int32 chunk) {
830   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
831                                          pupper, pupperD, pstride, incr, chunk);
832 }
833 
834 /*!
835  See @ref __kmpc_dist_for_static_init_4
836  */
837 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
838                                    kmp_int32 schedule, kmp_int32 *plastiter,
839                                    kmp_int64 *plower, kmp_int64 *pupper,
840                                    kmp_int64 *pupperD, kmp_int64 *pstride,
841                                    kmp_int64 incr, kmp_int64 chunk) {
842   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
843                                         pupper, pupperD, pstride, incr, chunk);
844 }
845 
846 /*!
847  See @ref __kmpc_dist_for_static_init_4
848  */
849 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
850                                     kmp_int32 schedule, kmp_int32 *plastiter,
851                                     kmp_uint64 *plower, kmp_uint64 *pupper,
852                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
853                                     kmp_int64 incr, kmp_int64 chunk) {
854   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
855                                          pupper, pupperD, pstride, incr, chunk);
856 }
857 /*!
858 @}
859 */
860 
861 //------------------------------------------------------------------------------
862 // Auxiliary routines for Distribute Parallel Loop construct implementation
863 //    Transfer call to template< type T >
864 //    __kmp_team_static_init( ident_t *loc, int gtid,
865 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
866 
867 /*!
868 @ingroup WORK_SHARING
869 @{
870 @param loc Source location
871 @param gtid Global thread id
872 @param p_last pointer to last iteration flag
873 @param p_lb  pointer to Lower bound
874 @param p_ub  pointer to Upper bound
875 @param p_st  Step (or increment if you prefer)
876 @param incr  Loop increment
877 @param chunk The chunk size to block with
878 
879 The functions compute the upper and lower bounds and stride to be used for the
880 set of iterations to be executed by the current team from the statically
881 scheduled loop that is described by the initial values of the bounds, stride,
882 increment and chunk for the distribute construct as part of composite distribute
883 parallel loop construct. These functions are all identical apart from the types
884 of the arguments.
885 */
886 
887 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
888                                kmp_int32 *p_lb, kmp_int32 *p_ub,
889                                kmp_int32 *p_st, kmp_int32 incr,
890                                kmp_int32 chunk) {
891   KMP_DEBUG_ASSERT(__kmp_init_serial);
892   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
893                                     chunk);
894 }
895 
896 /*!
897  See @ref __kmpc_team_static_init_4
898  */
899 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
900                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
901                                 kmp_int32 *p_st, kmp_int32 incr,
902                                 kmp_int32 chunk) {
903   KMP_DEBUG_ASSERT(__kmp_init_serial);
904   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
905                                      chunk);
906 }
907 
908 /*!
909  See @ref __kmpc_team_static_init_4
910  */
911 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
912                                kmp_int64 *p_lb, kmp_int64 *p_ub,
913                                kmp_int64 *p_st, kmp_int64 incr,
914                                kmp_int64 chunk) {
915   KMP_DEBUG_ASSERT(__kmp_init_serial);
916   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
917                                     chunk);
918 }
919 
920 /*!
921  See @ref __kmpc_team_static_init_4
922  */
923 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
924                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
925                                 kmp_int64 *p_st, kmp_int64 incr,
926                                 kmp_int64 chunk) {
927   KMP_DEBUG_ASSERT(__kmp_init_serial);
928   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
929                                      chunk);
930 }
931 /*!
932 @}
933 */
934 
935 } // extern "C"
936