1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 /* Static scheduling initialization.
15 
16   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
17         it may change values between parallel regions.  __kmp_max_nth
18         is the largest value __kmp_nth may take, 1 is the smallest. */
19 
20 #include "kmp.h"
21 #include "kmp_error.h"
22 #include "kmp_i18n.h"
23 #include "kmp_itt.h"
24 #include "kmp_stats.h"
25 #include "kmp_str.h"
26 
27 #if OMPT_SUPPORT
28 #include "ompt-specific.h"
29 #endif
30 
31 #ifdef KMP_DEBUG
32 //-------------------------------------------------------------------------
33 // template for debug prints specification ( d, u, lld, llu )
34 char const *traits_t<int>::spec = "d";
35 char const *traits_t<unsigned int>::spec = "u";
36 char const *traits_t<long long>::spec = "lld";
37 char const *traits_t<unsigned long long>::spec = "llu";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 template <typename T>
42 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
43                                   kmp_int32 schedtype, kmp_int32 *plastiter,
44                                   T *plower, T *pupper,
45                                   typename traits_t<T>::signed_t *pstride,
46                                   typename traits_t<T>::signed_t incr,
47                                   typename traits_t<T>::signed_t chunk
48 #if OMPT_SUPPORT && OMPT_OPTIONAL
49                                   ,
50                                   void *codeptr
51 #endif
52                                   ) {
53   KMP_COUNT_BLOCK(OMP_FOR_static);
54   KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
55 
56   typedef typename traits_t<T>::unsigned_t UT;
57   typedef typename traits_t<T>::signed_t ST;
58   /*  this all has to be changed back to TID and such.. */
59   kmp_int32 gtid = global_tid;
60   kmp_uint32 tid;
61   kmp_uint32 nth;
62   UT trip_count;
63   kmp_team_t *team;
64   kmp_info_t *th = __kmp_threads[gtid];
65 
66 #if OMPT_SUPPORT && OMPT_OPTIONAL
67   ompt_team_info_t *team_info = NULL;
68   ompt_task_info_t *task_info = NULL;
69   ompt_work_type_t ompt_work_type = ompt_work_loop;
70 
71   static kmp_int8 warn = 0;
72 
73   if (ompt_enabled.ompt_callback_work) {
74     // Only fully initialize variables needed by OMPT if OMPT is enabled.
75     team_info = __ompt_get_teaminfo(0, NULL);
76     task_info = __ompt_get_task_info_object(0);
77     // Determine workshare type
78     if (loc != NULL) {
79       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
80         ompt_work_type = ompt_work_loop;
81       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
82         ompt_work_type = ompt_work_sections;
83       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
84         ompt_work_type = ompt_work_distribute;
85       } else {
86         kmp_int8 bool_res =
87             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
88         if (bool_res)
89           KMP_WARNING(OmptOutdatedWorkshare);
90       }
91       KMP_DEBUG_ASSERT(ompt_work_type);
92     }
93   }
94 #endif
95 
96   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
97   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
98 #ifdef KMP_DEBUG
99   {
100     char *buff;
101     // create format specifiers before the debug output
102     buff = __kmp_str_format(
103         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
104         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
105         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
106         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
107     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
108                    *pstride, incr, chunk));
109     __kmp_str_free(&buff);
110   }
111 #endif
112 
113   if (__kmp_env_consistency_check) {
114     __kmp_push_workshare(global_tid, ct_pdo, loc);
115     if (incr == 0) {
116       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
117                             loc);
118     }
119   }
120   /* special handling for zero-trip loops */
121   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
122     if (plastiter != NULL)
123       *plastiter = FALSE;
124     /* leave pupper and plower set to entire iteration space */
125     *pstride = incr; /* value should never be used */
126 // *plower = *pupper - incr;
127 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
128 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
129 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
130 #ifdef KMP_DEBUG
131     {
132       char *buff;
133       // create format specifiers before the debug output
134       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
135                               "lower=%%%s upper=%%%s stride = %%%s "
136                               "signed?<%s>, loc = %%s\n",
137                               traits_t<T>::spec, traits_t<T>::spec,
138                               traits_t<ST>::spec, traits_t<T>::spec);
139       KD_TRACE(100,
140                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
141       __kmp_str_free(&buff);
142     }
143 #endif
144     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
145 
146 #if OMPT_SUPPORT && OMPT_OPTIONAL
147     if (ompt_enabled.ompt_callback_work) {
148       ompt_callbacks.ompt_callback(ompt_callback_work)(
149           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
150           &(task_info->task_data), 0, codeptr);
151     }
152 #endif
153     KMP_COUNT_VALUE(FOR_static_iterations, 0);
154     return;
155   }
156 
157 #if OMP_40_ENABLED
158   // Although there are schedule enumerations above kmp_ord_upper which are not
159   // schedules for "distribute", the only ones which are useful are dynamic, so
160   // cannot be seen here, since this codepath is only executed for static
161   // schedules.
162   if (schedtype > kmp_ord_upper) {
163     // we are in DISTRIBUTE construct
164     schedtype += kmp_sch_static -
165                  kmp_distribute_static; // AC: convert to usual schedule type
166     tid = th->th.th_team->t.t_master_tid;
167     team = th->th.th_team->t.t_parent;
168   } else
169 #endif
170   {
171     tid = __kmp_tid_from_gtid(global_tid);
172     team = th->th.th_team;
173   }
174 
175   /* determine if "for" loop is an active worksharing construct */
176   if (team->t.t_serialized) {
177     /* serialized parallel, each thread executes whole iteration space */
178     if (plastiter != NULL)
179       *plastiter = TRUE;
180     /* leave pupper and plower set to entire iteration space */
181     *pstride =
182         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
183 
184 #ifdef KMP_DEBUG
185     {
186       char *buff;
187       // create format specifiers before the debug output
188       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
189                               "lower=%%%s upper=%%%s stride = %%%s\n",
190                               traits_t<T>::spec, traits_t<T>::spec,
191                               traits_t<ST>::spec);
192       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
193       __kmp_str_free(&buff);
194     }
195 #endif
196     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
197 
198 #if OMPT_SUPPORT && OMPT_OPTIONAL
199     if (ompt_enabled.ompt_callback_work) {
200       ompt_callbacks.ompt_callback(ompt_callback_work)(
201           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
202           &(task_info->task_data), *pstride, codeptr);
203     }
204 #endif
205     return;
206   }
207   nth = team->t.t_nproc;
208   if (nth == 1) {
209     if (plastiter != NULL)
210       *plastiter = TRUE;
211     *pstride =
212         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
213 #ifdef KMP_DEBUG
214     {
215       char *buff;
216       // create format specifiers before the debug output
217       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
218                               "lower=%%%s upper=%%%s stride = %%%s\n",
219                               traits_t<T>::spec, traits_t<T>::spec,
220                               traits_t<ST>::spec);
221       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
222       __kmp_str_free(&buff);
223     }
224 #endif
225     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
226 
227 #if OMPT_SUPPORT && OMPT_OPTIONAL
228     if (ompt_enabled.ompt_callback_work) {
229       ompt_callbacks.ompt_callback(ompt_callback_work)(
230           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
231           &(task_info->task_data), *pstride, codeptr);
232     }
233 #endif
234     return;
235   }
236 
237   /* compute trip count */
238   if (incr == 1) {
239     trip_count = *pupper - *plower + 1;
240   } else if (incr == -1) {
241     trip_count = *plower - *pupper + 1;
242   } else if (incr > 0) {
243     // upper-lower can exceed the limit of signed type
244     trip_count = (UT)(*pupper - *plower) / incr + 1;
245   } else {
246     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
247   }
248 
249   if (__kmp_env_consistency_check) {
250     /* tripcount overflow? */
251     if (trip_count == 0 && *pupper != *plower) {
252       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
253                             loc);
254     }
255   }
256   KMP_COUNT_VALUE(FOR_static_iterations, trip_count);
257 
258   /* compute remaining parameters */
259   switch (schedtype) {
260   case kmp_sch_static: {
261     if (trip_count < nth) {
262       KMP_DEBUG_ASSERT(
263           __kmp_static == kmp_sch_static_greedy ||
264           __kmp_static ==
265               kmp_sch_static_balanced); // Unknown static scheduling type.
266       if (tid < trip_count) {
267         *pupper = *plower = *plower + tid * incr;
268       } else {
269         *plower = *pupper + incr;
270       }
271       if (plastiter != NULL)
272         *plastiter = (tid == trip_count - 1);
273     } else {
274       if (__kmp_static == kmp_sch_static_balanced) {
275         UT small_chunk = trip_count / nth;
276         UT extras = trip_count % nth;
277         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
278         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
279         if (plastiter != NULL)
280           *plastiter = (tid == nth - 1);
281       } else {
282         T big_chunk_inc_count =
283             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
284         T old_upper = *pupper;
285 
286         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
287         // Unknown static scheduling type.
288 
289         *plower += tid * big_chunk_inc_count;
290         *pupper = *plower + big_chunk_inc_count - incr;
291         if (incr > 0) {
292           if (*pupper < *plower)
293             *pupper = traits_t<T>::max_value;
294           if (plastiter != NULL)
295             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
296           if (*pupper > old_upper)
297             *pupper = old_upper; // tracker C73258
298         } else {
299           if (*pupper > *plower)
300             *pupper = traits_t<T>::min_value;
301           if (plastiter != NULL)
302             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
303           if (*pupper < old_upper)
304             *pupper = old_upper; // tracker C73258
305         }
306       }
307     }
308     *pstride = trip_count;
309     break;
310   }
311   case kmp_sch_static_chunked: {
312     ST span;
313     if (chunk < 1) {
314       chunk = 1;
315     }
316     span = chunk * incr;
317     *pstride = span * nth;
318     *plower = *plower + (span * tid);
319     *pupper = *plower + span - incr;
320     if (plastiter != NULL)
321       *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
322     break;
323   }
324 #if OMP_45_ENABLED
325   case kmp_sch_static_balanced_chunked: {
326     T old_upper = *pupper;
327     // round up to make sure the chunk is enough to cover all iterations
328     UT span = (trip_count + nth - 1) / nth;
329 
330     // perform chunk adjustment
331     chunk = (span + chunk - 1) & ~(chunk - 1);
332 
333     span = chunk * incr;
334     *plower = *plower + (span * tid);
335     *pupper = *plower + span - incr;
336     if (incr > 0) {
337       if (*pupper > old_upper)
338         *pupper = old_upper;
339     } else if (*pupper < old_upper)
340       *pupper = old_upper;
341 
342     if (plastiter != NULL)
343       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
344     break;
345   }
346 #endif
347   default:
348     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
349     break;
350   }
351 
352 #if USE_ITT_BUILD
353   // Report loop metadata
354   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
355       __kmp_forkjoin_frames_mode == 3 &&
356 #if OMP_40_ENABLED
357       th->th.th_teams_microtask == NULL &&
358 #endif
359       team->t.t_active_level == 1) {
360     kmp_uint64 cur_chunk = chunk;
361     // Calculate chunk in case it was not specified; it is specified for
362     // kmp_sch_static_chunked
363     if (schedtype == kmp_sch_static) {
364       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
365     }
366     // 0 - "static" schedule
367     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
368   }
369 #endif
370 #ifdef KMP_DEBUG
371   {
372     char *buff;
373     // create format specifiers before the debug output
374     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
375                             "upper=%%%s stride = %%%s signed?<%s>\n",
376                             traits_t<T>::spec, traits_t<T>::spec,
377                             traits_t<ST>::spec, traits_t<T>::spec);
378     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
379     __kmp_str_free(&buff);
380   }
381 #endif
382   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
383 
384 #if OMPT_SUPPORT && OMPT_OPTIONAL
385   if (ompt_enabled.ompt_callback_work) {
386     ompt_callbacks.ompt_callback(ompt_callback_work)(
387         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
388         &(task_info->task_data), trip_count, codeptr);
389   }
390 #endif
391 
392   return;
393 }
394 
395 template <typename T>
396 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
397                                        kmp_int32 schedule, kmp_int32 *plastiter,
398                                        T *plower, T *pupper, T *pupperDist,
399                                        typename traits_t<T>::signed_t *pstride,
400                                        typename traits_t<T>::signed_t incr,
401                                        typename traits_t<T>::signed_t chunk) {
402   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
403   typedef typename traits_t<T>::unsigned_t UT;
404   typedef typename traits_t<T>::signed_t ST;
405   kmp_uint32 tid;
406   kmp_uint32 nth;
407   kmp_uint32 team_id;
408   kmp_uint32 nteams;
409   UT trip_count;
410   kmp_team_t *team;
411   kmp_info_t *th;
412 
413   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
414   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
415 #ifdef KMP_DEBUG
416   {
417     char *buff;
418     // create format specifiers before the debug output
419     buff = __kmp_str_format(
420         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
421         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
422         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
423         traits_t<ST>::spec, traits_t<T>::spec);
424     KD_TRACE(100,
425              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
426     __kmp_str_free(&buff);
427   }
428 #endif
429 
430   if (__kmp_env_consistency_check) {
431     __kmp_push_workshare(gtid, ct_pdo, loc);
432     if (incr == 0) {
433       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
434                             loc);
435     }
436     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
437       // The loop is illegal.
438       // Some zero-trip loops maintained by compiler, e.g.:
439       //   for(i=10;i<0;++i) // lower >= upper - run-time check
440       //   for(i=0;i>10;--i) // lower <= upper - run-time check
441       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
442       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
443       // Compiler does not check the following illegal loops:
444       //   for(i=0;i<10;i+=incr) // where incr<0
445       //   for(i=10;i>0;i-=incr) // where incr<0
446       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
447     }
448   }
449   tid = __kmp_tid_from_gtid(gtid);
450   th = __kmp_threads[gtid];
451   nth = th->th.th_team_nproc;
452   team = th->th.th_team;
453 #if OMP_40_ENABLED
454   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
455   nteams = th->th.th_teams_size.nteams;
456 #endif
457   team_id = team->t.t_master_tid;
458   KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
459 
460   // compute global trip count
461   if (incr == 1) {
462     trip_count = *pupper - *plower + 1;
463   } else if (incr == -1) {
464     trip_count = *plower - *pupper + 1;
465   } else if (incr > 0) {
466     // upper-lower can exceed the limit of signed type
467     trip_count = (UT)(*pupper - *plower) / incr + 1;
468   } else {
469     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
470   }
471 
472   *pstride = *pupper - *plower; // just in case (can be unused)
473   if (trip_count <= nteams) {
474     KMP_DEBUG_ASSERT(
475         __kmp_static == kmp_sch_static_greedy ||
476         __kmp_static ==
477             kmp_sch_static_balanced); // Unknown static scheduling type.
478     // only masters of some teams get single iteration, other threads get
479     // nothing
480     if (team_id < trip_count && tid == 0) {
481       *pupper = *pupperDist = *plower = *plower + team_id * incr;
482     } else {
483       *pupperDist = *pupper;
484       *plower = *pupper + incr; // compiler should skip loop body
485     }
486     if (plastiter != NULL)
487       *plastiter = (tid == 0 && team_id == trip_count - 1);
488   } else {
489     // Get the team's chunk first (each team gets at most one chunk)
490     if (__kmp_static == kmp_sch_static_balanced) {
491       UT chunkD = trip_count / nteams;
492       UT extras = trip_count % nteams;
493       *plower +=
494           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
495       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
496       if (plastiter != NULL)
497         *plastiter = (team_id == nteams - 1);
498     } else {
499       T chunk_inc_count =
500           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
501       T upper = *pupper;
502       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
503       // Unknown static scheduling type.
504       *plower += team_id * chunk_inc_count;
505       *pupperDist = *plower + chunk_inc_count - incr;
506       // Check/correct bounds if needed
507       if (incr > 0) {
508         if (*pupperDist < *plower)
509           *pupperDist = traits_t<T>::max_value;
510         if (plastiter != NULL)
511           *plastiter = *plower <= upper && *pupperDist > upper - incr;
512         if (*pupperDist > upper)
513           *pupperDist = upper; // tracker C73258
514         if (*plower > *pupperDist) {
515           *pupper = *pupperDist; // no iterations available for the team
516           goto end;
517         }
518       } else {
519         if (*pupperDist > *plower)
520           *pupperDist = traits_t<T>::min_value;
521         if (plastiter != NULL)
522           *plastiter = *plower >= upper && *pupperDist < upper - incr;
523         if (*pupperDist < upper)
524           *pupperDist = upper; // tracker C73258
525         if (*plower < *pupperDist) {
526           *pupper = *pupperDist; // no iterations available for the team
527           goto end;
528         }
529       }
530     }
531     // Get the parallel loop chunk now (for thread)
532     // compute trip count for team's chunk
533     if (incr == 1) {
534       trip_count = *pupperDist - *plower + 1;
535     } else if (incr == -1) {
536       trip_count = *plower - *pupperDist + 1;
537     } else if (incr > 1) {
538       // upper-lower can exceed the limit of signed type
539       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
540     } else {
541       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
542     }
543     KMP_DEBUG_ASSERT(trip_count);
544     switch (schedule) {
545     case kmp_sch_static: {
546       if (trip_count <= nth) {
547         KMP_DEBUG_ASSERT(
548             __kmp_static == kmp_sch_static_greedy ||
549             __kmp_static ==
550                 kmp_sch_static_balanced); // Unknown static scheduling type.
551         if (tid < trip_count)
552           *pupper = *plower = *plower + tid * incr;
553         else
554           *plower = *pupper + incr; // no iterations available
555         if (plastiter != NULL)
556           if (*plastiter != 0 && !(tid == trip_count - 1))
557             *plastiter = 0;
558       } else {
559         if (__kmp_static == kmp_sch_static_balanced) {
560           UT chunkL = trip_count / nth;
561           UT extras = trip_count % nth;
562           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
563           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
564           if (plastiter != NULL)
565             if (*plastiter != 0 && !(tid == nth - 1))
566               *plastiter = 0;
567         } else {
568           T chunk_inc_count =
569               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
570           T upper = *pupperDist;
571           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
572           // Unknown static scheduling type.
573           *plower += tid * chunk_inc_count;
574           *pupper = *plower + chunk_inc_count - incr;
575           if (incr > 0) {
576             if (*pupper < *plower)
577               *pupper = traits_t<T>::max_value;
578             if (plastiter != NULL)
579               if (*plastiter != 0 &&
580                   !(*plower <= upper && *pupper > upper - incr))
581                 *plastiter = 0;
582             if (*pupper > upper)
583               *pupper = upper; // tracker C73258
584           } else {
585             if (*pupper > *plower)
586               *pupper = traits_t<T>::min_value;
587             if (plastiter != NULL)
588               if (*plastiter != 0 &&
589                   !(*plower >= upper && *pupper < upper - incr))
590                 *plastiter = 0;
591             if (*pupper < upper)
592               *pupper = upper; // tracker C73258
593           }
594         }
595       }
596       break;
597     }
598     case kmp_sch_static_chunked: {
599       ST span;
600       if (chunk < 1)
601         chunk = 1;
602       span = chunk * incr;
603       *pstride = span * nth;
604       *plower = *plower + (span * tid);
605       *pupper = *plower + span - incr;
606       if (plastiter != NULL)
607         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
608           *plastiter = 0;
609       break;
610     }
611     default:
612       KMP_ASSERT2(0,
613                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
614       break;
615     }
616   }
617 end:;
618 #ifdef KMP_DEBUG
619   {
620     char *buff;
621     // create format specifiers before the debug output
622     buff = __kmp_str_format(
623         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
624         "stride=%%%s signed?<%s>\n",
625         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
626         traits_t<ST>::spec, traits_t<T>::spec);
627     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
628     __kmp_str_free(&buff);
629   }
630 #endif
631   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
632   return;
633 }
634 
635 template <typename T>
636 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
637                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
638                                    typename traits_t<T>::signed_t *p_st,
639                                    typename traits_t<T>::signed_t incr,
640                                    typename traits_t<T>::signed_t chunk) {
641   // The routine returns the first chunk distributed to the team and
642   // stride for next chunks calculation.
643   // Last iteration flag set for the team that will execute
644   // the last iteration of the loop.
645   // The routine is called for dist_schedue(static,chunk) only.
646   typedef typename traits_t<T>::unsigned_t UT;
647   typedef typename traits_t<T>::signed_t ST;
648   kmp_uint32 team_id;
649   kmp_uint32 nteams;
650   UT trip_count;
651   T lower;
652   T upper;
653   ST span;
654   kmp_team_t *team;
655   kmp_info_t *th;
656 
657   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
658   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
659 #ifdef KMP_DEBUG
660   {
661     char *buff;
662     // create format specifiers before the debug output
663     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
664                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
665                             traits_t<T>::spec, traits_t<T>::spec,
666                             traits_t<ST>::spec, traits_t<ST>::spec,
667                             traits_t<T>::spec);
668     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
669     __kmp_str_free(&buff);
670   }
671 #endif
672 
673   lower = *p_lb;
674   upper = *p_ub;
675   if (__kmp_env_consistency_check) {
676     if (incr == 0) {
677       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
678                             loc);
679     }
680     if (incr > 0 ? (upper < lower) : (lower < upper)) {
681       // The loop is illegal.
682       // Some zero-trip loops maintained by compiler, e.g.:
683       //   for(i=10;i<0;++i) // lower >= upper - run-time check
684       //   for(i=0;i>10;--i) // lower <= upper - run-time check
685       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
686       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
687       // Compiler does not check the following illegal loops:
688       //   for(i=0;i<10;i+=incr) // where incr<0
689       //   for(i=10;i>0;i-=incr) // where incr<0
690       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
691     }
692   }
693   th = __kmp_threads[gtid];
694   team = th->th.th_team;
695 #if OMP_40_ENABLED
696   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
697   nteams = th->th.th_teams_size.nteams;
698 #endif
699   team_id = team->t.t_master_tid;
700   KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
701 
702   // compute trip count
703   if (incr == 1) {
704     trip_count = upper - lower + 1;
705   } else if (incr == -1) {
706     trip_count = lower - upper + 1;
707   } else if (incr > 0) {
708     // upper-lower can exceed the limit of signed type
709     trip_count = (UT)(upper - lower) / incr + 1;
710   } else {
711     trip_count = (UT)(lower - upper) / (-incr) + 1;
712   }
713   if (chunk < 1)
714     chunk = 1;
715   span = chunk * incr;
716   *p_st = span * nteams;
717   *p_lb = lower + (span * team_id);
718   *p_ub = *p_lb + span - incr;
719   if (p_last != NULL)
720     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
721   // Correct upper bound if needed
722   if (incr > 0) {
723     if (*p_ub < *p_lb) // overflow?
724       *p_ub = traits_t<T>::max_value;
725     if (*p_ub > upper)
726       *p_ub = upper; // tracker C73258
727   } else { // incr < 0
728     if (*p_ub > *p_lb)
729       *p_ub = traits_t<T>::min_value;
730     if (*p_ub < upper)
731       *p_ub = upper; // tracker C73258
732   }
733 #ifdef KMP_DEBUG
734   {
735     char *buff;
736     // create format specifiers before the debug output
737     buff =
738         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
739                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
740                          traits_t<T>::spec, traits_t<T>::spec,
741                          traits_t<ST>::spec, traits_t<ST>::spec);
742     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
743     __kmp_str_free(&buff);
744   }
745 #endif
746 }
747 
748 //------------------------------------------------------------------------------
749 extern "C" {
750 /*!
751 @ingroup WORK_SHARING
752 @param    loc       Source code location
753 @param    gtid      Global thread id of this thread
754 @param    schedtype  Scheduling type
755 @param    plastiter Pointer to the "last iteration" flag
756 @param    plower    Pointer to the lower bound
757 @param    pupper    Pointer to the upper bound
758 @param    pstride   Pointer to the stride
759 @param    incr      Loop increment
760 @param    chunk     The chunk size
761 
762 Each of the four functions here are identical apart from the argument types.
763 
764 The functions compute the upper and lower bounds and stride to be used for the
765 set of iterations to be executed by the current thread from the statically
766 scheduled loop that is described by the initial values of the bounds, stride,
767 increment and chunk size.
768 
769 @{
770 */
771 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
772                               kmp_int32 *plastiter, kmp_int32 *plower,
773                               kmp_int32 *pupper, kmp_int32 *pstride,
774                               kmp_int32 incr, kmp_int32 chunk) {
775   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
776                                    pupper, pstride, incr, chunk
777 #if OMPT_SUPPORT && OMPT_OPTIONAL
778                                    ,
779                                    OMPT_GET_RETURN_ADDRESS(0)
780 #endif
781                                        );
782 }
783 
784 /*!
785  See @ref __kmpc_for_static_init_4
786  */
787 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
788                                kmp_int32 schedtype, kmp_int32 *plastiter,
789                                kmp_uint32 *plower, kmp_uint32 *pupper,
790                                kmp_int32 *pstride, kmp_int32 incr,
791                                kmp_int32 chunk) {
792   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
793                                     pupper, pstride, incr, chunk
794 #if OMPT_SUPPORT && OMPT_OPTIONAL
795                                     ,
796                                     OMPT_GET_RETURN_ADDRESS(0)
797 #endif
798                                         );
799 }
800 
801 /*!
802  See @ref __kmpc_for_static_init_4
803  */
804 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
805                               kmp_int32 *plastiter, kmp_int64 *plower,
806                               kmp_int64 *pupper, kmp_int64 *pstride,
807                               kmp_int64 incr, kmp_int64 chunk) {
808   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
809                                    pupper, pstride, incr, chunk
810 #if OMPT_SUPPORT && OMPT_OPTIONAL
811                                    ,
812                                    OMPT_GET_RETURN_ADDRESS(0)
813 #endif
814                                        );
815 }
816 
817 /*!
818  See @ref __kmpc_for_static_init_4
819  */
820 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
821                                kmp_int32 schedtype, kmp_int32 *plastiter,
822                                kmp_uint64 *plower, kmp_uint64 *pupper,
823                                kmp_int64 *pstride, kmp_int64 incr,
824                                kmp_int64 chunk) {
825   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
826                                     pupper, pstride, incr, chunk
827 #if OMPT_SUPPORT && OMPT_OPTIONAL
828                                     ,
829                                     OMPT_GET_RETURN_ADDRESS(0)
830 #endif
831                                         );
832 }
833 /*!
834 @}
835 */
836 
837 /*!
838 @ingroup WORK_SHARING
839 @param    loc       Source code location
840 @param    gtid      Global thread id of this thread
841 @param    schedule  Scheduling type for the parallel loop
842 @param    plastiter Pointer to the "last iteration" flag
843 @param    plower    Pointer to the lower bound
844 @param    pupper    Pointer to the upper bound of loop chunk
845 @param    pupperD   Pointer to the upper bound of dist_chunk
846 @param    pstride   Pointer to the stride for parallel loop
847 @param    incr      Loop increment
848 @param    chunk     The chunk size for the parallel loop
849 
850 Each of the four functions here are identical apart from the argument types.
851 
852 The functions compute the upper and lower bounds and strides to be used for the
853 set of iterations to be executed by the current thread from the statically
854 scheduled loop that is described by the initial values of the bounds, strides,
855 increment and chunks for parallel loop and distribute constructs.
856 
857 @{
858 */
859 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
860                                    kmp_int32 schedule, kmp_int32 *plastiter,
861                                    kmp_int32 *plower, kmp_int32 *pupper,
862                                    kmp_int32 *pupperD, kmp_int32 *pstride,
863                                    kmp_int32 incr, kmp_int32 chunk) {
864   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
865                                         pupper, pupperD, pstride, incr, chunk);
866 }
867 
868 /*!
869  See @ref __kmpc_dist_for_static_init_4
870  */
871 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
872                                     kmp_int32 schedule, kmp_int32 *plastiter,
873                                     kmp_uint32 *plower, kmp_uint32 *pupper,
874                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
875                                     kmp_int32 incr, kmp_int32 chunk) {
876   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
877                                          pupper, pupperD, pstride, incr, chunk);
878 }
879 
880 /*!
881  See @ref __kmpc_dist_for_static_init_4
882  */
883 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
884                                    kmp_int32 schedule, kmp_int32 *plastiter,
885                                    kmp_int64 *plower, kmp_int64 *pupper,
886                                    kmp_int64 *pupperD, kmp_int64 *pstride,
887                                    kmp_int64 incr, kmp_int64 chunk) {
888   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
889                                         pupper, pupperD, pstride, incr, chunk);
890 }
891 
892 /*!
893  See @ref __kmpc_dist_for_static_init_4
894  */
895 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
896                                     kmp_int32 schedule, kmp_int32 *plastiter,
897                                     kmp_uint64 *plower, kmp_uint64 *pupper,
898                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
899                                     kmp_int64 incr, kmp_int64 chunk) {
900   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
901                                          pupper, pupperD, pstride, incr, chunk);
902 }
903 /*!
904 @}
905 */
906 
907 //------------------------------------------------------------------------------
908 // Auxiliary routines for Distribute Parallel Loop construct implementation
909 //    Transfer call to template< type T >
910 //    __kmp_team_static_init( ident_t *loc, int gtid,
911 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
912 
913 /*!
914 @ingroup WORK_SHARING
915 @{
916 @param loc Source location
917 @param gtid Global thread id
918 @param p_last pointer to last iteration flag
919 @param p_lb  pointer to Lower bound
920 @param p_ub  pointer to Upper bound
921 @param p_st  Step (or increment if you prefer)
922 @param incr  Loop increment
923 @param chunk The chunk size to block with
924 
925 The functions compute the upper and lower bounds and stride to be used for the
926 set of iterations to be executed by the current team from the statically
927 scheduled loop that is described by the initial values of the bounds, stride,
928 increment and chunk for the distribute construct as part of composite distribute
929 parallel loop construct. These functions are all identical apart from the types
930 of the arguments.
931 */
932 
933 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
934                                kmp_int32 *p_lb, kmp_int32 *p_ub,
935                                kmp_int32 *p_st, kmp_int32 incr,
936                                kmp_int32 chunk) {
937   KMP_DEBUG_ASSERT(__kmp_init_serial);
938   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
939                                     chunk);
940 }
941 
942 /*!
943  See @ref __kmpc_team_static_init_4
944  */
945 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
946                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
947                                 kmp_int32 *p_st, kmp_int32 incr,
948                                 kmp_int32 chunk) {
949   KMP_DEBUG_ASSERT(__kmp_init_serial);
950   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
951                                      chunk);
952 }
953 
954 /*!
955  See @ref __kmpc_team_static_init_4
956  */
957 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
958                                kmp_int64 *p_lb, kmp_int64 *p_ub,
959                                kmp_int64 *p_st, kmp_int64 incr,
960                                kmp_int64 chunk) {
961   KMP_DEBUG_ASSERT(__kmp_init_serial);
962   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
963                                     chunk);
964 }
965 
966 /*!
967  See @ref __kmpc_team_static_init_4
968  */
969 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
970                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
971                                 kmp_int64 *p_st, kmp_int64 incr,
972                                 kmp_int64 chunk) {
973   KMP_DEBUG_ASSERT(__kmp_init_serial);
974   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
975                                      chunk);
976 }
977 /*!
978 @}
979 */
980 
981 } // extern "C"
982