1 /*
2  * kmp_sched.c -- static scheduling -- iteration initialization
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 /*
17  * Static scheduling initialization.
18  *
19  * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20  *       it may change values between parallel regions.  __kmp_max_nth
21  *       is the largest value __kmp_nth may take, 1 is the smallest.
22  *
23  */
24 
25 #include "kmp.h"
26 #include "kmp_i18n.h"
27 #include "kmp_str.h"
28 #include "kmp_error.h"
29 #include "kmp_stats.h"
30 #include "kmp_itt.h"
31 
32 #if OMPT_SUPPORT
33 #include "ompt-specific.h"
34 #endif
35 
36 // template for type limits
37 template< typename T >
38 struct i_maxmin {
39     static const T mx;
40     static const T mn;
41 };
42 template<>
43 struct i_maxmin< int > {
44     static const int mx = 0x7fffffff;
45     static const int mn = 0x80000000;
46 };
47 template<>
48 struct i_maxmin< unsigned int > {
49     static const unsigned int mx = 0xffffffff;
50     static const unsigned int mn = 0x00000000;
51 };
52 template<>
53 struct i_maxmin< long long > {
54     static const long long mx = 0x7fffffffffffffffLL;
55     static const long long mn = 0x8000000000000000LL;
56 };
57 template<>
58 struct i_maxmin< unsigned long long > {
59     static const unsigned long long mx = 0xffffffffffffffffLL;
60     static const unsigned long long mn = 0x0000000000000000LL;
61 };
62 //-------------------------------------------------------------------------
63 #ifdef KMP_DEBUG
64 //-------------------------------------------------------------------------
65 // template for debug prints specification ( d, u, lld, llu )
66     char const * traits_t< int >::spec = "d";
67     char const * traits_t< unsigned int >::spec = "u";
68     char const * traits_t< long long >::spec = "lld";
69     char const * traits_t< unsigned long long >::spec = "llu";
70 //-------------------------------------------------------------------------
71 #endif
72 
73 template< typename T >
74 static void
75 __kmp_for_static_init(
76     ident_t                          *loc,
77     kmp_int32                         global_tid,
78     kmp_int32                         schedtype,
79     kmp_int32                        *plastiter,
80     T                                *plower,
81     T                                *pupper,
82     typename traits_t< T >::signed_t *pstride,
83     typename traits_t< T >::signed_t  incr,
84     typename traits_t< T >::signed_t  chunk
85 ) {
86     KMP_COUNT_BLOCK(OMP_FOR_static);
87     KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
88 
89     typedef typename traits_t< T >::unsigned_t  UT;
90     typedef typename traits_t< T >::signed_t    ST;
91     /*  this all has to be changed back to TID and such.. */
92     register kmp_int32   gtid = global_tid;
93     register kmp_uint32  tid;
94     register kmp_uint32  nth;
95     register UT          trip_count;
96     register kmp_team_t *team;
97     register kmp_info_t *th = __kmp_threads[ gtid ];
98 
99 #if OMPT_SUPPORT && OMPT_TRACE
100     ompt_team_info_t *team_info = NULL;
101     ompt_task_info_t *task_info = NULL;
102 
103     if (ompt_enabled) {
104         // Only fully initialize variables needed by OMPT if OMPT is enabled.
105         team_info = __ompt_get_teaminfo(0, NULL);
106         task_info = __ompt_get_taskinfo(0);
107     }
108 #endif
109 
110     KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
111     KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
112     #ifdef KMP_DEBUG
113     {
114         const char * buff;
115         // create format specifiers before the debug output
116         buff = __kmp_str_format(
117             "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
118             " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
119             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
120             traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121         KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
122             *plower, *pupper, *pstride, incr, chunk ) );
123         __kmp_str_free( &buff );
124     }
125     #endif
126 
127     if ( __kmp_env_consistency_check ) {
128         __kmp_push_workshare( global_tid, ct_pdo, loc );
129         if ( incr == 0 ) {
130             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
131         }
132     }
133     /* special handling for zero-trip loops */
134     if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
135         if( plastiter != NULL )
136             *plastiter = FALSE;
137         /* leave pupper and plower set to entire iteration space */
138         *pstride = incr;   /* value should never be used */
139 	//        *plower = *pupper - incr;   // let compiler bypass the illegal loop (like for(i=1;i<10;i--))  THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
140 	  upper=0,stride=1) - JPH June 23, 2009.
141         #ifdef KMP_DEBUG
142         {
143             const char * buff;
144             // create format specifiers before the debug output
145             buff = __kmp_str_format(
146                 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
147                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
148             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
149             __kmp_str_free( &buff );
150         }
151         #endif
152         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
153 
154 #if OMPT_SUPPORT && OMPT_TRACE
155         if (ompt_enabled &&
156             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
157             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
158                 team_info->parallel_id, task_info->task_id,
159                 team_info->microtask);
160         }
161 #endif
162         KMP_COUNT_VALUE (FOR_static_iterations, 0);
163         return;
164     }
165 
166     #if OMP_40_ENABLED
167     // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute",
168     // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed
169     // for static schedules.
170     if ( schedtype > kmp_ord_upper ) {
171         // we are in DISTRIBUTE construct
172         schedtype += kmp_sch_static - kmp_distribute_static;      // AC: convert to usual schedule type
173         tid  = th->th.th_team->t.t_master_tid;
174         team = th->th.th_team->t.t_parent;
175     } else
176     #endif
177     {
178         tid  = __kmp_tid_from_gtid( global_tid );
179         team = th->th.th_team;
180     }
181 
182     /* determine if "for" loop is an active worksharing construct */
183     if ( team -> t.t_serialized ) {
184         /* serialized parallel, each thread executes whole iteration space */
185         if( plastiter != NULL )
186             *plastiter = TRUE;
187         /* leave pupper and plower set to entire iteration space */
188         *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
189 
190         #ifdef KMP_DEBUG
191         {
192             const char * buff;
193             // create format specifiers before the debug output
194             buff = __kmp_str_format(
195                 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
196                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
197             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
198             __kmp_str_free( &buff );
199         }
200         #endif
201         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
202 
203 #if OMPT_SUPPORT && OMPT_TRACE
204         if (ompt_enabled &&
205             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
206             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
207                 team_info->parallel_id, task_info->task_id,
208                 team_info->microtask);
209         }
210 #endif
211         return;
212     }
213     nth = team->t.t_nproc;
214     if ( nth == 1 ) {
215         if( plastiter != NULL )
216             *plastiter = TRUE;
217         *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
218         #ifdef KMP_DEBUG
219         {
220             const char * buff;
221             // create format specifiers before the debug output
222             buff = __kmp_str_format(
223                 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
224                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
225             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
226             __kmp_str_free( &buff );
227         }
228         #endif
229         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
230 
231 #if OMPT_SUPPORT && OMPT_TRACE
232         if (ompt_enabled &&
233             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
234             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
235                 team_info->parallel_id, task_info->task_id,
236                 team_info->microtask);
237         }
238 #endif
239         return;
240     }
241 
242     /* compute trip count */
243     if ( incr == 1 ) {
244         trip_count = *pupper - *plower + 1;
245     } else if (incr == -1) {
246         trip_count = *plower - *pupper + 1;
247     } else if ( incr > 0 ) {
248         // upper-lower can exceed the limit of signed type
249         trip_count = (UT)(*pupper - *plower) / incr + 1;
250     } else {
251         trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
252     }
253 
254     if ( __kmp_env_consistency_check ) {
255         /* tripcount overflow? */
256         if ( trip_count == 0 && *pupper != *plower ) {
257             __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
258         }
259     }
260     KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
261 
262     /* compute remaining parameters */
263     switch ( schedtype ) {
264     case kmp_sch_static:
265         {
266             if ( trip_count < nth ) {
267                 KMP_DEBUG_ASSERT(
268                     __kmp_static == kmp_sch_static_greedy || \
269                     __kmp_static == kmp_sch_static_balanced
270                 ); // Unknown static scheduling type.
271                 if ( tid < trip_count ) {
272                     *pupper = *plower = *plower + tid * incr;
273                 } else {
274                     *plower = *pupper + incr;
275                 }
276                 if( plastiter != NULL )
277                     *plastiter = ( tid == trip_count - 1 );
278             } else {
279                 if ( __kmp_static == kmp_sch_static_balanced ) {
280                     register UT small_chunk = trip_count / nth;
281                     register UT extras = trip_count % nth;
282                     *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
283                     *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
284                     if( plastiter != NULL )
285                         *plastiter = ( tid == nth - 1 );
286                 } else {
287                     register T big_chunk_inc_count = ( trip_count/nth +
288                                                      ( ( trip_count % nth ) ? 1 : 0) ) * incr;
289                     register T old_upper = *pupper;
290 
291                     KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
292                         // Unknown static scheduling type.
293 
294                     *plower += tid * big_chunk_inc_count;
295                     *pupper = *plower + big_chunk_inc_count - incr;
296                     if ( incr > 0 ) {
297                         if( *pupper < *plower )
298                             *pupper = i_maxmin< T >::mx;
299                         if( plastiter != NULL )
300                             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
301                         if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
302                     } else {
303                         if( *pupper > *plower )
304                             *pupper = i_maxmin< T >::mn;
305                         if( plastiter != NULL )
306                             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
307                         if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
308                     }
309                 }
310             }
311             break;
312         }
313     case kmp_sch_static_chunked:
314         {
315             register ST span;
316             if ( chunk < 1 ) {
317                 chunk = 1;
318             }
319             span = chunk * incr;
320             *pstride = span * nth;
321             *plower = *plower + (span * tid);
322             *pupper = *plower + span - incr;
323             if( plastiter != NULL )
324                 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
325             break;
326         }
327 #if OMP_45_ENABLED
328     case kmp_sch_static_balanced_chunked:
329         {
330             register T old_upper = *pupper;
331             // round up to make sure the chunk is enough to cover all iterations
332             register UT span = (trip_count+nth-1) / nth;
333 
334             // perform chunk adjustment
335             chunk = (span + chunk - 1) & ~(chunk-1);
336 
337             span = chunk * incr;
338             *plower = *plower + (span * tid);
339             *pupper = *plower + span - incr;
340             if ( incr > 0 ) {
341               if ( *pupper > old_upper ) *pupper = old_upper;
342             } else
343               if ( *pupper < old_upper ) *pupper = old_upper;
344 
345             if( plastiter != NULL )
346                 *plastiter = ( tid == ((trip_count - 1)/( UT )chunk) );
347             break;
348         }
349 #endif
350     default:
351         KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
352         break;
353     }
354 
355 #if USE_ITT_BUILD
356     // Report loop metadata
357     if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
358 #if OMP_40_ENABLED
359         th->th.th_teams_microtask == NULL &&
360 #endif
361         team->t.t_active_level == 1 )
362     {
363         kmp_uint64 cur_chunk = chunk;
364         // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
365         if ( schedtype == kmp_sch_static ) {
366             cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
367         }
368         // 0 - "static" schedule
369         __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
370     }
371 #endif
372     #ifdef KMP_DEBUG
373     {
374         const char * buff;
375         // create format specifiers before the debug output
376         buff = __kmp_str_format(
377             "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
378             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
379         KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
380         __kmp_str_free( &buff );
381     }
382     #endif
383     KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
384 
385 #if OMPT_SUPPORT && OMPT_TRACE
386     if (ompt_enabled &&
387         ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
388         ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
389             team_info->parallel_id, task_info->task_id, team_info->microtask);
390     }
391 #endif
392 
393     return;
394 }
395 
396 template< typename T >
397 static void
398 __kmp_dist_for_static_init(
399     ident_t                          *loc,
400     kmp_int32                         gtid,
401     kmp_int32                         schedule,
402     kmp_int32                        *plastiter,
403     T                                *plower,
404     T                                *pupper,
405     T                                *pupperDist,
406     typename traits_t< T >::signed_t *pstride,
407     typename traits_t< T >::signed_t  incr,
408     typename traits_t< T >::signed_t  chunk
409 ) {
410     KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
411     typedef typename traits_t< T >::unsigned_t  UT;
412     typedef typename traits_t< T >::signed_t    ST;
413     register kmp_uint32  tid;
414     register kmp_uint32  nth;
415     register kmp_uint32  team_id;
416     register kmp_uint32  nteams;
417     register UT          trip_count;
418     register kmp_team_t *team;
419     kmp_info_t * th;
420 
421     KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
422     KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
423     #ifdef KMP_DEBUG
424     {
425         const char * buff;
426         // create format specifiers before the debug output
427         buff = __kmp_str_format(
428             "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
429             "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
430             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
431             traits_t< ST >::spec, traits_t< T >::spec );
432         KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
433                        *plower, *pupper, incr, chunk ) );
434         __kmp_str_free( &buff );
435     }
436     #endif
437 
438     if( __kmp_env_consistency_check ) {
439         __kmp_push_workshare( gtid, ct_pdo, loc );
440         if( incr == 0 ) {
441             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
442         }
443         if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
444             // The loop is illegal.
445             // Some zero-trip loops maintained by compiler, e.g.:
446             //   for(i=10;i<0;++i) // lower >= upper - run-time check
447             //   for(i=0;i>10;--i) // lower <= upper - run-time check
448             //   for(i=0;i>10;++i) // incr > 0       - compile-time check
449             //   for(i=10;i<0;--i) // incr < 0       - compile-time check
450             // Compiler does not check the following illegal loops:
451             //   for(i=0;i<10;i+=incr) // where incr<0
452             //   for(i=10;i>0;i-=incr) // where incr<0
453             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
454         }
455     }
456     tid = __kmp_tid_from_gtid( gtid );
457     th = __kmp_threads[gtid];
458     nth = th->th.th_team_nproc;
459     team = th->th.th_team;
460     #if OMP_40_ENABLED
461     KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
462     nteams = th->th.th_teams_size.nteams;
463     #endif
464     team_id = team->t.t_master_tid;
465     KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
466 
467     // compute global trip count
468     if( incr == 1 ) {
469         trip_count = *pupper - *plower + 1;
470     } else if(incr == -1) {
471         trip_count = *plower - *pupper + 1;
472     } else if ( incr > 0 ) {
473         // upper-lower can exceed the limit of signed type
474         trip_count = (UT)(*pupper - *plower) / incr + 1;
475     } else {
476         trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
477     }
478 
479     *pstride = *pupper - *plower;  // just in case (can be unused)
480     if( trip_count <= nteams ) {
481         KMP_DEBUG_ASSERT(
482             __kmp_static == kmp_sch_static_greedy || \
483             __kmp_static == kmp_sch_static_balanced
484         ); // Unknown static scheduling type.
485         // only masters of some teams get single iteration, other threads get nothing
486         if( team_id < trip_count && tid == 0 ) {
487             *pupper = *pupperDist = *plower = *plower + team_id * incr;
488         } else {
489             *pupperDist = *pupper;
490             *plower = *pupper + incr; // compiler should skip loop body
491         }
492         if( plastiter != NULL )
493             *plastiter = ( tid == 0 && team_id == trip_count - 1 );
494     } else {
495         // Get the team's chunk first (each team gets at most one chunk)
496         if( __kmp_static == kmp_sch_static_balanced ) {
497             register UT chunkD = trip_count / nteams;
498             register UT extras = trip_count % nteams;
499             *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
500             *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
501             if( plastiter != NULL )
502                 *plastiter = ( team_id == nteams - 1 );
503         } else {
504             register T chunk_inc_count =
505                 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
506             register T upper = *pupper;
507             KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
508                 // Unknown static scheduling type.
509             *plower += team_id * chunk_inc_count;
510             *pupperDist = *plower + chunk_inc_count - incr;
511             // Check/correct bounds if needed
512             if( incr > 0 ) {
513                 if( *pupperDist < *plower )
514                     *pupperDist = i_maxmin< T >::mx;
515                 if( plastiter != NULL )
516                     *plastiter = *plower <= upper && *pupperDist > upper - incr;
517                 if( *pupperDist > upper )
518                     *pupperDist = upper; // tracker C73258
519                 if( *plower > *pupperDist ) {
520                     *pupper = *pupperDist;  // no iterations available for the team
521                     goto end;
522                 }
523             } else {
524                 if( *pupperDist > *plower )
525                     *pupperDist = i_maxmin< T >::mn;
526                 if( plastiter != NULL )
527                     *plastiter = *plower >= upper && *pupperDist < upper - incr;
528                 if( *pupperDist < upper )
529                     *pupperDist = upper; // tracker C73258
530                 if( *plower < *pupperDist ) {
531                     *pupper = *pupperDist;  // no iterations available for the team
532                     goto end;
533                 }
534             }
535         }
536         // Get the parallel loop chunk now (for thread)
537         // compute trip count for team's chunk
538         if( incr == 1 ) {
539             trip_count = *pupperDist - *plower + 1;
540         } else if(incr == -1) {
541             trip_count = *plower - *pupperDist + 1;
542         } else if ( incr > 1 ) {
543             // upper-lower can exceed the limit of signed type
544             trip_count = (UT)(*pupperDist - *plower) / incr + 1;
545         } else {
546             trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
547         }
548         KMP_DEBUG_ASSERT( trip_count );
549         switch( schedule ) {
550         case kmp_sch_static:
551         {
552             if( trip_count <= nth ) {
553                 KMP_DEBUG_ASSERT(
554                     __kmp_static == kmp_sch_static_greedy || \
555                     __kmp_static == kmp_sch_static_balanced
556                 ); // Unknown static scheduling type.
557                 if( tid < trip_count )
558                     *pupper = *plower = *plower + tid * incr;
559                 else
560                     *plower = *pupper + incr; // no iterations available
561                 if( plastiter != NULL )
562                     if( *plastiter != 0 && !( tid == trip_count - 1 ) )
563                         *plastiter = 0;
564             } else {
565                 if( __kmp_static == kmp_sch_static_balanced ) {
566                     register UT chunkL = trip_count / nth;
567                     register UT extras = trip_count % nth;
568                     *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
569                     *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
570                     if( plastiter != NULL )
571                         if( *plastiter != 0 && !( tid == nth - 1 ) )
572                             *plastiter = 0;
573                 } else {
574                     register T chunk_inc_count =
575                         ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
576                     register T upper = *pupperDist;
577                     KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
578                         // Unknown static scheduling type.
579                     *plower += tid * chunk_inc_count;
580                     *pupper = *plower + chunk_inc_count - incr;
581                     if( incr > 0 ) {
582                         if( *pupper < *plower )
583                             *pupper = i_maxmin< T >::mx;
584                         if( plastiter != NULL )
585                             if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
586                                 *plastiter = 0;
587                         if( *pupper > upper )
588                             *pupper = upper;//tracker C73258
589                     } else {
590                         if( *pupper > *plower )
591                             *pupper = i_maxmin< T >::mn;
592                         if( plastiter != NULL )
593                             if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
594                                 *plastiter = 0;
595                         if( *pupper < upper )
596                             *pupper = upper;//tracker C73258
597                     }
598                 }
599             }
600             break;
601         }
602         case kmp_sch_static_chunked:
603         {
604             register ST span;
605             if( chunk < 1 )
606                 chunk = 1;
607             span = chunk * incr;
608             *pstride = span * nth;
609             *plower = *plower + (span * tid);
610             *pupper = *plower + span - incr;
611             if( plastiter != NULL )
612                 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
613                     *plastiter = 0;
614             break;
615         }
616         default:
617             KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
618             break;
619         }
620     }
621     end:;
622     #ifdef KMP_DEBUG
623     {
624         const char * buff;
625         // create format specifiers before the debug output
626         buff = __kmp_str_format(
627             "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
628             "stride=%%%s signed?<%s>\n",
629             traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
630             traits_t< ST >::spec, traits_t< T >::spec );
631         KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
632         __kmp_str_free( &buff );
633     }
634     #endif
635     KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
636     return;
637 }
638 
639 template< typename T >
640 static void
641 __kmp_team_static_init(
642     ident_t                          *loc,
643     kmp_int32                         gtid,
644     kmp_int32                        *p_last,
645     T                                *p_lb,
646     T                                *p_ub,
647     typename traits_t< T >::signed_t *p_st,
648     typename traits_t< T >::signed_t  incr,
649     typename traits_t< T >::signed_t  chunk
650 ) {
651     // The routine returns the first chunk distributed to the team and
652     // stride for next chunks calculation.
653     // Last iteration flag set for the team that will execute
654     // the last iteration of the loop.
655     // The routine is called for dist_schedue(static,chunk) only.
656     typedef typename traits_t< T >::unsigned_t  UT;
657     typedef typename traits_t< T >::signed_t    ST;
658     kmp_uint32  team_id;
659     kmp_uint32  nteams;
660     UT          trip_count;
661     T           lower;
662     T           upper;
663     ST          span;
664     kmp_team_t *team;
665     kmp_info_t *th;
666 
667     KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
668     KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
669     #ifdef KMP_DEBUG
670     {
671         const char * buff;
672         // create format specifiers before the debug output
673         buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
674             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
675             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
676             traits_t< ST >::spec, traits_t< T >::spec );
677         KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
678         __kmp_str_free( &buff );
679     }
680     #endif
681 
682     lower = *p_lb;
683     upper = *p_ub;
684     if( __kmp_env_consistency_check ) {
685         if( incr == 0 ) {
686             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
687         }
688         if( incr > 0 ? (upper < lower) : (lower < upper) ) {
689             // The loop is illegal.
690             // Some zero-trip loops maintained by compiler, e.g.:
691             //   for(i=10;i<0;++i) // lower >= upper - run-time check
692             //   for(i=0;i>10;--i) // lower <= upper - run-time check
693             //   for(i=0;i>10;++i) // incr > 0       - compile-time check
694             //   for(i=10;i<0;--i) // incr < 0       - compile-time check
695             // Compiler does not check the following illegal loops:
696             //   for(i=0;i<10;i+=incr) // where incr<0
697             //   for(i=10;i>0;i-=incr) // where incr<0
698             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
699         }
700     }
701     th = __kmp_threads[gtid];
702     team = th->th.th_team;
703     #if OMP_40_ENABLED
704     KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
705     nteams = th->th.th_teams_size.nteams;
706     #endif
707     team_id = team->t.t_master_tid;
708     KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
709 
710     // compute trip count
711     if( incr == 1 ) {
712         trip_count = upper - lower + 1;
713     } else if(incr == -1) {
714         trip_count = lower - upper + 1;
715     } else if ( incr > 0 ) {
716         // upper-lower can exceed the limit of signed type
717         trip_count = (UT)(upper - lower) / incr + 1;
718     } else {
719         trip_count = (UT)(lower - upper) / (-incr) + 1;
720     }
721     if( chunk < 1 )
722         chunk = 1;
723     span = chunk * incr;
724     *p_st = span * nteams;
725     *p_lb = lower + (span * team_id);
726     *p_ub = *p_lb + span - incr;
727     if ( p_last != NULL )
728         *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
729     // Correct upper bound if needed
730     if( incr > 0 ) {
731         if( *p_ub < *p_lb ) // overflow?
732             *p_ub = i_maxmin< T >::mx;
733         if( *p_ub > upper )
734             *p_ub = upper; // tracker C73258
735     } else {   // incr < 0
736         if( *p_ub > *p_lb )
737             *p_ub = i_maxmin< T >::mn;
738         if( *p_ub < upper )
739             *p_ub = upper; // tracker C73258
740     }
741     #ifdef KMP_DEBUG
742     {
743         const char * buff;
744         // create format specifiers before the debug output
745         buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
746             "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
747             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
748             traits_t< ST >::spec );
749         KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
750         __kmp_str_free( &buff );
751     }
752     #endif
753 }
754 
755 //--------------------------------------------------------------------------------------
756 extern "C" {
757 
758 /*!
759 @ingroup WORK_SHARING
760 @param    loc       Source code location
761 @param    gtid      Global thread id of this thread
762 @param    schedtype  Scheduling type
763 @param    plastiter Pointer to the "last iteration" flag
764 @param    plower    Pointer to the lower bound
765 @param    pupper    Pointer to the upper bound
766 @param    pstride   Pointer to the stride
767 @param    incr      Loop increment
768 @param    chunk     The chunk size
769 
770 Each of the four functions here are identical apart from the argument types.
771 
772 The functions compute the upper and lower bounds and stride to be used for the set of iterations
773 to be executed by the current thread from the statically scheduled loop that is described by the
774 initial values of the bounds, stride, increment and chunk size.
775 
776 @{
777 */
778 void
779 __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
780                       kmp_int32 *plower, kmp_int32 *pupper,
781                       kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
782 {
783     __kmp_for_static_init< kmp_int32 >(
784                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
785 }
786 
787 /*!
788  See @ref __kmpc_for_static_init_4
789  */
790 void
791 __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
792                       kmp_uint32 *plower, kmp_uint32 *pupper,
793                       kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
794 {
795     __kmp_for_static_init< kmp_uint32 >(
796                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
797 }
798 
799 /*!
800  See @ref __kmpc_for_static_init_4
801  */
802 void
803 __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
804                       kmp_int64 *plower, kmp_int64 *pupper,
805                       kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
806 {
807     __kmp_for_static_init< kmp_int64 >(
808                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
809 }
810 
811 /*!
812  See @ref __kmpc_for_static_init_4
813  */
814 void
815 __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
816                       kmp_uint64 *plower, kmp_uint64 *pupper,
817                       kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
818 {
819     __kmp_for_static_init< kmp_uint64 >(
820                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
821 }
822 /*!
823 @}
824 */
825 
826 /*!
827 @ingroup WORK_SHARING
828 @param    loc       Source code location
829 @param    gtid      Global thread id of this thread
830 @param    schedule  Scheduling type for the parallel loop
831 @param    plastiter Pointer to the "last iteration" flag
832 @param    plower    Pointer to the lower bound
833 @param    pupper    Pointer to the upper bound of loop chunk
834 @param    pupperD   Pointer to the upper bound of dist_chunk
835 @param    pstride   Pointer to the stride for parallel loop
836 @param    incr      Loop increment
837 @param    chunk     The chunk size for the parallel loop
838 
839 Each of the four functions here are identical apart from the argument types.
840 
841 The functions compute the upper and lower bounds and strides to be used for the set of iterations
842 to be executed by the current thread from the statically scheduled loop that is described by the
843 initial values of the bounds, strides, increment and chunks for parallel loop and distribute
844 constructs.
845 
846 @{
847 */
848 void
849 __kmpc_dist_for_static_init_4(
850     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
851     kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
852     kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
853 {
854     __kmp_dist_for_static_init< kmp_int32 >(
855         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
856 }
857 
858 /*!
859  See @ref __kmpc_dist_for_static_init_4
860  */
861 void
862 __kmpc_dist_for_static_init_4u(
863     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
864     kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
865     kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
866 {
867     __kmp_dist_for_static_init< kmp_uint32 >(
868         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
869 }
870 
871 /*!
872  See @ref __kmpc_dist_for_static_init_4
873  */
874 void
875 __kmpc_dist_for_static_init_8(
876     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
877     kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
878     kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
879 {
880     __kmp_dist_for_static_init< kmp_int64 >(
881         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
882 }
883 
884 /*!
885  See @ref __kmpc_dist_for_static_init_4
886  */
887 void
888 __kmpc_dist_for_static_init_8u(
889     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
890     kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
891     kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
892 {
893     __kmp_dist_for_static_init< kmp_uint64 >(
894         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
895 }
896 /*!
897 @}
898 */
899 
900 //-----------------------------------------------------------------------------------------
901 // Auxiliary routines for Distribute Parallel Loop construct implementation
902 //    Transfer call to template< type T >
903 //    __kmp_team_static_init( ident_t *loc, int gtid,
904 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
905 
906 /*!
907 @ingroup WORK_SHARING
908 @{
909 @param loc Source location
910 @param gtid Global thread id
911 @param p_last pointer to last iteration flag
912 @param p_lb  pointer to Lower bound
913 @param p_ub  pointer to Upper bound
914 @param p_st  Step (or increment if you prefer)
915 @param incr  Loop increment
916 @param chunk The chunk size to block with
917 
918 The functions compute the upper and lower bounds and stride to be used for the set of iterations
919 to be executed by the current team from the statically scheduled loop that is described by the
920 initial values of the bounds, stride, increment and chunk for the distribute construct as part of
921 composite distribute parallel loop construct.
922 These functions are all identical apart from the types of the arguments.
923 */
924 
925 void
926 __kmpc_team_static_init_4(
927     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
928     kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
929 {
930     KMP_DEBUG_ASSERT( __kmp_init_serial );
931     __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
932 }
933 
934 /*!
935  See @ref __kmpc_team_static_init_4
936  */
937 void
938 __kmpc_team_static_init_4u(
939     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
940     kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
941 {
942     KMP_DEBUG_ASSERT( __kmp_init_serial );
943     __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
944 }
945 
946 /*!
947  See @ref __kmpc_team_static_init_4
948  */
949 void
950 __kmpc_team_static_init_8(
951     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
952     kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
953 {
954     KMP_DEBUG_ASSERT( __kmp_init_serial );
955     __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
956 }
957 
958 /*!
959  See @ref __kmpc_team_static_init_4
960  */
961 void
962 __kmpc_team_static_init_8u(
963     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
964     kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
965 {
966     KMP_DEBUG_ASSERT( __kmp_init_serial );
967     __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
968 }
969 /*!
970 @}
971 */
972 
973 } // extern "C"
974 
975