1 /*
2  * kmp_sched.c -- static scheduling -- iteration initialization
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 /*
17  * Static scheduling initialization.
18  *
19  * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20  *       it may change values between parallel regions.  __kmp_max_nth
21  *       is the largest value __kmp_nth may take, 1 is the smallest.
22  *
23  */
24 
25 #include "kmp.h"
26 #include "kmp_i18n.h"
27 #include "kmp_str.h"
28 #include "kmp_error.h"
29 #include "kmp_stats.h"
30 #include "kmp_itt.h"
31 
32 #if OMPT_SUPPORT
33 #include "ompt-specific.h"
34 #endif
35 
36 // template for type limits
37 template< typename T >
38 struct i_maxmin {
39     static const T mx;
40     static const T mn;
41 };
42 template<>
43 struct i_maxmin< int > {
44     static const int mx = 0x7fffffff;
45     static const int mn = 0x80000000;
46 };
47 template<>
48 struct i_maxmin< unsigned int > {
49     static const unsigned int mx = 0xffffffff;
50     static const unsigned int mn = 0x00000000;
51 };
52 template<>
53 struct i_maxmin< long long > {
54     static const long long mx = 0x7fffffffffffffffLL;
55     static const long long mn = 0x8000000000000000LL;
56 };
57 template<>
58 struct i_maxmin< unsigned long long > {
59     static const unsigned long long mx = 0xffffffffffffffffLL;
60     static const unsigned long long mn = 0x0000000000000000LL;
61 };
62 //-------------------------------------------------------------------------
63 #ifdef KMP_DEBUG
64 //-------------------------------------------------------------------------
65 // template for debug prints specification ( d, u, lld, llu )
66     char const * traits_t< int >::spec = "d";
67     char const * traits_t< unsigned int >::spec = "u";
68     char const * traits_t< long long >::spec = "lld";
69     char const * traits_t< unsigned long long >::spec = "llu";
70 //-------------------------------------------------------------------------
71 #endif
72 
73 template< typename T >
74 static void
75 __kmp_for_static_init(
76     ident_t                          *loc,
77     kmp_int32                         global_tid,
78     kmp_int32                         schedtype,
79     kmp_int32                        *plastiter,
80     T                                *plower,
81     T                                *pupper,
82     typename traits_t< T >::signed_t *pstride,
83     typename traits_t< T >::signed_t  incr,
84     typename traits_t< T >::signed_t  chunk
85 ) {
86     KMP_COUNT_BLOCK(OMP_FOR_static);
87     KMP_TIME_BLOCK (FOR_static_scheduling);
88 
89     typedef typename traits_t< T >::unsigned_t  UT;
90     typedef typename traits_t< T >::signed_t    ST;
91     /*  this all has to be changed back to TID and such.. */
92     register kmp_int32   gtid = global_tid;
93     register kmp_uint32  tid;
94     register kmp_uint32  nth;
95     register UT          trip_count;
96     register kmp_team_t *team;
97     register kmp_info_t *th = __kmp_threads[ gtid ];
98 
99 #if OMPT_SUPPORT && OMPT_TRACE
100     ompt_team_info_t *team_info = NULL;
101     ompt_task_info_t *task_info = NULL;
102 
103     if (ompt_enabled) {
104         // Only fully initialize variables needed by OMPT if OMPT is enabled.
105         team_info = __ompt_get_teaminfo(0, NULL);
106         task_info = __ompt_get_taskinfo(0);
107     }
108 #endif
109 
110     KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
111     KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
112     #ifdef KMP_DEBUG
113     {
114         const char * buff;
115         // create format specifiers before the debug output
116         buff = __kmp_str_format(
117             "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
118             " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
119             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
120             traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121         KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
122             *plower, *pupper, *pstride, incr, chunk ) );
123         __kmp_str_free( &buff );
124     }
125     #endif
126 
127     if ( __kmp_env_consistency_check ) {
128         __kmp_push_workshare( global_tid, ct_pdo, loc );
129         if ( incr == 0 ) {
130             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
131         }
132     }
133     /* special handling for zero-trip loops */
134     if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
135         if( plastiter != NULL )
136             *plastiter = FALSE;
137         /* leave pupper and plower set to entire iteration space */
138         *pstride = incr;   /* value should never be used */
139 	//        *plower = *pupper - incr;   // let compiler bypass the illegal loop (like for(i=1;i<10;i--))  THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
140 	  upper=0,stride=1) - JPH June 23, 2009.
141         #ifdef KMP_DEBUG
142         {
143             const char * buff;
144             // create format specifiers before the debug output
145             buff = __kmp_str_format(
146                 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
147                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
148             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
149             __kmp_str_free( &buff );
150         }
151         #endif
152         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
153 
154 #if OMPT_SUPPORT && OMPT_TRACE
155         if (ompt_enabled &&
156             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
157             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
158                 team_info->parallel_id, task_info->task_id,
159                 team_info->microtask);
160         }
161 #endif
162         KMP_COUNT_VALUE (FOR_static_iterations, 0);
163         return;
164     }
165 
166     #if OMP_40_ENABLED
167     if ( schedtype > kmp_ord_upper ) {
168         // we are in DISTRIBUTE construct
169         schedtype += kmp_sch_static - kmp_distribute_static;      // AC: convert to usual schedule type
170         tid  = th->th.th_team->t.t_master_tid;
171         team = th->th.th_team->t.t_parent;
172     } else
173     #endif
174     {
175         tid  = __kmp_tid_from_gtid( global_tid );
176         team = th->th.th_team;
177     }
178 
179     /* determine if "for" loop is an active worksharing construct */
180     if ( team -> t.t_serialized ) {
181         /* serialized parallel, each thread executes whole iteration space */
182         if( plastiter != NULL )
183             *plastiter = TRUE;
184         /* leave pupper and plower set to entire iteration space */
185         *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
186 
187         #ifdef KMP_DEBUG
188         {
189             const char * buff;
190             // create format specifiers before the debug output
191             buff = __kmp_str_format(
192                 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
193                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
194             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
195             __kmp_str_free( &buff );
196         }
197         #endif
198         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
199 
200 #if OMPT_SUPPORT && OMPT_TRACE
201         if (ompt_enabled &&
202             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
203             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
204                 team_info->parallel_id, task_info->task_id,
205                 team_info->microtask);
206         }
207 #endif
208         return;
209     }
210     nth = team->t.t_nproc;
211     if ( nth == 1 ) {
212         if( plastiter != NULL )
213             *plastiter = TRUE;
214         *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
215         #ifdef KMP_DEBUG
216         {
217             const char * buff;
218             // create format specifiers before the debug output
219             buff = __kmp_str_format(
220                 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
221                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
222             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
223             __kmp_str_free( &buff );
224         }
225         #endif
226         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
227 
228 #if OMPT_SUPPORT && OMPT_TRACE
229         if (ompt_enabled &&
230             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
231             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
232                 team_info->parallel_id, task_info->task_id,
233                 team_info->microtask);
234         }
235 #endif
236         return;
237     }
238 
239     /* compute trip count */
240     if ( incr == 1 ) {
241         trip_count = *pupper - *plower + 1;
242     } else if (incr == -1) {
243         trip_count = *plower - *pupper + 1;
244     } else {
245         if ( incr > 1 ) {  // the check is needed for unsigned division when incr < 0
246             trip_count = (*pupper - *plower) / incr + 1;
247         } else {
248             trip_count = (*plower - *pupper) / ( -incr ) + 1;
249         }
250     }
251 
252     if ( __kmp_env_consistency_check ) {
253         /* tripcount overflow? */
254         if ( trip_count == 0 && *pupper != *plower ) {
255             __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
256         }
257     }
258     KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
259 
260     /* compute remaining parameters */
261     switch ( schedtype ) {
262     case kmp_sch_static:
263         {
264             if ( trip_count < nth ) {
265                 KMP_DEBUG_ASSERT(
266                     __kmp_static == kmp_sch_static_greedy || \
267                     __kmp_static == kmp_sch_static_balanced
268                 ); // Unknown static scheduling type.
269                 if ( tid < trip_count ) {
270                     *pupper = *plower = *plower + tid * incr;
271                 } else {
272                     *plower = *pupper + incr;
273                 }
274                 if( plastiter != NULL )
275                     *plastiter = ( tid == trip_count - 1 );
276             } else {
277                 if ( __kmp_static == kmp_sch_static_balanced ) {
278                     register UT small_chunk = trip_count / nth;
279                     register UT extras = trip_count % nth;
280                     *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
281                     *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
282                     if( plastiter != NULL )
283                         *plastiter = ( tid == nth - 1 );
284                 } else {
285                     register T big_chunk_inc_count = ( trip_count/nth +
286                                                      ( ( trip_count % nth ) ? 1 : 0) ) * incr;
287                     register T old_upper = *pupper;
288 
289                     KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
290                         // Unknown static scheduling type.
291 
292                     *plower += tid * big_chunk_inc_count;
293                     *pupper = *plower + big_chunk_inc_count - incr;
294                     if ( incr > 0 ) {
295                         if( *pupper < *plower )
296                             *pupper = i_maxmin< T >::mx;
297                         if( plastiter != NULL )
298                             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
299                         if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
300                     } else {
301                         if( *pupper > *plower )
302                             *pupper = i_maxmin< T >::mn;
303                         if( plastiter != NULL )
304                             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
305                         if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
306                     }
307                 }
308             }
309             break;
310         }
311     case kmp_sch_static_chunked:
312         {
313             register ST span;
314             if ( chunk < 1 ) {
315                 chunk = 1;
316             }
317             span = chunk * incr;
318             *pstride = span * nth;
319             *plower = *plower + (span * tid);
320             *pupper = *plower + span - incr;
321             if( plastiter != NULL )
322                 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
323             break;
324         }
325     default:
326         KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
327         break;
328     }
329 
330 #if USE_ITT_BUILD
331     // Report loop metadata
332     if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
333 #if OMP_40_ENABLED
334         th->th.th_teams_microtask == NULL &&
335 #endif
336         team->t.t_active_level == 1 )
337     {
338         kmp_uint64 cur_chunk = chunk;
339         // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
340         if ( schedtype == kmp_sch_static ) {
341             cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
342         }
343         // 0 - "static" schedule
344         __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
345     }
346 #endif
347     #ifdef KMP_DEBUG
348     {
349         const char * buff;
350         // create format specifiers before the debug output
351         buff = __kmp_str_format(
352             "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
353             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
354         KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
355         __kmp_str_free( &buff );
356     }
357     #endif
358     KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
359 
360 #if OMPT_SUPPORT && OMPT_TRACE
361     if (ompt_enabled &&
362         ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
363         ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
364             team_info->parallel_id, task_info->task_id, team_info->microtask);
365     }
366 #endif
367 
368     return;
369 }
370 
371 template< typename T >
372 static void
373 __kmp_dist_for_static_init(
374     ident_t                          *loc,
375     kmp_int32                         gtid,
376     kmp_int32                         schedule,
377     kmp_int32                        *plastiter,
378     T                                *plower,
379     T                                *pupper,
380     T                                *pupperDist,
381     typename traits_t< T >::signed_t *pstride,
382     typename traits_t< T >::signed_t  incr,
383     typename traits_t< T >::signed_t  chunk
384 ) {
385     KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
386     typedef typename traits_t< T >::unsigned_t  UT;
387     typedef typename traits_t< T >::signed_t    ST;
388     register kmp_uint32  tid;
389     register kmp_uint32  nth;
390     register kmp_uint32  team_id;
391     register kmp_uint32  nteams;
392     register UT          trip_count;
393     register kmp_team_t *team;
394     kmp_info_t * th;
395 
396     KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
397     KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
398     #ifdef KMP_DEBUG
399     {
400         const char * buff;
401         // create format specifiers before the debug output
402         buff = __kmp_str_format(
403             "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
404             "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
405             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
406             traits_t< ST >::spec, traits_t< T >::spec );
407         KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
408                        *plower, *pupper, incr, chunk ) );
409         __kmp_str_free( &buff );
410     }
411     #endif
412 
413     if( __kmp_env_consistency_check ) {
414         __kmp_push_workshare( gtid, ct_pdo, loc );
415         if( incr == 0 ) {
416             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
417         }
418         if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
419             // The loop is illegal.
420             // Some zero-trip loops maintained by compiler, e.g.:
421             //   for(i=10;i<0;++i) // lower >= upper - run-time check
422             //   for(i=0;i>10;--i) // lower <= upper - run-time check
423             //   for(i=0;i>10;++i) // incr > 0       - compile-time check
424             //   for(i=10;i<0;--i) // incr < 0       - compile-time check
425             // Compiler does not check the following illegal loops:
426             //   for(i=0;i<10;i+=incr) // where incr<0
427             //   for(i=10;i>0;i-=incr) // where incr<0
428             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
429         }
430     }
431     tid = __kmp_tid_from_gtid( gtid );
432     th = __kmp_threads[gtid];
433     nth = th->th.th_team_nproc;
434     team = th->th.th_team;
435     #if OMP_40_ENABLED
436     KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
437     nteams = th->th.th_teams_size.nteams;
438     #endif
439     team_id = team->t.t_master_tid;
440     KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
441 
442     // compute global trip count
443     if( incr == 1 ) {
444         trip_count = *pupper - *plower + 1;
445     } else if(incr == -1) {
446         trip_count = *plower - *pupper + 1;
447     } else {
448         trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
449     }
450 
451     *pstride = *pupper - *plower;  // just in case (can be unused)
452     if( trip_count <= nteams ) {
453         KMP_DEBUG_ASSERT(
454             __kmp_static == kmp_sch_static_greedy || \
455             __kmp_static == kmp_sch_static_balanced
456         ); // Unknown static scheduling type.
457         // only masters of some teams get single iteration, other threads get nothing
458         if( team_id < trip_count && tid == 0 ) {
459             *pupper = *pupperDist = *plower = *plower + team_id * incr;
460         } else {
461             *pupperDist = *pupper;
462             *plower = *pupper + incr; // compiler should skip loop body
463         }
464         if( plastiter != NULL )
465             *plastiter = ( tid == 0 && team_id == trip_count - 1 );
466     } else {
467         // Get the team's chunk first (each team gets at most one chunk)
468         if( __kmp_static == kmp_sch_static_balanced ) {
469             register UT chunkD = trip_count / nteams;
470             register UT extras = trip_count % nteams;
471             *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
472             *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
473             if( plastiter != NULL )
474                 *plastiter = ( team_id == nteams - 1 );
475         } else {
476             register T chunk_inc_count =
477                 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
478             register T upper = *pupper;
479             KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
480                 // Unknown static scheduling type.
481             *plower += team_id * chunk_inc_count;
482             *pupperDist = *plower + chunk_inc_count - incr;
483             // Check/correct bounds if needed
484             if( incr > 0 ) {
485                 if( *pupperDist < *plower )
486                     *pupperDist = i_maxmin< T >::mx;
487                 if( plastiter != NULL )
488                     *plastiter = *plower <= upper && *pupperDist > upper - incr;
489                 if( *pupperDist > upper )
490                     *pupperDist = upper; // tracker C73258
491                 if( *plower > *pupperDist ) {
492                     *pupper = *pupperDist;  // no iterations available for the team
493                     goto end;
494                 }
495             } else {
496                 if( *pupperDist > *plower )
497                     *pupperDist = i_maxmin< T >::mn;
498                 if( plastiter != NULL )
499                     *plastiter = *plower >= upper && *pupperDist < upper - incr;
500                 if( *pupperDist < upper )
501                     *pupperDist = upper; // tracker C73258
502                 if( *plower < *pupperDist ) {
503                     *pupper = *pupperDist;  // no iterations available for the team
504                     goto end;
505                 }
506             }
507         }
508         // Get the parallel loop chunk now (for thread)
509         // compute trip count for team's chunk
510         if( incr == 1 ) {
511             trip_count = *pupperDist - *plower + 1;
512         } else if(incr == -1) {
513             trip_count = *plower - *pupperDist + 1;
514         } else {
515             trip_count = (ST)(*pupperDist - *plower) / incr + 1;
516         }
517         KMP_DEBUG_ASSERT( trip_count );
518         switch( schedule ) {
519         case kmp_sch_static:
520         {
521             if( trip_count <= nth ) {
522                 KMP_DEBUG_ASSERT(
523                     __kmp_static == kmp_sch_static_greedy || \
524                     __kmp_static == kmp_sch_static_balanced
525                 ); // Unknown static scheduling type.
526                 if( tid < trip_count )
527                     *pupper = *plower = *plower + tid * incr;
528                 else
529                     *plower = *pupper + incr; // no iterations available
530                 if( plastiter != NULL )
531                     if( *plastiter != 0 && !( tid == trip_count - 1 ) )
532                         *plastiter = 0;
533             } else {
534                 if( __kmp_static == kmp_sch_static_balanced ) {
535                     register UT chunkL = trip_count / nth;
536                     register UT extras = trip_count % nth;
537                     *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
538                     *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
539                     if( plastiter != NULL )
540                         if( *plastiter != 0 && !( tid == nth - 1 ) )
541                             *plastiter = 0;
542                 } else {
543                     register T chunk_inc_count =
544                         ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
545                     register T upper = *pupperDist;
546                     KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
547                         // Unknown static scheduling type.
548                     *plower += tid * chunk_inc_count;
549                     *pupper = *plower + chunk_inc_count - incr;
550                     if( incr > 0 ) {
551                         if( *pupper < *plower )
552                             *pupper = i_maxmin< T >::mx;
553                         if( plastiter != NULL )
554                             if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
555                                 *plastiter = 0;
556                         if( *pupper > upper )
557                             *pupper = upper;//tracker C73258
558                     } else {
559                         if( *pupper > *plower )
560                             *pupper = i_maxmin< T >::mn;
561                         if( plastiter != NULL )
562                             if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
563                                 *plastiter = 0;
564                         if( *pupper < upper )
565                             *pupper = upper;//tracker C73258
566                     }
567                 }
568             }
569             break;
570         }
571         case kmp_sch_static_chunked:
572         {
573             register ST span;
574             if( chunk < 1 )
575                 chunk = 1;
576             span = chunk * incr;
577             *pstride = span * nth;
578             *plower = *plower + (span * tid);
579             *pupper = *plower + span - incr;
580             if( plastiter != NULL )
581                 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
582                     *plastiter = 0;
583             break;
584         }
585         default:
586             KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
587             break;
588         }
589     }
590     end:;
591     #ifdef KMP_DEBUG
592     {
593         const char * buff;
594         // create format specifiers before the debug output
595         buff = __kmp_str_format(
596             "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
597             "stride=%%%s signed?<%s>\n",
598             traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
599             traits_t< ST >::spec, traits_t< T >::spec );
600         KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
601         __kmp_str_free( &buff );
602     }
603     #endif
604     KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
605     return;
606 }
607 
608 template< typename T >
609 static void
610 __kmp_team_static_init(
611     ident_t                          *loc,
612     kmp_int32                         gtid,
613     kmp_int32                        *p_last,
614     T                                *p_lb,
615     T                                *p_ub,
616     typename traits_t< T >::signed_t *p_st,
617     typename traits_t< T >::signed_t  incr,
618     typename traits_t< T >::signed_t  chunk
619 ) {
620     // The routine returns the first chunk distributed to the team and
621     // stride for next chunks calculation.
622     // Last iteration flag set for the team that will execute
623     // the last iteration of the loop.
624     // The routine is called for dist_schedue(static,chunk) only.
625     typedef typename traits_t< T >::unsigned_t  UT;
626     typedef typename traits_t< T >::signed_t    ST;
627     kmp_uint32  team_id;
628     kmp_uint32  nteams;
629     UT          trip_count;
630     T           lower;
631     T           upper;
632     ST          span;
633     kmp_team_t *team;
634     kmp_info_t *th;
635 
636     KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
637     KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
638     #ifdef KMP_DEBUG
639     {
640         const char * buff;
641         // create format specifiers before the debug output
642         buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
643             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
644             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
645             traits_t< ST >::spec, traits_t< T >::spec );
646         KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
647         __kmp_str_free( &buff );
648     }
649     #endif
650 
651     lower = *p_lb;
652     upper = *p_ub;
653     if( __kmp_env_consistency_check ) {
654         if( incr == 0 ) {
655             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
656         }
657         if( incr > 0 ? (upper < lower) : (lower < upper) ) {
658             // The loop is illegal.
659             // Some zero-trip loops maintained by compiler, e.g.:
660             //   for(i=10;i<0;++i) // lower >= upper - run-time check
661             //   for(i=0;i>10;--i) // lower <= upper - run-time check
662             //   for(i=0;i>10;++i) // incr > 0       - compile-time check
663             //   for(i=10;i<0;--i) // incr < 0       - compile-time check
664             // Compiler does not check the following illegal loops:
665             //   for(i=0;i<10;i+=incr) // where incr<0
666             //   for(i=10;i>0;i-=incr) // where incr<0
667             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
668         }
669     }
670     th = __kmp_threads[gtid];
671     team = th->th.th_team;
672     #if OMP_40_ENABLED
673     KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
674     nteams = th->th.th_teams_size.nteams;
675     #endif
676     team_id = team->t.t_master_tid;
677     KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
678 
679     // compute trip count
680     if( incr == 1 ) {
681         trip_count = upper - lower + 1;
682     } else if(incr == -1) {
683         trip_count = lower - upper + 1;
684     } else {
685         trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
686     }
687     if( chunk < 1 )
688         chunk = 1;
689     span = chunk * incr;
690     *p_st = span * nteams;
691     *p_lb = lower + (span * team_id);
692     *p_ub = *p_lb + span - incr;
693     if ( p_last != NULL )
694         *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
695     // Correct upper bound if needed
696     if( incr > 0 ) {
697         if( *p_ub < *p_lb ) // overflow?
698             *p_ub = i_maxmin< T >::mx;
699         if( *p_ub > upper )
700             *p_ub = upper; // tracker C73258
701     } else {   // incr < 0
702         if( *p_ub > *p_lb )
703             *p_ub = i_maxmin< T >::mn;
704         if( *p_ub < upper )
705             *p_ub = upper; // tracker C73258
706     }
707     #ifdef KMP_DEBUG
708     {
709         const char * buff;
710         // create format specifiers before the debug output
711         buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
712             "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
713             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
714             traits_t< ST >::spec );
715         KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
716         __kmp_str_free( &buff );
717     }
718     #endif
719 }
720 
721 //--------------------------------------------------------------------------------------
722 extern "C" {
723 
724 /*!
725 @ingroup WORK_SHARING
726 @param    loc       Source code location
727 @param    gtid      Global thread id of this thread
728 @param    schedtype  Scheduling type
729 @param    plastiter Pointer to the "last iteration" flag
730 @param    plower    Pointer to the lower bound
731 @param    pupper    Pointer to the upper bound
732 @param    pstride   Pointer to the stride
733 @param    incr      Loop increment
734 @param    chunk     The chunk size
735 
736 Each of the four functions here are identical apart from the argument types.
737 
738 The functions compute the upper and lower bounds and stride to be used for the set of iterations
739 to be executed by the current thread from the statically scheduled loop that is described by the
740 initial values of the bounds, stride, increment and chunk size.
741 
742 @{
743 */
744 void
745 __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
746                       kmp_int32 *plower, kmp_int32 *pupper,
747                       kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
748 {
749     __kmp_for_static_init< kmp_int32 >(
750                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
751 }
752 
753 /*!
754  See @ref __kmpc_for_static_init_4
755  */
756 void
757 __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
758                       kmp_uint32 *plower, kmp_uint32 *pupper,
759                       kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
760 {
761     __kmp_for_static_init< kmp_uint32 >(
762                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
763 }
764 
765 /*!
766  See @ref __kmpc_for_static_init_4
767  */
768 void
769 __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
770                       kmp_int64 *plower, kmp_int64 *pupper,
771                       kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
772 {
773     __kmp_for_static_init< kmp_int64 >(
774                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
775 }
776 
777 /*!
778  See @ref __kmpc_for_static_init_4
779  */
780 void
781 __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
782                       kmp_uint64 *plower, kmp_uint64 *pupper,
783                       kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
784 {
785     __kmp_for_static_init< kmp_uint64 >(
786                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
787 }
788 /*!
789 @}
790 */
791 
792 /*!
793 @ingroup WORK_SHARING
794 @param    loc       Source code location
795 @param    gtid      Global thread id of this thread
796 @param    schedule  Scheduling type for the parallel loop
797 @param    plastiter Pointer to the "last iteration" flag
798 @param    plower    Pointer to the lower bound
799 @param    pupper    Pointer to the upper bound of loop chunk
800 @param    pupperD   Pointer to the upper bound of dist_chunk
801 @param    pstride   Pointer to the stride for parallel loop
802 @param    incr      Loop increment
803 @param    chunk     The chunk size for the parallel loop
804 
805 Each of the four functions here are identical apart from the argument types.
806 
807 The functions compute the upper and lower bounds and strides to be used for the set of iterations
808 to be executed by the current thread from the statically scheduled loop that is described by the
809 initial values of the bounds, strides, increment and chunks for parallel loop and distribute
810 constructs.
811 
812 @{
813 */
814 void
815 __kmpc_dist_for_static_init_4(
816     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
817     kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
818     kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
819 {
820     __kmp_dist_for_static_init< kmp_int32 >(
821         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
822 }
823 
824 /*!
825  See @ref __kmpc_dist_for_static_init_4
826  */
827 void
828 __kmpc_dist_for_static_init_4u(
829     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
830     kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
831     kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
832 {
833     __kmp_dist_for_static_init< kmp_uint32 >(
834         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
835 }
836 
837 /*!
838  See @ref __kmpc_dist_for_static_init_4
839  */
840 void
841 __kmpc_dist_for_static_init_8(
842     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
843     kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
844     kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
845 {
846     __kmp_dist_for_static_init< kmp_int64 >(
847         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
848 }
849 
850 /*!
851  See @ref __kmpc_dist_for_static_init_4
852  */
853 void
854 __kmpc_dist_for_static_init_8u(
855     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
856     kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
857     kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
858 {
859     __kmp_dist_for_static_init< kmp_uint64 >(
860         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
861 }
862 /*!
863 @}
864 */
865 
866 //-----------------------------------------------------------------------------------------
867 // Auxiliary routines for Distribute Parallel Loop construct implementation
868 //    Transfer call to template< type T >
869 //    __kmp_team_static_init( ident_t *loc, int gtid,
870 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
871 
872 /*!
873 @ingroup WORK_SHARING
874 @{
875 @param loc Source location
876 @param gtid Global thread id
877 @param p_last pointer to last iteration flag
878 @param p_lb  pointer to Lower bound
879 @param p_ub  pointer to Upper bound
880 @param p_st  Step (or increment if you prefer)
881 @param incr  Loop increment
882 @param chunk The chunk size to block with
883 
884 The functions compute the upper and lower bounds and stride to be used for the set of iterations
885 to be executed by the current team from the statically scheduled loop that is described by the
886 initial values of the bounds, stride, increment and chunk for the distribute construct as part of
887 composite distribute parallel loop construct.
888 These functions are all identical apart from the types of the arguments.
889 */
890 
891 void
892 __kmpc_team_static_init_4(
893     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
894     kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
895 {
896     KMP_DEBUG_ASSERT( __kmp_init_serial );
897     __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
898 }
899 
900 /*!
901  See @ref __kmpc_team_static_init_4
902  */
903 void
904 __kmpc_team_static_init_4u(
905     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
906     kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
907 {
908     KMP_DEBUG_ASSERT( __kmp_init_serial );
909     __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
910 }
911 
912 /*!
913  See @ref __kmpc_team_static_init_4
914  */
915 void
916 __kmpc_team_static_init_8(
917     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
918     kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
919 {
920     KMP_DEBUG_ASSERT( __kmp_init_serial );
921     __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
922 }
923 
924 /*!
925  See @ref __kmpc_team_static_init_4
926  */
927 void
928 __kmpc_team_static_init_8u(
929     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
930     kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
931 {
932     KMP_DEBUG_ASSERT( __kmp_init_serial );
933     __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
934 }
935 /*!
936 @}
937 */
938 
939 } // extern "C"
940 
941