1 /*
2  * kmp_sched.c -- static scheduling -- iteration initialization
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 /*
17  * Static scheduling initialization.
18  *
19  * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20  *       it may change values between parallel regions.  __kmp_max_nth
21  *       is the largest value __kmp_nth may take, 1 is the smallest.
22  *
23  */
24 
25 #include "kmp.h"
26 #include "kmp_i18n.h"
27 #include "kmp_str.h"
28 #include "kmp_error.h"
29 #include "kmp_stats.h"
30 #include "kmp_itt.h"
31 
32 #if OMPT_SUPPORT
33 #include "ompt-specific.h"
34 #endif
35 
36 // template for type limits
37 template< typename T >
38 struct i_maxmin {
39     static const T mx;
40     static const T mn;
41 };
42 template<>
43 struct i_maxmin< int > {
44     static const int mx = 0x7fffffff;
45     static const int mn = 0x80000000;
46 };
47 template<>
48 struct i_maxmin< unsigned int > {
49     static const unsigned int mx = 0xffffffff;
50     static const unsigned int mn = 0x00000000;
51 };
52 template<>
53 struct i_maxmin< long long > {
54     static const long long mx = 0x7fffffffffffffffLL;
55     static const long long mn = 0x8000000000000000LL;
56 };
57 template<>
58 struct i_maxmin< unsigned long long > {
59     static const unsigned long long mx = 0xffffffffffffffffLL;
60     static const unsigned long long mn = 0x0000000000000000LL;
61 };
62 //-------------------------------------------------------------------------
63 #ifdef KMP_DEBUG
64 //-------------------------------------------------------------------------
65 // template for debug prints specification ( d, u, lld, llu )
66     char const * traits_t< int >::spec = "d";
67     char const * traits_t< unsigned int >::spec = "u";
68     char const * traits_t< long long >::spec = "lld";
69     char const * traits_t< unsigned long long >::spec = "llu";
70 //-------------------------------------------------------------------------
71 #endif
72 
73 template< typename T >
74 static void
75 __kmp_for_static_init(
76     ident_t                          *loc,
77     kmp_int32                         global_tid,
78     kmp_int32                         schedtype,
79     kmp_int32                        *plastiter,
80     T                                *plower,
81     T                                *pupper,
82     typename traits_t< T >::signed_t *pstride,
83     typename traits_t< T >::signed_t  incr,
84     typename traits_t< T >::signed_t  chunk
85 ) {
86     KMP_COUNT_BLOCK(OMP_FOR_static);
87     typedef typename traits_t< T >::unsigned_t  UT;
88     typedef typename traits_t< T >::signed_t    ST;
89     /*  this all has to be changed back to TID and such.. */
90     register kmp_int32   gtid = global_tid;
91     register kmp_uint32  tid;
92     register kmp_uint32  nth;
93     register UT          trip_count;
94     register kmp_team_t *team;
95 
96 #if OMPT_SUPPORT && OMPT_TRACE
97     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
98     ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
99 #endif
100 
101     KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
102     KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
103     #ifdef KMP_DEBUG
104     {
105         const char * buff;
106         // create format specifiers before the debug output
107         buff = __kmp_str_format(
108             "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
109             " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
110             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
111             traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
112         KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
113             *plower, *pupper, *pstride, incr, chunk ) );
114         __kmp_str_free( &buff );
115     }
116     #endif
117 
118     if ( __kmp_env_consistency_check ) {
119         __kmp_push_workshare( global_tid, ct_pdo, loc );
120         if ( incr == 0 ) {
121             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
122         }
123     }
124     /* special handling for zero-trip loops */
125     if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
126         if( plastiter != NULL )
127             *plastiter = FALSE;
128         /* leave pupper and plower set to entire iteration space */
129         *pstride = incr;   /* value should never be used */
130 	//        *plower = *pupper - incr;   // let compiler bypass the illegal loop (like for(i=1;i<10;i--))  THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
131 	  upper=0,stride=1) - JPH June 23, 2009.
132         #ifdef KMP_DEBUG
133         {
134             const char * buff;
135             // create format specifiers before the debug output
136             buff = __kmp_str_format(
137                 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
138                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
139             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
140             __kmp_str_free( &buff );
141         }
142         #endif
143         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
144 
145 #if OMPT_SUPPORT && OMPT_TRACE
146         if ((ompt_status == ompt_status_track_callback) &&
147             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
148             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
149                 team_info->parallel_id, task_info->task_id,
150                 team_info->microtask);
151         }
152 #endif
153         return;
154     }
155 
156     #if OMP_40_ENABLED
157     if ( schedtype > kmp_ord_upper ) {
158         // we are in DISTRIBUTE construct
159         schedtype += kmp_sch_static - kmp_distribute_static;      // AC: convert to usual schedule type
160         tid  = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid;
161         team = __kmp_threads[ gtid ]->th.th_team->t.t_parent;
162     } else
163     #endif
164     {
165         tid  = __kmp_tid_from_gtid( global_tid );
166         team = __kmp_threads[ gtid ]->th.th_team;
167     }
168 
169     /* determine if "for" loop is an active worksharing construct */
170     if ( team -> t.t_serialized ) {
171         /* serialized parallel, each thread executes whole iteration space */
172         if( plastiter != NULL )
173             *plastiter = TRUE;
174         /* leave pupper and plower set to entire iteration space */
175         *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
176 
177         #ifdef KMP_DEBUG
178         {
179             const char * buff;
180             // create format specifiers before the debug output
181             buff = __kmp_str_format(
182                 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
183                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
184             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
185             __kmp_str_free( &buff );
186         }
187         #endif
188         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
189 
190 #if OMPT_SUPPORT && OMPT_TRACE
191         if ((ompt_status == ompt_status_track_callback) &&
192             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
193             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
194                 team_info->parallel_id, task_info->task_id,
195                 team_info->microtask);
196         }
197 #endif
198         return;
199     }
200     nth = team->t.t_nproc;
201     if ( nth == 1 ) {
202         if( plastiter != NULL )
203             *plastiter = TRUE;
204         *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
205         #ifdef KMP_DEBUG
206         {
207             const char * buff;
208             // create format specifiers before the debug output
209             buff = __kmp_str_format(
210                 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
211                 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
212             KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
213             __kmp_str_free( &buff );
214         }
215         #endif
216         KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
217 
218 #if OMPT_SUPPORT && OMPT_TRACE
219         if ((ompt_status == ompt_status_track_callback) &&
220             ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
221             ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
222                 team_info->parallel_id, task_info->task_id,
223                 team_info->microtask);
224         }
225 #endif
226         return;
227     }
228 
229     /* compute trip count */
230     if ( incr == 1 ) {
231         trip_count = *pupper - *plower + 1;
232     } else if (incr == -1) {
233         trip_count = *plower - *pupper + 1;
234     } else {
235         if ( incr > 1 ) {  // the check is needed for unsigned division when incr < 0
236             trip_count = (*pupper - *plower) / incr + 1;
237         } else {
238             trip_count = (*plower - *pupper) / ( -incr ) + 1;
239         }
240     }
241 
242     if ( __kmp_env_consistency_check ) {
243         /* tripcount overflow? */
244         if ( trip_count == 0 && *pupper != *plower ) {
245             __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
246         }
247     }
248 
249     /* compute remaining parameters */
250     switch ( schedtype ) {
251     case kmp_sch_static:
252         {
253             if ( trip_count < nth ) {
254                 KMP_DEBUG_ASSERT(
255                     __kmp_static == kmp_sch_static_greedy || \
256                     __kmp_static == kmp_sch_static_balanced
257                 ); // Unknown static scheduling type.
258                 if ( tid < trip_count ) {
259                     *pupper = *plower = *plower + tid * incr;
260                 } else {
261                     *plower = *pupper + incr;
262                 }
263                 if( plastiter != NULL )
264                     *plastiter = ( tid == trip_count - 1 );
265             } else {
266                 if ( __kmp_static == kmp_sch_static_balanced ) {
267                     register UT small_chunk = trip_count / nth;
268                     register UT extras = trip_count % nth;
269                     *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
270                     *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
271                     if( plastiter != NULL )
272                         *plastiter = ( tid == nth - 1 );
273                 } else {
274                     register T big_chunk_inc_count = ( trip_count/nth +
275                                                      ( ( trip_count % nth ) ? 1 : 0) ) * incr;
276                     register T old_upper = *pupper;
277 
278                     KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
279                         // Unknown static scheduling type.
280 
281                     *plower += tid * big_chunk_inc_count;
282                     *pupper = *plower + big_chunk_inc_count - incr;
283                     if ( incr > 0 ) {
284                         if( *pupper < *plower )
285                             *pupper = i_maxmin< T >::mx;
286                         if( plastiter != NULL )
287                             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
288                         if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
289                     } else {
290                         if( *pupper > *plower )
291                             *pupper = i_maxmin< T >::mn;
292                         if( plastiter != NULL )
293                             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
294                         if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
295                     }
296                 }
297             }
298             break;
299         }
300     case kmp_sch_static_chunked:
301         {
302             register ST span;
303             if ( chunk < 1 ) {
304                 chunk = 1;
305             }
306             span = chunk * incr;
307             *pstride = span * nth;
308             *plower = *plower + (span * tid);
309             *pupper = *plower + span - incr;
310             if( plastiter != NULL )
311                 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
312             break;
313         }
314     default:
315         KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
316         break;
317     }
318 
319 #if USE_ITT_BUILD
320     // Report loop metadata
321     if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
322         kmp_uint64 cur_chunk = chunk;
323         // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
324         if ( schedtype == kmp_sch_static ) {
325             cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
326         }
327         // 0 - "static" schedule
328         __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
329     }
330 #endif
331     #ifdef KMP_DEBUG
332     {
333         const char * buff;
334         // create format specifiers before the debug output
335         buff = __kmp_str_format(
336             "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
337             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
338         KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
339         __kmp_str_free( &buff );
340     }
341     #endif
342     KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
343 
344 #if OMPT_SUPPORT && OMPT_TRACE
345     if ((ompt_status == ompt_status_track_callback) &&
346         ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
347         ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
348             team_info->parallel_id, task_info->task_id, team_info->microtask);
349     }
350 #endif
351 
352     return;
353 }
354 
355 template< typename T >
356 static void
357 __kmp_dist_for_static_init(
358     ident_t                          *loc,
359     kmp_int32                         gtid,
360     kmp_int32                         schedule,
361     kmp_int32                        *plastiter,
362     T                                *plower,
363     T                                *pupper,
364     T                                *pupperDist,
365     typename traits_t< T >::signed_t *pstride,
366     typename traits_t< T >::signed_t  incr,
367     typename traits_t< T >::signed_t  chunk
368 ) {
369     KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
370     typedef typename traits_t< T >::unsigned_t  UT;
371     typedef typename traits_t< T >::signed_t    ST;
372     register kmp_uint32  tid;
373     register kmp_uint32  nth;
374     register kmp_uint32  team_id;
375     register kmp_uint32  nteams;
376     register UT          trip_count;
377     register kmp_team_t *team;
378     kmp_info_t * th;
379 
380     KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
381     KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
382     #ifdef KMP_DEBUG
383     {
384         const char * buff;
385         // create format specifiers before the debug output
386         buff = __kmp_str_format(
387             "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
388             "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
389             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
390             traits_t< ST >::spec, traits_t< T >::spec );
391         KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
392                        *plower, *pupper, incr, chunk ) );
393         __kmp_str_free( &buff );
394     }
395     #endif
396 
397     if( __kmp_env_consistency_check ) {
398         __kmp_push_workshare( gtid, ct_pdo, loc );
399         if( incr == 0 ) {
400             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
401         }
402         if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
403             // The loop is illegal.
404             // Some zero-trip loops maintained by compiler, e.g.:
405             //   for(i=10;i<0;++i) // lower >= upper - run-time check
406             //   for(i=0;i>10;--i) // lower <= upper - run-time check
407             //   for(i=0;i>10;++i) // incr > 0       - compile-time check
408             //   for(i=10;i<0;--i) // incr < 0       - compile-time check
409             // Compiler does not check the following illegal loops:
410             //   for(i=0;i<10;i+=incr) // where incr<0
411             //   for(i=10;i>0;i-=incr) // where incr<0
412             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
413         }
414     }
415     tid = __kmp_tid_from_gtid( gtid );
416     th = __kmp_threads[gtid];
417     KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
418     nth = th->th.th_team_nproc;
419     team = th->th.th_team;
420     #if OMP_40_ENABLED
421     nteams = th->th.th_teams_size.nteams;
422     #endif
423     team_id = team->t.t_master_tid;
424     KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
425 
426     // compute global trip count
427     if( incr == 1 ) {
428         trip_count = *pupper - *plower + 1;
429     } else if(incr == -1) {
430         trip_count = *plower - *pupper + 1;
431     } else {
432         trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
433     }
434     *pstride = *pupper - *plower;  // just in case (can be unused)
435     if( trip_count <= nteams ) {
436         KMP_DEBUG_ASSERT(
437             __kmp_static == kmp_sch_static_greedy || \
438             __kmp_static == kmp_sch_static_balanced
439         ); // Unknown static scheduling type.
440         // only masters of some teams get single iteration, other threads get nothing
441         if( team_id < trip_count && tid == 0 ) {
442             *pupper = *pupperDist = *plower = *plower + team_id * incr;
443         } else {
444             *pupperDist = *pupper;
445             *plower = *pupper + incr; // compiler should skip loop body
446         }
447         if( plastiter != NULL )
448             *plastiter = ( tid == 0 && team_id == trip_count - 1 );
449     } else {
450         // Get the team's chunk first (each team gets at most one chunk)
451         if( __kmp_static == kmp_sch_static_balanced ) {
452             register UT chunkD = trip_count / nteams;
453             register UT extras = trip_count % nteams;
454             *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
455             *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
456             if( plastiter != NULL )
457                 *plastiter = ( team_id == nteams - 1 );
458         } else {
459             register T chunk_inc_count =
460                 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
461             register T upper = *pupper;
462             KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
463                 // Unknown static scheduling type.
464             *plower += team_id * chunk_inc_count;
465             *pupperDist = *plower + chunk_inc_count - incr;
466             // Check/correct bounds if needed
467             if( incr > 0 ) {
468                 if( *pupperDist < *plower )
469                     *pupperDist = i_maxmin< T >::mx;
470                 if( plastiter != NULL )
471                     *plastiter = *plower <= upper && *pupperDist > upper - incr;
472                 if( *pupperDist > upper )
473                     *pupperDist = upper; // tracker C73258
474                 if( *plower > *pupperDist ) {
475                     *pupper = *pupperDist;  // no iterations available for the team
476                     goto end;
477                 }
478             } else {
479                 if( *pupperDist > *plower )
480                     *pupperDist = i_maxmin< T >::mn;
481                 if( plastiter != NULL )
482                     *plastiter = *plower >= upper && *pupperDist < upper - incr;
483                 if( *pupperDist < upper )
484                     *pupperDist = upper; // tracker C73258
485                 if( *plower < *pupperDist ) {
486                     *pupper = *pupperDist;  // no iterations available for the team
487                     goto end;
488                 }
489             }
490         }
491         // Get the parallel loop chunk now (for thread)
492         // compute trip count for team's chunk
493         if( incr == 1 ) {
494             trip_count = *pupperDist - *plower + 1;
495         } else if(incr == -1) {
496             trip_count = *plower - *pupperDist + 1;
497         } else {
498             trip_count = (ST)(*pupperDist - *plower) / incr + 1;
499         }
500         KMP_DEBUG_ASSERT( trip_count );
501         switch( schedule ) {
502         case kmp_sch_static:
503         {
504             if( trip_count <= nth ) {
505                 KMP_DEBUG_ASSERT(
506                     __kmp_static == kmp_sch_static_greedy || \
507                     __kmp_static == kmp_sch_static_balanced
508                 ); // Unknown static scheduling type.
509                 if( tid < trip_count )
510                     *pupper = *plower = *plower + tid * incr;
511                 else
512                     *plower = *pupper + incr; // no iterations available
513                 if( plastiter != NULL )
514                     if( *plastiter != 0 && !( tid == trip_count - 1 ) )
515                         *plastiter = 0;
516             } else {
517                 if( __kmp_static == kmp_sch_static_balanced ) {
518                     register UT chunkL = trip_count / nth;
519                     register UT extras = trip_count % nth;
520                     *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
521                     *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
522                     if( plastiter != NULL )
523                         if( *plastiter != 0 && !( tid == nth - 1 ) )
524                             *plastiter = 0;
525                 } else {
526                     register T chunk_inc_count =
527                         ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
528                     register T upper = *pupperDist;
529                     KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
530                         // Unknown static scheduling type.
531                     *plower += tid * chunk_inc_count;
532                     *pupper = *plower + chunk_inc_count - incr;
533                     if( incr > 0 ) {
534                         if( *pupper < *plower )
535                             *pupper = i_maxmin< T >::mx;
536                         if( plastiter != NULL )
537                             if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
538                                 *plastiter = 0;
539                         if( *pupper > upper )
540                             *pupper = upper;//tracker C73258
541                     } else {
542                         if( *pupper > *plower )
543                             *pupper = i_maxmin< T >::mn;
544                         if( plastiter != NULL )
545                             if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
546                                 *plastiter = 0;
547                         if( *pupper < upper )
548                             *pupper = upper;//tracker C73258
549                     }
550                 }
551             }
552             break;
553         }
554         case kmp_sch_static_chunked:
555         {
556             register ST span;
557             if( chunk < 1 )
558                 chunk = 1;
559             span = chunk * incr;
560             *pstride = span * nth;
561             *plower = *plower + (span * tid);
562             *pupper = *plower + span - incr;
563             if( plastiter != NULL )
564                 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
565                     *plastiter = 0;
566             break;
567         }
568         default:
569             KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
570             break;
571         }
572     }
573     end:;
574     #ifdef KMP_DEBUG
575     {
576         const char * buff;
577         // create format specifiers before the debug output
578         buff = __kmp_str_format(
579             "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
580             "stride=%%%s signed?<%s>\n",
581             traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
582             traits_t< ST >::spec, traits_t< T >::spec );
583         KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
584         __kmp_str_free( &buff );
585     }
586     #endif
587     KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
588     return;
589 }
590 
591 template< typename T >
592 static void
593 __kmp_team_static_init(
594     ident_t                          *loc,
595     kmp_int32                         gtid,
596     kmp_int32                        *p_last,
597     T                                *p_lb,
598     T                                *p_ub,
599     typename traits_t< T >::signed_t *p_st,
600     typename traits_t< T >::signed_t  incr,
601     typename traits_t< T >::signed_t  chunk
602 ) {
603     // The routine returns the first chunk distributed to the team and
604     // stride for next chunks calculation.
605     // Last iteration flag set for the team that will execute
606     // the last iteration of the loop.
607     // The routine is called for dist_schedue(static,chunk) only.
608     typedef typename traits_t< T >::unsigned_t  UT;
609     typedef typename traits_t< T >::signed_t    ST;
610     kmp_uint32  team_id;
611     kmp_uint32  nteams;
612     UT          trip_count;
613     T           lower;
614     T           upper;
615     ST          span;
616     kmp_team_t *team;
617     kmp_info_t *th;
618 
619     KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
620     KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
621     #ifdef KMP_DEBUG
622     {
623         const char * buff;
624         // create format specifiers before the debug output
625         buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
626             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
627             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
628             traits_t< ST >::spec, traits_t< T >::spec );
629         KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
630         __kmp_str_free( &buff );
631     }
632     #endif
633 
634     lower = *p_lb;
635     upper = *p_ub;
636     if( __kmp_env_consistency_check ) {
637         if( incr == 0 ) {
638             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
639         }
640         if( incr > 0 ? (upper < lower) : (lower < upper) ) {
641             // The loop is illegal.
642             // Some zero-trip loops maintained by compiler, e.g.:
643             //   for(i=10;i<0;++i) // lower >= upper - run-time check
644             //   for(i=0;i>10;--i) // lower <= upper - run-time check
645             //   for(i=0;i>10;++i) // incr > 0       - compile-time check
646             //   for(i=10;i<0;--i) // incr < 0       - compile-time check
647             // Compiler does not check the following illegal loops:
648             //   for(i=0;i<10;i+=incr) // where incr<0
649             //   for(i=10;i>0;i-=incr) // where incr<0
650             __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
651         }
652     }
653     th = __kmp_threads[gtid];
654     KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
655     team = th->th.th_team;
656     #if OMP_40_ENABLED
657     nteams = th->th.th_teams_size.nteams;
658     #endif
659     team_id = team->t.t_master_tid;
660     KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
661 
662     // compute trip count
663     if( incr == 1 ) {
664         trip_count = upper - lower + 1;
665     } else if(incr == -1) {
666         trip_count = lower - upper + 1;
667     } else {
668         trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
669     }
670     if( chunk < 1 )
671         chunk = 1;
672     span = chunk * incr;
673     *p_st = span * nteams;
674     *p_lb = lower + (span * team_id);
675     *p_ub = *p_lb + span - incr;
676     if ( p_last != NULL )
677         *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
678     // Correct upper bound if needed
679     if( incr > 0 ) {
680         if( *p_ub < *p_lb ) // overflow?
681             *p_ub = i_maxmin< T >::mx;
682         if( *p_ub > upper )
683             *p_ub = upper; // tracker C73258
684     } else {   // incr < 0
685         if( *p_ub > *p_lb )
686             *p_ub = i_maxmin< T >::mn;
687         if( *p_ub < upper )
688             *p_ub = upper; // tracker C73258
689     }
690     #ifdef KMP_DEBUG
691     {
692         const char * buff;
693         // create format specifiers before the debug output
694         buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
695             "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
696             traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
697             traits_t< ST >::spec );
698         KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
699         __kmp_str_free( &buff );
700     }
701     #endif
702 }
703 
704 //--------------------------------------------------------------------------------------
705 extern "C" {
706 
707 /*!
708 @ingroup WORK_SHARING
709 @param    loc       Source code location
710 @param    gtid      Global thread id of this thread
711 @param    schedtype  Scheduling type
712 @param    plastiter Pointer to the "last iteration" flag
713 @param    plower    Pointer to the lower bound
714 @param    pupper    Pointer to the upper bound
715 @param    pstride   Pointer to the stride
716 @param    incr      Loop increment
717 @param    chunk     The chunk size
718 
719 Each of the four functions here are identical apart from the argument types.
720 
721 The functions compute the upper and lower bounds and stride to be used for the set of iterations
722 to be executed by the current thread from the statically scheduled loop that is described by the
723 initial values of the bounds, stride, increment and chunk size.
724 
725 @{
726 */
727 void
728 __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
729                       kmp_int32 *plower, kmp_int32 *pupper,
730                       kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
731 {
732     __kmp_for_static_init< kmp_int32 >(
733                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
734 }
735 
736 /*!
737  See @ref __kmpc_for_static_init_4
738  */
739 void
740 __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
741                       kmp_uint32 *plower, kmp_uint32 *pupper,
742                       kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
743 {
744     __kmp_for_static_init< kmp_uint32 >(
745                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
746 }
747 
748 /*!
749  See @ref __kmpc_for_static_init_4
750  */
751 void
752 __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
753                       kmp_int64 *plower, kmp_int64 *pupper,
754                       kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
755 {
756     __kmp_for_static_init< kmp_int64 >(
757                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
758 }
759 
760 /*!
761  See @ref __kmpc_for_static_init_4
762  */
763 void
764 __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
765                       kmp_uint64 *plower, kmp_uint64 *pupper,
766                       kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
767 {
768     __kmp_for_static_init< kmp_uint64 >(
769                       loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
770 }
771 /*!
772 @}
773 */
774 
775 /*!
776 @ingroup WORK_SHARING
777 @param    loc       Source code location
778 @param    gtid      Global thread id of this thread
779 @param    scheduleD Scheduling type for the distribute
780 @param    scheduleL Scheduling type for the parallel loop
781 @param    plastiter Pointer to the "last iteration" flag
782 @param    plower    Pointer to the lower bound
783 @param    pupper    Pointer to the upper bound of loop chunk
784 @param    pupperD   Pointer to the upper bound of dist_chunk
785 @param    pstrideD  Pointer to the stride for distribute
786 @param    pstrideL  Pointer to the stride for parallel loop
787 @param    incr      Loop increment
788 @param    chunkD    The chunk size for the distribute
789 @param    chunkL    The chunk size for the parallel loop
790 
791 Each of the four functions here are identical apart from the argument types.
792 
793 The functions compute the upper and lower bounds and strides to be used for the set of iterations
794 to be executed by the current thread from the statically scheduled loop that is described by the
795 initial values of the bounds, strides, increment and chunks for parallel loop and distribute
796 constructs.
797 
798 @{
799 */
800 void
801 __kmpc_dist_for_static_init_4(
802     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
803     kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
804     kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
805 {
806     __kmp_dist_for_static_init< kmp_int32 >(
807         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
808 }
809 
810 /*!
811  See @ref __kmpc_dist_for_static_init_4
812  */
813 void
814 __kmpc_dist_for_static_init_4u(
815     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
816     kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
817     kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
818 {
819     __kmp_dist_for_static_init< kmp_uint32 >(
820         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
821 }
822 
823 /*!
824  See @ref __kmpc_dist_for_static_init_4
825  */
826 void
827 __kmpc_dist_for_static_init_8(
828     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
829     kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
830     kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
831 {
832     __kmp_dist_for_static_init< kmp_int64 >(
833         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
834 }
835 
836 /*!
837  See @ref __kmpc_dist_for_static_init_4
838  */
839 void
840 __kmpc_dist_for_static_init_8u(
841     ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
842     kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
843     kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
844 {
845     __kmp_dist_for_static_init< kmp_uint64 >(
846         loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
847 }
848 /*!
849 @}
850 */
851 
852 //-----------------------------------------------------------------------------------------
853 // Auxiliary routines for Distribute Parallel Loop construct implementation
854 //    Transfer call to template< type T >
855 //    __kmp_team_static_init( ident_t *loc, int gtid,
856 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
857 
858 /*!
859 @ingroup WORK_SHARING
860 @{
861 @param loc Source location
862 @param gtid Global thread id
863 @param p_last pointer to last iteration flag
864 @param p_lb  pointer to Lower bound
865 @param p_ub  pointer to Upper bound
866 @param p_st  Step (or increment if you prefer)
867 @param incr  Loop increment
868 @param chunk The chunk size to block with
869 
870 The functions compute the upper and lower bounds and stride to be used for the set of iterations
871 to be executed by the current team from the statically scheduled loop that is described by the
872 initial values of the bounds, stride, increment and chunk for the distribute construct as part of
873 composite distribute parallel loop construct.
874 These functions are all identical apart from the types of the arguments.
875 */
876 
877 void
878 __kmpc_team_static_init_4(
879     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
880     kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
881 {
882     KMP_DEBUG_ASSERT( __kmp_init_serial );
883     __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
884 }
885 
886 /*!
887  See @ref __kmpc_team_static_init_4
888  */
889 void
890 __kmpc_team_static_init_4u(
891     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
892     kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
893 {
894     KMP_DEBUG_ASSERT( __kmp_init_serial );
895     __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
896 }
897 
898 /*!
899  See @ref __kmpc_team_static_init_4
900  */
901 void
902 __kmpc_team_static_init_8(
903     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
904     kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
905 {
906     KMP_DEBUG_ASSERT( __kmp_init_serial );
907     __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
908 }
909 
910 /*!
911  See @ref __kmpc_team_static_init_4
912  */
913 void
914 __kmpc_team_static_init_8u(
915     ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
916     kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
917 {
918     KMP_DEBUG_ASSERT( __kmp_init_serial );
919     __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
920 }
921 /*!
922 @}
923 */
924 
925 } // extern "C"
926 
927