1 /* 2 * kmp_sched.c -- static scheduling -- iteration initialization 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 /* 17 * Static scheduling initialization. 18 * 19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 20 * it may change values between parallel regions. __kmp_max_nth 21 * is the largest value __kmp_nth may take, 1 is the smallest. 22 * 23 */ 24 25 #include "kmp.h" 26 #include "kmp_i18n.h" 27 #include "kmp_str.h" 28 #include "kmp_error.h" 29 #include "kmp_stats.h" 30 #include "kmp_itt.h" 31 32 #if OMPT_SUPPORT 33 #include "ompt-specific.h" 34 #endif 35 36 // template for type limits 37 template< typename T > 38 struct i_maxmin { 39 static const T mx; 40 static const T mn; 41 }; 42 template<> 43 struct i_maxmin< int > { 44 static const int mx = 0x7fffffff; 45 static const int mn = 0x80000000; 46 }; 47 template<> 48 struct i_maxmin< unsigned int > { 49 static const unsigned int mx = 0xffffffff; 50 static const unsigned int mn = 0x00000000; 51 }; 52 template<> 53 struct i_maxmin< long long > { 54 static const long long mx = 0x7fffffffffffffffLL; 55 static const long long mn = 0x8000000000000000LL; 56 }; 57 template<> 58 struct i_maxmin< unsigned long long > { 59 static const unsigned long long mx = 0xffffffffffffffffLL; 60 static const unsigned long long mn = 0x0000000000000000LL; 61 }; 62 //------------------------------------------------------------------------- 63 #ifdef KMP_DEBUG 64 //------------------------------------------------------------------------- 65 // template for debug prints specification ( d, u, lld, llu ) 66 char const * traits_t< int >::spec = "d"; 67 char const * traits_t< unsigned int >::spec = "u"; 68 char const * traits_t< long long >::spec = "lld"; 69 char const * traits_t< unsigned long long >::spec = "llu"; 70 //------------------------------------------------------------------------- 71 #endif 72 73 template< typename T > 74 static void 75 __kmp_for_static_init( 76 ident_t *loc, 77 kmp_int32 global_tid, 78 kmp_int32 schedtype, 79 kmp_int32 *plastiter, 80 T *plower, 81 T *pupper, 82 typename traits_t< T >::signed_t *pstride, 83 typename traits_t< T >::signed_t incr, 84 typename traits_t< T >::signed_t chunk 85 ) { 86 KMP_COUNT_BLOCK(OMP_FOR_static); 87 typedef typename traits_t< T >::unsigned_t UT; 88 typedef typename traits_t< T >::signed_t ST; 89 /* this all has to be changed back to TID and such.. */ 90 register kmp_int32 gtid = global_tid; 91 register kmp_uint32 tid; 92 register kmp_uint32 nth; 93 register UT trip_count; 94 register kmp_team_t *team; 95 96 #if OMPT_SUPPORT && OMPT_TRACE 97 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 98 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 99 #endif 100 101 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride ); 102 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 103 #ifdef KMP_DEBUG 104 { 105 const char * buff; 106 // create format specifiers before the debug output 107 buff = __kmp_str_format( 108 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ 109 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 110 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 111 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 112 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, 113 *plower, *pupper, *pstride, incr, chunk ) ); 114 __kmp_str_free( &buff ); 115 } 116 #endif 117 118 if ( __kmp_env_consistency_check ) { 119 __kmp_push_workshare( global_tid, ct_pdo, loc ); 120 if ( incr == 0 ) { 121 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 122 } 123 } 124 /* special handling for zero-trip loops */ 125 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 126 if( plastiter != NULL ) 127 *plastiter = FALSE; 128 /* leave pupper and plower set to entire iteration space */ 129 *pstride = incr; /* value should never be used */ 130 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ 131 upper=0,stride=1) - JPH June 23, 2009. 132 #ifdef KMP_DEBUG 133 { 134 const char * buff; 135 // create format specifiers before the debug output 136 buff = __kmp_str_format( 137 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", 138 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 139 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); 140 __kmp_str_free( &buff ); 141 } 142 #endif 143 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 144 145 #if OMPT_SUPPORT && OMPT_TRACE 146 if ((ompt_status == ompt_status_track_callback) && 147 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 148 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 149 team_info->parallel_id, task_info->task_id, 150 team_info->microtask); 151 } 152 #endif 153 return; 154 } 155 156 #if OMP_40_ENABLED 157 if ( schedtype > kmp_ord_upper ) { 158 // we are in DISTRIBUTE construct 159 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type 160 tid = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid; 161 team = __kmp_threads[ gtid ]->th.th_team->t.t_parent; 162 } else 163 #endif 164 { 165 tid = __kmp_tid_from_gtid( global_tid ); 166 team = __kmp_threads[ gtid ]->th.th_team; 167 } 168 169 /* determine if "for" loop is an active worksharing construct */ 170 if ( team -> t.t_serialized ) { 171 /* serialized parallel, each thread executes whole iteration space */ 172 if( plastiter != NULL ) 173 *plastiter = TRUE; 174 /* leave pupper and plower set to entire iteration space */ 175 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 176 177 #ifdef KMP_DEBUG 178 { 179 const char * buff; 180 // create format specifiers before the debug output 181 buff = __kmp_str_format( 182 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 183 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 184 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 185 __kmp_str_free( &buff ); 186 } 187 #endif 188 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 189 190 #if OMPT_SUPPORT && OMPT_TRACE 191 if ((ompt_status == ompt_status_track_callback) && 192 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 193 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 194 team_info->parallel_id, task_info->task_id, 195 team_info->microtask); 196 } 197 #endif 198 return; 199 } 200 nth = team->t.t_nproc; 201 if ( nth == 1 ) { 202 if( plastiter != NULL ) 203 *plastiter = TRUE; 204 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 205 #ifdef KMP_DEBUG 206 { 207 const char * buff; 208 // create format specifiers before the debug output 209 buff = __kmp_str_format( 210 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 211 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 212 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 213 __kmp_str_free( &buff ); 214 } 215 #endif 216 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 217 218 #if OMPT_SUPPORT && OMPT_TRACE 219 if ((ompt_status == ompt_status_track_callback) && 220 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 221 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 222 team_info->parallel_id, task_info->task_id, 223 team_info->microtask); 224 } 225 #endif 226 return; 227 } 228 229 /* compute trip count */ 230 if ( incr == 1 ) { 231 trip_count = *pupper - *plower + 1; 232 } else if (incr == -1) { 233 trip_count = *plower - *pupper + 1; 234 } else { 235 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0 236 trip_count = (*pupper - *plower) / incr + 1; 237 } else { 238 trip_count = (*plower - *pupper) / ( -incr ) + 1; 239 } 240 } 241 242 if ( __kmp_env_consistency_check ) { 243 /* tripcount overflow? */ 244 if ( trip_count == 0 && *pupper != *plower ) { 245 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); 246 } 247 } 248 249 /* compute remaining parameters */ 250 switch ( schedtype ) { 251 case kmp_sch_static: 252 { 253 if ( trip_count < nth ) { 254 KMP_DEBUG_ASSERT( 255 __kmp_static == kmp_sch_static_greedy || \ 256 __kmp_static == kmp_sch_static_balanced 257 ); // Unknown static scheduling type. 258 if ( tid < trip_count ) { 259 *pupper = *plower = *plower + tid * incr; 260 } else { 261 *plower = *pupper + incr; 262 } 263 if( plastiter != NULL ) 264 *plastiter = ( tid == trip_count - 1 ); 265 } else { 266 if ( __kmp_static == kmp_sch_static_balanced ) { 267 register UT small_chunk = trip_count / nth; 268 register UT extras = trip_count % nth; 269 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); 270 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); 271 if( plastiter != NULL ) 272 *plastiter = ( tid == nth - 1 ); 273 } else { 274 register T big_chunk_inc_count = ( trip_count/nth + 275 ( ( trip_count % nth ) ? 1 : 0) ) * incr; 276 register T old_upper = *pupper; 277 278 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 279 // Unknown static scheduling type. 280 281 *plower += tid * big_chunk_inc_count; 282 *pupper = *plower + big_chunk_inc_count - incr; 283 if ( incr > 0 ) { 284 if( *pupper < *plower ) 285 *pupper = i_maxmin< T >::mx; 286 if( plastiter != NULL ) 287 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 288 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 289 } else { 290 if( *pupper > *plower ) 291 *pupper = i_maxmin< T >::mn; 292 if( plastiter != NULL ) 293 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 294 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 295 } 296 } 297 } 298 break; 299 } 300 case kmp_sch_static_chunked: 301 { 302 register ST span; 303 if ( chunk < 1 ) { 304 chunk = 1; 305 } 306 span = chunk * incr; 307 *pstride = span * nth; 308 *plower = *plower + (span * tid); 309 *pupper = *plower + span - incr; 310 if( plastiter != NULL ) 311 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth); 312 break; 313 } 314 default: 315 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); 316 break; 317 } 318 319 #if USE_ITT_BUILD 320 // Report loop metadata 321 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) { 322 kmp_uint64 cur_chunk = chunk; 323 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked 324 if ( schedtype == kmp_sch_static ) { 325 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0); 326 } 327 // 0 - "static" schedule 328 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 329 } 330 #endif 331 #ifdef KMP_DEBUG 332 { 333 const char * buff; 334 // create format specifiers before the debug output 335 buff = __kmp_str_format( 336 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", 337 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 338 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 339 __kmp_str_free( &buff ); 340 } 341 #endif 342 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 343 344 #if OMPT_SUPPORT && OMPT_TRACE 345 if ((ompt_status == ompt_status_track_callback) && 346 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 347 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 348 team_info->parallel_id, task_info->task_id, team_info->microtask); 349 } 350 #endif 351 352 return; 353 } 354 355 template< typename T > 356 static void 357 __kmp_dist_for_static_init( 358 ident_t *loc, 359 kmp_int32 gtid, 360 kmp_int32 schedule, 361 kmp_int32 *plastiter, 362 T *plower, 363 T *pupper, 364 T *pupperDist, 365 typename traits_t< T >::signed_t *pstride, 366 typename traits_t< T >::signed_t incr, 367 typename traits_t< T >::signed_t chunk 368 ) { 369 KMP_COUNT_BLOCK(OMP_DISTR_FOR_static); 370 typedef typename traits_t< T >::unsigned_t UT; 371 typedef typename traits_t< T >::signed_t ST; 372 register kmp_uint32 tid; 373 register kmp_uint32 nth; 374 register kmp_uint32 team_id; 375 register kmp_uint32 nteams; 376 register UT trip_count; 377 register kmp_team_t *team; 378 kmp_info_t * th; 379 380 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride ); 381 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 382 #ifdef KMP_DEBUG 383 { 384 const char * buff; 385 // create format specifiers before the debug output 386 buff = __kmp_str_format( 387 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\ 388 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 389 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 390 traits_t< ST >::spec, traits_t< T >::spec ); 391 KD_TRACE(100, ( buff, gtid, schedule, *plastiter, 392 *plower, *pupper, incr, chunk ) ); 393 __kmp_str_free( &buff ); 394 } 395 #endif 396 397 if( __kmp_env_consistency_check ) { 398 __kmp_push_workshare( gtid, ct_pdo, loc ); 399 if( incr == 0 ) { 400 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 401 } 402 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 403 // The loop is illegal. 404 // Some zero-trip loops maintained by compiler, e.g.: 405 // for(i=10;i<0;++i) // lower >= upper - run-time check 406 // for(i=0;i>10;--i) // lower <= upper - run-time check 407 // for(i=0;i>10;++i) // incr > 0 - compile-time check 408 // for(i=10;i<0;--i) // incr < 0 - compile-time check 409 // Compiler does not check the following illegal loops: 410 // for(i=0;i<10;i+=incr) // where incr<0 411 // for(i=10;i>0;i-=incr) // where incr<0 412 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 413 } 414 } 415 tid = __kmp_tid_from_gtid( gtid ); 416 th = __kmp_threads[gtid]; 417 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 418 nth = th->th.th_team_nproc; 419 team = th->th.th_team; 420 #if OMP_40_ENABLED 421 nteams = th->th.th_teams_size.nteams; 422 #endif 423 team_id = team->t.t_master_tid; 424 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 425 426 // compute global trip count 427 if( incr == 1 ) { 428 trip_count = *pupper - *plower + 1; 429 } else if(incr == -1) { 430 trip_count = *plower - *pupper + 1; 431 } else { 432 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case 433 } 434 *pstride = *pupper - *plower; // just in case (can be unused) 435 if( trip_count <= nteams ) { 436 KMP_DEBUG_ASSERT( 437 __kmp_static == kmp_sch_static_greedy || \ 438 __kmp_static == kmp_sch_static_balanced 439 ); // Unknown static scheduling type. 440 // only masters of some teams get single iteration, other threads get nothing 441 if( team_id < trip_count && tid == 0 ) { 442 *pupper = *pupperDist = *plower = *plower + team_id * incr; 443 } else { 444 *pupperDist = *pupper; 445 *plower = *pupper + incr; // compiler should skip loop body 446 } 447 if( plastiter != NULL ) 448 *plastiter = ( tid == 0 && team_id == trip_count - 1 ); 449 } else { 450 // Get the team's chunk first (each team gets at most one chunk) 451 if( __kmp_static == kmp_sch_static_balanced ) { 452 register UT chunkD = trip_count / nteams; 453 register UT extras = trip_count % nteams; 454 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) ); 455 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr ); 456 if( plastiter != NULL ) 457 *plastiter = ( team_id == nteams - 1 ); 458 } else { 459 register T chunk_inc_count = 460 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; 461 register T upper = *pupper; 462 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 463 // Unknown static scheduling type. 464 *plower += team_id * chunk_inc_count; 465 *pupperDist = *plower + chunk_inc_count - incr; 466 // Check/correct bounds if needed 467 if( incr > 0 ) { 468 if( *pupperDist < *plower ) 469 *pupperDist = i_maxmin< T >::mx; 470 if( plastiter != NULL ) 471 *plastiter = *plower <= upper && *pupperDist > upper - incr; 472 if( *pupperDist > upper ) 473 *pupperDist = upper; // tracker C73258 474 if( *plower > *pupperDist ) { 475 *pupper = *pupperDist; // no iterations available for the team 476 goto end; 477 } 478 } else { 479 if( *pupperDist > *plower ) 480 *pupperDist = i_maxmin< T >::mn; 481 if( plastiter != NULL ) 482 *plastiter = *plower >= upper && *pupperDist < upper - incr; 483 if( *pupperDist < upper ) 484 *pupperDist = upper; // tracker C73258 485 if( *plower < *pupperDist ) { 486 *pupper = *pupperDist; // no iterations available for the team 487 goto end; 488 } 489 } 490 } 491 // Get the parallel loop chunk now (for thread) 492 // compute trip count for team's chunk 493 if( incr == 1 ) { 494 trip_count = *pupperDist - *plower + 1; 495 } else if(incr == -1) { 496 trip_count = *plower - *pupperDist + 1; 497 } else { 498 trip_count = (ST)(*pupperDist - *plower) / incr + 1; 499 } 500 KMP_DEBUG_ASSERT( trip_count ); 501 switch( schedule ) { 502 case kmp_sch_static: 503 { 504 if( trip_count <= nth ) { 505 KMP_DEBUG_ASSERT( 506 __kmp_static == kmp_sch_static_greedy || \ 507 __kmp_static == kmp_sch_static_balanced 508 ); // Unknown static scheduling type. 509 if( tid < trip_count ) 510 *pupper = *plower = *plower + tid * incr; 511 else 512 *plower = *pupper + incr; // no iterations available 513 if( plastiter != NULL ) 514 if( *plastiter != 0 && !( tid == trip_count - 1 ) ) 515 *plastiter = 0; 516 } else { 517 if( __kmp_static == kmp_sch_static_balanced ) { 518 register UT chunkL = trip_count / nth; 519 register UT extras = trip_count % nth; 520 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 521 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 522 if( plastiter != NULL ) 523 if( *plastiter != 0 && !( tid == nth - 1 ) ) 524 *plastiter = 0; 525 } else { 526 register T chunk_inc_count = 527 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr; 528 register T upper = *pupperDist; 529 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 530 // Unknown static scheduling type. 531 *plower += tid * chunk_inc_count; 532 *pupper = *plower + chunk_inc_count - incr; 533 if( incr > 0 ) { 534 if( *pupper < *plower ) 535 *pupper = i_maxmin< T >::mx; 536 if( plastiter != NULL ) 537 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) ) 538 *plastiter = 0; 539 if( *pupper > upper ) 540 *pupper = upper;//tracker C73258 541 } else { 542 if( *pupper > *plower ) 543 *pupper = i_maxmin< T >::mn; 544 if( plastiter != NULL ) 545 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) ) 546 *plastiter = 0; 547 if( *pupper < upper ) 548 *pupper = upper;//tracker C73258 549 } 550 } 551 } 552 break; 553 } 554 case kmp_sch_static_chunked: 555 { 556 register ST span; 557 if( chunk < 1 ) 558 chunk = 1; 559 span = chunk * incr; 560 *pstride = span * nth; 561 *plower = *plower + (span * tid); 562 *pupper = *plower + span - incr; 563 if( plastiter != NULL ) 564 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) ) 565 *plastiter = 0; 566 break; 567 } 568 default: 569 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" ); 570 break; 571 } 572 } 573 end:; 574 #ifdef KMP_DEBUG 575 { 576 const char * buff; 577 // create format specifiers before the debug output 578 buff = __kmp_str_format( 579 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\ 580 "stride=%%%s signed?<%s>\n", 581 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec, 582 traits_t< ST >::spec, traits_t< T >::spec ); 583 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) ); 584 __kmp_str_free( &buff ); 585 } 586 #endif 587 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) ); 588 return; 589 } 590 591 template< typename T > 592 static void 593 __kmp_team_static_init( 594 ident_t *loc, 595 kmp_int32 gtid, 596 kmp_int32 *p_last, 597 T *p_lb, 598 T *p_ub, 599 typename traits_t< T >::signed_t *p_st, 600 typename traits_t< T >::signed_t incr, 601 typename traits_t< T >::signed_t chunk 602 ) { 603 // The routine returns the first chunk distributed to the team and 604 // stride for next chunks calculation. 605 // Last iteration flag set for the team that will execute 606 // the last iteration of the loop. 607 // The routine is called for dist_schedue(static,chunk) only. 608 typedef typename traits_t< T >::unsigned_t UT; 609 typedef typename traits_t< T >::signed_t ST; 610 kmp_uint32 team_id; 611 kmp_uint32 nteams; 612 UT trip_count; 613 T lower; 614 T upper; 615 ST span; 616 kmp_team_t *team; 617 kmp_info_t *th; 618 619 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st ); 620 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid)); 621 #ifdef KMP_DEBUG 622 { 623 const char * buff; 624 // create format specifiers before the debug output 625 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\ 626 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 627 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 628 traits_t< ST >::spec, traits_t< T >::spec ); 629 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 630 __kmp_str_free( &buff ); 631 } 632 #endif 633 634 lower = *p_lb; 635 upper = *p_ub; 636 if( __kmp_env_consistency_check ) { 637 if( incr == 0 ) { 638 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 639 } 640 if( incr > 0 ? (upper < lower) : (lower < upper) ) { 641 // The loop is illegal. 642 // Some zero-trip loops maintained by compiler, e.g.: 643 // for(i=10;i<0;++i) // lower >= upper - run-time check 644 // for(i=0;i>10;--i) // lower <= upper - run-time check 645 // for(i=0;i>10;++i) // incr > 0 - compile-time check 646 // for(i=10;i<0;--i) // incr < 0 - compile-time check 647 // Compiler does not check the following illegal loops: 648 // for(i=0;i<10;i+=incr) // where incr<0 649 // for(i=10;i>0;i-=incr) // where incr<0 650 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 651 } 652 } 653 th = __kmp_threads[gtid]; 654 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 655 team = th->th.th_team; 656 #if OMP_40_ENABLED 657 nteams = th->th.th_teams_size.nteams; 658 #endif 659 team_id = team->t.t_master_tid; 660 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 661 662 // compute trip count 663 if( incr == 1 ) { 664 trip_count = upper - lower + 1; 665 } else if(incr == -1) { 666 trip_count = lower - upper + 1; 667 } else { 668 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case 669 } 670 if( chunk < 1 ) 671 chunk = 1; 672 span = chunk * incr; 673 *p_st = span * nteams; 674 *p_lb = lower + (span * team_id); 675 *p_ub = *p_lb + span - incr; 676 if ( p_last != NULL ) 677 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams); 678 // Correct upper bound if needed 679 if( incr > 0 ) { 680 if( *p_ub < *p_lb ) // overflow? 681 *p_ub = i_maxmin< T >::mx; 682 if( *p_ub > upper ) 683 *p_ub = upper; // tracker C73258 684 } else { // incr < 0 685 if( *p_ub > *p_lb ) 686 *p_ub = i_maxmin< T >::mn; 687 if( *p_ub < upper ) 688 *p_ub = upper; // tracker C73258 689 } 690 #ifdef KMP_DEBUG 691 { 692 const char * buff; 693 // create format specifiers before the debug output 694 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\ 695 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 696 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 697 traits_t< ST >::spec ); 698 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 699 __kmp_str_free( &buff ); 700 } 701 #endif 702 } 703 704 //-------------------------------------------------------------------------------------- 705 extern "C" { 706 707 /*! 708 @ingroup WORK_SHARING 709 @param loc Source code location 710 @param gtid Global thread id of this thread 711 @param schedtype Scheduling type 712 @param plastiter Pointer to the "last iteration" flag 713 @param plower Pointer to the lower bound 714 @param pupper Pointer to the upper bound 715 @param pstride Pointer to the stride 716 @param incr Loop increment 717 @param chunk The chunk size 718 719 Each of the four functions here are identical apart from the argument types. 720 721 The functions compute the upper and lower bounds and stride to be used for the set of iterations 722 to be executed by the current thread from the statically scheduled loop that is described by the 723 initial values of the bounds, stride, increment and chunk size. 724 725 @{ 726 */ 727 void 728 __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 729 kmp_int32 *plower, kmp_int32 *pupper, 730 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 731 { 732 __kmp_for_static_init< kmp_int32 >( 733 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 734 } 735 736 /*! 737 See @ref __kmpc_for_static_init_4 738 */ 739 void 740 __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 741 kmp_uint32 *plower, kmp_uint32 *pupper, 742 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 743 { 744 __kmp_for_static_init< kmp_uint32 >( 745 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 746 } 747 748 /*! 749 See @ref __kmpc_for_static_init_4 750 */ 751 void 752 __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 753 kmp_int64 *plower, kmp_int64 *pupper, 754 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 755 { 756 __kmp_for_static_init< kmp_int64 >( 757 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 758 } 759 760 /*! 761 See @ref __kmpc_for_static_init_4 762 */ 763 void 764 __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 765 kmp_uint64 *plower, kmp_uint64 *pupper, 766 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 767 { 768 __kmp_for_static_init< kmp_uint64 >( 769 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 770 } 771 /*! 772 @} 773 */ 774 775 /*! 776 @ingroup WORK_SHARING 777 @param loc Source code location 778 @param gtid Global thread id of this thread 779 @param scheduleD Scheduling type for the distribute 780 @param scheduleL Scheduling type for the parallel loop 781 @param plastiter Pointer to the "last iteration" flag 782 @param plower Pointer to the lower bound 783 @param pupper Pointer to the upper bound of loop chunk 784 @param pupperD Pointer to the upper bound of dist_chunk 785 @param pstrideD Pointer to the stride for distribute 786 @param pstrideL Pointer to the stride for parallel loop 787 @param incr Loop increment 788 @param chunkD The chunk size for the distribute 789 @param chunkL The chunk size for the parallel loop 790 791 Each of the four functions here are identical apart from the argument types. 792 793 The functions compute the upper and lower bounds and strides to be used for the set of iterations 794 to be executed by the current thread from the statically scheduled loop that is described by the 795 initial values of the bounds, strides, increment and chunks for parallel loop and distribute 796 constructs. 797 798 @{ 799 */ 800 void 801 __kmpc_dist_for_static_init_4( 802 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 803 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, 804 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 805 { 806 __kmp_dist_for_static_init< kmp_int32 >( 807 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 808 } 809 810 /*! 811 See @ref __kmpc_dist_for_static_init_4 812 */ 813 void 814 __kmpc_dist_for_static_init_4u( 815 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 816 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, 817 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 818 { 819 __kmp_dist_for_static_init< kmp_uint32 >( 820 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 821 } 822 823 /*! 824 See @ref __kmpc_dist_for_static_init_4 825 */ 826 void 827 __kmpc_dist_for_static_init_8( 828 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 829 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, 830 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 831 { 832 __kmp_dist_for_static_init< kmp_int64 >( 833 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 834 } 835 836 /*! 837 See @ref __kmpc_dist_for_static_init_4 838 */ 839 void 840 __kmpc_dist_for_static_init_8u( 841 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 842 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, 843 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 844 { 845 __kmp_dist_for_static_init< kmp_uint64 >( 846 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 847 } 848 /*! 849 @} 850 */ 851 852 //----------------------------------------------------------------------------------------- 853 // Auxiliary routines for Distribute Parallel Loop construct implementation 854 // Transfer call to template< type T > 855 // __kmp_team_static_init( ident_t *loc, int gtid, 856 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 857 858 /*! 859 @ingroup WORK_SHARING 860 @{ 861 @param loc Source location 862 @param gtid Global thread id 863 @param p_last pointer to last iteration flag 864 @param p_lb pointer to Lower bound 865 @param p_ub pointer to Upper bound 866 @param p_st Step (or increment if you prefer) 867 @param incr Loop increment 868 @param chunk The chunk size to block with 869 870 The functions compute the upper and lower bounds and stride to be used for the set of iterations 871 to be executed by the current team from the statically scheduled loop that is described by the 872 initial values of the bounds, stride, increment and chunk for the distribute construct as part of 873 composite distribute parallel loop construct. 874 These functions are all identical apart from the types of the arguments. 875 */ 876 877 void 878 __kmpc_team_static_init_4( 879 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 880 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 881 { 882 KMP_DEBUG_ASSERT( __kmp_init_serial ); 883 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 884 } 885 886 /*! 887 See @ref __kmpc_team_static_init_4 888 */ 889 void 890 __kmpc_team_static_init_4u( 891 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 892 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 893 { 894 KMP_DEBUG_ASSERT( __kmp_init_serial ); 895 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 896 } 897 898 /*! 899 See @ref __kmpc_team_static_init_4 900 */ 901 void 902 __kmpc_team_static_init_8( 903 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 904 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 905 { 906 KMP_DEBUG_ASSERT( __kmp_init_serial ); 907 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 908 } 909 910 /*! 911 See @ref __kmpc_team_static_init_4 912 */ 913 void 914 __kmpc_team_static_init_8u( 915 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 916 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 917 { 918 KMP_DEBUG_ASSERT( __kmp_init_serial ); 919 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 920 } 921 /*! 922 @} 923 */ 924 925 } // extern "C" 926 927