1 /* 2 * kmp_sched.c -- static scheduling -- iteration initialization 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 /* 17 * Static scheduling initialization. 18 * 19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 20 * it may change values between parallel regions. __kmp_max_nth 21 * is the largest value __kmp_nth may take, 1 is the smallest. 22 * 23 */ 24 25 #include "kmp.h" 26 #include "kmp_i18n.h" 27 #include "kmp_str.h" 28 #include "kmp_error.h" 29 #include "kmp_stats.h" 30 #include "kmp_itt.h" 31 32 #if OMPT_SUPPORT 33 #include "ompt-specific.h" 34 #endif 35 36 // template for type limits 37 template< typename T > 38 struct i_maxmin { 39 static const T mx; 40 static const T mn; 41 }; 42 template<> 43 struct i_maxmin< int > { 44 static const int mx = 0x7fffffff; 45 static const int mn = 0x80000000; 46 }; 47 template<> 48 struct i_maxmin< unsigned int > { 49 static const unsigned int mx = 0xffffffff; 50 static const unsigned int mn = 0x00000000; 51 }; 52 template<> 53 struct i_maxmin< long long > { 54 static const long long mx = 0x7fffffffffffffffLL; 55 static const long long mn = 0x8000000000000000LL; 56 }; 57 template<> 58 struct i_maxmin< unsigned long long > { 59 static const unsigned long long mx = 0xffffffffffffffffLL; 60 static const unsigned long long mn = 0x0000000000000000LL; 61 }; 62 //------------------------------------------------------------------------- 63 #ifdef KMP_DEBUG 64 //------------------------------------------------------------------------- 65 // template for debug prints specification ( d, u, lld, llu ) 66 char const * traits_t< int >::spec = "d"; 67 char const * traits_t< unsigned int >::spec = "u"; 68 char const * traits_t< long long >::spec = "lld"; 69 char const * traits_t< unsigned long long >::spec = "llu"; 70 //------------------------------------------------------------------------- 71 #endif 72 73 template< typename T > 74 static void 75 __kmp_for_static_init( 76 ident_t *loc, 77 kmp_int32 global_tid, 78 kmp_int32 schedtype, 79 kmp_int32 *plastiter, 80 T *plower, 81 T *pupper, 82 typename traits_t< T >::signed_t *pstride, 83 typename traits_t< T >::signed_t incr, 84 typename traits_t< T >::signed_t chunk 85 ) { 86 KMP_COUNT_BLOCK(OMP_FOR_static); 87 KMP_TIME_BLOCK (FOR_static_scheduling); 88 89 typedef typename traits_t< T >::unsigned_t UT; 90 typedef typename traits_t< T >::signed_t ST; 91 /* this all has to be changed back to TID and such.. */ 92 register kmp_int32 gtid = global_tid; 93 register kmp_uint32 tid; 94 register kmp_uint32 nth; 95 register UT trip_count; 96 register kmp_team_t *team; 97 register kmp_info_t *th = __kmp_threads[ gtid ]; 98 99 #if OMPT_SUPPORT && OMPT_TRACE 100 ompt_team_info_t *team_info = NULL; 101 ompt_task_info_t *task_info = NULL; 102 103 if (ompt_enabled) { 104 // Only fully initialize variables needed by OMPT if OMPT is enabled. 105 team_info = __ompt_get_teaminfo(0, NULL); 106 task_info = __ompt_get_taskinfo(0); 107 } 108 #endif 109 110 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride ); 111 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 112 #ifdef KMP_DEBUG 113 { 114 const char * buff; 115 // create format specifiers before the debug output 116 buff = __kmp_str_format( 117 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ 118 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 119 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 120 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 121 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, 122 *plower, *pupper, *pstride, incr, chunk ) ); 123 __kmp_str_free( &buff ); 124 } 125 #endif 126 127 if ( __kmp_env_consistency_check ) { 128 __kmp_push_workshare( global_tid, ct_pdo, loc ); 129 if ( incr == 0 ) { 130 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 131 } 132 } 133 /* special handling for zero-trip loops */ 134 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 135 if( plastiter != NULL ) 136 *plastiter = FALSE; 137 /* leave pupper and plower set to entire iteration space */ 138 *pstride = incr; /* value should never be used */ 139 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ 140 upper=0,stride=1) - JPH June 23, 2009. 141 #ifdef KMP_DEBUG 142 { 143 const char * buff; 144 // create format specifiers before the debug output 145 buff = __kmp_str_format( 146 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", 147 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 148 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); 149 __kmp_str_free( &buff ); 150 } 151 #endif 152 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 153 154 #if OMPT_SUPPORT && OMPT_TRACE 155 if (ompt_enabled && 156 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 157 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 158 team_info->parallel_id, task_info->task_id, 159 team_info->microtask); 160 } 161 #endif 162 KMP_COUNT_VALUE (FOR_static_iterations, 0); 163 return; 164 } 165 166 #if OMP_40_ENABLED 167 if ( schedtype > kmp_ord_upper ) { 168 // we are in DISTRIBUTE construct 169 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type 170 tid = th->th.th_team->t.t_master_tid; 171 team = th->th.th_team->t.t_parent; 172 } else 173 #endif 174 { 175 tid = __kmp_tid_from_gtid( global_tid ); 176 team = th->th.th_team; 177 } 178 179 /* determine if "for" loop is an active worksharing construct */ 180 if ( team -> t.t_serialized ) { 181 /* serialized parallel, each thread executes whole iteration space */ 182 if( plastiter != NULL ) 183 *plastiter = TRUE; 184 /* leave pupper and plower set to entire iteration space */ 185 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 186 187 #ifdef KMP_DEBUG 188 { 189 const char * buff; 190 // create format specifiers before the debug output 191 buff = __kmp_str_format( 192 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 193 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 194 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 195 __kmp_str_free( &buff ); 196 } 197 #endif 198 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 199 200 #if OMPT_SUPPORT && OMPT_TRACE 201 if (ompt_enabled && 202 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 203 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 204 team_info->parallel_id, task_info->task_id, 205 team_info->microtask); 206 } 207 #endif 208 return; 209 } 210 nth = team->t.t_nproc; 211 if ( nth == 1 ) { 212 if( plastiter != NULL ) 213 *plastiter = TRUE; 214 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 215 #ifdef KMP_DEBUG 216 { 217 const char * buff; 218 // create format specifiers before the debug output 219 buff = __kmp_str_format( 220 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 221 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 222 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 223 __kmp_str_free( &buff ); 224 } 225 #endif 226 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 227 228 #if OMPT_SUPPORT && OMPT_TRACE 229 if (ompt_enabled && 230 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 231 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 232 team_info->parallel_id, task_info->task_id, 233 team_info->microtask); 234 } 235 #endif 236 return; 237 } 238 239 /* compute trip count */ 240 if ( incr == 1 ) { 241 trip_count = *pupper - *plower + 1; 242 } else if (incr == -1) { 243 trip_count = *plower - *pupper + 1; 244 } else { 245 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0 246 trip_count = (*pupper - *plower) / incr + 1; 247 } else { 248 trip_count = (*plower - *pupper) / ( -incr ) + 1; 249 } 250 } 251 252 if ( __kmp_env_consistency_check ) { 253 /* tripcount overflow? */ 254 if ( trip_count == 0 && *pupper != *plower ) { 255 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); 256 } 257 } 258 KMP_COUNT_VALUE (FOR_static_iterations, trip_count); 259 260 /* compute remaining parameters */ 261 switch ( schedtype ) { 262 case kmp_sch_static: 263 { 264 if ( trip_count < nth ) { 265 KMP_DEBUG_ASSERT( 266 __kmp_static == kmp_sch_static_greedy || \ 267 __kmp_static == kmp_sch_static_balanced 268 ); // Unknown static scheduling type. 269 if ( tid < trip_count ) { 270 *pupper = *plower = *plower + tid * incr; 271 } else { 272 *plower = *pupper + incr; 273 } 274 if( plastiter != NULL ) 275 *plastiter = ( tid == trip_count - 1 ); 276 } else { 277 if ( __kmp_static == kmp_sch_static_balanced ) { 278 register UT small_chunk = trip_count / nth; 279 register UT extras = trip_count % nth; 280 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); 281 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); 282 if( plastiter != NULL ) 283 *plastiter = ( tid == nth - 1 ); 284 } else { 285 register T big_chunk_inc_count = ( trip_count/nth + 286 ( ( trip_count % nth ) ? 1 : 0) ) * incr; 287 register T old_upper = *pupper; 288 289 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 290 // Unknown static scheduling type. 291 292 *plower += tid * big_chunk_inc_count; 293 *pupper = *plower + big_chunk_inc_count - incr; 294 if ( incr > 0 ) { 295 if( *pupper < *plower ) 296 *pupper = i_maxmin< T >::mx; 297 if( plastiter != NULL ) 298 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 299 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 300 } else { 301 if( *pupper > *plower ) 302 *pupper = i_maxmin< T >::mn; 303 if( plastiter != NULL ) 304 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 305 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 306 } 307 } 308 } 309 break; 310 } 311 case kmp_sch_static_chunked: 312 { 313 register ST span; 314 if ( chunk < 1 ) { 315 chunk = 1; 316 } 317 span = chunk * incr; 318 *pstride = span * nth; 319 *plower = *plower + (span * tid); 320 *pupper = *plower + span - incr; 321 if( plastiter != NULL ) 322 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth); 323 break; 324 } 325 default: 326 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); 327 break; 328 } 329 330 #if USE_ITT_BUILD 331 // Report loop metadata 332 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 333 #if OMP_40_ENABLED 334 th->th.th_teams_microtask == NULL && 335 #endif 336 team->t.t_active_level == 1 ) 337 { 338 kmp_uint64 cur_chunk = chunk; 339 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked 340 if ( schedtype == kmp_sch_static ) { 341 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0); 342 } 343 // 0 - "static" schedule 344 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 345 } 346 #endif 347 #ifdef KMP_DEBUG 348 { 349 const char * buff; 350 // create format specifiers before the debug output 351 buff = __kmp_str_format( 352 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", 353 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 354 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 355 __kmp_str_free( &buff ); 356 } 357 #endif 358 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 359 360 #if OMPT_SUPPORT && OMPT_TRACE 361 if (ompt_enabled && 362 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 363 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 364 team_info->parallel_id, task_info->task_id, team_info->microtask); 365 } 366 #endif 367 368 return; 369 } 370 371 template< typename T > 372 static void 373 __kmp_dist_for_static_init( 374 ident_t *loc, 375 kmp_int32 gtid, 376 kmp_int32 schedule, 377 kmp_int32 *plastiter, 378 T *plower, 379 T *pupper, 380 T *pupperDist, 381 typename traits_t< T >::signed_t *pstride, 382 typename traits_t< T >::signed_t incr, 383 typename traits_t< T >::signed_t chunk 384 ) { 385 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 386 typedef typename traits_t< T >::unsigned_t UT; 387 typedef typename traits_t< T >::signed_t ST; 388 register kmp_uint32 tid; 389 register kmp_uint32 nth; 390 register kmp_uint32 team_id; 391 register kmp_uint32 nteams; 392 register UT trip_count; 393 register kmp_team_t *team; 394 kmp_info_t * th; 395 396 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride ); 397 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 398 #ifdef KMP_DEBUG 399 { 400 const char * buff; 401 // create format specifiers before the debug output 402 buff = __kmp_str_format( 403 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\ 404 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 405 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 406 traits_t< ST >::spec, traits_t< T >::spec ); 407 KD_TRACE(100, ( buff, gtid, schedule, *plastiter, 408 *plower, *pupper, incr, chunk ) ); 409 __kmp_str_free( &buff ); 410 } 411 #endif 412 413 if( __kmp_env_consistency_check ) { 414 __kmp_push_workshare( gtid, ct_pdo, loc ); 415 if( incr == 0 ) { 416 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 417 } 418 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 419 // The loop is illegal. 420 // Some zero-trip loops maintained by compiler, e.g.: 421 // for(i=10;i<0;++i) // lower >= upper - run-time check 422 // for(i=0;i>10;--i) // lower <= upper - run-time check 423 // for(i=0;i>10;++i) // incr > 0 - compile-time check 424 // for(i=10;i<0;--i) // incr < 0 - compile-time check 425 // Compiler does not check the following illegal loops: 426 // for(i=0;i<10;i+=incr) // where incr<0 427 // for(i=10;i>0;i-=incr) // where incr<0 428 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 429 } 430 } 431 tid = __kmp_tid_from_gtid( gtid ); 432 th = __kmp_threads[gtid]; 433 nth = th->th.th_team_nproc; 434 team = th->th.th_team; 435 #if OMP_40_ENABLED 436 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 437 nteams = th->th.th_teams_size.nteams; 438 #endif 439 team_id = team->t.t_master_tid; 440 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 441 442 // compute global trip count 443 if( incr == 1 ) { 444 trip_count = *pupper - *plower + 1; 445 } else if(incr == -1) { 446 trip_count = *plower - *pupper + 1; 447 } else { 448 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case 449 } 450 451 *pstride = *pupper - *plower; // just in case (can be unused) 452 if( trip_count <= nteams ) { 453 KMP_DEBUG_ASSERT( 454 __kmp_static == kmp_sch_static_greedy || \ 455 __kmp_static == kmp_sch_static_balanced 456 ); // Unknown static scheduling type. 457 // only masters of some teams get single iteration, other threads get nothing 458 if( team_id < trip_count && tid == 0 ) { 459 *pupper = *pupperDist = *plower = *plower + team_id * incr; 460 } else { 461 *pupperDist = *pupper; 462 *plower = *pupper + incr; // compiler should skip loop body 463 } 464 if( plastiter != NULL ) 465 *plastiter = ( tid == 0 && team_id == trip_count - 1 ); 466 } else { 467 // Get the team's chunk first (each team gets at most one chunk) 468 if( __kmp_static == kmp_sch_static_balanced ) { 469 register UT chunkD = trip_count / nteams; 470 register UT extras = trip_count % nteams; 471 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) ); 472 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr ); 473 if( plastiter != NULL ) 474 *plastiter = ( team_id == nteams - 1 ); 475 } else { 476 register T chunk_inc_count = 477 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; 478 register T upper = *pupper; 479 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 480 // Unknown static scheduling type. 481 *plower += team_id * chunk_inc_count; 482 *pupperDist = *plower + chunk_inc_count - incr; 483 // Check/correct bounds if needed 484 if( incr > 0 ) { 485 if( *pupperDist < *plower ) 486 *pupperDist = i_maxmin< T >::mx; 487 if( plastiter != NULL ) 488 *plastiter = *plower <= upper && *pupperDist > upper - incr; 489 if( *pupperDist > upper ) 490 *pupperDist = upper; // tracker C73258 491 if( *plower > *pupperDist ) { 492 *pupper = *pupperDist; // no iterations available for the team 493 goto end; 494 } 495 } else { 496 if( *pupperDist > *plower ) 497 *pupperDist = i_maxmin< T >::mn; 498 if( plastiter != NULL ) 499 *plastiter = *plower >= upper && *pupperDist < upper - incr; 500 if( *pupperDist < upper ) 501 *pupperDist = upper; // tracker C73258 502 if( *plower < *pupperDist ) { 503 *pupper = *pupperDist; // no iterations available for the team 504 goto end; 505 } 506 } 507 } 508 // Get the parallel loop chunk now (for thread) 509 // compute trip count for team's chunk 510 if( incr == 1 ) { 511 trip_count = *pupperDist - *plower + 1; 512 } else if(incr == -1) { 513 trip_count = *plower - *pupperDist + 1; 514 } else { 515 trip_count = (ST)(*pupperDist - *plower) / incr + 1; 516 } 517 KMP_DEBUG_ASSERT( trip_count ); 518 switch( schedule ) { 519 case kmp_sch_static: 520 { 521 if( trip_count <= nth ) { 522 KMP_DEBUG_ASSERT( 523 __kmp_static == kmp_sch_static_greedy || \ 524 __kmp_static == kmp_sch_static_balanced 525 ); // Unknown static scheduling type. 526 if( tid < trip_count ) 527 *pupper = *plower = *plower + tid * incr; 528 else 529 *plower = *pupper + incr; // no iterations available 530 if( plastiter != NULL ) 531 if( *plastiter != 0 && !( tid == trip_count - 1 ) ) 532 *plastiter = 0; 533 } else { 534 if( __kmp_static == kmp_sch_static_balanced ) { 535 register UT chunkL = trip_count / nth; 536 register UT extras = trip_count % nth; 537 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 538 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 539 if( plastiter != NULL ) 540 if( *plastiter != 0 && !( tid == nth - 1 ) ) 541 *plastiter = 0; 542 } else { 543 register T chunk_inc_count = 544 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr; 545 register T upper = *pupperDist; 546 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 547 // Unknown static scheduling type. 548 *plower += tid * chunk_inc_count; 549 *pupper = *plower + chunk_inc_count - incr; 550 if( incr > 0 ) { 551 if( *pupper < *plower ) 552 *pupper = i_maxmin< T >::mx; 553 if( plastiter != NULL ) 554 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) ) 555 *plastiter = 0; 556 if( *pupper > upper ) 557 *pupper = upper;//tracker C73258 558 } else { 559 if( *pupper > *plower ) 560 *pupper = i_maxmin< T >::mn; 561 if( plastiter != NULL ) 562 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) ) 563 *plastiter = 0; 564 if( *pupper < upper ) 565 *pupper = upper;//tracker C73258 566 } 567 } 568 } 569 break; 570 } 571 case kmp_sch_static_chunked: 572 { 573 register ST span; 574 if( chunk < 1 ) 575 chunk = 1; 576 span = chunk * incr; 577 *pstride = span * nth; 578 *plower = *plower + (span * tid); 579 *pupper = *plower + span - incr; 580 if( plastiter != NULL ) 581 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) ) 582 *plastiter = 0; 583 break; 584 } 585 default: 586 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" ); 587 break; 588 } 589 } 590 end:; 591 #ifdef KMP_DEBUG 592 { 593 const char * buff; 594 // create format specifiers before the debug output 595 buff = __kmp_str_format( 596 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\ 597 "stride=%%%s signed?<%s>\n", 598 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec, 599 traits_t< ST >::spec, traits_t< T >::spec ); 600 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) ); 601 __kmp_str_free( &buff ); 602 } 603 #endif 604 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) ); 605 return; 606 } 607 608 template< typename T > 609 static void 610 __kmp_team_static_init( 611 ident_t *loc, 612 kmp_int32 gtid, 613 kmp_int32 *p_last, 614 T *p_lb, 615 T *p_ub, 616 typename traits_t< T >::signed_t *p_st, 617 typename traits_t< T >::signed_t incr, 618 typename traits_t< T >::signed_t chunk 619 ) { 620 // The routine returns the first chunk distributed to the team and 621 // stride for next chunks calculation. 622 // Last iteration flag set for the team that will execute 623 // the last iteration of the loop. 624 // The routine is called for dist_schedue(static,chunk) only. 625 typedef typename traits_t< T >::unsigned_t UT; 626 typedef typename traits_t< T >::signed_t ST; 627 kmp_uint32 team_id; 628 kmp_uint32 nteams; 629 UT trip_count; 630 T lower; 631 T upper; 632 ST span; 633 kmp_team_t *team; 634 kmp_info_t *th; 635 636 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st ); 637 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid)); 638 #ifdef KMP_DEBUG 639 { 640 const char * buff; 641 // create format specifiers before the debug output 642 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\ 643 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 644 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 645 traits_t< ST >::spec, traits_t< T >::spec ); 646 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 647 __kmp_str_free( &buff ); 648 } 649 #endif 650 651 lower = *p_lb; 652 upper = *p_ub; 653 if( __kmp_env_consistency_check ) { 654 if( incr == 0 ) { 655 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 656 } 657 if( incr > 0 ? (upper < lower) : (lower < upper) ) { 658 // The loop is illegal. 659 // Some zero-trip loops maintained by compiler, e.g.: 660 // for(i=10;i<0;++i) // lower >= upper - run-time check 661 // for(i=0;i>10;--i) // lower <= upper - run-time check 662 // for(i=0;i>10;++i) // incr > 0 - compile-time check 663 // for(i=10;i<0;--i) // incr < 0 - compile-time check 664 // Compiler does not check the following illegal loops: 665 // for(i=0;i<10;i+=incr) // where incr<0 666 // for(i=10;i>0;i-=incr) // where incr<0 667 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 668 } 669 } 670 th = __kmp_threads[gtid]; 671 team = th->th.th_team; 672 #if OMP_40_ENABLED 673 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 674 nteams = th->th.th_teams_size.nteams; 675 #endif 676 team_id = team->t.t_master_tid; 677 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 678 679 // compute trip count 680 if( incr == 1 ) { 681 trip_count = upper - lower + 1; 682 } else if(incr == -1) { 683 trip_count = lower - upper + 1; 684 } else { 685 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case 686 } 687 if( chunk < 1 ) 688 chunk = 1; 689 span = chunk * incr; 690 *p_st = span * nteams; 691 *p_lb = lower + (span * team_id); 692 *p_ub = *p_lb + span - incr; 693 if ( p_last != NULL ) 694 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams); 695 // Correct upper bound if needed 696 if( incr > 0 ) { 697 if( *p_ub < *p_lb ) // overflow? 698 *p_ub = i_maxmin< T >::mx; 699 if( *p_ub > upper ) 700 *p_ub = upper; // tracker C73258 701 } else { // incr < 0 702 if( *p_ub > *p_lb ) 703 *p_ub = i_maxmin< T >::mn; 704 if( *p_ub < upper ) 705 *p_ub = upper; // tracker C73258 706 } 707 #ifdef KMP_DEBUG 708 { 709 const char * buff; 710 // create format specifiers before the debug output 711 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\ 712 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 713 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 714 traits_t< ST >::spec ); 715 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 716 __kmp_str_free( &buff ); 717 } 718 #endif 719 } 720 721 //-------------------------------------------------------------------------------------- 722 extern "C" { 723 724 /*! 725 @ingroup WORK_SHARING 726 @param loc Source code location 727 @param gtid Global thread id of this thread 728 @param schedtype Scheduling type 729 @param plastiter Pointer to the "last iteration" flag 730 @param plower Pointer to the lower bound 731 @param pupper Pointer to the upper bound 732 @param pstride Pointer to the stride 733 @param incr Loop increment 734 @param chunk The chunk size 735 736 Each of the four functions here are identical apart from the argument types. 737 738 The functions compute the upper and lower bounds and stride to be used for the set of iterations 739 to be executed by the current thread from the statically scheduled loop that is described by the 740 initial values of the bounds, stride, increment and chunk size. 741 742 @{ 743 */ 744 void 745 __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 746 kmp_int32 *plower, kmp_int32 *pupper, 747 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 748 { 749 __kmp_for_static_init< kmp_int32 >( 750 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 751 } 752 753 /*! 754 See @ref __kmpc_for_static_init_4 755 */ 756 void 757 __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 758 kmp_uint32 *plower, kmp_uint32 *pupper, 759 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 760 { 761 __kmp_for_static_init< kmp_uint32 >( 762 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 763 } 764 765 /*! 766 See @ref __kmpc_for_static_init_4 767 */ 768 void 769 __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 770 kmp_int64 *plower, kmp_int64 *pupper, 771 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 772 { 773 __kmp_for_static_init< kmp_int64 >( 774 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 775 } 776 777 /*! 778 See @ref __kmpc_for_static_init_4 779 */ 780 void 781 __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 782 kmp_uint64 *plower, kmp_uint64 *pupper, 783 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 784 { 785 __kmp_for_static_init< kmp_uint64 >( 786 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 787 } 788 /*! 789 @} 790 */ 791 792 /*! 793 @ingroup WORK_SHARING 794 @param loc Source code location 795 @param gtid Global thread id of this thread 796 @param schedule Scheduling type for the parallel loop 797 @param plastiter Pointer to the "last iteration" flag 798 @param plower Pointer to the lower bound 799 @param pupper Pointer to the upper bound of loop chunk 800 @param pupperD Pointer to the upper bound of dist_chunk 801 @param pstride Pointer to the stride for parallel loop 802 @param incr Loop increment 803 @param chunk The chunk size for the parallel loop 804 805 Each of the four functions here are identical apart from the argument types. 806 807 The functions compute the upper and lower bounds and strides to be used for the set of iterations 808 to be executed by the current thread from the statically scheduled loop that is described by the 809 initial values of the bounds, strides, increment and chunks for parallel loop and distribute 810 constructs. 811 812 @{ 813 */ 814 void 815 __kmpc_dist_for_static_init_4( 816 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 817 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, 818 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 819 { 820 __kmp_dist_for_static_init< kmp_int32 >( 821 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 822 } 823 824 /*! 825 See @ref __kmpc_dist_for_static_init_4 826 */ 827 void 828 __kmpc_dist_for_static_init_4u( 829 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 830 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, 831 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 832 { 833 __kmp_dist_for_static_init< kmp_uint32 >( 834 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 835 } 836 837 /*! 838 See @ref __kmpc_dist_for_static_init_4 839 */ 840 void 841 __kmpc_dist_for_static_init_8( 842 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 843 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, 844 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 845 { 846 __kmp_dist_for_static_init< kmp_int64 >( 847 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 848 } 849 850 /*! 851 See @ref __kmpc_dist_for_static_init_4 852 */ 853 void 854 __kmpc_dist_for_static_init_8u( 855 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 856 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, 857 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 858 { 859 __kmp_dist_for_static_init< kmp_uint64 >( 860 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 861 } 862 /*! 863 @} 864 */ 865 866 //----------------------------------------------------------------------------------------- 867 // Auxiliary routines for Distribute Parallel Loop construct implementation 868 // Transfer call to template< type T > 869 // __kmp_team_static_init( ident_t *loc, int gtid, 870 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 871 872 /*! 873 @ingroup WORK_SHARING 874 @{ 875 @param loc Source location 876 @param gtid Global thread id 877 @param p_last pointer to last iteration flag 878 @param p_lb pointer to Lower bound 879 @param p_ub pointer to Upper bound 880 @param p_st Step (or increment if you prefer) 881 @param incr Loop increment 882 @param chunk The chunk size to block with 883 884 The functions compute the upper and lower bounds and stride to be used for the set of iterations 885 to be executed by the current team from the statically scheduled loop that is described by the 886 initial values of the bounds, stride, increment and chunk for the distribute construct as part of 887 composite distribute parallel loop construct. 888 These functions are all identical apart from the types of the arguments. 889 */ 890 891 void 892 __kmpc_team_static_init_4( 893 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 894 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 895 { 896 KMP_DEBUG_ASSERT( __kmp_init_serial ); 897 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 898 } 899 900 /*! 901 See @ref __kmpc_team_static_init_4 902 */ 903 void 904 __kmpc_team_static_init_4u( 905 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 906 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 907 { 908 KMP_DEBUG_ASSERT( __kmp_init_serial ); 909 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 910 } 911 912 /*! 913 See @ref __kmpc_team_static_init_4 914 */ 915 void 916 __kmpc_team_static_init_8( 917 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 918 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 919 { 920 KMP_DEBUG_ASSERT( __kmp_init_serial ); 921 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 922 } 923 924 /*! 925 See @ref __kmpc_team_static_init_4 926 */ 927 void 928 __kmpc_team_static_init_8u( 929 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 930 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 931 { 932 KMP_DEBUG_ASSERT( __kmp_init_serial ); 933 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 934 } 935 /*! 936 @} 937 */ 938 939 } // extern "C" 940 941