1 /* 2 * kmp_sched.c -- static scheduling -- iteration initialization 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 /* 17 * Static scheduling initialization. 18 * 19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 20 * it may change values between parallel regions. __kmp_max_nth 21 * is the largest value __kmp_nth may take, 1 is the smallest. 22 * 23 */ 24 25 #include "kmp.h" 26 #include "kmp_i18n.h" 27 #include "kmp_str.h" 28 #include "kmp_error.h" 29 #include "kmp_stats.h" 30 #include "kmp_itt.h" 31 32 #if OMPT_SUPPORT 33 #include "ompt-specific.h" 34 #endif 35 36 // template for type limits 37 template< typename T > 38 struct i_maxmin { 39 static const T mx; 40 static const T mn; 41 }; 42 template<> 43 struct i_maxmin< int > { 44 static const int mx = 0x7fffffff; 45 static const int mn = 0x80000000; 46 }; 47 template<> 48 struct i_maxmin< unsigned int > { 49 static const unsigned int mx = 0xffffffff; 50 static const unsigned int mn = 0x00000000; 51 }; 52 template<> 53 struct i_maxmin< long long > { 54 static const long long mx = 0x7fffffffffffffffLL; 55 static const long long mn = 0x8000000000000000LL; 56 }; 57 template<> 58 struct i_maxmin< unsigned long long > { 59 static const unsigned long long mx = 0xffffffffffffffffLL; 60 static const unsigned long long mn = 0x0000000000000000LL; 61 }; 62 //------------------------------------------------------------------------- 63 #ifdef KMP_DEBUG 64 //------------------------------------------------------------------------- 65 // template for debug prints specification ( d, u, lld, llu ) 66 char const * traits_t< int >::spec = "d"; 67 char const * traits_t< unsigned int >::spec = "u"; 68 char const * traits_t< long long >::spec = "lld"; 69 char const * traits_t< unsigned long long >::spec = "llu"; 70 //------------------------------------------------------------------------- 71 #endif 72 73 template< typename T > 74 static void 75 __kmp_for_static_init( 76 ident_t *loc, 77 kmp_int32 global_tid, 78 kmp_int32 schedtype, 79 kmp_int32 *plastiter, 80 T *plower, 81 T *pupper, 82 typename traits_t< T >::signed_t *pstride, 83 typename traits_t< T >::signed_t incr, 84 typename traits_t< T >::signed_t chunk 85 ) { 86 KMP_COUNT_BLOCK(OMP_FOR_static); 87 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); 88 89 typedef typename traits_t< T >::unsigned_t UT; 90 typedef typename traits_t< T >::signed_t ST; 91 /* this all has to be changed back to TID and such.. */ 92 register kmp_int32 gtid = global_tid; 93 register kmp_uint32 tid; 94 register kmp_uint32 nth; 95 register UT trip_count; 96 register kmp_team_t *team; 97 register kmp_info_t *th = __kmp_threads[ gtid ]; 98 99 #if OMPT_SUPPORT && OMPT_TRACE 100 ompt_team_info_t *team_info = NULL; 101 ompt_task_info_t *task_info = NULL; 102 103 if (ompt_enabled) { 104 // Only fully initialize variables needed by OMPT if OMPT is enabled. 105 team_info = __ompt_get_teaminfo(0, NULL); 106 task_info = __ompt_get_taskinfo(0); 107 } 108 #endif 109 110 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride ); 111 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 112 #ifdef KMP_DEBUG 113 { 114 const char * buff; 115 // create format specifiers before the debug output 116 buff = __kmp_str_format( 117 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ 118 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 119 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 120 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 121 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, 122 *plower, *pupper, *pstride, incr, chunk ) ); 123 __kmp_str_free( &buff ); 124 } 125 #endif 126 127 if ( __kmp_env_consistency_check ) { 128 __kmp_push_workshare( global_tid, ct_pdo, loc ); 129 if ( incr == 0 ) { 130 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 131 } 132 } 133 /* special handling for zero-trip loops */ 134 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 135 if( plastiter != NULL ) 136 *plastiter = FALSE; 137 /* leave pupper and plower set to entire iteration space */ 138 *pstride = incr; /* value should never be used */ 139 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ 140 upper=0,stride=1) - JPH June 23, 2009. 141 #ifdef KMP_DEBUG 142 { 143 const char * buff; 144 // create format specifiers before the debug output 145 buff = __kmp_str_format( 146 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", 147 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 148 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); 149 __kmp_str_free( &buff ); 150 } 151 #endif 152 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 153 154 #if OMPT_SUPPORT && OMPT_TRACE 155 if (ompt_enabled && 156 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 157 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 158 team_info->parallel_id, task_info->task_id, 159 team_info->microtask); 160 } 161 #endif 162 KMP_COUNT_VALUE (FOR_static_iterations, 0); 163 return; 164 } 165 166 #if OMP_40_ENABLED 167 // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute", 168 // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed 169 // for static schedules. 170 if ( schedtype > kmp_ord_upper ) { 171 // we are in DISTRIBUTE construct 172 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type 173 tid = th->th.th_team->t.t_master_tid; 174 team = th->th.th_team->t.t_parent; 175 } else 176 #endif 177 { 178 tid = __kmp_tid_from_gtid( global_tid ); 179 team = th->th.th_team; 180 } 181 182 /* determine if "for" loop is an active worksharing construct */ 183 if ( team -> t.t_serialized ) { 184 /* serialized parallel, each thread executes whole iteration space */ 185 if( plastiter != NULL ) 186 *plastiter = TRUE; 187 /* leave pupper and plower set to entire iteration space */ 188 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 189 190 #ifdef KMP_DEBUG 191 { 192 const char * buff; 193 // create format specifiers before the debug output 194 buff = __kmp_str_format( 195 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 196 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 197 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 198 __kmp_str_free( &buff ); 199 } 200 #endif 201 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 202 203 #if OMPT_SUPPORT && OMPT_TRACE 204 if (ompt_enabled && 205 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 206 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 207 team_info->parallel_id, task_info->task_id, 208 team_info->microtask); 209 } 210 #endif 211 return; 212 } 213 nth = team->t.t_nproc; 214 if ( nth == 1 ) { 215 if( plastiter != NULL ) 216 *plastiter = TRUE; 217 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 218 #ifdef KMP_DEBUG 219 { 220 const char * buff; 221 // create format specifiers before the debug output 222 buff = __kmp_str_format( 223 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 224 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 225 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 226 __kmp_str_free( &buff ); 227 } 228 #endif 229 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 230 231 #if OMPT_SUPPORT && OMPT_TRACE 232 if (ompt_enabled && 233 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 234 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 235 team_info->parallel_id, task_info->task_id, 236 team_info->microtask); 237 } 238 #endif 239 return; 240 } 241 242 /* compute trip count */ 243 if ( incr == 1 ) { 244 trip_count = *pupper - *plower + 1; 245 } else if (incr == -1) { 246 trip_count = *plower - *pupper + 1; 247 } else if ( incr > 0 ) { 248 // upper-lower can exceed the limit of signed type 249 trip_count = (UT)(*pupper - *plower) / incr + 1; 250 } else { 251 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 252 } 253 254 if ( __kmp_env_consistency_check ) { 255 /* tripcount overflow? */ 256 if ( trip_count == 0 && *pupper != *plower ) { 257 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); 258 } 259 } 260 KMP_COUNT_VALUE (FOR_static_iterations, trip_count); 261 262 /* compute remaining parameters */ 263 switch ( schedtype ) { 264 case kmp_sch_static: 265 { 266 if ( trip_count < nth ) { 267 KMP_DEBUG_ASSERT( 268 __kmp_static == kmp_sch_static_greedy || \ 269 __kmp_static == kmp_sch_static_balanced 270 ); // Unknown static scheduling type. 271 if ( tid < trip_count ) { 272 *pupper = *plower = *plower + tid * incr; 273 } else { 274 *plower = *pupper + incr; 275 } 276 if( plastiter != NULL ) 277 *plastiter = ( tid == trip_count - 1 ); 278 } else { 279 if ( __kmp_static == kmp_sch_static_balanced ) { 280 register UT small_chunk = trip_count / nth; 281 register UT extras = trip_count % nth; 282 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); 283 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); 284 if( plastiter != NULL ) 285 *plastiter = ( tid == nth - 1 ); 286 } else { 287 register T big_chunk_inc_count = ( trip_count/nth + 288 ( ( trip_count % nth ) ? 1 : 0) ) * incr; 289 register T old_upper = *pupper; 290 291 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 292 // Unknown static scheduling type. 293 294 *plower += tid * big_chunk_inc_count; 295 *pupper = *plower + big_chunk_inc_count - incr; 296 if ( incr > 0 ) { 297 if( *pupper < *plower ) 298 *pupper = i_maxmin< T >::mx; 299 if( plastiter != NULL ) 300 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 301 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 302 } else { 303 if( *pupper > *plower ) 304 *pupper = i_maxmin< T >::mn; 305 if( plastiter != NULL ) 306 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 307 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 308 } 309 } 310 } 311 break; 312 } 313 case kmp_sch_static_chunked: 314 { 315 register ST span; 316 if ( chunk < 1 ) { 317 chunk = 1; 318 } 319 span = chunk * incr; 320 *pstride = span * nth; 321 *plower = *plower + (span * tid); 322 *pupper = *plower + span - incr; 323 if( plastiter != NULL ) 324 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth); 325 break; 326 } 327 #if OMP_45_ENABLED 328 case kmp_sch_static_balanced_chunked: 329 { 330 register T old_upper = *pupper; 331 // round up to make sure the chunk is enough to cover all iterations 332 register UT span = (trip_count+nth-1) / nth; 333 334 // perform chunk adjustment 335 chunk = (span + chunk - 1) & ~(chunk-1); 336 337 span = chunk * incr; 338 *plower = *plower + (span * tid); 339 *pupper = *plower + span - incr; 340 if ( incr > 0 ) { 341 if ( *pupper > old_upper ) *pupper = old_upper; 342 } else 343 if ( *pupper < old_upper ) *pupper = old_upper; 344 345 if( plastiter != NULL ) 346 *plastiter = ( tid == ((trip_count - 1)/( UT )chunk) ); 347 break; 348 } 349 #endif 350 default: 351 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); 352 break; 353 } 354 355 #if USE_ITT_BUILD 356 // Report loop metadata 357 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 358 #if OMP_40_ENABLED 359 th->th.th_teams_microtask == NULL && 360 #endif 361 team->t.t_active_level == 1 ) 362 { 363 kmp_uint64 cur_chunk = chunk; 364 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked 365 if ( schedtype == kmp_sch_static ) { 366 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0); 367 } 368 // 0 - "static" schedule 369 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 370 } 371 #endif 372 #ifdef KMP_DEBUG 373 { 374 const char * buff; 375 // create format specifiers before the debug output 376 buff = __kmp_str_format( 377 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", 378 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 379 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 380 __kmp_str_free( &buff ); 381 } 382 #endif 383 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 384 385 #if OMPT_SUPPORT && OMPT_TRACE 386 if (ompt_enabled && 387 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 388 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 389 team_info->parallel_id, task_info->task_id, team_info->microtask); 390 } 391 #endif 392 393 return; 394 } 395 396 template< typename T > 397 static void 398 __kmp_dist_for_static_init( 399 ident_t *loc, 400 kmp_int32 gtid, 401 kmp_int32 schedule, 402 kmp_int32 *plastiter, 403 T *plower, 404 T *pupper, 405 T *pupperDist, 406 typename traits_t< T >::signed_t *pstride, 407 typename traits_t< T >::signed_t incr, 408 typename traits_t< T >::signed_t chunk 409 ) { 410 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 411 typedef typename traits_t< T >::unsigned_t UT; 412 typedef typename traits_t< T >::signed_t ST; 413 register kmp_uint32 tid; 414 register kmp_uint32 nth; 415 register kmp_uint32 team_id; 416 register kmp_uint32 nteams; 417 register UT trip_count; 418 register kmp_team_t *team; 419 kmp_info_t * th; 420 421 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride ); 422 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 423 #ifdef KMP_DEBUG 424 { 425 const char * buff; 426 // create format specifiers before the debug output 427 buff = __kmp_str_format( 428 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\ 429 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 430 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 431 traits_t< ST >::spec, traits_t< T >::spec ); 432 KD_TRACE(100, ( buff, gtid, schedule, *plastiter, 433 *plower, *pupper, incr, chunk ) ); 434 __kmp_str_free( &buff ); 435 } 436 #endif 437 438 if( __kmp_env_consistency_check ) { 439 __kmp_push_workshare( gtid, ct_pdo, loc ); 440 if( incr == 0 ) { 441 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 442 } 443 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 444 // The loop is illegal. 445 // Some zero-trip loops maintained by compiler, e.g.: 446 // for(i=10;i<0;++i) // lower >= upper - run-time check 447 // for(i=0;i>10;--i) // lower <= upper - run-time check 448 // for(i=0;i>10;++i) // incr > 0 - compile-time check 449 // for(i=10;i<0;--i) // incr < 0 - compile-time check 450 // Compiler does not check the following illegal loops: 451 // for(i=0;i<10;i+=incr) // where incr<0 452 // for(i=10;i>0;i-=incr) // where incr<0 453 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 454 } 455 } 456 tid = __kmp_tid_from_gtid( gtid ); 457 th = __kmp_threads[gtid]; 458 nth = th->th.th_team_nproc; 459 team = th->th.th_team; 460 #if OMP_40_ENABLED 461 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 462 nteams = th->th.th_teams_size.nteams; 463 #endif 464 team_id = team->t.t_master_tid; 465 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 466 467 // compute global trip count 468 if( incr == 1 ) { 469 trip_count = *pupper - *plower + 1; 470 } else if(incr == -1) { 471 trip_count = *plower - *pupper + 1; 472 } else if ( incr > 0 ) { 473 // upper-lower can exceed the limit of signed type 474 trip_count = (UT)(*pupper - *plower) / incr + 1; 475 } else { 476 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 477 } 478 479 *pstride = *pupper - *plower; // just in case (can be unused) 480 if( trip_count <= nteams ) { 481 KMP_DEBUG_ASSERT( 482 __kmp_static == kmp_sch_static_greedy || \ 483 __kmp_static == kmp_sch_static_balanced 484 ); // Unknown static scheduling type. 485 // only masters of some teams get single iteration, other threads get nothing 486 if( team_id < trip_count && tid == 0 ) { 487 *pupper = *pupperDist = *plower = *plower + team_id * incr; 488 } else { 489 *pupperDist = *pupper; 490 *plower = *pupper + incr; // compiler should skip loop body 491 } 492 if( plastiter != NULL ) 493 *plastiter = ( tid == 0 && team_id == trip_count - 1 ); 494 } else { 495 // Get the team's chunk first (each team gets at most one chunk) 496 if( __kmp_static == kmp_sch_static_balanced ) { 497 register UT chunkD = trip_count / nteams; 498 register UT extras = trip_count % nteams; 499 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) ); 500 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr ); 501 if( plastiter != NULL ) 502 *plastiter = ( team_id == nteams - 1 ); 503 } else { 504 register T chunk_inc_count = 505 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; 506 register T upper = *pupper; 507 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 508 // Unknown static scheduling type. 509 *plower += team_id * chunk_inc_count; 510 *pupperDist = *plower + chunk_inc_count - incr; 511 // Check/correct bounds if needed 512 if( incr > 0 ) { 513 if( *pupperDist < *plower ) 514 *pupperDist = i_maxmin< T >::mx; 515 if( plastiter != NULL ) 516 *plastiter = *plower <= upper && *pupperDist > upper - incr; 517 if( *pupperDist > upper ) 518 *pupperDist = upper; // tracker C73258 519 if( *plower > *pupperDist ) { 520 *pupper = *pupperDist; // no iterations available for the team 521 goto end; 522 } 523 } else { 524 if( *pupperDist > *plower ) 525 *pupperDist = i_maxmin< T >::mn; 526 if( plastiter != NULL ) 527 *plastiter = *plower >= upper && *pupperDist < upper - incr; 528 if( *pupperDist < upper ) 529 *pupperDist = upper; // tracker C73258 530 if( *plower < *pupperDist ) { 531 *pupper = *pupperDist; // no iterations available for the team 532 goto end; 533 } 534 } 535 } 536 // Get the parallel loop chunk now (for thread) 537 // compute trip count for team's chunk 538 if( incr == 1 ) { 539 trip_count = *pupperDist - *plower + 1; 540 } else if(incr == -1) { 541 trip_count = *plower - *pupperDist + 1; 542 } else if ( incr > 1 ) { 543 // upper-lower can exceed the limit of signed type 544 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 545 } else { 546 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 547 } 548 KMP_DEBUG_ASSERT( trip_count ); 549 switch( schedule ) { 550 case kmp_sch_static: 551 { 552 if( trip_count <= nth ) { 553 KMP_DEBUG_ASSERT( 554 __kmp_static == kmp_sch_static_greedy || \ 555 __kmp_static == kmp_sch_static_balanced 556 ); // Unknown static scheduling type. 557 if( tid < trip_count ) 558 *pupper = *plower = *plower + tid * incr; 559 else 560 *plower = *pupper + incr; // no iterations available 561 if( plastiter != NULL ) 562 if( *plastiter != 0 && !( tid == trip_count - 1 ) ) 563 *plastiter = 0; 564 } else { 565 if( __kmp_static == kmp_sch_static_balanced ) { 566 register UT chunkL = trip_count / nth; 567 register UT extras = trip_count % nth; 568 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 569 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 570 if( plastiter != NULL ) 571 if( *plastiter != 0 && !( tid == nth - 1 ) ) 572 *plastiter = 0; 573 } else { 574 register T chunk_inc_count = 575 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr; 576 register T upper = *pupperDist; 577 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 578 // Unknown static scheduling type. 579 *plower += tid * chunk_inc_count; 580 *pupper = *plower + chunk_inc_count - incr; 581 if( incr > 0 ) { 582 if( *pupper < *plower ) 583 *pupper = i_maxmin< T >::mx; 584 if( plastiter != NULL ) 585 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) ) 586 *plastiter = 0; 587 if( *pupper > upper ) 588 *pupper = upper;//tracker C73258 589 } else { 590 if( *pupper > *plower ) 591 *pupper = i_maxmin< T >::mn; 592 if( plastiter != NULL ) 593 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) ) 594 *plastiter = 0; 595 if( *pupper < upper ) 596 *pupper = upper;//tracker C73258 597 } 598 } 599 } 600 break; 601 } 602 case kmp_sch_static_chunked: 603 { 604 register ST span; 605 if( chunk < 1 ) 606 chunk = 1; 607 span = chunk * incr; 608 *pstride = span * nth; 609 *plower = *plower + (span * tid); 610 *pupper = *plower + span - incr; 611 if( plastiter != NULL ) 612 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) ) 613 *plastiter = 0; 614 break; 615 } 616 default: 617 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" ); 618 break; 619 } 620 } 621 end:; 622 #ifdef KMP_DEBUG 623 { 624 const char * buff; 625 // create format specifiers before the debug output 626 buff = __kmp_str_format( 627 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\ 628 "stride=%%%s signed?<%s>\n", 629 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec, 630 traits_t< ST >::spec, traits_t< T >::spec ); 631 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) ); 632 __kmp_str_free( &buff ); 633 } 634 #endif 635 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) ); 636 return; 637 } 638 639 template< typename T > 640 static void 641 __kmp_team_static_init( 642 ident_t *loc, 643 kmp_int32 gtid, 644 kmp_int32 *p_last, 645 T *p_lb, 646 T *p_ub, 647 typename traits_t< T >::signed_t *p_st, 648 typename traits_t< T >::signed_t incr, 649 typename traits_t< T >::signed_t chunk 650 ) { 651 // The routine returns the first chunk distributed to the team and 652 // stride for next chunks calculation. 653 // Last iteration flag set for the team that will execute 654 // the last iteration of the loop. 655 // The routine is called for dist_schedue(static,chunk) only. 656 typedef typename traits_t< T >::unsigned_t UT; 657 typedef typename traits_t< T >::signed_t ST; 658 kmp_uint32 team_id; 659 kmp_uint32 nteams; 660 UT trip_count; 661 T lower; 662 T upper; 663 ST span; 664 kmp_team_t *team; 665 kmp_info_t *th; 666 667 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st ); 668 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid)); 669 #ifdef KMP_DEBUG 670 { 671 const char * buff; 672 // create format specifiers before the debug output 673 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\ 674 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 675 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 676 traits_t< ST >::spec, traits_t< T >::spec ); 677 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 678 __kmp_str_free( &buff ); 679 } 680 #endif 681 682 lower = *p_lb; 683 upper = *p_ub; 684 if( __kmp_env_consistency_check ) { 685 if( incr == 0 ) { 686 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 687 } 688 if( incr > 0 ? (upper < lower) : (lower < upper) ) { 689 // The loop is illegal. 690 // Some zero-trip loops maintained by compiler, e.g.: 691 // for(i=10;i<0;++i) // lower >= upper - run-time check 692 // for(i=0;i>10;--i) // lower <= upper - run-time check 693 // for(i=0;i>10;++i) // incr > 0 - compile-time check 694 // for(i=10;i<0;--i) // incr < 0 - compile-time check 695 // Compiler does not check the following illegal loops: 696 // for(i=0;i<10;i+=incr) // where incr<0 697 // for(i=10;i>0;i-=incr) // where incr<0 698 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 699 } 700 } 701 th = __kmp_threads[gtid]; 702 team = th->th.th_team; 703 #if OMP_40_ENABLED 704 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 705 nteams = th->th.th_teams_size.nteams; 706 #endif 707 team_id = team->t.t_master_tid; 708 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 709 710 // compute trip count 711 if( incr == 1 ) { 712 trip_count = upper - lower + 1; 713 } else if(incr == -1) { 714 trip_count = lower - upper + 1; 715 } else if ( incr > 0 ) { 716 // upper-lower can exceed the limit of signed type 717 trip_count = (UT)(upper - lower) / incr + 1; 718 } else { 719 trip_count = (UT)(lower - upper) / (-incr) + 1; 720 } 721 if( chunk < 1 ) 722 chunk = 1; 723 span = chunk * incr; 724 *p_st = span * nteams; 725 *p_lb = lower + (span * team_id); 726 *p_ub = *p_lb + span - incr; 727 if ( p_last != NULL ) 728 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams); 729 // Correct upper bound if needed 730 if( incr > 0 ) { 731 if( *p_ub < *p_lb ) // overflow? 732 *p_ub = i_maxmin< T >::mx; 733 if( *p_ub > upper ) 734 *p_ub = upper; // tracker C73258 735 } else { // incr < 0 736 if( *p_ub > *p_lb ) 737 *p_ub = i_maxmin< T >::mn; 738 if( *p_ub < upper ) 739 *p_ub = upper; // tracker C73258 740 } 741 #ifdef KMP_DEBUG 742 { 743 const char * buff; 744 // create format specifiers before the debug output 745 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\ 746 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 747 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 748 traits_t< ST >::spec ); 749 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 750 __kmp_str_free( &buff ); 751 } 752 #endif 753 } 754 755 //-------------------------------------------------------------------------------------- 756 extern "C" { 757 758 /*! 759 @ingroup WORK_SHARING 760 @param loc Source code location 761 @param gtid Global thread id of this thread 762 @param schedtype Scheduling type 763 @param plastiter Pointer to the "last iteration" flag 764 @param plower Pointer to the lower bound 765 @param pupper Pointer to the upper bound 766 @param pstride Pointer to the stride 767 @param incr Loop increment 768 @param chunk The chunk size 769 770 Each of the four functions here are identical apart from the argument types. 771 772 The functions compute the upper and lower bounds and stride to be used for the set of iterations 773 to be executed by the current thread from the statically scheduled loop that is described by the 774 initial values of the bounds, stride, increment and chunk size. 775 776 @{ 777 */ 778 void 779 __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 780 kmp_int32 *plower, kmp_int32 *pupper, 781 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 782 { 783 __kmp_for_static_init< kmp_int32 >( 784 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 785 } 786 787 /*! 788 See @ref __kmpc_for_static_init_4 789 */ 790 void 791 __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 792 kmp_uint32 *plower, kmp_uint32 *pupper, 793 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 794 { 795 __kmp_for_static_init< kmp_uint32 >( 796 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 797 } 798 799 /*! 800 See @ref __kmpc_for_static_init_4 801 */ 802 void 803 __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 804 kmp_int64 *plower, kmp_int64 *pupper, 805 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 806 { 807 __kmp_for_static_init< kmp_int64 >( 808 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 809 } 810 811 /*! 812 See @ref __kmpc_for_static_init_4 813 */ 814 void 815 __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 816 kmp_uint64 *plower, kmp_uint64 *pupper, 817 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 818 { 819 __kmp_for_static_init< kmp_uint64 >( 820 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 821 } 822 /*! 823 @} 824 */ 825 826 /*! 827 @ingroup WORK_SHARING 828 @param loc Source code location 829 @param gtid Global thread id of this thread 830 @param schedule Scheduling type for the parallel loop 831 @param plastiter Pointer to the "last iteration" flag 832 @param plower Pointer to the lower bound 833 @param pupper Pointer to the upper bound of loop chunk 834 @param pupperD Pointer to the upper bound of dist_chunk 835 @param pstride Pointer to the stride for parallel loop 836 @param incr Loop increment 837 @param chunk The chunk size for the parallel loop 838 839 Each of the four functions here are identical apart from the argument types. 840 841 The functions compute the upper and lower bounds and strides to be used for the set of iterations 842 to be executed by the current thread from the statically scheduled loop that is described by the 843 initial values of the bounds, strides, increment and chunks for parallel loop and distribute 844 constructs. 845 846 @{ 847 */ 848 void 849 __kmpc_dist_for_static_init_4( 850 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 851 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, 852 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 853 { 854 __kmp_dist_for_static_init< kmp_int32 >( 855 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 856 } 857 858 /*! 859 See @ref __kmpc_dist_for_static_init_4 860 */ 861 void 862 __kmpc_dist_for_static_init_4u( 863 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 864 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, 865 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 866 { 867 __kmp_dist_for_static_init< kmp_uint32 >( 868 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 869 } 870 871 /*! 872 See @ref __kmpc_dist_for_static_init_4 873 */ 874 void 875 __kmpc_dist_for_static_init_8( 876 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 877 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, 878 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 879 { 880 __kmp_dist_for_static_init< kmp_int64 >( 881 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 882 } 883 884 /*! 885 See @ref __kmpc_dist_for_static_init_4 886 */ 887 void 888 __kmpc_dist_for_static_init_8u( 889 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 890 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, 891 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 892 { 893 __kmp_dist_for_static_init< kmp_uint64 >( 894 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 895 } 896 /*! 897 @} 898 */ 899 900 //----------------------------------------------------------------------------------------- 901 // Auxiliary routines for Distribute Parallel Loop construct implementation 902 // Transfer call to template< type T > 903 // __kmp_team_static_init( ident_t *loc, int gtid, 904 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 905 906 /*! 907 @ingroup WORK_SHARING 908 @{ 909 @param loc Source location 910 @param gtid Global thread id 911 @param p_last pointer to last iteration flag 912 @param p_lb pointer to Lower bound 913 @param p_ub pointer to Upper bound 914 @param p_st Step (or increment if you prefer) 915 @param incr Loop increment 916 @param chunk The chunk size to block with 917 918 The functions compute the upper and lower bounds and stride to be used for the set of iterations 919 to be executed by the current team from the statically scheduled loop that is described by the 920 initial values of the bounds, stride, increment and chunk for the distribute construct as part of 921 composite distribute parallel loop construct. 922 These functions are all identical apart from the types of the arguments. 923 */ 924 925 void 926 __kmpc_team_static_init_4( 927 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 928 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 929 { 930 KMP_DEBUG_ASSERT( __kmp_init_serial ); 931 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 932 } 933 934 /*! 935 See @ref __kmpc_team_static_init_4 936 */ 937 void 938 __kmpc_team_static_init_4u( 939 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 940 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 941 { 942 KMP_DEBUG_ASSERT( __kmp_init_serial ); 943 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 944 } 945 946 /*! 947 See @ref __kmpc_team_static_init_4 948 */ 949 void 950 __kmpc_team_static_init_8( 951 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 952 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 953 { 954 KMP_DEBUG_ASSERT( __kmp_init_serial ); 955 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 956 } 957 958 /*! 959 See @ref __kmpc_team_static_init_4 960 */ 961 void 962 __kmpc_team_static_init_8u( 963 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 964 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 965 { 966 KMP_DEBUG_ASSERT( __kmp_init_serial ); 967 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 968 } 969 /*! 970 @} 971 */ 972 973 } // extern "C" 974 975