1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 /* Static scheduling initialization. 15 16 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 17 it may change values between parallel regions. __kmp_max_nth 18 is the largest value __kmp_nth may take, 1 is the smallest. */ 19 20 #include "kmp.h" 21 #include "kmp_error.h" 22 #include "kmp_i18n.h" 23 #include "kmp_itt.h" 24 #include "kmp_stats.h" 25 #include "kmp_str.h" 26 27 #if OMPT_SUPPORT 28 #include "ompt-specific.h" 29 #endif 30 31 #ifdef KMP_DEBUG 32 //------------------------------------------------------------------------- 33 // template for debug prints specification ( d, u, lld, llu ) 34 char const *traits_t<int>::spec = "d"; 35 char const *traits_t<unsigned int>::spec = "u"; 36 char const *traits_t<long long>::spec = "lld"; 37 char const *traits_t<unsigned long long>::spec = "llu"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 template <typename T> 42 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 43 kmp_int32 schedtype, kmp_int32 *plastiter, 44 T *plower, T *pupper, 45 typename traits_t<T>::signed_t *pstride, 46 typename traits_t<T>::signed_t incr, 47 typename traits_t<T>::signed_t chunk 48 #if OMPT_SUPPORT && OMPT_OPTIONAL 49 , 50 void *codeptr 51 #endif 52 ) { 53 KMP_COUNT_BLOCK(OMP_FOR_static); 54 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); 55 56 typedef typename traits_t<T>::unsigned_t UT; 57 typedef typename traits_t<T>::signed_t ST; 58 /* this all has to be changed back to TID and such.. */ 59 kmp_int32 gtid = global_tid; 60 kmp_uint32 tid; 61 kmp_uint32 nth; 62 UT trip_count; 63 kmp_team_t *team; 64 kmp_info_t *th = __kmp_threads[gtid]; 65 66 #if OMPT_SUPPORT && OMPT_OPTIONAL 67 ompt_team_info_t *team_info = NULL; 68 ompt_task_info_t *task_info = NULL; 69 ompt_work_type_t ompt_work_type = ompt_work_loop; 70 71 static kmp_int8 warn = 0; 72 73 if (ompt_enabled.ompt_callback_work) { 74 // Only fully initialize variables needed by OMPT if OMPT is enabled. 75 team_info = __ompt_get_teaminfo(0, NULL); 76 task_info = __ompt_get_task_info_object(0); 77 // Determine workshare type 78 if (loc != NULL) { 79 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 80 ompt_work_type = ompt_work_loop; 81 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 82 ompt_work_type = ompt_work_sections; 83 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 84 ompt_work_type = ompt_work_distribute; 85 } else { 86 kmp_int8 bool_res = 87 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 88 if (bool_res) 89 KMP_WARNING(OmptOutdatedWorkshare); 90 } 91 KMP_DEBUG_ASSERT(ompt_work_type); 92 } 93 } 94 #endif 95 96 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 97 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 98 #ifdef KMP_DEBUG 99 { 100 char *buff; 101 // create format specifiers before the debug output 102 buff = __kmp_str_format( 103 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 104 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 105 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 106 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 107 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 108 *pstride, incr, chunk)); 109 __kmp_str_free(&buff); 110 } 111 #endif 112 113 if (__kmp_env_consistency_check) { 114 __kmp_push_workshare(global_tid, ct_pdo, loc); 115 if (incr == 0) { 116 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 117 loc); 118 } 119 } 120 /* special handling for zero-trip loops */ 121 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 122 if (plastiter != NULL) 123 *plastiter = FALSE; 124 /* leave pupper and plower set to entire iteration space */ 125 *pstride = incr; /* value should never be used */ 126 // *plower = *pupper - incr; 127 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 128 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 129 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 130 #ifdef KMP_DEBUG 131 { 132 char *buff; 133 // create format specifiers before the debug output 134 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 135 "lower=%%%s upper=%%%s stride = %%%s " 136 "signed?<%s>, loc = %%s\n", 137 traits_t<T>::spec, traits_t<T>::spec, 138 traits_t<ST>::spec, traits_t<T>::spec); 139 KD_TRACE(100, 140 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 141 __kmp_str_free(&buff); 142 } 143 #endif 144 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 145 146 #if OMPT_SUPPORT && OMPT_OPTIONAL 147 if (ompt_enabled.ompt_callback_work) { 148 ompt_callbacks.ompt_callback(ompt_callback_work)( 149 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 150 &(task_info->task_data), 0, codeptr); 151 } 152 #endif 153 KMP_COUNT_VALUE(FOR_static_iterations, 0); 154 return; 155 } 156 157 #if OMP_40_ENABLED 158 // Although there are schedule enumerations above kmp_ord_upper which are not 159 // schedules for "distribute", the only ones which are useful are dynamic, so 160 // cannot be seen here, since this codepath is only executed for static 161 // schedules. 162 if (schedtype > kmp_ord_upper) { 163 // we are in DISTRIBUTE construct 164 schedtype += kmp_sch_static - 165 kmp_distribute_static; // AC: convert to usual schedule type 166 tid = th->th.th_team->t.t_master_tid; 167 team = th->th.th_team->t.t_parent; 168 } else 169 #endif 170 { 171 tid = __kmp_tid_from_gtid(global_tid); 172 team = th->th.th_team; 173 } 174 175 /* determine if "for" loop is an active worksharing construct */ 176 if (team->t.t_serialized) { 177 /* serialized parallel, each thread executes whole iteration space */ 178 if (plastiter != NULL) 179 *plastiter = TRUE; 180 /* leave pupper and plower set to entire iteration space */ 181 *pstride = 182 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 183 184 #ifdef KMP_DEBUG 185 { 186 char *buff; 187 // create format specifiers before the debug output 188 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 189 "lower=%%%s upper=%%%s stride = %%%s\n", 190 traits_t<T>::spec, traits_t<T>::spec, 191 traits_t<ST>::spec); 192 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 193 __kmp_str_free(&buff); 194 } 195 #endif 196 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 197 198 #if OMPT_SUPPORT && OMPT_OPTIONAL 199 if (ompt_enabled.ompt_callback_work) { 200 ompt_callbacks.ompt_callback(ompt_callback_work)( 201 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 202 &(task_info->task_data), *pstride, codeptr); 203 } 204 #endif 205 return; 206 } 207 nth = team->t.t_nproc; 208 if (nth == 1) { 209 if (plastiter != NULL) 210 *plastiter = TRUE; 211 *pstride = 212 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 213 #ifdef KMP_DEBUG 214 { 215 char *buff; 216 // create format specifiers before the debug output 217 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 218 "lower=%%%s upper=%%%s stride = %%%s\n", 219 traits_t<T>::spec, traits_t<T>::spec, 220 traits_t<ST>::spec); 221 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 222 __kmp_str_free(&buff); 223 } 224 #endif 225 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 226 227 #if OMPT_SUPPORT && OMPT_OPTIONAL 228 if (ompt_enabled.ompt_callback_work) { 229 ompt_callbacks.ompt_callback(ompt_callback_work)( 230 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 231 &(task_info->task_data), *pstride, codeptr); 232 } 233 #endif 234 return; 235 } 236 237 /* compute trip count */ 238 if (incr == 1) { 239 trip_count = *pupper - *plower + 1; 240 } else if (incr == -1) { 241 trip_count = *plower - *pupper + 1; 242 } else if (incr > 0) { 243 // upper-lower can exceed the limit of signed type 244 trip_count = (UT)(*pupper - *plower) / incr + 1; 245 } else { 246 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 247 } 248 249 if (__kmp_env_consistency_check) { 250 /* tripcount overflow? */ 251 if (trip_count == 0 && *pupper != *plower) { 252 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 253 loc); 254 } 255 } 256 KMP_COUNT_VALUE(FOR_static_iterations, trip_count); 257 258 /* compute remaining parameters */ 259 switch (schedtype) { 260 case kmp_sch_static: { 261 if (trip_count < nth) { 262 KMP_DEBUG_ASSERT( 263 __kmp_static == kmp_sch_static_greedy || 264 __kmp_static == 265 kmp_sch_static_balanced); // Unknown static scheduling type. 266 if (tid < trip_count) { 267 *pupper = *plower = *plower + tid * incr; 268 } else { 269 *plower = *pupper + incr; 270 } 271 if (plastiter != NULL) 272 *plastiter = (tid == trip_count - 1); 273 } else { 274 if (__kmp_static == kmp_sch_static_balanced) { 275 UT small_chunk = trip_count / nth; 276 UT extras = trip_count % nth; 277 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 278 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 279 if (plastiter != NULL) 280 *plastiter = (tid == nth - 1); 281 } else { 282 T big_chunk_inc_count = 283 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 284 T old_upper = *pupper; 285 286 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 287 // Unknown static scheduling type. 288 289 *plower += tid * big_chunk_inc_count; 290 *pupper = *plower + big_chunk_inc_count - incr; 291 if (incr > 0) { 292 if (*pupper < *plower) 293 *pupper = traits_t<T>::max_value; 294 if (plastiter != NULL) 295 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 296 if (*pupper > old_upper) 297 *pupper = old_upper; // tracker C73258 298 } else { 299 if (*pupper > *plower) 300 *pupper = traits_t<T>::min_value; 301 if (plastiter != NULL) 302 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 303 if (*pupper < old_upper) 304 *pupper = old_upper; // tracker C73258 305 } 306 } 307 } 308 *pstride = trip_count; 309 break; 310 } 311 case kmp_sch_static_chunked: { 312 ST span; 313 if (chunk < 1) { 314 chunk = 1; 315 } 316 span = chunk * incr; 317 *pstride = span * nth; 318 *plower = *plower + (span * tid); 319 *pupper = *plower + span - incr; 320 if (plastiter != NULL) 321 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); 322 break; 323 } 324 #if OMP_45_ENABLED 325 case kmp_sch_static_balanced_chunked: { 326 T old_upper = *pupper; 327 // round up to make sure the chunk is enough to cover all iterations 328 UT span = (trip_count + nth - 1) / nth; 329 330 // perform chunk adjustment 331 chunk = (span + chunk - 1) & ~(chunk - 1); 332 333 span = chunk * incr; 334 *plower = *plower + (span * tid); 335 *pupper = *plower + span - incr; 336 if (incr > 0) { 337 if (*pupper > old_upper) 338 *pupper = old_upper; 339 } else if (*pupper < old_upper) 340 *pupper = old_upper; 341 342 if (plastiter != NULL) 343 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 344 break; 345 } 346 #endif 347 default: 348 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 349 break; 350 } 351 352 #if USE_ITT_BUILD 353 // Report loop metadata 354 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 355 __kmp_forkjoin_frames_mode == 3 && 356 #if OMP_40_ENABLED 357 th->th.th_teams_microtask == NULL && 358 #endif 359 team->t.t_active_level == 1) { 360 kmp_uint64 cur_chunk = chunk; 361 // Calculate chunk in case it was not specified; it is specified for 362 // kmp_sch_static_chunked 363 if (schedtype == kmp_sch_static) { 364 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 365 } 366 // 0 - "static" schedule 367 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 368 } 369 #endif 370 #ifdef KMP_DEBUG 371 { 372 char *buff; 373 // create format specifiers before the debug output 374 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 375 "upper=%%%s stride = %%%s signed?<%s>\n", 376 traits_t<T>::spec, traits_t<T>::spec, 377 traits_t<ST>::spec, traits_t<T>::spec); 378 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 379 __kmp_str_free(&buff); 380 } 381 #endif 382 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 383 384 #if OMPT_SUPPORT && OMPT_OPTIONAL 385 if (ompt_enabled.ompt_callback_work) { 386 ompt_callbacks.ompt_callback(ompt_callback_work)( 387 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 388 &(task_info->task_data), trip_count, codeptr); 389 } 390 #endif 391 392 return; 393 } 394 395 template <typename T> 396 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 397 kmp_int32 schedule, kmp_int32 *plastiter, 398 T *plower, T *pupper, T *pupperDist, 399 typename traits_t<T>::signed_t *pstride, 400 typename traits_t<T>::signed_t incr, 401 typename traits_t<T>::signed_t chunk) { 402 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 403 typedef typename traits_t<T>::unsigned_t UT; 404 typedef typename traits_t<T>::signed_t ST; 405 kmp_uint32 tid; 406 kmp_uint32 nth; 407 kmp_uint32 team_id; 408 kmp_uint32 nteams; 409 UT trip_count; 410 kmp_team_t *team; 411 kmp_info_t *th; 412 413 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 414 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 415 #ifdef KMP_DEBUG 416 { 417 char *buff; 418 // create format specifiers before the debug output 419 buff = __kmp_str_format( 420 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 421 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 422 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 423 traits_t<ST>::spec, traits_t<T>::spec); 424 KD_TRACE(100, 425 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 426 __kmp_str_free(&buff); 427 } 428 #endif 429 430 if (__kmp_env_consistency_check) { 431 __kmp_push_workshare(gtid, ct_pdo, loc); 432 if (incr == 0) { 433 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 434 loc); 435 } 436 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 437 // The loop is illegal. 438 // Some zero-trip loops maintained by compiler, e.g.: 439 // for(i=10;i<0;++i) // lower >= upper - run-time check 440 // for(i=0;i>10;--i) // lower <= upper - run-time check 441 // for(i=0;i>10;++i) // incr > 0 - compile-time check 442 // for(i=10;i<0;--i) // incr < 0 - compile-time check 443 // Compiler does not check the following illegal loops: 444 // for(i=0;i<10;i+=incr) // where incr<0 445 // for(i=10;i>0;i-=incr) // where incr<0 446 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 447 } 448 } 449 tid = __kmp_tid_from_gtid(gtid); 450 th = __kmp_threads[gtid]; 451 nth = th->th.th_team_nproc; 452 team = th->th.th_team; 453 #if OMP_40_ENABLED 454 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 455 nteams = th->th.th_teams_size.nteams; 456 #endif 457 team_id = team->t.t_master_tid; 458 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 459 460 // compute global trip count 461 if (incr == 1) { 462 trip_count = *pupper - *plower + 1; 463 } else if (incr == -1) { 464 trip_count = *plower - *pupper + 1; 465 } else if (incr > 0) { 466 // upper-lower can exceed the limit of signed type 467 trip_count = (UT)(*pupper - *plower) / incr + 1; 468 } else { 469 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 470 } 471 472 *pstride = *pupper - *plower; // just in case (can be unused) 473 if (trip_count <= nteams) { 474 KMP_DEBUG_ASSERT( 475 __kmp_static == kmp_sch_static_greedy || 476 __kmp_static == 477 kmp_sch_static_balanced); // Unknown static scheduling type. 478 // only masters of some teams get single iteration, other threads get 479 // nothing 480 if (team_id < trip_count && tid == 0) { 481 *pupper = *pupperDist = *plower = *plower + team_id * incr; 482 } else { 483 *pupperDist = *pupper; 484 *plower = *pupper + incr; // compiler should skip loop body 485 } 486 if (plastiter != NULL) 487 *plastiter = (tid == 0 && team_id == trip_count - 1); 488 } else { 489 // Get the team's chunk first (each team gets at most one chunk) 490 if (__kmp_static == kmp_sch_static_balanced) { 491 UT chunkD = trip_count / nteams; 492 UT extras = trip_count % nteams; 493 *plower += 494 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 495 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 496 if (plastiter != NULL) 497 *plastiter = (team_id == nteams - 1); 498 } else { 499 T chunk_inc_count = 500 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 501 T upper = *pupper; 502 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 503 // Unknown static scheduling type. 504 *plower += team_id * chunk_inc_count; 505 *pupperDist = *plower + chunk_inc_count - incr; 506 // Check/correct bounds if needed 507 if (incr > 0) { 508 if (*pupperDist < *plower) 509 *pupperDist = traits_t<T>::max_value; 510 if (plastiter != NULL) 511 *plastiter = *plower <= upper && *pupperDist > upper - incr; 512 if (*pupperDist > upper) 513 *pupperDist = upper; // tracker C73258 514 if (*plower > *pupperDist) { 515 *pupper = *pupperDist; // no iterations available for the team 516 goto end; 517 } 518 } else { 519 if (*pupperDist > *plower) 520 *pupperDist = traits_t<T>::min_value; 521 if (plastiter != NULL) 522 *plastiter = *plower >= upper && *pupperDist < upper - incr; 523 if (*pupperDist < upper) 524 *pupperDist = upper; // tracker C73258 525 if (*plower < *pupperDist) { 526 *pupper = *pupperDist; // no iterations available for the team 527 goto end; 528 } 529 } 530 } 531 // Get the parallel loop chunk now (for thread) 532 // compute trip count for team's chunk 533 if (incr == 1) { 534 trip_count = *pupperDist - *plower + 1; 535 } else if (incr == -1) { 536 trip_count = *plower - *pupperDist + 1; 537 } else if (incr > 1) { 538 // upper-lower can exceed the limit of signed type 539 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 540 } else { 541 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 542 } 543 KMP_DEBUG_ASSERT(trip_count); 544 switch (schedule) { 545 case kmp_sch_static: { 546 if (trip_count <= nth) { 547 KMP_DEBUG_ASSERT( 548 __kmp_static == kmp_sch_static_greedy || 549 __kmp_static == 550 kmp_sch_static_balanced); // Unknown static scheduling type. 551 if (tid < trip_count) 552 *pupper = *plower = *plower + tid * incr; 553 else 554 *plower = *pupper + incr; // no iterations available 555 if (plastiter != NULL) 556 if (*plastiter != 0 && !(tid == trip_count - 1)) 557 *plastiter = 0; 558 } else { 559 if (__kmp_static == kmp_sch_static_balanced) { 560 UT chunkL = trip_count / nth; 561 UT extras = trip_count % nth; 562 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 563 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 564 if (plastiter != NULL) 565 if (*plastiter != 0 && !(tid == nth - 1)) 566 *plastiter = 0; 567 } else { 568 T chunk_inc_count = 569 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 570 T upper = *pupperDist; 571 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 572 // Unknown static scheduling type. 573 *plower += tid * chunk_inc_count; 574 *pupper = *plower + chunk_inc_count - incr; 575 if (incr > 0) { 576 if (*pupper < *plower) 577 *pupper = traits_t<T>::max_value; 578 if (plastiter != NULL) 579 if (*plastiter != 0 && 580 !(*plower <= upper && *pupper > upper - incr)) 581 *plastiter = 0; 582 if (*pupper > upper) 583 *pupper = upper; // tracker C73258 584 } else { 585 if (*pupper > *plower) 586 *pupper = traits_t<T>::min_value; 587 if (plastiter != NULL) 588 if (*plastiter != 0 && 589 !(*plower >= upper && *pupper < upper - incr)) 590 *plastiter = 0; 591 if (*pupper < upper) 592 *pupper = upper; // tracker C73258 593 } 594 } 595 } 596 break; 597 } 598 case kmp_sch_static_chunked: { 599 ST span; 600 if (chunk < 1) 601 chunk = 1; 602 span = chunk * incr; 603 *pstride = span * nth; 604 *plower = *plower + (span * tid); 605 *pupper = *plower + span - incr; 606 if (plastiter != NULL) 607 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 608 *plastiter = 0; 609 break; 610 } 611 default: 612 KMP_ASSERT2(0, 613 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 614 break; 615 } 616 } 617 end:; 618 #ifdef KMP_DEBUG 619 { 620 char *buff; 621 // create format specifiers before the debug output 622 buff = __kmp_str_format( 623 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 624 "stride=%%%s signed?<%s>\n", 625 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 626 traits_t<ST>::spec, traits_t<T>::spec); 627 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 628 __kmp_str_free(&buff); 629 } 630 #endif 631 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 632 return; 633 } 634 635 template <typename T> 636 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 637 kmp_int32 *p_last, T *p_lb, T *p_ub, 638 typename traits_t<T>::signed_t *p_st, 639 typename traits_t<T>::signed_t incr, 640 typename traits_t<T>::signed_t chunk) { 641 // The routine returns the first chunk distributed to the team and 642 // stride for next chunks calculation. 643 // Last iteration flag set for the team that will execute 644 // the last iteration of the loop. 645 // The routine is called for dist_schedue(static,chunk) only. 646 typedef typename traits_t<T>::unsigned_t UT; 647 typedef typename traits_t<T>::signed_t ST; 648 kmp_uint32 team_id; 649 kmp_uint32 nteams; 650 UT trip_count; 651 T lower; 652 T upper; 653 ST span; 654 kmp_team_t *team; 655 kmp_info_t *th; 656 657 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 658 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 659 #ifdef KMP_DEBUG 660 { 661 char *buff; 662 // create format specifiers before the debug output 663 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 664 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 665 traits_t<T>::spec, traits_t<T>::spec, 666 traits_t<ST>::spec, traits_t<ST>::spec, 667 traits_t<T>::spec); 668 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 669 __kmp_str_free(&buff); 670 } 671 #endif 672 673 lower = *p_lb; 674 upper = *p_ub; 675 if (__kmp_env_consistency_check) { 676 if (incr == 0) { 677 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 678 loc); 679 } 680 if (incr > 0 ? (upper < lower) : (lower < upper)) { 681 // The loop is illegal. 682 // Some zero-trip loops maintained by compiler, e.g.: 683 // for(i=10;i<0;++i) // lower >= upper - run-time check 684 // for(i=0;i>10;--i) // lower <= upper - run-time check 685 // for(i=0;i>10;++i) // incr > 0 - compile-time check 686 // for(i=10;i<0;--i) // incr < 0 - compile-time check 687 // Compiler does not check the following illegal loops: 688 // for(i=0;i<10;i+=incr) // where incr<0 689 // for(i=10;i>0;i-=incr) // where incr<0 690 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 691 } 692 } 693 th = __kmp_threads[gtid]; 694 team = th->th.th_team; 695 #if OMP_40_ENABLED 696 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 697 nteams = th->th.th_teams_size.nteams; 698 #endif 699 team_id = team->t.t_master_tid; 700 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 701 702 // compute trip count 703 if (incr == 1) { 704 trip_count = upper - lower + 1; 705 } else if (incr == -1) { 706 trip_count = lower - upper + 1; 707 } else if (incr > 0) { 708 // upper-lower can exceed the limit of signed type 709 trip_count = (UT)(upper - lower) / incr + 1; 710 } else { 711 trip_count = (UT)(lower - upper) / (-incr) + 1; 712 } 713 if (chunk < 1) 714 chunk = 1; 715 span = chunk * incr; 716 *p_st = span * nteams; 717 *p_lb = lower + (span * team_id); 718 *p_ub = *p_lb + span - incr; 719 if (p_last != NULL) 720 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 721 // Correct upper bound if needed 722 if (incr > 0) { 723 if (*p_ub < *p_lb) // overflow? 724 *p_ub = traits_t<T>::max_value; 725 if (*p_ub > upper) 726 *p_ub = upper; // tracker C73258 727 } else { // incr < 0 728 if (*p_ub > *p_lb) 729 *p_ub = traits_t<T>::min_value; 730 if (*p_ub < upper) 731 *p_ub = upper; // tracker C73258 732 } 733 #ifdef KMP_DEBUG 734 { 735 char *buff; 736 // create format specifiers before the debug output 737 buff = 738 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 739 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 740 traits_t<T>::spec, traits_t<T>::spec, 741 traits_t<ST>::spec, traits_t<ST>::spec); 742 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 743 __kmp_str_free(&buff); 744 } 745 #endif 746 } 747 748 //------------------------------------------------------------------------------ 749 extern "C" { 750 /*! 751 @ingroup WORK_SHARING 752 @param loc Source code location 753 @param gtid Global thread id of this thread 754 @param schedtype Scheduling type 755 @param plastiter Pointer to the "last iteration" flag 756 @param plower Pointer to the lower bound 757 @param pupper Pointer to the upper bound 758 @param pstride Pointer to the stride 759 @param incr Loop increment 760 @param chunk The chunk size 761 762 Each of the four functions here are identical apart from the argument types. 763 764 The functions compute the upper and lower bounds and stride to be used for the 765 set of iterations to be executed by the current thread from the statically 766 scheduled loop that is described by the initial values of the bounds, stride, 767 increment and chunk size. 768 769 @{ 770 */ 771 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 772 kmp_int32 *plastiter, kmp_int32 *plower, 773 kmp_int32 *pupper, kmp_int32 *pstride, 774 kmp_int32 incr, kmp_int32 chunk) { 775 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 776 pupper, pstride, incr, chunk 777 #if OMPT_SUPPORT && OMPT_OPTIONAL 778 , 779 OMPT_GET_RETURN_ADDRESS(0) 780 #endif 781 ); 782 } 783 784 /*! 785 See @ref __kmpc_for_static_init_4 786 */ 787 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 788 kmp_int32 schedtype, kmp_int32 *plastiter, 789 kmp_uint32 *plower, kmp_uint32 *pupper, 790 kmp_int32 *pstride, kmp_int32 incr, 791 kmp_int32 chunk) { 792 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 793 pupper, pstride, incr, chunk 794 #if OMPT_SUPPORT && OMPT_OPTIONAL 795 , 796 OMPT_GET_RETURN_ADDRESS(0) 797 #endif 798 ); 799 } 800 801 /*! 802 See @ref __kmpc_for_static_init_4 803 */ 804 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 805 kmp_int32 *plastiter, kmp_int64 *plower, 806 kmp_int64 *pupper, kmp_int64 *pstride, 807 kmp_int64 incr, kmp_int64 chunk) { 808 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 809 pupper, pstride, incr, chunk 810 #if OMPT_SUPPORT && OMPT_OPTIONAL 811 , 812 OMPT_GET_RETURN_ADDRESS(0) 813 #endif 814 ); 815 } 816 817 /*! 818 See @ref __kmpc_for_static_init_4 819 */ 820 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 821 kmp_int32 schedtype, kmp_int32 *plastiter, 822 kmp_uint64 *plower, kmp_uint64 *pupper, 823 kmp_int64 *pstride, kmp_int64 incr, 824 kmp_int64 chunk) { 825 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 826 pupper, pstride, incr, chunk 827 #if OMPT_SUPPORT && OMPT_OPTIONAL 828 , 829 OMPT_GET_RETURN_ADDRESS(0) 830 #endif 831 ); 832 } 833 /*! 834 @} 835 */ 836 837 /*! 838 @ingroup WORK_SHARING 839 @param loc Source code location 840 @param gtid Global thread id of this thread 841 @param schedule Scheduling type for the parallel loop 842 @param plastiter Pointer to the "last iteration" flag 843 @param plower Pointer to the lower bound 844 @param pupper Pointer to the upper bound of loop chunk 845 @param pupperD Pointer to the upper bound of dist_chunk 846 @param pstride Pointer to the stride for parallel loop 847 @param incr Loop increment 848 @param chunk The chunk size for the parallel loop 849 850 Each of the four functions here are identical apart from the argument types. 851 852 The functions compute the upper and lower bounds and strides to be used for the 853 set of iterations to be executed by the current thread from the statically 854 scheduled loop that is described by the initial values of the bounds, strides, 855 increment and chunks for parallel loop and distribute constructs. 856 857 @{ 858 */ 859 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 860 kmp_int32 schedule, kmp_int32 *plastiter, 861 kmp_int32 *plower, kmp_int32 *pupper, 862 kmp_int32 *pupperD, kmp_int32 *pstride, 863 kmp_int32 incr, kmp_int32 chunk) { 864 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 865 pupper, pupperD, pstride, incr, chunk); 866 } 867 868 /*! 869 See @ref __kmpc_dist_for_static_init_4 870 */ 871 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 872 kmp_int32 schedule, kmp_int32 *plastiter, 873 kmp_uint32 *plower, kmp_uint32 *pupper, 874 kmp_uint32 *pupperD, kmp_int32 *pstride, 875 kmp_int32 incr, kmp_int32 chunk) { 876 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 877 pupper, pupperD, pstride, incr, chunk); 878 } 879 880 /*! 881 See @ref __kmpc_dist_for_static_init_4 882 */ 883 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 884 kmp_int32 schedule, kmp_int32 *plastiter, 885 kmp_int64 *plower, kmp_int64 *pupper, 886 kmp_int64 *pupperD, kmp_int64 *pstride, 887 kmp_int64 incr, kmp_int64 chunk) { 888 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 889 pupper, pupperD, pstride, incr, chunk); 890 } 891 892 /*! 893 See @ref __kmpc_dist_for_static_init_4 894 */ 895 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 896 kmp_int32 schedule, kmp_int32 *plastiter, 897 kmp_uint64 *plower, kmp_uint64 *pupper, 898 kmp_uint64 *pupperD, kmp_int64 *pstride, 899 kmp_int64 incr, kmp_int64 chunk) { 900 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 901 pupper, pupperD, pstride, incr, chunk); 902 } 903 /*! 904 @} 905 */ 906 907 //------------------------------------------------------------------------------ 908 // Auxiliary routines for Distribute Parallel Loop construct implementation 909 // Transfer call to template< type T > 910 // __kmp_team_static_init( ident_t *loc, int gtid, 911 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 912 913 /*! 914 @ingroup WORK_SHARING 915 @{ 916 @param loc Source location 917 @param gtid Global thread id 918 @param p_last pointer to last iteration flag 919 @param p_lb pointer to Lower bound 920 @param p_ub pointer to Upper bound 921 @param p_st Step (or increment if you prefer) 922 @param incr Loop increment 923 @param chunk The chunk size to block with 924 925 The functions compute the upper and lower bounds and stride to be used for the 926 set of iterations to be executed by the current team from the statically 927 scheduled loop that is described by the initial values of the bounds, stride, 928 increment and chunk for the distribute construct as part of composite distribute 929 parallel loop construct. These functions are all identical apart from the types 930 of the arguments. 931 */ 932 933 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 934 kmp_int32 *p_lb, kmp_int32 *p_ub, 935 kmp_int32 *p_st, kmp_int32 incr, 936 kmp_int32 chunk) { 937 KMP_DEBUG_ASSERT(__kmp_init_serial); 938 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 939 chunk); 940 } 941 942 /*! 943 See @ref __kmpc_team_static_init_4 944 */ 945 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 946 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 947 kmp_int32 *p_st, kmp_int32 incr, 948 kmp_int32 chunk) { 949 KMP_DEBUG_ASSERT(__kmp_init_serial); 950 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 951 chunk); 952 } 953 954 /*! 955 See @ref __kmpc_team_static_init_4 956 */ 957 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 958 kmp_int64 *p_lb, kmp_int64 *p_ub, 959 kmp_int64 *p_st, kmp_int64 incr, 960 kmp_int64 chunk) { 961 KMP_DEBUG_ASSERT(__kmp_init_serial); 962 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 963 chunk); 964 } 965 966 /*! 967 See @ref __kmpc_team_static_init_4 968 */ 969 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 970 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 971 kmp_int64 *p_st, kmp_int64 incr, 972 kmp_int64 chunk) { 973 KMP_DEBUG_ASSERT(__kmp_init_serial); 974 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 975 chunk); 976 } 977 /*! 978 @} 979 */ 980 981 } // extern "C" 982