1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 65 static inline void check_loc(ident_t *&loc) { 66 if (loc == NULL) 67 loc = &loc_stub; // may need to report location info to ittnotify 68 } 69 70 template <typename T> 71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 72 kmp_int32 schedtype, kmp_int32 *plastiter, 73 T *plower, T *pupper, 74 typename traits_t<T>::signed_t *pstride, 75 typename traits_t<T>::signed_t incr, 76 typename traits_t<T>::signed_t chunk 77 #if OMPT_SUPPORT && OMPT_OPTIONAL 78 , 79 void *codeptr 80 #endif 81 ) { 82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 85 86 typedef typename traits_t<T>::unsigned_t UT; 87 typedef typename traits_t<T>::signed_t ST; 88 /* this all has to be changed back to TID and such.. */ 89 kmp_int32 gtid = global_tid; 90 kmp_uint32 tid; 91 kmp_uint32 nth; 92 UT trip_count; 93 kmp_team_t *team; 94 __kmp_assert_valid_gtid(gtid); 95 kmp_info_t *th = __kmp_threads[gtid]; 96 97 #if OMPT_SUPPORT && OMPT_OPTIONAL 98 ompt_team_info_t *team_info = NULL; 99 ompt_task_info_t *task_info = NULL; 100 ompt_work_t ompt_work_type = ompt_work_loop; 101 102 static kmp_int8 warn = 0; 103 104 if (ompt_enabled.ompt_callback_work) { 105 // Only fully initialize variables needed by OMPT if OMPT is enabled. 106 team_info = __ompt_get_teaminfo(0, NULL); 107 task_info = __ompt_get_task_info_object(0); 108 // Determine workshare type 109 if (loc != NULL) { 110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 111 ompt_work_type = ompt_work_loop; 112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 113 ompt_work_type = ompt_work_sections; 114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 115 ompt_work_type = ompt_work_distribute; 116 } else { 117 kmp_int8 bool_res = 118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 119 if (bool_res) 120 KMP_WARNING(OmptOutdatedWorkshare); 121 } 122 KMP_DEBUG_ASSERT(ompt_work_type); 123 } 124 } 125 #endif 126 127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 129 #ifdef KMP_DEBUG 130 { 131 char *buff; 132 // create format specifiers before the debug output 133 buff = __kmp_str_format( 134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 139 *pstride, incr, chunk)); 140 __kmp_str_free(&buff); 141 } 142 #endif 143 144 if (__kmp_env_consistency_check) { 145 __kmp_push_workshare(global_tid, ct_pdo, loc); 146 if (incr == 0) { 147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 148 loc); 149 } 150 } 151 /* special handling for zero-trip loops */ 152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 153 if (plastiter != NULL) 154 *plastiter = FALSE; 155 /* leave pupper and plower set to entire iteration space */ 156 *pstride = incr; /* value should never be used */ 157 // *plower = *pupper - incr; 158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 161 #ifdef KMP_DEBUG 162 { 163 char *buff; 164 // create format specifiers before the debug output 165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 166 "lower=%%%s upper=%%%s stride = %%%s " 167 "signed?<%s>, loc = %%s\n", 168 traits_t<T>::spec, traits_t<T>::spec, 169 traits_t<ST>::spec, traits_t<T>::spec); 170 check_loc(loc); 171 KD_TRACE(100, 172 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 173 __kmp_str_free(&buff); 174 } 175 #endif 176 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 177 178 #if OMPT_SUPPORT && OMPT_OPTIONAL 179 if (ompt_enabled.ompt_callback_work) { 180 ompt_callbacks.ompt_callback(ompt_callback_work)( 181 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 182 &(task_info->task_data), 0, codeptr); 183 } 184 #endif 185 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 186 return; 187 } 188 189 // Although there are schedule enumerations above kmp_ord_upper which are not 190 // schedules for "distribute", the only ones which are useful are dynamic, so 191 // cannot be seen here, since this codepath is only executed for static 192 // schedules. 193 if (schedtype > kmp_ord_upper) { 194 // we are in DISTRIBUTE construct 195 schedtype += kmp_sch_static - 196 kmp_distribute_static; // AC: convert to usual schedule type 197 if (th->th.th_team->t.t_serialized > 1) { 198 tid = 0; 199 team = th->th.th_team; 200 } else { 201 tid = th->th.th_team->t.t_master_tid; 202 team = th->th.th_team->t.t_parent; 203 } 204 } else { 205 tid = __kmp_tid_from_gtid(global_tid); 206 team = th->th.th_team; 207 } 208 209 /* determine if "for" loop is an active worksharing construct */ 210 if (team->t.t_serialized) { 211 /* serialized parallel, each thread executes whole iteration space */ 212 if (plastiter != NULL) 213 *plastiter = TRUE; 214 /* leave pupper and plower set to entire iteration space */ 215 *pstride = 216 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 217 218 #ifdef KMP_DEBUG 219 { 220 char *buff; 221 // create format specifiers before the debug output 222 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 223 "lower=%%%s upper=%%%s stride = %%%s\n", 224 traits_t<T>::spec, traits_t<T>::spec, 225 traits_t<ST>::spec); 226 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 227 __kmp_str_free(&buff); 228 } 229 #endif 230 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 231 232 #if OMPT_SUPPORT && OMPT_OPTIONAL 233 if (ompt_enabled.ompt_callback_work) { 234 ompt_callbacks.ompt_callback(ompt_callback_work)( 235 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 236 &(task_info->task_data), *pstride, codeptr); 237 } 238 #endif 239 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 240 return; 241 } 242 nth = team->t.t_nproc; 243 if (nth == 1) { 244 if (plastiter != NULL) 245 *plastiter = TRUE; 246 *pstride = 247 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 248 #ifdef KMP_DEBUG 249 { 250 char *buff; 251 // create format specifiers before the debug output 252 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 253 "lower=%%%s upper=%%%s stride = %%%s\n", 254 traits_t<T>::spec, traits_t<T>::spec, 255 traits_t<ST>::spec); 256 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 257 __kmp_str_free(&buff); 258 } 259 #endif 260 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 261 262 #if OMPT_SUPPORT && OMPT_OPTIONAL 263 if (ompt_enabled.ompt_callback_work) { 264 ompt_callbacks.ompt_callback(ompt_callback_work)( 265 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 266 &(task_info->task_data), *pstride, codeptr); 267 } 268 #endif 269 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 270 return; 271 } 272 273 /* compute trip count */ 274 if (incr == 1) { 275 trip_count = *pupper - *plower + 1; 276 } else if (incr == -1) { 277 trip_count = *plower - *pupper + 1; 278 } else if (incr > 0) { 279 // upper-lower can exceed the limit of signed type 280 trip_count = (UT)(*pupper - *plower) / incr + 1; 281 } else { 282 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 283 } 284 285 #if KMP_STATS_ENABLED 286 if (KMP_MASTER_GTID(gtid)) { 287 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 288 } 289 #endif 290 291 if (__kmp_env_consistency_check) { 292 /* tripcount overflow? */ 293 if (trip_count == 0 && *pupper != *plower) { 294 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 295 loc); 296 } 297 } 298 299 /* compute remaining parameters */ 300 switch (schedtype) { 301 case kmp_sch_static: { 302 if (trip_count < nth) { 303 KMP_DEBUG_ASSERT( 304 __kmp_static == kmp_sch_static_greedy || 305 __kmp_static == 306 kmp_sch_static_balanced); // Unknown static scheduling type. 307 if (tid < trip_count) { 308 *pupper = *plower = *plower + tid * incr; 309 } else { 310 // set bounds so non-active threads execute no iterations 311 *plower = *pupper + (incr > 0 ? 1 : -1); 312 } 313 if (plastiter != NULL) 314 *plastiter = (tid == trip_count - 1); 315 } else { 316 if (__kmp_static == kmp_sch_static_balanced) { 317 UT small_chunk = trip_count / nth; 318 UT extras = trip_count % nth; 319 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 320 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 321 if (plastiter != NULL) 322 *plastiter = (tid == nth - 1); 323 } else { 324 T big_chunk_inc_count = 325 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 326 T old_upper = *pupper; 327 328 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 329 // Unknown static scheduling type. 330 331 *plower += tid * big_chunk_inc_count; 332 *pupper = *plower + big_chunk_inc_count - incr; 333 if (incr > 0) { 334 if (*pupper < *plower) 335 *pupper = traits_t<T>::max_value; 336 if (plastiter != NULL) 337 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 338 if (*pupper > old_upper) 339 *pupper = old_upper; // tracker C73258 340 } else { 341 if (*pupper > *plower) 342 *pupper = traits_t<T>::min_value; 343 if (plastiter != NULL) 344 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 345 if (*pupper < old_upper) 346 *pupper = old_upper; // tracker C73258 347 } 348 } 349 } 350 *pstride = trip_count; 351 break; 352 } 353 case kmp_sch_static_chunked: { 354 ST span; 355 UT nchunks; 356 if (chunk < 1) 357 chunk = 1; 358 else if ((UT)chunk > trip_count) 359 chunk = trip_count; 360 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); 361 span = chunk * incr; 362 if (nchunks < nth) { 363 *pstride = span * nchunks; 364 if (tid < nchunks) { 365 *plower = *plower + (span * tid); 366 *pupper = *plower + span - incr; 367 } else { 368 *plower = *pupper + (incr > 0 ? 1 : -1); 369 } 370 } else { 371 *pstride = span * nth; 372 *plower = *plower + (span * tid); 373 *pupper = *plower + span - incr; 374 } 375 if (plastiter != NULL) 376 *plastiter = (tid == (nchunks - 1) % nth); 377 break; 378 } 379 case kmp_sch_static_balanced_chunked: { 380 T old_upper = *pupper; 381 // round up to make sure the chunk is enough to cover all iterations 382 UT span = (trip_count + nth - 1) / nth; 383 384 // perform chunk adjustment 385 chunk = (span + chunk - 1) & ~(chunk - 1); 386 387 span = chunk * incr; 388 *plower = *plower + (span * tid); 389 *pupper = *plower + span - incr; 390 if (incr > 0) { 391 if (*pupper > old_upper) 392 *pupper = old_upper; 393 } else if (*pupper < old_upper) 394 *pupper = old_upper; 395 396 if (plastiter != NULL) 397 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 398 break; 399 } 400 default: 401 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 402 break; 403 } 404 405 #if USE_ITT_BUILD 406 // Report loop metadata 407 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 408 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 409 team->t.t_active_level == 1) { 410 kmp_uint64 cur_chunk = chunk; 411 check_loc(loc); 412 // Calculate chunk in case it was not specified; it is specified for 413 // kmp_sch_static_chunked 414 if (schedtype == kmp_sch_static) { 415 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 416 } 417 // 0 - "static" schedule 418 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 419 } 420 #endif 421 #ifdef KMP_DEBUG 422 { 423 char *buff; 424 // create format specifiers before the debug output 425 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 426 "upper=%%%s stride = %%%s signed?<%s>\n", 427 traits_t<T>::spec, traits_t<T>::spec, 428 traits_t<ST>::spec, traits_t<T>::spec); 429 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 430 __kmp_str_free(&buff); 431 } 432 #endif 433 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 434 435 #if OMPT_SUPPORT && OMPT_OPTIONAL 436 if (ompt_enabled.ompt_callback_work) { 437 ompt_callbacks.ompt_callback(ompt_callback_work)( 438 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 439 &(task_info->task_data), trip_count, codeptr); 440 } 441 #endif 442 443 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 444 return; 445 } 446 447 template <typename T> 448 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 449 kmp_int32 schedule, kmp_int32 *plastiter, 450 T *plower, T *pupper, T *pupperDist, 451 typename traits_t<T>::signed_t *pstride, 452 typename traits_t<T>::signed_t incr, 453 typename traits_t<T>::signed_t chunk) { 454 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 455 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 456 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 457 typedef typename traits_t<T>::unsigned_t UT; 458 typedef typename traits_t<T>::signed_t ST; 459 kmp_uint32 tid; 460 kmp_uint32 nth; 461 kmp_uint32 team_id; 462 kmp_uint32 nteams; 463 UT trip_count; 464 kmp_team_t *team; 465 kmp_info_t *th; 466 467 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 468 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 469 __kmp_assert_valid_gtid(gtid); 470 #ifdef KMP_DEBUG 471 { 472 char *buff; 473 // create format specifiers before the debug output 474 buff = __kmp_str_format( 475 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 476 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 477 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 478 traits_t<ST>::spec, traits_t<T>::spec); 479 KD_TRACE(100, 480 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 481 __kmp_str_free(&buff); 482 } 483 #endif 484 485 if (__kmp_env_consistency_check) { 486 __kmp_push_workshare(gtid, ct_pdo, loc); 487 if (incr == 0) { 488 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 489 loc); 490 } 491 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 492 // The loop is illegal. 493 // Some zero-trip loops maintained by compiler, e.g.: 494 // for(i=10;i<0;++i) // lower >= upper - run-time check 495 // for(i=0;i>10;--i) // lower <= upper - run-time check 496 // for(i=0;i>10;++i) // incr > 0 - compile-time check 497 // for(i=10;i<0;--i) // incr < 0 - compile-time check 498 // Compiler does not check the following illegal loops: 499 // for(i=0;i<10;i+=incr) // where incr<0 500 // for(i=10;i>0;i-=incr) // where incr<0 501 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 502 } 503 } 504 tid = __kmp_tid_from_gtid(gtid); 505 th = __kmp_threads[gtid]; 506 nth = th->th.th_team_nproc; 507 team = th->th.th_team; 508 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 509 nteams = th->th.th_teams_size.nteams; 510 team_id = team->t.t_master_tid; 511 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 512 513 // compute global trip count 514 if (incr == 1) { 515 trip_count = *pupper - *plower + 1; 516 } else if (incr == -1) { 517 trip_count = *plower - *pupper + 1; 518 } else if (incr > 0) { 519 // upper-lower can exceed the limit of signed type 520 trip_count = (UT)(*pupper - *plower) / incr + 1; 521 } else { 522 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 523 } 524 525 *pstride = *pupper - *plower; // just in case (can be unused) 526 if (trip_count <= nteams) { 527 KMP_DEBUG_ASSERT( 528 __kmp_static == kmp_sch_static_greedy || 529 __kmp_static == 530 kmp_sch_static_balanced); // Unknown static scheduling type. 531 // only primary threads of some teams get single iteration, other threads 532 // get nothing 533 if (team_id < trip_count && tid == 0) { 534 *pupper = *pupperDist = *plower = *plower + team_id * incr; 535 } else { 536 *pupperDist = *pupper; 537 *plower = *pupper + incr; // compiler should skip loop body 538 } 539 if (plastiter != NULL) 540 *plastiter = (tid == 0 && team_id == trip_count - 1); 541 } else { 542 // Get the team's chunk first (each team gets at most one chunk) 543 if (__kmp_static == kmp_sch_static_balanced) { 544 UT chunkD = trip_count / nteams; 545 UT extras = trip_count % nteams; 546 *plower += 547 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 548 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 549 if (plastiter != NULL) 550 *plastiter = (team_id == nteams - 1); 551 } else { 552 T chunk_inc_count = 553 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 554 T upper = *pupper; 555 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 556 // Unknown static scheduling type. 557 *plower += team_id * chunk_inc_count; 558 *pupperDist = *plower + chunk_inc_count - incr; 559 // Check/correct bounds if needed 560 if (incr > 0) { 561 if (*pupperDist < *plower) 562 *pupperDist = traits_t<T>::max_value; 563 if (plastiter != NULL) 564 *plastiter = *plower <= upper && *pupperDist > upper - incr; 565 if (*pupperDist > upper) 566 *pupperDist = upper; // tracker C73258 567 if (*plower > *pupperDist) { 568 *pupper = *pupperDist; // no iterations available for the team 569 goto end; 570 } 571 } else { 572 if (*pupperDist > *plower) 573 *pupperDist = traits_t<T>::min_value; 574 if (plastiter != NULL) 575 *plastiter = *plower >= upper && *pupperDist < upper - incr; 576 if (*pupperDist < upper) 577 *pupperDist = upper; // tracker C73258 578 if (*plower < *pupperDist) { 579 *pupper = *pupperDist; // no iterations available for the team 580 goto end; 581 } 582 } 583 } 584 // Get the parallel loop chunk now (for thread) 585 // compute trip count for team's chunk 586 if (incr == 1) { 587 trip_count = *pupperDist - *plower + 1; 588 } else if (incr == -1) { 589 trip_count = *plower - *pupperDist + 1; 590 } else if (incr > 1) { 591 // upper-lower can exceed the limit of signed type 592 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 593 } else { 594 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 595 } 596 KMP_DEBUG_ASSERT(trip_count); 597 switch (schedule) { 598 case kmp_sch_static: { 599 if (trip_count <= nth) { 600 KMP_DEBUG_ASSERT( 601 __kmp_static == kmp_sch_static_greedy || 602 __kmp_static == 603 kmp_sch_static_balanced); // Unknown static scheduling type. 604 if (tid < trip_count) 605 *pupper = *plower = *plower + tid * incr; 606 else 607 *plower = *pupper + incr; // no iterations available 608 if (plastiter != NULL) 609 if (*plastiter != 0 && !(tid == trip_count - 1)) 610 *plastiter = 0; 611 } else { 612 if (__kmp_static == kmp_sch_static_balanced) { 613 UT chunkL = trip_count / nth; 614 UT extras = trip_count % nth; 615 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 616 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 617 if (plastiter != NULL) 618 if (*plastiter != 0 && !(tid == nth - 1)) 619 *plastiter = 0; 620 } else { 621 T chunk_inc_count = 622 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 623 T upper = *pupperDist; 624 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 625 // Unknown static scheduling type. 626 *plower += tid * chunk_inc_count; 627 *pupper = *plower + chunk_inc_count - incr; 628 if (incr > 0) { 629 if (*pupper < *plower) 630 *pupper = traits_t<T>::max_value; 631 if (plastiter != NULL) 632 if (*plastiter != 0 && 633 !(*plower <= upper && *pupper > upper - incr)) 634 *plastiter = 0; 635 if (*pupper > upper) 636 *pupper = upper; // tracker C73258 637 } else { 638 if (*pupper > *plower) 639 *pupper = traits_t<T>::min_value; 640 if (plastiter != NULL) 641 if (*plastiter != 0 && 642 !(*plower >= upper && *pupper < upper - incr)) 643 *plastiter = 0; 644 if (*pupper < upper) 645 *pupper = upper; // tracker C73258 646 } 647 } 648 } 649 break; 650 } 651 case kmp_sch_static_chunked: { 652 ST span; 653 if (chunk < 1) 654 chunk = 1; 655 span = chunk * incr; 656 *pstride = span * nth; 657 *plower = *plower + (span * tid); 658 *pupper = *plower + span - incr; 659 if (plastiter != NULL) 660 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 661 *plastiter = 0; 662 break; 663 } 664 default: 665 KMP_ASSERT2(0, 666 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 667 break; 668 } 669 } 670 end:; 671 #ifdef KMP_DEBUG 672 { 673 char *buff; 674 // create format specifiers before the debug output 675 buff = __kmp_str_format( 676 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 677 "stride=%%%s signed?<%s>\n", 678 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 679 traits_t<ST>::spec, traits_t<T>::spec); 680 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 681 __kmp_str_free(&buff); 682 } 683 #endif 684 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 685 KMP_STATS_LOOP_END(OMP_distribute_iterations); 686 return; 687 } 688 689 template <typename T> 690 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 691 kmp_int32 *p_last, T *p_lb, T *p_ub, 692 typename traits_t<T>::signed_t *p_st, 693 typename traits_t<T>::signed_t incr, 694 typename traits_t<T>::signed_t chunk) { 695 // The routine returns the first chunk distributed to the team and 696 // stride for next chunks calculation. 697 // Last iteration flag set for the team that will execute 698 // the last iteration of the loop. 699 // The routine is called for dist_schedule(static,chunk) only. 700 typedef typename traits_t<T>::unsigned_t UT; 701 typedef typename traits_t<T>::signed_t ST; 702 kmp_uint32 team_id; 703 kmp_uint32 nteams; 704 UT trip_count; 705 T lower; 706 T upper; 707 ST span; 708 kmp_team_t *team; 709 kmp_info_t *th; 710 711 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 712 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 713 __kmp_assert_valid_gtid(gtid); 714 #ifdef KMP_DEBUG 715 { 716 char *buff; 717 // create format specifiers before the debug output 718 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 719 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 720 traits_t<T>::spec, traits_t<T>::spec, 721 traits_t<ST>::spec, traits_t<ST>::spec, 722 traits_t<T>::spec); 723 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 724 __kmp_str_free(&buff); 725 } 726 #endif 727 728 lower = *p_lb; 729 upper = *p_ub; 730 if (__kmp_env_consistency_check) { 731 if (incr == 0) { 732 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 733 loc); 734 } 735 if (incr > 0 ? (upper < lower) : (lower < upper)) { 736 // The loop is illegal. 737 // Some zero-trip loops maintained by compiler, e.g.: 738 // for(i=10;i<0;++i) // lower >= upper - run-time check 739 // for(i=0;i>10;--i) // lower <= upper - run-time check 740 // for(i=0;i>10;++i) // incr > 0 - compile-time check 741 // for(i=10;i<0;--i) // incr < 0 - compile-time check 742 // Compiler does not check the following illegal loops: 743 // for(i=0;i<10;i+=incr) // where incr<0 744 // for(i=10;i>0;i-=incr) // where incr<0 745 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 746 } 747 } 748 th = __kmp_threads[gtid]; 749 team = th->th.th_team; 750 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 751 nteams = th->th.th_teams_size.nteams; 752 team_id = team->t.t_master_tid; 753 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 754 755 // compute trip count 756 if (incr == 1) { 757 trip_count = upper - lower + 1; 758 } else if (incr == -1) { 759 trip_count = lower - upper + 1; 760 } else if (incr > 0) { 761 // upper-lower can exceed the limit of signed type 762 trip_count = (UT)(upper - lower) / incr + 1; 763 } else { 764 trip_count = (UT)(lower - upper) / (-incr) + 1; 765 } 766 if (chunk < 1) 767 chunk = 1; 768 span = chunk * incr; 769 *p_st = span * nteams; 770 *p_lb = lower + (span * team_id); 771 *p_ub = *p_lb + span - incr; 772 if (p_last != NULL) 773 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 774 // Correct upper bound if needed 775 if (incr > 0) { 776 if (*p_ub < *p_lb) // overflow? 777 *p_ub = traits_t<T>::max_value; 778 if (*p_ub > upper) 779 *p_ub = upper; // tracker C73258 780 } else { // incr < 0 781 if (*p_ub > *p_lb) 782 *p_ub = traits_t<T>::min_value; 783 if (*p_ub < upper) 784 *p_ub = upper; // tracker C73258 785 } 786 #ifdef KMP_DEBUG 787 { 788 char *buff; 789 // create format specifiers before the debug output 790 buff = 791 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 792 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 793 traits_t<T>::spec, traits_t<T>::spec, 794 traits_t<ST>::spec, traits_t<ST>::spec); 795 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 796 __kmp_str_free(&buff); 797 } 798 #endif 799 } 800 801 //------------------------------------------------------------------------------ 802 extern "C" { 803 /*! 804 @ingroup WORK_SHARING 805 @param loc Source code location 806 @param gtid Global thread id of this thread 807 @param schedtype Scheduling type 808 @param plastiter Pointer to the "last iteration" flag 809 @param plower Pointer to the lower bound 810 @param pupper Pointer to the upper bound 811 @param pstride Pointer to the stride 812 @param incr Loop increment 813 @param chunk The chunk size 814 815 Each of the four functions here are identical apart from the argument types. 816 817 The functions compute the upper and lower bounds and stride to be used for the 818 set of iterations to be executed by the current thread from the statically 819 scheduled loop that is described by the initial values of the bounds, stride, 820 increment and chunk size. 821 822 @{ 823 */ 824 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 825 kmp_int32 *plastiter, kmp_int32 *plower, 826 kmp_int32 *pupper, kmp_int32 *pstride, 827 kmp_int32 incr, kmp_int32 chunk) { 828 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 829 pupper, pstride, incr, chunk 830 #if OMPT_SUPPORT && OMPT_OPTIONAL 831 , 832 OMPT_GET_RETURN_ADDRESS(0) 833 #endif 834 ); 835 } 836 837 /*! 838 See @ref __kmpc_for_static_init_4 839 */ 840 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 841 kmp_int32 schedtype, kmp_int32 *plastiter, 842 kmp_uint32 *plower, kmp_uint32 *pupper, 843 kmp_int32 *pstride, kmp_int32 incr, 844 kmp_int32 chunk) { 845 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 846 pupper, pstride, incr, chunk 847 #if OMPT_SUPPORT && OMPT_OPTIONAL 848 , 849 OMPT_GET_RETURN_ADDRESS(0) 850 #endif 851 ); 852 } 853 854 /*! 855 See @ref __kmpc_for_static_init_4 856 */ 857 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 858 kmp_int32 *plastiter, kmp_int64 *plower, 859 kmp_int64 *pupper, kmp_int64 *pstride, 860 kmp_int64 incr, kmp_int64 chunk) { 861 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 862 pupper, pstride, incr, chunk 863 #if OMPT_SUPPORT && OMPT_OPTIONAL 864 , 865 OMPT_GET_RETURN_ADDRESS(0) 866 #endif 867 ); 868 } 869 870 /*! 871 See @ref __kmpc_for_static_init_4 872 */ 873 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 874 kmp_int32 schedtype, kmp_int32 *plastiter, 875 kmp_uint64 *plower, kmp_uint64 *pupper, 876 kmp_int64 *pstride, kmp_int64 incr, 877 kmp_int64 chunk) { 878 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 879 pupper, pstride, incr, chunk 880 #if OMPT_SUPPORT && OMPT_OPTIONAL 881 , 882 OMPT_GET_RETURN_ADDRESS(0) 883 #endif 884 ); 885 } 886 /*! 887 @} 888 */ 889 890 /*! 891 @ingroup WORK_SHARING 892 @param loc Source code location 893 @param gtid Global thread id of this thread 894 @param schedule Scheduling type for the parallel loop 895 @param plastiter Pointer to the "last iteration" flag 896 @param plower Pointer to the lower bound 897 @param pupper Pointer to the upper bound of loop chunk 898 @param pupperD Pointer to the upper bound of dist_chunk 899 @param pstride Pointer to the stride for parallel loop 900 @param incr Loop increment 901 @param chunk The chunk size for the parallel loop 902 903 Each of the four functions here are identical apart from the argument types. 904 905 The functions compute the upper and lower bounds and strides to be used for the 906 set of iterations to be executed by the current thread from the statically 907 scheduled loop that is described by the initial values of the bounds, strides, 908 increment and chunks for parallel loop and distribute constructs. 909 910 @{ 911 */ 912 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 913 kmp_int32 schedule, kmp_int32 *plastiter, 914 kmp_int32 *plower, kmp_int32 *pupper, 915 kmp_int32 *pupperD, kmp_int32 *pstride, 916 kmp_int32 incr, kmp_int32 chunk) { 917 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 918 pupper, pupperD, pstride, incr, chunk); 919 } 920 921 /*! 922 See @ref __kmpc_dist_for_static_init_4 923 */ 924 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 925 kmp_int32 schedule, kmp_int32 *plastiter, 926 kmp_uint32 *plower, kmp_uint32 *pupper, 927 kmp_uint32 *pupperD, kmp_int32 *pstride, 928 kmp_int32 incr, kmp_int32 chunk) { 929 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 930 pupper, pupperD, pstride, incr, chunk); 931 } 932 933 /*! 934 See @ref __kmpc_dist_for_static_init_4 935 */ 936 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 937 kmp_int32 schedule, kmp_int32 *plastiter, 938 kmp_int64 *plower, kmp_int64 *pupper, 939 kmp_int64 *pupperD, kmp_int64 *pstride, 940 kmp_int64 incr, kmp_int64 chunk) { 941 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 942 pupper, pupperD, pstride, incr, chunk); 943 } 944 945 /*! 946 See @ref __kmpc_dist_for_static_init_4 947 */ 948 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 949 kmp_int32 schedule, kmp_int32 *plastiter, 950 kmp_uint64 *plower, kmp_uint64 *pupper, 951 kmp_uint64 *pupperD, kmp_int64 *pstride, 952 kmp_int64 incr, kmp_int64 chunk) { 953 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 954 pupper, pupperD, pstride, incr, chunk); 955 } 956 /*! 957 @} 958 */ 959 960 //------------------------------------------------------------------------------ 961 // Auxiliary routines for Distribute Parallel Loop construct implementation 962 // Transfer call to template< type T > 963 // __kmp_team_static_init( ident_t *loc, int gtid, 964 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 965 966 /*! 967 @ingroup WORK_SHARING 968 @{ 969 @param loc Source location 970 @param gtid Global thread id 971 @param p_last pointer to last iteration flag 972 @param p_lb pointer to Lower bound 973 @param p_ub pointer to Upper bound 974 @param p_st Step (or increment if you prefer) 975 @param incr Loop increment 976 @param chunk The chunk size to block with 977 978 The functions compute the upper and lower bounds and stride to be used for the 979 set of iterations to be executed by the current team from the statically 980 scheduled loop that is described by the initial values of the bounds, stride, 981 increment and chunk for the distribute construct as part of composite distribute 982 parallel loop construct. These functions are all identical apart from the types 983 of the arguments. 984 */ 985 986 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 987 kmp_int32 *p_lb, kmp_int32 *p_ub, 988 kmp_int32 *p_st, kmp_int32 incr, 989 kmp_int32 chunk) { 990 KMP_DEBUG_ASSERT(__kmp_init_serial); 991 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 992 chunk); 993 } 994 995 /*! 996 See @ref __kmpc_team_static_init_4 997 */ 998 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 999 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 1000 kmp_int32 *p_st, kmp_int32 incr, 1001 kmp_int32 chunk) { 1002 KMP_DEBUG_ASSERT(__kmp_init_serial); 1003 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1004 chunk); 1005 } 1006 1007 /*! 1008 See @ref __kmpc_team_static_init_4 1009 */ 1010 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1011 kmp_int64 *p_lb, kmp_int64 *p_ub, 1012 kmp_int64 *p_st, kmp_int64 incr, 1013 kmp_int64 chunk) { 1014 KMP_DEBUG_ASSERT(__kmp_init_serial); 1015 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1016 chunk); 1017 } 1018 1019 /*! 1020 See @ref __kmpc_team_static_init_4 1021 */ 1022 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1023 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1024 kmp_int64 *p_st, kmp_int64 incr, 1025 kmp_int64 chunk) { 1026 KMP_DEBUG_ASSERT(__kmp_init_serial); 1027 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1028 chunk); 1029 } 1030 /*! 1031 @} 1032 */ 1033 1034 } // extern "C" 1035