1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 65 static inline void check_loc(ident_t *&loc) { 66 if (loc == NULL) 67 loc = &loc_stub; // may need to report location info to ittnotify 68 } 69 70 template <typename T> 71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 72 kmp_int32 schedtype, kmp_int32 *plastiter, 73 T *plower, T *pupper, 74 typename traits_t<T>::signed_t *pstride, 75 typename traits_t<T>::signed_t incr, 76 typename traits_t<T>::signed_t chunk 77 #if OMPT_SUPPORT && OMPT_OPTIONAL 78 , 79 void *codeptr 80 #endif 81 ) { 82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 85 86 typedef typename traits_t<T>::unsigned_t UT; 87 typedef typename traits_t<T>::signed_t ST; 88 /* this all has to be changed back to TID and such.. */ 89 kmp_int32 gtid = global_tid; 90 kmp_uint32 tid; 91 kmp_uint32 nth; 92 UT trip_count; 93 kmp_team_t *team; 94 __kmp_assert_valid_gtid(gtid); 95 kmp_info_t *th = __kmp_threads[gtid]; 96 97 #if OMPT_SUPPORT && OMPT_OPTIONAL 98 ompt_team_info_t *team_info = NULL; 99 ompt_task_info_t *task_info = NULL; 100 ompt_work_t ompt_work_type = ompt_work_loop; 101 102 static kmp_int8 warn = 0; 103 104 if (ompt_enabled.ompt_callback_work) { 105 // Only fully initialize variables needed by OMPT if OMPT is enabled. 106 team_info = __ompt_get_teaminfo(0, NULL); 107 task_info = __ompt_get_task_info_object(0); 108 // Determine workshare type 109 if (loc != NULL) { 110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 111 ompt_work_type = ompt_work_loop; 112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 113 ompt_work_type = ompt_work_sections; 114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 115 ompt_work_type = ompt_work_distribute; 116 } else { 117 kmp_int8 bool_res = 118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 119 if (bool_res) 120 KMP_WARNING(OmptOutdatedWorkshare); 121 } 122 KMP_DEBUG_ASSERT(ompt_work_type); 123 } 124 } 125 #endif 126 127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 129 #ifdef KMP_DEBUG 130 { 131 char *buff; 132 // create format specifiers before the debug output 133 buff = __kmp_str_format( 134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 139 *pstride, incr, chunk)); 140 __kmp_str_free(&buff); 141 } 142 #endif 143 144 if (__kmp_env_consistency_check) { 145 __kmp_push_workshare(global_tid, ct_pdo, loc); 146 if (incr == 0) { 147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 148 loc); 149 } 150 } 151 /* special handling for zero-trip loops */ 152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 153 if (plastiter != NULL) 154 *plastiter = FALSE; 155 /* leave pupper and plower set to entire iteration space */ 156 *pstride = incr; /* value should never be used */ 157 // *plower = *pupper - incr; 158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 161 #ifdef KMP_DEBUG 162 { 163 char *buff; 164 // create format specifiers before the debug output 165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 166 "lower=%%%s upper=%%%s stride = %%%s " 167 "signed?<%s>, loc = %%s\n", 168 traits_t<T>::spec, traits_t<T>::spec, 169 traits_t<ST>::spec, traits_t<T>::spec); 170 KD_TRACE(100, 171 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 172 __kmp_str_free(&buff); 173 } 174 #endif 175 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 176 177 #if OMPT_SUPPORT && OMPT_OPTIONAL 178 if (ompt_enabled.ompt_callback_work) { 179 ompt_callbacks.ompt_callback(ompt_callback_work)( 180 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 181 &(task_info->task_data), 0, codeptr); 182 } 183 #endif 184 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 185 return; 186 } 187 188 // Although there are schedule enumerations above kmp_ord_upper which are not 189 // schedules for "distribute", the only ones which are useful are dynamic, so 190 // cannot be seen here, since this codepath is only executed for static 191 // schedules. 192 if (schedtype > kmp_ord_upper) { 193 // we are in DISTRIBUTE construct 194 schedtype += kmp_sch_static - 195 kmp_distribute_static; // AC: convert to usual schedule type 196 tid = th->th.th_team->t.t_master_tid; 197 team = th->th.th_team->t.t_parent; 198 } else { 199 tid = __kmp_tid_from_gtid(global_tid); 200 team = th->th.th_team; 201 } 202 203 /* determine if "for" loop is an active worksharing construct */ 204 if (team->t.t_serialized) { 205 /* serialized parallel, each thread executes whole iteration space */ 206 if (plastiter != NULL) 207 *plastiter = TRUE; 208 /* leave pupper and plower set to entire iteration space */ 209 *pstride = 210 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 211 212 #ifdef KMP_DEBUG 213 { 214 char *buff; 215 // create format specifiers before the debug output 216 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 217 "lower=%%%s upper=%%%s stride = %%%s\n", 218 traits_t<T>::spec, traits_t<T>::spec, 219 traits_t<ST>::spec); 220 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 221 __kmp_str_free(&buff); 222 } 223 #endif 224 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 225 226 #if OMPT_SUPPORT && OMPT_OPTIONAL 227 if (ompt_enabled.ompt_callback_work) { 228 ompt_callbacks.ompt_callback(ompt_callback_work)( 229 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 230 &(task_info->task_data), *pstride, codeptr); 231 } 232 #endif 233 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 234 return; 235 } 236 nth = team->t.t_nproc; 237 if (nth == 1) { 238 if (plastiter != NULL) 239 *plastiter = TRUE; 240 *pstride = 241 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 242 #ifdef KMP_DEBUG 243 { 244 char *buff; 245 // create format specifiers before the debug output 246 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 247 "lower=%%%s upper=%%%s stride = %%%s\n", 248 traits_t<T>::spec, traits_t<T>::spec, 249 traits_t<ST>::spec); 250 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 251 __kmp_str_free(&buff); 252 } 253 #endif 254 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 255 256 #if OMPT_SUPPORT && OMPT_OPTIONAL 257 if (ompt_enabled.ompt_callback_work) { 258 ompt_callbacks.ompt_callback(ompt_callback_work)( 259 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 260 &(task_info->task_data), *pstride, codeptr); 261 } 262 #endif 263 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 264 return; 265 } 266 267 /* compute trip count */ 268 if (incr == 1) { 269 trip_count = *pupper - *plower + 1; 270 } else if (incr == -1) { 271 trip_count = *plower - *pupper + 1; 272 } else if (incr > 0) { 273 // upper-lower can exceed the limit of signed type 274 trip_count = (UT)(*pupper - *plower) / incr + 1; 275 } else { 276 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 277 } 278 279 #if KMP_STATS_ENABLED 280 if (KMP_MASTER_GTID(gtid)) { 281 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 282 } 283 #endif 284 285 if (__kmp_env_consistency_check) { 286 /* tripcount overflow? */ 287 if (trip_count == 0 && *pupper != *plower) { 288 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 289 loc); 290 } 291 } 292 293 /* compute remaining parameters */ 294 switch (schedtype) { 295 case kmp_sch_static: { 296 if (trip_count < nth) { 297 KMP_DEBUG_ASSERT( 298 __kmp_static == kmp_sch_static_greedy || 299 __kmp_static == 300 kmp_sch_static_balanced); // Unknown static scheduling type. 301 if (tid < trip_count) { 302 *pupper = *plower = *plower + tid * incr; 303 } else { 304 // set bounds so non-active threads execute no iterations 305 *plower = *pupper + (incr > 0 ? 1 : -1); 306 } 307 if (plastiter != NULL) 308 *plastiter = (tid == trip_count - 1); 309 } else { 310 if (__kmp_static == kmp_sch_static_balanced) { 311 UT small_chunk = trip_count / nth; 312 UT extras = trip_count % nth; 313 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 314 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 315 if (plastiter != NULL) 316 *plastiter = (tid == nth - 1); 317 } else { 318 T big_chunk_inc_count = 319 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 320 T old_upper = *pupper; 321 322 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 323 // Unknown static scheduling type. 324 325 *plower += tid * big_chunk_inc_count; 326 *pupper = *plower + big_chunk_inc_count - incr; 327 if (incr > 0) { 328 if (*pupper < *plower) 329 *pupper = traits_t<T>::max_value; 330 if (plastiter != NULL) 331 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 332 if (*pupper > old_upper) 333 *pupper = old_upper; // tracker C73258 334 } else { 335 if (*pupper > *plower) 336 *pupper = traits_t<T>::min_value; 337 if (plastiter != NULL) 338 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 339 if (*pupper < old_upper) 340 *pupper = old_upper; // tracker C73258 341 } 342 } 343 } 344 *pstride = trip_count; 345 break; 346 } 347 case kmp_sch_static_chunked: { 348 ST span; 349 UT nchunks; 350 if (chunk < 1) 351 chunk = 1; 352 else if ((UT)chunk > trip_count) 353 chunk = trip_count; 354 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); 355 span = chunk * incr; 356 if (nchunks < nth) { 357 *pstride = span * nchunks; 358 if (tid < nchunks) { 359 *plower = *plower + (span * tid); 360 *pupper = *plower + span - incr; 361 } else { 362 *plower = *pupper + (incr > 0 ? 1 : -1); 363 } 364 } else { 365 *pstride = span * nth; 366 *plower = *plower + (span * tid); 367 *pupper = *plower + span - incr; 368 } 369 if (plastiter != NULL) 370 *plastiter = (tid == (nchunks - 1) % nth); 371 break; 372 } 373 case kmp_sch_static_balanced_chunked: { 374 T old_upper = *pupper; 375 // round up to make sure the chunk is enough to cover all iterations 376 UT span = (trip_count + nth - 1) / nth; 377 378 // perform chunk adjustment 379 chunk = (span + chunk - 1) & ~(chunk - 1); 380 381 span = chunk * incr; 382 *plower = *plower + (span * tid); 383 *pupper = *plower + span - incr; 384 if (incr > 0) { 385 if (*pupper > old_upper) 386 *pupper = old_upper; 387 } else if (*pupper < old_upper) 388 *pupper = old_upper; 389 390 if (plastiter != NULL) 391 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 392 break; 393 } 394 default: 395 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 396 break; 397 } 398 399 #if USE_ITT_BUILD 400 // Report loop metadata 401 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 402 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 403 team->t.t_active_level == 1) { 404 kmp_uint64 cur_chunk = chunk; 405 check_loc(loc); 406 // Calculate chunk in case it was not specified; it is specified for 407 // kmp_sch_static_chunked 408 if (schedtype == kmp_sch_static) { 409 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 410 } 411 // 0 - "static" schedule 412 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 413 } 414 #endif 415 #ifdef KMP_DEBUG 416 { 417 char *buff; 418 // create format specifiers before the debug output 419 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 420 "upper=%%%s stride = %%%s signed?<%s>\n", 421 traits_t<T>::spec, traits_t<T>::spec, 422 traits_t<ST>::spec, traits_t<T>::spec); 423 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 424 __kmp_str_free(&buff); 425 } 426 #endif 427 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 428 429 #if OMPT_SUPPORT && OMPT_OPTIONAL 430 if (ompt_enabled.ompt_callback_work) { 431 ompt_callbacks.ompt_callback(ompt_callback_work)( 432 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 433 &(task_info->task_data), trip_count, codeptr); 434 } 435 #endif 436 437 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 438 return; 439 } 440 441 template <typename T> 442 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 443 kmp_int32 schedule, kmp_int32 *plastiter, 444 T *plower, T *pupper, T *pupperDist, 445 typename traits_t<T>::signed_t *pstride, 446 typename traits_t<T>::signed_t incr, 447 typename traits_t<T>::signed_t chunk) { 448 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 449 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 450 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 451 typedef typename traits_t<T>::unsigned_t UT; 452 typedef typename traits_t<T>::signed_t ST; 453 kmp_uint32 tid; 454 kmp_uint32 nth; 455 kmp_uint32 team_id; 456 kmp_uint32 nteams; 457 UT trip_count; 458 kmp_team_t *team; 459 kmp_info_t *th; 460 461 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 462 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 463 __kmp_assert_valid_gtid(gtid); 464 #ifdef KMP_DEBUG 465 { 466 char *buff; 467 // create format specifiers before the debug output 468 buff = __kmp_str_format( 469 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 470 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 471 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 472 traits_t<ST>::spec, traits_t<T>::spec); 473 KD_TRACE(100, 474 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 475 __kmp_str_free(&buff); 476 } 477 #endif 478 479 if (__kmp_env_consistency_check) { 480 __kmp_push_workshare(gtid, ct_pdo, loc); 481 if (incr == 0) { 482 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 483 loc); 484 } 485 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 486 // The loop is illegal. 487 // Some zero-trip loops maintained by compiler, e.g.: 488 // for(i=10;i<0;++i) // lower >= upper - run-time check 489 // for(i=0;i>10;--i) // lower <= upper - run-time check 490 // for(i=0;i>10;++i) // incr > 0 - compile-time check 491 // for(i=10;i<0;--i) // incr < 0 - compile-time check 492 // Compiler does not check the following illegal loops: 493 // for(i=0;i<10;i+=incr) // where incr<0 494 // for(i=10;i>0;i-=incr) // where incr<0 495 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 496 } 497 } 498 tid = __kmp_tid_from_gtid(gtid); 499 th = __kmp_threads[gtid]; 500 nth = th->th.th_team_nproc; 501 team = th->th.th_team; 502 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 503 nteams = th->th.th_teams_size.nteams; 504 team_id = team->t.t_master_tid; 505 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 506 507 // compute global trip count 508 if (incr == 1) { 509 trip_count = *pupper - *plower + 1; 510 } else if (incr == -1) { 511 trip_count = *plower - *pupper + 1; 512 } else if (incr > 0) { 513 // upper-lower can exceed the limit of signed type 514 trip_count = (UT)(*pupper - *plower) / incr + 1; 515 } else { 516 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 517 } 518 519 *pstride = *pupper - *plower; // just in case (can be unused) 520 if (trip_count <= nteams) { 521 KMP_DEBUG_ASSERT( 522 __kmp_static == kmp_sch_static_greedy || 523 __kmp_static == 524 kmp_sch_static_balanced); // Unknown static scheduling type. 525 // only primary threads of some teams get single iteration, other threads 526 // get nothing 527 if (team_id < trip_count && tid == 0) { 528 *pupper = *pupperDist = *plower = *plower + team_id * incr; 529 } else { 530 *pupperDist = *pupper; 531 *plower = *pupper + incr; // compiler should skip loop body 532 } 533 if (plastiter != NULL) 534 *plastiter = (tid == 0 && team_id == trip_count - 1); 535 } else { 536 // Get the team's chunk first (each team gets at most one chunk) 537 if (__kmp_static == kmp_sch_static_balanced) { 538 UT chunkD = trip_count / nteams; 539 UT extras = trip_count % nteams; 540 *plower += 541 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 542 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 543 if (plastiter != NULL) 544 *plastiter = (team_id == nteams - 1); 545 } else { 546 T chunk_inc_count = 547 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 548 T upper = *pupper; 549 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 550 // Unknown static scheduling type. 551 *plower += team_id * chunk_inc_count; 552 *pupperDist = *plower + chunk_inc_count - incr; 553 // Check/correct bounds if needed 554 if (incr > 0) { 555 if (*pupperDist < *plower) 556 *pupperDist = traits_t<T>::max_value; 557 if (plastiter != NULL) 558 *plastiter = *plower <= upper && *pupperDist > upper - incr; 559 if (*pupperDist > upper) 560 *pupperDist = upper; // tracker C73258 561 if (*plower > *pupperDist) { 562 *pupper = *pupperDist; // no iterations available for the team 563 goto end; 564 } 565 } else { 566 if (*pupperDist > *plower) 567 *pupperDist = traits_t<T>::min_value; 568 if (plastiter != NULL) 569 *plastiter = *plower >= upper && *pupperDist < upper - incr; 570 if (*pupperDist < upper) 571 *pupperDist = upper; // tracker C73258 572 if (*plower < *pupperDist) { 573 *pupper = *pupperDist; // no iterations available for the team 574 goto end; 575 } 576 } 577 } 578 // Get the parallel loop chunk now (for thread) 579 // compute trip count for team's chunk 580 if (incr == 1) { 581 trip_count = *pupperDist - *plower + 1; 582 } else if (incr == -1) { 583 trip_count = *plower - *pupperDist + 1; 584 } else if (incr > 1) { 585 // upper-lower can exceed the limit of signed type 586 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 587 } else { 588 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 589 } 590 KMP_DEBUG_ASSERT(trip_count); 591 switch (schedule) { 592 case kmp_sch_static: { 593 if (trip_count <= nth) { 594 KMP_DEBUG_ASSERT( 595 __kmp_static == kmp_sch_static_greedy || 596 __kmp_static == 597 kmp_sch_static_balanced); // Unknown static scheduling type. 598 if (tid < trip_count) 599 *pupper = *plower = *plower + tid * incr; 600 else 601 *plower = *pupper + incr; // no iterations available 602 if (plastiter != NULL) 603 if (*plastiter != 0 && !(tid == trip_count - 1)) 604 *plastiter = 0; 605 } else { 606 if (__kmp_static == kmp_sch_static_balanced) { 607 UT chunkL = trip_count / nth; 608 UT extras = trip_count % nth; 609 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 610 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 611 if (plastiter != NULL) 612 if (*plastiter != 0 && !(tid == nth - 1)) 613 *plastiter = 0; 614 } else { 615 T chunk_inc_count = 616 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 617 T upper = *pupperDist; 618 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 619 // Unknown static scheduling type. 620 *plower += tid * chunk_inc_count; 621 *pupper = *plower + chunk_inc_count - incr; 622 if (incr > 0) { 623 if (*pupper < *plower) 624 *pupper = traits_t<T>::max_value; 625 if (plastiter != NULL) 626 if (*plastiter != 0 && 627 !(*plower <= upper && *pupper > upper - incr)) 628 *plastiter = 0; 629 if (*pupper > upper) 630 *pupper = upper; // tracker C73258 631 } else { 632 if (*pupper > *plower) 633 *pupper = traits_t<T>::min_value; 634 if (plastiter != NULL) 635 if (*plastiter != 0 && 636 !(*plower >= upper && *pupper < upper - incr)) 637 *plastiter = 0; 638 if (*pupper < upper) 639 *pupper = upper; // tracker C73258 640 } 641 } 642 } 643 break; 644 } 645 case kmp_sch_static_chunked: { 646 ST span; 647 if (chunk < 1) 648 chunk = 1; 649 span = chunk * incr; 650 *pstride = span * nth; 651 *plower = *plower + (span * tid); 652 *pupper = *plower + span - incr; 653 if (plastiter != NULL) 654 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 655 *plastiter = 0; 656 break; 657 } 658 default: 659 KMP_ASSERT2(0, 660 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 661 break; 662 } 663 } 664 end:; 665 #ifdef KMP_DEBUG 666 { 667 char *buff; 668 // create format specifiers before the debug output 669 buff = __kmp_str_format( 670 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 671 "stride=%%%s signed?<%s>\n", 672 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 673 traits_t<ST>::spec, traits_t<T>::spec); 674 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 675 __kmp_str_free(&buff); 676 } 677 #endif 678 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 679 KMP_STATS_LOOP_END(OMP_distribute_iterations); 680 return; 681 } 682 683 template <typename T> 684 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 685 kmp_int32 *p_last, T *p_lb, T *p_ub, 686 typename traits_t<T>::signed_t *p_st, 687 typename traits_t<T>::signed_t incr, 688 typename traits_t<T>::signed_t chunk) { 689 // The routine returns the first chunk distributed to the team and 690 // stride for next chunks calculation. 691 // Last iteration flag set for the team that will execute 692 // the last iteration of the loop. 693 // The routine is called for dist_schedule(static,chunk) only. 694 typedef typename traits_t<T>::unsigned_t UT; 695 typedef typename traits_t<T>::signed_t ST; 696 kmp_uint32 team_id; 697 kmp_uint32 nteams; 698 UT trip_count; 699 T lower; 700 T upper; 701 ST span; 702 kmp_team_t *team; 703 kmp_info_t *th; 704 705 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 706 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 707 __kmp_assert_valid_gtid(gtid); 708 #ifdef KMP_DEBUG 709 { 710 char *buff; 711 // create format specifiers before the debug output 712 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 713 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 714 traits_t<T>::spec, traits_t<T>::spec, 715 traits_t<ST>::spec, traits_t<ST>::spec, 716 traits_t<T>::spec); 717 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 718 __kmp_str_free(&buff); 719 } 720 #endif 721 722 lower = *p_lb; 723 upper = *p_ub; 724 if (__kmp_env_consistency_check) { 725 if (incr == 0) { 726 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 727 loc); 728 } 729 if (incr > 0 ? (upper < lower) : (lower < upper)) { 730 // The loop is illegal. 731 // Some zero-trip loops maintained by compiler, e.g.: 732 // for(i=10;i<0;++i) // lower >= upper - run-time check 733 // for(i=0;i>10;--i) // lower <= upper - run-time check 734 // for(i=0;i>10;++i) // incr > 0 - compile-time check 735 // for(i=10;i<0;--i) // incr < 0 - compile-time check 736 // Compiler does not check the following illegal loops: 737 // for(i=0;i<10;i+=incr) // where incr<0 738 // for(i=10;i>0;i-=incr) // where incr<0 739 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 740 } 741 } 742 th = __kmp_threads[gtid]; 743 team = th->th.th_team; 744 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 745 nteams = th->th.th_teams_size.nteams; 746 team_id = team->t.t_master_tid; 747 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 748 749 // compute trip count 750 if (incr == 1) { 751 trip_count = upper - lower + 1; 752 } else if (incr == -1) { 753 trip_count = lower - upper + 1; 754 } else if (incr > 0) { 755 // upper-lower can exceed the limit of signed type 756 trip_count = (UT)(upper - lower) / incr + 1; 757 } else { 758 trip_count = (UT)(lower - upper) / (-incr) + 1; 759 } 760 if (chunk < 1) 761 chunk = 1; 762 span = chunk * incr; 763 *p_st = span * nteams; 764 *p_lb = lower + (span * team_id); 765 *p_ub = *p_lb + span - incr; 766 if (p_last != NULL) 767 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 768 // Correct upper bound if needed 769 if (incr > 0) { 770 if (*p_ub < *p_lb) // overflow? 771 *p_ub = traits_t<T>::max_value; 772 if (*p_ub > upper) 773 *p_ub = upper; // tracker C73258 774 } else { // incr < 0 775 if (*p_ub > *p_lb) 776 *p_ub = traits_t<T>::min_value; 777 if (*p_ub < upper) 778 *p_ub = upper; // tracker C73258 779 } 780 #ifdef KMP_DEBUG 781 { 782 char *buff; 783 // create format specifiers before the debug output 784 buff = 785 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 786 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 787 traits_t<T>::spec, traits_t<T>::spec, 788 traits_t<ST>::spec, traits_t<ST>::spec); 789 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 790 __kmp_str_free(&buff); 791 } 792 #endif 793 } 794 795 //------------------------------------------------------------------------------ 796 extern "C" { 797 /*! 798 @ingroup WORK_SHARING 799 @param loc Source code location 800 @param gtid Global thread id of this thread 801 @param schedtype Scheduling type 802 @param plastiter Pointer to the "last iteration" flag 803 @param plower Pointer to the lower bound 804 @param pupper Pointer to the upper bound 805 @param pstride Pointer to the stride 806 @param incr Loop increment 807 @param chunk The chunk size 808 809 Each of the four functions here are identical apart from the argument types. 810 811 The functions compute the upper and lower bounds and stride to be used for the 812 set of iterations to be executed by the current thread from the statically 813 scheduled loop that is described by the initial values of the bounds, stride, 814 increment and chunk size. 815 816 @{ 817 */ 818 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 819 kmp_int32 *plastiter, kmp_int32 *plower, 820 kmp_int32 *pupper, kmp_int32 *pstride, 821 kmp_int32 incr, kmp_int32 chunk) { 822 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 823 pupper, pstride, incr, chunk 824 #if OMPT_SUPPORT && OMPT_OPTIONAL 825 , 826 OMPT_GET_RETURN_ADDRESS(0) 827 #endif 828 ); 829 } 830 831 /*! 832 See @ref __kmpc_for_static_init_4 833 */ 834 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 835 kmp_int32 schedtype, kmp_int32 *plastiter, 836 kmp_uint32 *plower, kmp_uint32 *pupper, 837 kmp_int32 *pstride, kmp_int32 incr, 838 kmp_int32 chunk) { 839 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 840 pupper, pstride, incr, chunk 841 #if OMPT_SUPPORT && OMPT_OPTIONAL 842 , 843 OMPT_GET_RETURN_ADDRESS(0) 844 #endif 845 ); 846 } 847 848 /*! 849 See @ref __kmpc_for_static_init_4 850 */ 851 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 852 kmp_int32 *plastiter, kmp_int64 *plower, 853 kmp_int64 *pupper, kmp_int64 *pstride, 854 kmp_int64 incr, kmp_int64 chunk) { 855 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 856 pupper, pstride, incr, chunk 857 #if OMPT_SUPPORT && OMPT_OPTIONAL 858 , 859 OMPT_GET_RETURN_ADDRESS(0) 860 #endif 861 ); 862 } 863 864 /*! 865 See @ref __kmpc_for_static_init_4 866 */ 867 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 868 kmp_int32 schedtype, kmp_int32 *plastiter, 869 kmp_uint64 *plower, kmp_uint64 *pupper, 870 kmp_int64 *pstride, kmp_int64 incr, 871 kmp_int64 chunk) { 872 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 873 pupper, pstride, incr, chunk 874 #if OMPT_SUPPORT && OMPT_OPTIONAL 875 , 876 OMPT_GET_RETURN_ADDRESS(0) 877 #endif 878 ); 879 } 880 /*! 881 @} 882 */ 883 884 /*! 885 @ingroup WORK_SHARING 886 @param loc Source code location 887 @param gtid Global thread id of this thread 888 @param schedule Scheduling type for the parallel loop 889 @param plastiter Pointer to the "last iteration" flag 890 @param plower Pointer to the lower bound 891 @param pupper Pointer to the upper bound of loop chunk 892 @param pupperD Pointer to the upper bound of dist_chunk 893 @param pstride Pointer to the stride for parallel loop 894 @param incr Loop increment 895 @param chunk The chunk size for the parallel loop 896 897 Each of the four functions here are identical apart from the argument types. 898 899 The functions compute the upper and lower bounds and strides to be used for the 900 set of iterations to be executed by the current thread from the statically 901 scheduled loop that is described by the initial values of the bounds, strides, 902 increment and chunks for parallel loop and distribute constructs. 903 904 @{ 905 */ 906 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 907 kmp_int32 schedule, kmp_int32 *plastiter, 908 kmp_int32 *plower, kmp_int32 *pupper, 909 kmp_int32 *pupperD, kmp_int32 *pstride, 910 kmp_int32 incr, kmp_int32 chunk) { 911 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 912 pupper, pupperD, pstride, incr, chunk); 913 } 914 915 /*! 916 See @ref __kmpc_dist_for_static_init_4 917 */ 918 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 919 kmp_int32 schedule, kmp_int32 *plastiter, 920 kmp_uint32 *plower, kmp_uint32 *pupper, 921 kmp_uint32 *pupperD, kmp_int32 *pstride, 922 kmp_int32 incr, kmp_int32 chunk) { 923 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 924 pupper, pupperD, pstride, incr, chunk); 925 } 926 927 /*! 928 See @ref __kmpc_dist_for_static_init_4 929 */ 930 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 931 kmp_int32 schedule, kmp_int32 *plastiter, 932 kmp_int64 *plower, kmp_int64 *pupper, 933 kmp_int64 *pupperD, kmp_int64 *pstride, 934 kmp_int64 incr, kmp_int64 chunk) { 935 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 936 pupper, pupperD, pstride, incr, chunk); 937 } 938 939 /*! 940 See @ref __kmpc_dist_for_static_init_4 941 */ 942 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 943 kmp_int32 schedule, kmp_int32 *plastiter, 944 kmp_uint64 *plower, kmp_uint64 *pupper, 945 kmp_uint64 *pupperD, kmp_int64 *pstride, 946 kmp_int64 incr, kmp_int64 chunk) { 947 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 948 pupper, pupperD, pstride, incr, chunk); 949 } 950 /*! 951 @} 952 */ 953 954 //------------------------------------------------------------------------------ 955 // Auxiliary routines for Distribute Parallel Loop construct implementation 956 // Transfer call to template< type T > 957 // __kmp_team_static_init( ident_t *loc, int gtid, 958 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 959 960 /*! 961 @ingroup WORK_SHARING 962 @{ 963 @param loc Source location 964 @param gtid Global thread id 965 @param p_last pointer to last iteration flag 966 @param p_lb pointer to Lower bound 967 @param p_ub pointer to Upper bound 968 @param p_st Step (or increment if you prefer) 969 @param incr Loop increment 970 @param chunk The chunk size to block with 971 972 The functions compute the upper and lower bounds and stride to be used for the 973 set of iterations to be executed by the current team from the statically 974 scheduled loop that is described by the initial values of the bounds, stride, 975 increment and chunk for the distribute construct as part of composite distribute 976 parallel loop construct. These functions are all identical apart from the types 977 of the arguments. 978 */ 979 980 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 981 kmp_int32 *p_lb, kmp_int32 *p_ub, 982 kmp_int32 *p_st, kmp_int32 incr, 983 kmp_int32 chunk) { 984 KMP_DEBUG_ASSERT(__kmp_init_serial); 985 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 986 chunk); 987 } 988 989 /*! 990 See @ref __kmpc_team_static_init_4 991 */ 992 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 993 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 994 kmp_int32 *p_st, kmp_int32 incr, 995 kmp_int32 chunk) { 996 KMP_DEBUG_ASSERT(__kmp_init_serial); 997 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 998 chunk); 999 } 1000 1001 /*! 1002 See @ref __kmpc_team_static_init_4 1003 */ 1004 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1005 kmp_int64 *p_lb, kmp_int64 *p_ub, 1006 kmp_int64 *p_st, kmp_int64 incr, 1007 kmp_int64 chunk) { 1008 KMP_DEBUG_ASSERT(__kmp_init_serial); 1009 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1010 chunk); 1011 } 1012 1013 /*! 1014 See @ref __kmpc_team_static_init_4 1015 */ 1016 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1017 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1018 kmp_int64 *p_st, kmp_int64 incr, 1019 kmp_int64 chunk) { 1020 KMP_DEBUG_ASSERT(__kmp_init_serial); 1021 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1022 chunk); 1023 } 1024 /*! 1025 @} 1026 */ 1027 1028 } // extern "C" 1029