1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 template <typename T> 65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 66 kmp_int32 schedtype, kmp_int32 *plastiter, 67 T *plower, T *pupper, 68 typename traits_t<T>::signed_t *pstride, 69 typename traits_t<T>::signed_t incr, 70 typename traits_t<T>::signed_t chunk 71 #if OMPT_SUPPORT && OMPT_OPTIONAL 72 , 73 void *codeptr 74 #endif 75 ) { 76 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 77 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 78 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 79 80 typedef typename traits_t<T>::unsigned_t UT; 81 typedef typename traits_t<T>::signed_t ST; 82 /* this all has to be changed back to TID and such.. */ 83 kmp_int32 gtid = global_tid; 84 kmp_uint32 tid; 85 kmp_uint32 nth; 86 UT trip_count; 87 kmp_team_t *team; 88 kmp_info_t *th = __kmp_threads[gtid]; 89 90 #if OMPT_SUPPORT && OMPT_OPTIONAL 91 ompt_team_info_t *team_info = NULL; 92 ompt_task_info_t *task_info = NULL; 93 ompt_work_t ompt_work_type = ompt_work_loop; 94 95 static kmp_int8 warn = 0; 96 97 if (ompt_enabled.ompt_callback_work) { 98 // Only fully initialize variables needed by OMPT if OMPT is enabled. 99 team_info = __ompt_get_teaminfo(0, NULL); 100 task_info = __ompt_get_task_info_object(0); 101 // Determine workshare type 102 if (loc != NULL) { 103 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 104 ompt_work_type = ompt_work_loop; 105 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 106 ompt_work_type = ompt_work_sections; 107 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 108 ompt_work_type = ompt_work_distribute; 109 } else { 110 kmp_int8 bool_res = 111 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 112 if (bool_res) 113 KMP_WARNING(OmptOutdatedWorkshare); 114 } 115 KMP_DEBUG_ASSERT(ompt_work_type); 116 } 117 } 118 #endif 119 120 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 121 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 122 #ifdef KMP_DEBUG 123 { 124 char *buff; 125 // create format specifiers before the debug output 126 buff = __kmp_str_format( 127 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 128 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 129 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 130 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 131 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 132 *pstride, incr, chunk)); 133 __kmp_str_free(&buff); 134 } 135 #endif 136 137 if (__kmp_env_consistency_check) { 138 __kmp_push_workshare(global_tid, ct_pdo, loc); 139 if (incr == 0) { 140 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 141 loc); 142 } 143 } 144 /* special handling for zero-trip loops */ 145 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 146 if (plastiter != NULL) 147 *plastiter = FALSE; 148 /* leave pupper and plower set to entire iteration space */ 149 *pstride = incr; /* value should never be used */ 150 // *plower = *pupper - incr; 151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 154 #ifdef KMP_DEBUG 155 { 156 char *buff; 157 // create format specifiers before the debug output 158 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 159 "lower=%%%s upper=%%%s stride = %%%s " 160 "signed?<%s>, loc = %%s\n", 161 traits_t<T>::spec, traits_t<T>::spec, 162 traits_t<ST>::spec, traits_t<T>::spec); 163 KD_TRACE(100, 164 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 165 __kmp_str_free(&buff); 166 } 167 #endif 168 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 169 170 #if OMPT_SUPPORT && OMPT_OPTIONAL 171 if (ompt_enabled.ompt_callback_work) { 172 ompt_callbacks.ompt_callback(ompt_callback_work)( 173 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 174 &(task_info->task_data), 0, codeptr); 175 } 176 #endif 177 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 178 return; 179 } 180 181 #if OMP_40_ENABLED 182 // Although there are schedule enumerations above kmp_ord_upper which are not 183 // schedules for "distribute", the only ones which are useful are dynamic, so 184 // cannot be seen here, since this codepath is only executed for static 185 // schedules. 186 if (schedtype > kmp_ord_upper) { 187 // we are in DISTRIBUTE construct 188 schedtype += kmp_sch_static - 189 kmp_distribute_static; // AC: convert to usual schedule type 190 tid = th->th.th_team->t.t_master_tid; 191 team = th->th.th_team->t.t_parent; 192 } else 193 #endif 194 { 195 tid = __kmp_tid_from_gtid(global_tid); 196 team = th->th.th_team; 197 } 198 199 /* determine if "for" loop is an active worksharing construct */ 200 if (team->t.t_serialized) { 201 /* serialized parallel, each thread executes whole iteration space */ 202 if (plastiter != NULL) 203 *plastiter = TRUE; 204 /* leave pupper and plower set to entire iteration space */ 205 *pstride = 206 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 207 208 #ifdef KMP_DEBUG 209 { 210 char *buff; 211 // create format specifiers before the debug output 212 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 213 "lower=%%%s upper=%%%s stride = %%%s\n", 214 traits_t<T>::spec, traits_t<T>::spec, 215 traits_t<ST>::spec); 216 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 217 __kmp_str_free(&buff); 218 } 219 #endif 220 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 221 222 #if OMPT_SUPPORT && OMPT_OPTIONAL 223 if (ompt_enabled.ompt_callback_work) { 224 ompt_callbacks.ompt_callback(ompt_callback_work)( 225 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 226 &(task_info->task_data), *pstride, codeptr); 227 } 228 #endif 229 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 230 return; 231 } 232 nth = team->t.t_nproc; 233 if (nth == 1) { 234 if (plastiter != NULL) 235 *plastiter = TRUE; 236 *pstride = 237 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 238 #ifdef KMP_DEBUG 239 { 240 char *buff; 241 // create format specifiers before the debug output 242 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 243 "lower=%%%s upper=%%%s stride = %%%s\n", 244 traits_t<T>::spec, traits_t<T>::spec, 245 traits_t<ST>::spec); 246 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 247 __kmp_str_free(&buff); 248 } 249 #endif 250 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 251 252 #if OMPT_SUPPORT && OMPT_OPTIONAL 253 if (ompt_enabled.ompt_callback_work) { 254 ompt_callbacks.ompt_callback(ompt_callback_work)( 255 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 256 &(task_info->task_data), *pstride, codeptr); 257 } 258 #endif 259 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 260 return; 261 } 262 263 /* compute trip count */ 264 if (incr == 1) { 265 trip_count = *pupper - *plower + 1; 266 } else if (incr == -1) { 267 trip_count = *plower - *pupper + 1; 268 } else if (incr > 0) { 269 // upper-lower can exceed the limit of signed type 270 trip_count = (UT)(*pupper - *plower) / incr + 1; 271 } else { 272 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 273 } 274 275 #if KMP_STATS_ENABLED 276 if (KMP_MASTER_GTID(gtid)) { 277 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 278 } 279 #endif 280 281 if (__kmp_env_consistency_check) { 282 /* tripcount overflow? */ 283 if (trip_count == 0 && *pupper != *plower) { 284 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 285 loc); 286 } 287 } 288 289 /* compute remaining parameters */ 290 switch (schedtype) { 291 case kmp_sch_static: { 292 if (trip_count < nth) { 293 KMP_DEBUG_ASSERT( 294 __kmp_static == kmp_sch_static_greedy || 295 __kmp_static == 296 kmp_sch_static_balanced); // Unknown static scheduling type. 297 if (tid < trip_count) { 298 *pupper = *plower = *plower + tid * incr; 299 } else { 300 *plower = *pupper + incr; 301 } 302 if (plastiter != NULL) 303 *plastiter = (tid == trip_count - 1); 304 } else { 305 if (__kmp_static == kmp_sch_static_balanced) { 306 UT small_chunk = trip_count / nth; 307 UT extras = trip_count % nth; 308 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 309 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 310 if (plastiter != NULL) 311 *plastiter = (tid == nth - 1); 312 } else { 313 T big_chunk_inc_count = 314 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 315 T old_upper = *pupper; 316 317 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 318 // Unknown static scheduling type. 319 320 *plower += tid * big_chunk_inc_count; 321 *pupper = *plower + big_chunk_inc_count - incr; 322 if (incr > 0) { 323 if (*pupper < *plower) 324 *pupper = traits_t<T>::max_value; 325 if (plastiter != NULL) 326 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 327 if (*pupper > old_upper) 328 *pupper = old_upper; // tracker C73258 329 } else { 330 if (*pupper > *plower) 331 *pupper = traits_t<T>::min_value; 332 if (plastiter != NULL) 333 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 334 if (*pupper < old_upper) 335 *pupper = old_upper; // tracker C73258 336 } 337 } 338 } 339 *pstride = trip_count; 340 break; 341 } 342 case kmp_sch_static_chunked: { 343 ST span; 344 if (chunk < 1) { 345 chunk = 1; 346 } 347 span = chunk * incr; 348 *pstride = span * nth; 349 *plower = *plower + (span * tid); 350 *pupper = *plower + span - incr; 351 if (plastiter != NULL) 352 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); 353 break; 354 } 355 #if OMP_45_ENABLED 356 case kmp_sch_static_balanced_chunked: { 357 T old_upper = *pupper; 358 // round up to make sure the chunk is enough to cover all iterations 359 UT span = (trip_count + nth - 1) / nth; 360 361 // perform chunk adjustment 362 chunk = (span + chunk - 1) & ~(chunk - 1); 363 364 span = chunk * incr; 365 *plower = *plower + (span * tid); 366 *pupper = *plower + span - incr; 367 if (incr > 0) { 368 if (*pupper > old_upper) 369 *pupper = old_upper; 370 } else if (*pupper < old_upper) 371 *pupper = old_upper; 372 373 if (plastiter != NULL) 374 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 375 break; 376 } 377 #endif 378 default: 379 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 380 break; 381 } 382 383 #if USE_ITT_BUILD 384 // Report loop metadata 385 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 386 __kmp_forkjoin_frames_mode == 3 && 387 #if OMP_40_ENABLED 388 th->th.th_teams_microtask == NULL && 389 #endif 390 team->t.t_active_level == 1) { 391 kmp_uint64 cur_chunk = chunk; 392 // Calculate chunk in case it was not specified; it is specified for 393 // kmp_sch_static_chunked 394 if (schedtype == kmp_sch_static) { 395 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 396 } 397 // 0 - "static" schedule 398 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 399 } 400 #endif 401 #ifdef KMP_DEBUG 402 { 403 char *buff; 404 // create format specifiers before the debug output 405 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 406 "upper=%%%s stride = %%%s signed?<%s>\n", 407 traits_t<T>::spec, traits_t<T>::spec, 408 traits_t<ST>::spec, traits_t<T>::spec); 409 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 410 __kmp_str_free(&buff); 411 } 412 #endif 413 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 414 415 #if OMPT_SUPPORT && OMPT_OPTIONAL 416 if (ompt_enabled.ompt_callback_work) { 417 ompt_callbacks.ompt_callback(ompt_callback_work)( 418 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 419 &(task_info->task_data), trip_count, codeptr); 420 } 421 #endif 422 423 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 424 return; 425 } 426 427 template <typename T> 428 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 429 kmp_int32 schedule, kmp_int32 *plastiter, 430 T *plower, T *pupper, T *pupperDist, 431 typename traits_t<T>::signed_t *pstride, 432 typename traits_t<T>::signed_t incr, 433 typename traits_t<T>::signed_t chunk) { 434 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 435 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 436 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 437 typedef typename traits_t<T>::unsigned_t UT; 438 typedef typename traits_t<T>::signed_t ST; 439 kmp_uint32 tid; 440 kmp_uint32 nth; 441 kmp_uint32 team_id; 442 kmp_uint32 nteams; 443 UT trip_count; 444 kmp_team_t *team; 445 kmp_info_t *th; 446 447 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 448 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 449 #ifdef KMP_DEBUG 450 { 451 char *buff; 452 // create format specifiers before the debug output 453 buff = __kmp_str_format( 454 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 455 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 456 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 457 traits_t<ST>::spec, traits_t<T>::spec); 458 KD_TRACE(100, 459 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 460 __kmp_str_free(&buff); 461 } 462 #endif 463 464 if (__kmp_env_consistency_check) { 465 __kmp_push_workshare(gtid, ct_pdo, loc); 466 if (incr == 0) { 467 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 468 loc); 469 } 470 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 471 // The loop is illegal. 472 // Some zero-trip loops maintained by compiler, e.g.: 473 // for(i=10;i<0;++i) // lower >= upper - run-time check 474 // for(i=0;i>10;--i) // lower <= upper - run-time check 475 // for(i=0;i>10;++i) // incr > 0 - compile-time check 476 // for(i=10;i<0;--i) // incr < 0 - compile-time check 477 // Compiler does not check the following illegal loops: 478 // for(i=0;i<10;i+=incr) // where incr<0 479 // for(i=10;i>0;i-=incr) // where incr<0 480 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 481 } 482 } 483 tid = __kmp_tid_from_gtid(gtid); 484 th = __kmp_threads[gtid]; 485 nth = th->th.th_team_nproc; 486 team = th->th.th_team; 487 #if OMP_40_ENABLED 488 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 489 nteams = th->th.th_teams_size.nteams; 490 #endif 491 team_id = team->t.t_master_tid; 492 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 493 494 // compute global trip count 495 if (incr == 1) { 496 trip_count = *pupper - *plower + 1; 497 } else if (incr == -1) { 498 trip_count = *plower - *pupper + 1; 499 } else if (incr > 0) { 500 // upper-lower can exceed the limit of signed type 501 trip_count = (UT)(*pupper - *plower) / incr + 1; 502 } else { 503 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 504 } 505 506 *pstride = *pupper - *plower; // just in case (can be unused) 507 if (trip_count <= nteams) { 508 KMP_DEBUG_ASSERT( 509 __kmp_static == kmp_sch_static_greedy || 510 __kmp_static == 511 kmp_sch_static_balanced); // Unknown static scheduling type. 512 // only masters of some teams get single iteration, other threads get 513 // nothing 514 if (team_id < trip_count && tid == 0) { 515 *pupper = *pupperDist = *plower = *plower + team_id * incr; 516 } else { 517 *pupperDist = *pupper; 518 *plower = *pupper + incr; // compiler should skip loop body 519 } 520 if (plastiter != NULL) 521 *plastiter = (tid == 0 && team_id == trip_count - 1); 522 } else { 523 // Get the team's chunk first (each team gets at most one chunk) 524 if (__kmp_static == kmp_sch_static_balanced) { 525 UT chunkD = trip_count / nteams; 526 UT extras = trip_count % nteams; 527 *plower += 528 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 529 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 530 if (plastiter != NULL) 531 *plastiter = (team_id == nteams - 1); 532 } else { 533 T chunk_inc_count = 534 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 535 T upper = *pupper; 536 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 537 // Unknown static scheduling type. 538 *plower += team_id * chunk_inc_count; 539 *pupperDist = *plower + chunk_inc_count - incr; 540 // Check/correct bounds if needed 541 if (incr > 0) { 542 if (*pupperDist < *plower) 543 *pupperDist = traits_t<T>::max_value; 544 if (plastiter != NULL) 545 *plastiter = *plower <= upper && *pupperDist > upper - incr; 546 if (*pupperDist > upper) 547 *pupperDist = upper; // tracker C73258 548 if (*plower > *pupperDist) { 549 *pupper = *pupperDist; // no iterations available for the team 550 goto end; 551 } 552 } else { 553 if (*pupperDist > *plower) 554 *pupperDist = traits_t<T>::min_value; 555 if (plastiter != NULL) 556 *plastiter = *plower >= upper && *pupperDist < upper - incr; 557 if (*pupperDist < upper) 558 *pupperDist = upper; // tracker C73258 559 if (*plower < *pupperDist) { 560 *pupper = *pupperDist; // no iterations available for the team 561 goto end; 562 } 563 } 564 } 565 // Get the parallel loop chunk now (for thread) 566 // compute trip count for team's chunk 567 if (incr == 1) { 568 trip_count = *pupperDist - *plower + 1; 569 } else if (incr == -1) { 570 trip_count = *plower - *pupperDist + 1; 571 } else if (incr > 1) { 572 // upper-lower can exceed the limit of signed type 573 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 574 } else { 575 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 576 } 577 KMP_DEBUG_ASSERT(trip_count); 578 switch (schedule) { 579 case kmp_sch_static: { 580 if (trip_count <= nth) { 581 KMP_DEBUG_ASSERT( 582 __kmp_static == kmp_sch_static_greedy || 583 __kmp_static == 584 kmp_sch_static_balanced); // Unknown static scheduling type. 585 if (tid < trip_count) 586 *pupper = *plower = *plower + tid * incr; 587 else 588 *plower = *pupper + incr; // no iterations available 589 if (plastiter != NULL) 590 if (*plastiter != 0 && !(tid == trip_count - 1)) 591 *plastiter = 0; 592 } else { 593 if (__kmp_static == kmp_sch_static_balanced) { 594 UT chunkL = trip_count / nth; 595 UT extras = trip_count % nth; 596 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 597 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 598 if (plastiter != NULL) 599 if (*plastiter != 0 && !(tid == nth - 1)) 600 *plastiter = 0; 601 } else { 602 T chunk_inc_count = 603 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 604 T upper = *pupperDist; 605 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 606 // Unknown static scheduling type. 607 *plower += tid * chunk_inc_count; 608 *pupper = *plower + chunk_inc_count - incr; 609 if (incr > 0) { 610 if (*pupper < *plower) 611 *pupper = traits_t<T>::max_value; 612 if (plastiter != NULL) 613 if (*plastiter != 0 && 614 !(*plower <= upper && *pupper > upper - incr)) 615 *plastiter = 0; 616 if (*pupper > upper) 617 *pupper = upper; // tracker C73258 618 } else { 619 if (*pupper > *plower) 620 *pupper = traits_t<T>::min_value; 621 if (plastiter != NULL) 622 if (*plastiter != 0 && 623 !(*plower >= upper && *pupper < upper - incr)) 624 *plastiter = 0; 625 if (*pupper < upper) 626 *pupper = upper; // tracker C73258 627 } 628 } 629 } 630 break; 631 } 632 case kmp_sch_static_chunked: { 633 ST span; 634 if (chunk < 1) 635 chunk = 1; 636 span = chunk * incr; 637 *pstride = span * nth; 638 *plower = *plower + (span * tid); 639 *pupper = *plower + span - incr; 640 if (plastiter != NULL) 641 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 642 *plastiter = 0; 643 break; 644 } 645 default: 646 KMP_ASSERT2(0, 647 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 648 break; 649 } 650 } 651 end:; 652 #ifdef KMP_DEBUG 653 { 654 char *buff; 655 // create format specifiers before the debug output 656 buff = __kmp_str_format( 657 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 658 "stride=%%%s signed?<%s>\n", 659 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 660 traits_t<ST>::spec, traits_t<T>::spec); 661 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 662 __kmp_str_free(&buff); 663 } 664 #endif 665 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 666 KMP_STATS_LOOP_END(OMP_distribute_iterations); 667 return; 668 } 669 670 template <typename T> 671 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 672 kmp_int32 *p_last, T *p_lb, T *p_ub, 673 typename traits_t<T>::signed_t *p_st, 674 typename traits_t<T>::signed_t incr, 675 typename traits_t<T>::signed_t chunk) { 676 // The routine returns the first chunk distributed to the team and 677 // stride for next chunks calculation. 678 // Last iteration flag set for the team that will execute 679 // the last iteration of the loop. 680 // The routine is called for dist_schedue(static,chunk) only. 681 typedef typename traits_t<T>::unsigned_t UT; 682 typedef typename traits_t<T>::signed_t ST; 683 kmp_uint32 team_id; 684 kmp_uint32 nteams; 685 UT trip_count; 686 T lower; 687 T upper; 688 ST span; 689 kmp_team_t *team; 690 kmp_info_t *th; 691 692 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 693 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 694 #ifdef KMP_DEBUG 695 { 696 char *buff; 697 // create format specifiers before the debug output 698 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 699 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 700 traits_t<T>::spec, traits_t<T>::spec, 701 traits_t<ST>::spec, traits_t<ST>::spec, 702 traits_t<T>::spec); 703 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 704 __kmp_str_free(&buff); 705 } 706 #endif 707 708 lower = *p_lb; 709 upper = *p_ub; 710 if (__kmp_env_consistency_check) { 711 if (incr == 0) { 712 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 713 loc); 714 } 715 if (incr > 0 ? (upper < lower) : (lower < upper)) { 716 // The loop is illegal. 717 // Some zero-trip loops maintained by compiler, e.g.: 718 // for(i=10;i<0;++i) // lower >= upper - run-time check 719 // for(i=0;i>10;--i) // lower <= upper - run-time check 720 // for(i=0;i>10;++i) // incr > 0 - compile-time check 721 // for(i=10;i<0;--i) // incr < 0 - compile-time check 722 // Compiler does not check the following illegal loops: 723 // for(i=0;i<10;i+=incr) // where incr<0 724 // for(i=10;i>0;i-=incr) // where incr<0 725 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 726 } 727 } 728 th = __kmp_threads[gtid]; 729 team = th->th.th_team; 730 #if OMP_40_ENABLED 731 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 732 nteams = th->th.th_teams_size.nteams; 733 #endif 734 team_id = team->t.t_master_tid; 735 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 736 737 // compute trip count 738 if (incr == 1) { 739 trip_count = upper - lower + 1; 740 } else if (incr == -1) { 741 trip_count = lower - upper + 1; 742 } else if (incr > 0) { 743 // upper-lower can exceed the limit of signed type 744 trip_count = (UT)(upper - lower) / incr + 1; 745 } else { 746 trip_count = (UT)(lower - upper) / (-incr) + 1; 747 } 748 if (chunk < 1) 749 chunk = 1; 750 span = chunk * incr; 751 *p_st = span * nteams; 752 *p_lb = lower + (span * team_id); 753 *p_ub = *p_lb + span - incr; 754 if (p_last != NULL) 755 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 756 // Correct upper bound if needed 757 if (incr > 0) { 758 if (*p_ub < *p_lb) // overflow? 759 *p_ub = traits_t<T>::max_value; 760 if (*p_ub > upper) 761 *p_ub = upper; // tracker C73258 762 } else { // incr < 0 763 if (*p_ub > *p_lb) 764 *p_ub = traits_t<T>::min_value; 765 if (*p_ub < upper) 766 *p_ub = upper; // tracker C73258 767 } 768 #ifdef KMP_DEBUG 769 { 770 char *buff; 771 // create format specifiers before the debug output 772 buff = 773 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 774 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 775 traits_t<T>::spec, traits_t<T>::spec, 776 traits_t<ST>::spec, traits_t<ST>::spec); 777 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 778 __kmp_str_free(&buff); 779 } 780 #endif 781 } 782 783 //------------------------------------------------------------------------------ 784 extern "C" { 785 /*! 786 @ingroup WORK_SHARING 787 @param loc Source code location 788 @param gtid Global thread id of this thread 789 @param schedtype Scheduling type 790 @param plastiter Pointer to the "last iteration" flag 791 @param plower Pointer to the lower bound 792 @param pupper Pointer to the upper bound 793 @param pstride Pointer to the stride 794 @param incr Loop increment 795 @param chunk The chunk size 796 797 Each of the four functions here are identical apart from the argument types. 798 799 The functions compute the upper and lower bounds and stride to be used for the 800 set of iterations to be executed by the current thread from the statically 801 scheduled loop that is described by the initial values of the bounds, stride, 802 increment and chunk size. 803 804 @{ 805 */ 806 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 807 kmp_int32 *plastiter, kmp_int32 *plower, 808 kmp_int32 *pupper, kmp_int32 *pstride, 809 kmp_int32 incr, kmp_int32 chunk) { 810 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 811 pupper, pstride, incr, chunk 812 #if OMPT_SUPPORT && OMPT_OPTIONAL 813 , 814 OMPT_GET_RETURN_ADDRESS(0) 815 #endif 816 ); 817 } 818 819 /*! 820 See @ref __kmpc_for_static_init_4 821 */ 822 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 823 kmp_int32 schedtype, kmp_int32 *plastiter, 824 kmp_uint32 *plower, kmp_uint32 *pupper, 825 kmp_int32 *pstride, kmp_int32 incr, 826 kmp_int32 chunk) { 827 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 828 pupper, pstride, incr, chunk 829 #if OMPT_SUPPORT && OMPT_OPTIONAL 830 , 831 OMPT_GET_RETURN_ADDRESS(0) 832 #endif 833 ); 834 } 835 836 /*! 837 See @ref __kmpc_for_static_init_4 838 */ 839 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 840 kmp_int32 *plastiter, kmp_int64 *plower, 841 kmp_int64 *pupper, kmp_int64 *pstride, 842 kmp_int64 incr, kmp_int64 chunk) { 843 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 844 pupper, pstride, incr, chunk 845 #if OMPT_SUPPORT && OMPT_OPTIONAL 846 , 847 OMPT_GET_RETURN_ADDRESS(0) 848 #endif 849 ); 850 } 851 852 /*! 853 See @ref __kmpc_for_static_init_4 854 */ 855 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 856 kmp_int32 schedtype, kmp_int32 *plastiter, 857 kmp_uint64 *plower, kmp_uint64 *pupper, 858 kmp_int64 *pstride, kmp_int64 incr, 859 kmp_int64 chunk) { 860 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 861 pupper, pstride, incr, chunk 862 #if OMPT_SUPPORT && OMPT_OPTIONAL 863 , 864 OMPT_GET_RETURN_ADDRESS(0) 865 #endif 866 ); 867 } 868 /*! 869 @} 870 */ 871 872 /*! 873 @ingroup WORK_SHARING 874 @param loc Source code location 875 @param gtid Global thread id of this thread 876 @param schedule Scheduling type for the parallel loop 877 @param plastiter Pointer to the "last iteration" flag 878 @param plower Pointer to the lower bound 879 @param pupper Pointer to the upper bound of loop chunk 880 @param pupperD Pointer to the upper bound of dist_chunk 881 @param pstride Pointer to the stride for parallel loop 882 @param incr Loop increment 883 @param chunk The chunk size for the parallel loop 884 885 Each of the four functions here are identical apart from the argument types. 886 887 The functions compute the upper and lower bounds and strides to be used for the 888 set of iterations to be executed by the current thread from the statically 889 scheduled loop that is described by the initial values of the bounds, strides, 890 increment and chunks for parallel loop and distribute constructs. 891 892 @{ 893 */ 894 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 895 kmp_int32 schedule, kmp_int32 *plastiter, 896 kmp_int32 *plower, kmp_int32 *pupper, 897 kmp_int32 *pupperD, kmp_int32 *pstride, 898 kmp_int32 incr, kmp_int32 chunk) { 899 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 900 pupper, pupperD, pstride, incr, chunk); 901 } 902 903 /*! 904 See @ref __kmpc_dist_for_static_init_4 905 */ 906 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 907 kmp_int32 schedule, kmp_int32 *plastiter, 908 kmp_uint32 *plower, kmp_uint32 *pupper, 909 kmp_uint32 *pupperD, kmp_int32 *pstride, 910 kmp_int32 incr, kmp_int32 chunk) { 911 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 912 pupper, pupperD, pstride, incr, chunk); 913 } 914 915 /*! 916 See @ref __kmpc_dist_for_static_init_4 917 */ 918 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 919 kmp_int32 schedule, kmp_int32 *plastiter, 920 kmp_int64 *plower, kmp_int64 *pupper, 921 kmp_int64 *pupperD, kmp_int64 *pstride, 922 kmp_int64 incr, kmp_int64 chunk) { 923 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 924 pupper, pupperD, pstride, incr, chunk); 925 } 926 927 /*! 928 See @ref __kmpc_dist_for_static_init_4 929 */ 930 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 931 kmp_int32 schedule, kmp_int32 *plastiter, 932 kmp_uint64 *plower, kmp_uint64 *pupper, 933 kmp_uint64 *pupperD, kmp_int64 *pstride, 934 kmp_int64 incr, kmp_int64 chunk) { 935 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 936 pupper, pupperD, pstride, incr, chunk); 937 } 938 /*! 939 @} 940 */ 941 942 //------------------------------------------------------------------------------ 943 // Auxiliary routines for Distribute Parallel Loop construct implementation 944 // Transfer call to template< type T > 945 // __kmp_team_static_init( ident_t *loc, int gtid, 946 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 947 948 /*! 949 @ingroup WORK_SHARING 950 @{ 951 @param loc Source location 952 @param gtid Global thread id 953 @param p_last pointer to last iteration flag 954 @param p_lb pointer to Lower bound 955 @param p_ub pointer to Upper bound 956 @param p_st Step (or increment if you prefer) 957 @param incr Loop increment 958 @param chunk The chunk size to block with 959 960 The functions compute the upper and lower bounds and stride to be used for the 961 set of iterations to be executed by the current team from the statically 962 scheduled loop that is described by the initial values of the bounds, stride, 963 increment and chunk for the distribute construct as part of composite distribute 964 parallel loop construct. These functions are all identical apart from the types 965 of the arguments. 966 */ 967 968 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 969 kmp_int32 *p_lb, kmp_int32 *p_ub, 970 kmp_int32 *p_st, kmp_int32 incr, 971 kmp_int32 chunk) { 972 KMP_DEBUG_ASSERT(__kmp_init_serial); 973 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 974 chunk); 975 } 976 977 /*! 978 See @ref __kmpc_team_static_init_4 979 */ 980 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 981 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 982 kmp_int32 *p_st, kmp_int32 incr, 983 kmp_int32 chunk) { 984 KMP_DEBUG_ASSERT(__kmp_init_serial); 985 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 986 chunk); 987 } 988 989 /*! 990 See @ref __kmpc_team_static_init_4 991 */ 992 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 993 kmp_int64 *p_lb, kmp_int64 *p_ub, 994 kmp_int64 *p_st, kmp_int64 incr, 995 kmp_int64 chunk) { 996 KMP_DEBUG_ASSERT(__kmp_init_serial); 997 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 998 chunk); 999 } 1000 1001 /*! 1002 See @ref __kmpc_team_static_init_4 1003 */ 1004 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1005 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1006 kmp_int64 *p_st, kmp_int64 incr, 1007 kmp_int64 chunk) { 1008 KMP_DEBUG_ASSERT(__kmp_init_serial); 1009 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1010 chunk); 1011 } 1012 /*! 1013 @} 1014 */ 1015 1016 } // extern "C" 1017