1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 /* Static scheduling initialization. 15 16 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 17 it may change values between parallel regions. __kmp_max_nth 18 is the largest value __kmp_nth may take, 1 is the smallest. */ 19 20 #include "kmp.h" 21 #include "kmp_error.h" 22 #include "kmp_i18n.h" 23 #include "kmp_itt.h" 24 #include "kmp_stats.h" 25 #include "kmp_str.h" 26 27 #if OMPT_SUPPORT 28 #include "ompt-specific.h" 29 #endif 30 31 #ifdef KMP_DEBUG 32 //------------------------------------------------------------------------- 33 // template for debug prints specification ( d, u, lld, llu ) 34 char const *traits_t<int>::spec = "d"; 35 char const *traits_t<unsigned int>::spec = "u"; 36 char const *traits_t<long long>::spec = "lld"; 37 char const *traits_t<unsigned long long>::spec = "llu"; 38 char const *traits_t<long>::spec = "ld"; 39 //------------------------------------------------------------------------- 40 #endif 41 42 template <typename T> 43 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 44 kmp_int32 schedtype, kmp_int32 *plastiter, 45 T *plower, T *pupper, 46 typename traits_t<T>::signed_t *pstride, 47 typename traits_t<T>::signed_t incr, 48 typename traits_t<T>::signed_t chunk 49 #if OMPT_SUPPORT && OMPT_OPTIONAL 50 , 51 void *codeptr 52 #endif 53 ) { 54 KMP_COUNT_BLOCK(OMP_FOR_static); 55 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); 56 57 typedef typename traits_t<T>::unsigned_t UT; 58 typedef typename traits_t<T>::signed_t ST; 59 /* this all has to be changed back to TID and such.. */ 60 kmp_int32 gtid = global_tid; 61 kmp_uint32 tid; 62 kmp_uint32 nth; 63 UT trip_count; 64 kmp_team_t *team; 65 kmp_info_t *th = __kmp_threads[gtid]; 66 67 #if OMPT_SUPPORT && OMPT_OPTIONAL 68 ompt_team_info_t *team_info = NULL; 69 ompt_task_info_t *task_info = NULL; 70 ompt_work_type_t ompt_work_type = ompt_work_loop; 71 72 static kmp_int8 warn = 0; 73 74 if (ompt_enabled.ompt_callback_work) { 75 // Only fully initialize variables needed by OMPT if OMPT is enabled. 76 team_info = __ompt_get_teaminfo(0, NULL); 77 task_info = __ompt_get_task_info_object(0); 78 // Determine workshare type 79 if (loc != NULL) { 80 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 81 ompt_work_type = ompt_work_loop; 82 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 83 ompt_work_type = ompt_work_sections; 84 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 85 ompt_work_type = ompt_work_distribute; 86 } else { 87 kmp_int8 bool_res = 88 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 89 if (bool_res) 90 KMP_WARNING(OmptOutdatedWorkshare); 91 } 92 KMP_DEBUG_ASSERT(ompt_work_type); 93 } 94 } 95 #endif 96 97 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 98 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 99 #ifdef KMP_DEBUG 100 { 101 char *buff; 102 // create format specifiers before the debug output 103 buff = __kmp_str_format( 104 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 105 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 106 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 107 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 108 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 109 *pstride, incr, chunk)); 110 __kmp_str_free(&buff); 111 } 112 #endif 113 114 if (__kmp_env_consistency_check) { 115 __kmp_push_workshare(global_tid, ct_pdo, loc); 116 if (incr == 0) { 117 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 118 loc); 119 } 120 } 121 /* special handling for zero-trip loops */ 122 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 123 if (plastiter != NULL) 124 *plastiter = FALSE; 125 /* leave pupper and plower set to entire iteration space */ 126 *pstride = incr; /* value should never be used */ 127 // *plower = *pupper - incr; 128 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 129 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 130 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 131 #ifdef KMP_DEBUG 132 { 133 char *buff; 134 // create format specifiers before the debug output 135 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 136 "lower=%%%s upper=%%%s stride = %%%s " 137 "signed?<%s>, loc = %%s\n", 138 traits_t<T>::spec, traits_t<T>::spec, 139 traits_t<ST>::spec, traits_t<T>::spec); 140 KD_TRACE(100, 141 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 142 __kmp_str_free(&buff); 143 } 144 #endif 145 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 146 147 #if OMPT_SUPPORT && OMPT_OPTIONAL 148 if (ompt_enabled.ompt_callback_work) { 149 ompt_callbacks.ompt_callback(ompt_callback_work)( 150 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 151 &(task_info->task_data), 0, codeptr); 152 } 153 #endif 154 KMP_COUNT_VALUE(FOR_static_iterations, 0); 155 return; 156 } 157 158 #if OMP_40_ENABLED 159 // Although there are schedule enumerations above kmp_ord_upper which are not 160 // schedules for "distribute", the only ones which are useful are dynamic, so 161 // cannot be seen here, since this codepath is only executed for static 162 // schedules. 163 if (schedtype > kmp_ord_upper) { 164 // we are in DISTRIBUTE construct 165 schedtype += kmp_sch_static - 166 kmp_distribute_static; // AC: convert to usual schedule type 167 tid = th->th.th_team->t.t_master_tid; 168 team = th->th.th_team->t.t_parent; 169 } else 170 #endif 171 { 172 tid = __kmp_tid_from_gtid(global_tid); 173 team = th->th.th_team; 174 } 175 176 /* determine if "for" loop is an active worksharing construct */ 177 if (team->t.t_serialized) { 178 /* serialized parallel, each thread executes whole iteration space */ 179 if (plastiter != NULL) 180 *plastiter = TRUE; 181 /* leave pupper and plower set to entire iteration space */ 182 *pstride = 183 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 184 185 #ifdef KMP_DEBUG 186 { 187 char *buff; 188 // create format specifiers before the debug output 189 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 190 "lower=%%%s upper=%%%s stride = %%%s\n", 191 traits_t<T>::spec, traits_t<T>::spec, 192 traits_t<ST>::spec); 193 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 194 __kmp_str_free(&buff); 195 } 196 #endif 197 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 198 199 #if OMPT_SUPPORT && OMPT_OPTIONAL 200 if (ompt_enabled.ompt_callback_work) { 201 ompt_callbacks.ompt_callback(ompt_callback_work)( 202 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 203 &(task_info->task_data), *pstride, codeptr); 204 } 205 #endif 206 return; 207 } 208 nth = team->t.t_nproc; 209 if (nth == 1) { 210 if (plastiter != NULL) 211 *plastiter = TRUE; 212 *pstride = 213 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 214 #ifdef KMP_DEBUG 215 { 216 char *buff; 217 // create format specifiers before the debug output 218 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 219 "lower=%%%s upper=%%%s stride = %%%s\n", 220 traits_t<T>::spec, traits_t<T>::spec, 221 traits_t<ST>::spec); 222 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 223 __kmp_str_free(&buff); 224 } 225 #endif 226 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 227 228 #if OMPT_SUPPORT && OMPT_OPTIONAL 229 if (ompt_enabled.ompt_callback_work) { 230 ompt_callbacks.ompt_callback(ompt_callback_work)( 231 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 232 &(task_info->task_data), *pstride, codeptr); 233 } 234 #endif 235 return; 236 } 237 238 /* compute trip count */ 239 if (incr == 1) { 240 trip_count = *pupper - *plower + 1; 241 } else if (incr == -1) { 242 trip_count = *plower - *pupper + 1; 243 } else if (incr > 0) { 244 // upper-lower can exceed the limit of signed type 245 trip_count = (UT)(*pupper - *plower) / incr + 1; 246 } else { 247 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 248 } 249 250 if (__kmp_env_consistency_check) { 251 /* tripcount overflow? */ 252 if (trip_count == 0 && *pupper != *plower) { 253 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 254 loc); 255 } 256 } 257 KMP_COUNT_VALUE(FOR_static_iterations, trip_count); 258 259 /* compute remaining parameters */ 260 switch (schedtype) { 261 case kmp_sch_static: { 262 if (trip_count < nth) { 263 KMP_DEBUG_ASSERT( 264 __kmp_static == kmp_sch_static_greedy || 265 __kmp_static == 266 kmp_sch_static_balanced); // Unknown static scheduling type. 267 if (tid < trip_count) { 268 *pupper = *plower = *plower + tid * incr; 269 } else { 270 *plower = *pupper + incr; 271 } 272 if (plastiter != NULL) 273 *plastiter = (tid == trip_count - 1); 274 } else { 275 if (__kmp_static == kmp_sch_static_balanced) { 276 UT small_chunk = trip_count / nth; 277 UT extras = trip_count % nth; 278 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 279 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 280 if (plastiter != NULL) 281 *plastiter = (tid == nth - 1); 282 } else { 283 T big_chunk_inc_count = 284 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 285 T old_upper = *pupper; 286 287 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 288 // Unknown static scheduling type. 289 290 *plower += tid * big_chunk_inc_count; 291 *pupper = *plower + big_chunk_inc_count - incr; 292 if (incr > 0) { 293 if (*pupper < *plower) 294 *pupper = traits_t<T>::max_value; 295 if (plastiter != NULL) 296 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 297 if (*pupper > old_upper) 298 *pupper = old_upper; // tracker C73258 299 } else { 300 if (*pupper > *plower) 301 *pupper = traits_t<T>::min_value; 302 if (plastiter != NULL) 303 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 304 if (*pupper < old_upper) 305 *pupper = old_upper; // tracker C73258 306 } 307 } 308 } 309 *pstride = trip_count; 310 break; 311 } 312 case kmp_sch_static_chunked: { 313 ST span; 314 if (chunk < 1) { 315 chunk = 1; 316 } 317 span = chunk * incr; 318 *pstride = span * nth; 319 *plower = *plower + (span * tid); 320 *pupper = *plower + span - incr; 321 if (plastiter != NULL) 322 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); 323 break; 324 } 325 #if OMP_45_ENABLED 326 case kmp_sch_static_balanced_chunked: { 327 T old_upper = *pupper; 328 // round up to make sure the chunk is enough to cover all iterations 329 UT span = (trip_count + nth - 1) / nth; 330 331 // perform chunk adjustment 332 chunk = (span + chunk - 1) & ~(chunk - 1); 333 334 span = chunk * incr; 335 *plower = *plower + (span * tid); 336 *pupper = *plower + span - incr; 337 if (incr > 0) { 338 if (*pupper > old_upper) 339 *pupper = old_upper; 340 } else if (*pupper < old_upper) 341 *pupper = old_upper; 342 343 if (plastiter != NULL) 344 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 345 break; 346 } 347 #endif 348 default: 349 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 350 break; 351 } 352 353 #if USE_ITT_BUILD 354 // Report loop metadata 355 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 356 __kmp_forkjoin_frames_mode == 3 && 357 #if OMP_40_ENABLED 358 th->th.th_teams_microtask == NULL && 359 #endif 360 team->t.t_active_level == 1) { 361 kmp_uint64 cur_chunk = chunk; 362 // Calculate chunk in case it was not specified; it is specified for 363 // kmp_sch_static_chunked 364 if (schedtype == kmp_sch_static) { 365 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 366 } 367 // 0 - "static" schedule 368 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 369 } 370 #endif 371 #ifdef KMP_DEBUG 372 { 373 char *buff; 374 // create format specifiers before the debug output 375 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 376 "upper=%%%s stride = %%%s signed?<%s>\n", 377 traits_t<T>::spec, traits_t<T>::spec, 378 traits_t<ST>::spec, traits_t<T>::spec); 379 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 380 __kmp_str_free(&buff); 381 } 382 #endif 383 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 384 385 #if OMPT_SUPPORT && OMPT_OPTIONAL 386 if (ompt_enabled.ompt_callback_work) { 387 ompt_callbacks.ompt_callback(ompt_callback_work)( 388 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 389 &(task_info->task_data), trip_count, codeptr); 390 } 391 #endif 392 393 return; 394 } 395 396 template <typename T> 397 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 398 kmp_int32 schedule, kmp_int32 *plastiter, 399 T *plower, T *pupper, T *pupperDist, 400 typename traits_t<T>::signed_t *pstride, 401 typename traits_t<T>::signed_t incr, 402 typename traits_t<T>::signed_t chunk) { 403 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 404 typedef typename traits_t<T>::unsigned_t UT; 405 typedef typename traits_t<T>::signed_t ST; 406 kmp_uint32 tid; 407 kmp_uint32 nth; 408 kmp_uint32 team_id; 409 kmp_uint32 nteams; 410 UT trip_count; 411 kmp_team_t *team; 412 kmp_info_t *th; 413 414 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 415 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 416 #ifdef KMP_DEBUG 417 { 418 char *buff; 419 // create format specifiers before the debug output 420 buff = __kmp_str_format( 421 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 422 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 423 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 424 traits_t<ST>::spec, traits_t<T>::spec); 425 KD_TRACE(100, 426 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 427 __kmp_str_free(&buff); 428 } 429 #endif 430 431 if (__kmp_env_consistency_check) { 432 __kmp_push_workshare(gtid, ct_pdo, loc); 433 if (incr == 0) { 434 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 435 loc); 436 } 437 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 438 // The loop is illegal. 439 // Some zero-trip loops maintained by compiler, e.g.: 440 // for(i=10;i<0;++i) // lower >= upper - run-time check 441 // for(i=0;i>10;--i) // lower <= upper - run-time check 442 // for(i=0;i>10;++i) // incr > 0 - compile-time check 443 // for(i=10;i<0;--i) // incr < 0 - compile-time check 444 // Compiler does not check the following illegal loops: 445 // for(i=0;i<10;i+=incr) // where incr<0 446 // for(i=10;i>0;i-=incr) // where incr<0 447 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 448 } 449 } 450 tid = __kmp_tid_from_gtid(gtid); 451 th = __kmp_threads[gtid]; 452 nth = th->th.th_team_nproc; 453 team = th->th.th_team; 454 #if OMP_40_ENABLED 455 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 456 nteams = th->th.th_teams_size.nteams; 457 #endif 458 team_id = team->t.t_master_tid; 459 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 460 461 // compute global trip count 462 if (incr == 1) { 463 trip_count = *pupper - *plower + 1; 464 } else if (incr == -1) { 465 trip_count = *plower - *pupper + 1; 466 } else if (incr > 0) { 467 // upper-lower can exceed the limit of signed type 468 trip_count = (UT)(*pupper - *plower) / incr + 1; 469 } else { 470 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 471 } 472 473 *pstride = *pupper - *plower; // just in case (can be unused) 474 if (trip_count <= nteams) { 475 KMP_DEBUG_ASSERT( 476 __kmp_static == kmp_sch_static_greedy || 477 __kmp_static == 478 kmp_sch_static_balanced); // Unknown static scheduling type. 479 // only masters of some teams get single iteration, other threads get 480 // nothing 481 if (team_id < trip_count && tid == 0) { 482 *pupper = *pupperDist = *plower = *plower + team_id * incr; 483 } else { 484 *pupperDist = *pupper; 485 *plower = *pupper + incr; // compiler should skip loop body 486 } 487 if (plastiter != NULL) 488 *plastiter = (tid == 0 && team_id == trip_count - 1); 489 } else { 490 // Get the team's chunk first (each team gets at most one chunk) 491 if (__kmp_static == kmp_sch_static_balanced) { 492 UT chunkD = trip_count / nteams; 493 UT extras = trip_count % nteams; 494 *plower += 495 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 496 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 497 if (plastiter != NULL) 498 *plastiter = (team_id == nteams - 1); 499 } else { 500 T chunk_inc_count = 501 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 502 T upper = *pupper; 503 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 504 // Unknown static scheduling type. 505 *plower += team_id * chunk_inc_count; 506 *pupperDist = *plower + chunk_inc_count - incr; 507 // Check/correct bounds if needed 508 if (incr > 0) { 509 if (*pupperDist < *plower) 510 *pupperDist = traits_t<T>::max_value; 511 if (plastiter != NULL) 512 *plastiter = *plower <= upper && *pupperDist > upper - incr; 513 if (*pupperDist > upper) 514 *pupperDist = upper; // tracker C73258 515 if (*plower > *pupperDist) { 516 *pupper = *pupperDist; // no iterations available for the team 517 goto end; 518 } 519 } else { 520 if (*pupperDist > *plower) 521 *pupperDist = traits_t<T>::min_value; 522 if (plastiter != NULL) 523 *plastiter = *plower >= upper && *pupperDist < upper - incr; 524 if (*pupperDist < upper) 525 *pupperDist = upper; // tracker C73258 526 if (*plower < *pupperDist) { 527 *pupper = *pupperDist; // no iterations available for the team 528 goto end; 529 } 530 } 531 } 532 // Get the parallel loop chunk now (for thread) 533 // compute trip count for team's chunk 534 if (incr == 1) { 535 trip_count = *pupperDist - *plower + 1; 536 } else if (incr == -1) { 537 trip_count = *plower - *pupperDist + 1; 538 } else if (incr > 1) { 539 // upper-lower can exceed the limit of signed type 540 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 541 } else { 542 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 543 } 544 KMP_DEBUG_ASSERT(trip_count); 545 switch (schedule) { 546 case kmp_sch_static: { 547 if (trip_count <= nth) { 548 KMP_DEBUG_ASSERT( 549 __kmp_static == kmp_sch_static_greedy || 550 __kmp_static == 551 kmp_sch_static_balanced); // Unknown static scheduling type. 552 if (tid < trip_count) 553 *pupper = *plower = *plower + tid * incr; 554 else 555 *plower = *pupper + incr; // no iterations available 556 if (plastiter != NULL) 557 if (*plastiter != 0 && !(tid == trip_count - 1)) 558 *plastiter = 0; 559 } else { 560 if (__kmp_static == kmp_sch_static_balanced) { 561 UT chunkL = trip_count / nth; 562 UT extras = trip_count % nth; 563 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 564 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 565 if (plastiter != NULL) 566 if (*plastiter != 0 && !(tid == nth - 1)) 567 *plastiter = 0; 568 } else { 569 T chunk_inc_count = 570 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 571 T upper = *pupperDist; 572 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 573 // Unknown static scheduling type. 574 *plower += tid * chunk_inc_count; 575 *pupper = *plower + chunk_inc_count - incr; 576 if (incr > 0) { 577 if (*pupper < *plower) 578 *pupper = traits_t<T>::max_value; 579 if (plastiter != NULL) 580 if (*plastiter != 0 && 581 !(*plower <= upper && *pupper > upper - incr)) 582 *plastiter = 0; 583 if (*pupper > upper) 584 *pupper = upper; // tracker C73258 585 } else { 586 if (*pupper > *plower) 587 *pupper = traits_t<T>::min_value; 588 if (plastiter != NULL) 589 if (*plastiter != 0 && 590 !(*plower >= upper && *pupper < upper - incr)) 591 *plastiter = 0; 592 if (*pupper < upper) 593 *pupper = upper; // tracker C73258 594 } 595 } 596 } 597 break; 598 } 599 case kmp_sch_static_chunked: { 600 ST span; 601 if (chunk < 1) 602 chunk = 1; 603 span = chunk * incr; 604 *pstride = span * nth; 605 *plower = *plower + (span * tid); 606 *pupper = *plower + span - incr; 607 if (plastiter != NULL) 608 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 609 *plastiter = 0; 610 break; 611 } 612 default: 613 KMP_ASSERT2(0, 614 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 615 break; 616 } 617 } 618 end:; 619 #ifdef KMP_DEBUG 620 { 621 char *buff; 622 // create format specifiers before the debug output 623 buff = __kmp_str_format( 624 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 625 "stride=%%%s signed?<%s>\n", 626 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 627 traits_t<ST>::spec, traits_t<T>::spec); 628 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 629 __kmp_str_free(&buff); 630 } 631 #endif 632 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 633 return; 634 } 635 636 template <typename T> 637 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 638 kmp_int32 *p_last, T *p_lb, T *p_ub, 639 typename traits_t<T>::signed_t *p_st, 640 typename traits_t<T>::signed_t incr, 641 typename traits_t<T>::signed_t chunk) { 642 // The routine returns the first chunk distributed to the team and 643 // stride for next chunks calculation. 644 // Last iteration flag set for the team that will execute 645 // the last iteration of the loop. 646 // The routine is called for dist_schedue(static,chunk) only. 647 typedef typename traits_t<T>::unsigned_t UT; 648 typedef typename traits_t<T>::signed_t ST; 649 kmp_uint32 team_id; 650 kmp_uint32 nteams; 651 UT trip_count; 652 T lower; 653 T upper; 654 ST span; 655 kmp_team_t *team; 656 kmp_info_t *th; 657 658 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 659 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 660 #ifdef KMP_DEBUG 661 { 662 char *buff; 663 // create format specifiers before the debug output 664 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 665 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 666 traits_t<T>::spec, traits_t<T>::spec, 667 traits_t<ST>::spec, traits_t<ST>::spec, 668 traits_t<T>::spec); 669 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 670 __kmp_str_free(&buff); 671 } 672 #endif 673 674 lower = *p_lb; 675 upper = *p_ub; 676 if (__kmp_env_consistency_check) { 677 if (incr == 0) { 678 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 679 loc); 680 } 681 if (incr > 0 ? (upper < lower) : (lower < upper)) { 682 // The loop is illegal. 683 // Some zero-trip loops maintained by compiler, e.g.: 684 // for(i=10;i<0;++i) // lower >= upper - run-time check 685 // for(i=0;i>10;--i) // lower <= upper - run-time check 686 // for(i=0;i>10;++i) // incr > 0 - compile-time check 687 // for(i=10;i<0;--i) // incr < 0 - compile-time check 688 // Compiler does not check the following illegal loops: 689 // for(i=0;i<10;i+=incr) // where incr<0 690 // for(i=10;i>0;i-=incr) // where incr<0 691 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 692 } 693 } 694 th = __kmp_threads[gtid]; 695 team = th->th.th_team; 696 #if OMP_40_ENABLED 697 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 698 nteams = th->th.th_teams_size.nteams; 699 #endif 700 team_id = team->t.t_master_tid; 701 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 702 703 // compute trip count 704 if (incr == 1) { 705 trip_count = upper - lower + 1; 706 } else if (incr == -1) { 707 trip_count = lower - upper + 1; 708 } else if (incr > 0) { 709 // upper-lower can exceed the limit of signed type 710 trip_count = (UT)(upper - lower) / incr + 1; 711 } else { 712 trip_count = (UT)(lower - upper) / (-incr) + 1; 713 } 714 if (chunk < 1) 715 chunk = 1; 716 span = chunk * incr; 717 *p_st = span * nteams; 718 *p_lb = lower + (span * team_id); 719 *p_ub = *p_lb + span - incr; 720 if (p_last != NULL) 721 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 722 // Correct upper bound if needed 723 if (incr > 0) { 724 if (*p_ub < *p_lb) // overflow? 725 *p_ub = traits_t<T>::max_value; 726 if (*p_ub > upper) 727 *p_ub = upper; // tracker C73258 728 } else { // incr < 0 729 if (*p_ub > *p_lb) 730 *p_ub = traits_t<T>::min_value; 731 if (*p_ub < upper) 732 *p_ub = upper; // tracker C73258 733 } 734 #ifdef KMP_DEBUG 735 { 736 char *buff; 737 // create format specifiers before the debug output 738 buff = 739 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 740 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 741 traits_t<T>::spec, traits_t<T>::spec, 742 traits_t<ST>::spec, traits_t<ST>::spec); 743 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 744 __kmp_str_free(&buff); 745 } 746 #endif 747 } 748 749 //------------------------------------------------------------------------------ 750 extern "C" { 751 /*! 752 @ingroup WORK_SHARING 753 @param loc Source code location 754 @param gtid Global thread id of this thread 755 @param schedtype Scheduling type 756 @param plastiter Pointer to the "last iteration" flag 757 @param plower Pointer to the lower bound 758 @param pupper Pointer to the upper bound 759 @param pstride Pointer to the stride 760 @param incr Loop increment 761 @param chunk The chunk size 762 763 Each of the four functions here are identical apart from the argument types. 764 765 The functions compute the upper and lower bounds and stride to be used for the 766 set of iterations to be executed by the current thread from the statically 767 scheduled loop that is described by the initial values of the bounds, stride, 768 increment and chunk size. 769 770 @{ 771 */ 772 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 773 kmp_int32 *plastiter, kmp_int32 *plower, 774 kmp_int32 *pupper, kmp_int32 *pstride, 775 kmp_int32 incr, kmp_int32 chunk) { 776 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 777 pupper, pstride, incr, chunk 778 #if OMPT_SUPPORT && OMPT_OPTIONAL 779 , 780 OMPT_GET_RETURN_ADDRESS(0) 781 #endif 782 ); 783 } 784 785 /*! 786 See @ref __kmpc_for_static_init_4 787 */ 788 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 789 kmp_int32 schedtype, kmp_int32 *plastiter, 790 kmp_uint32 *plower, kmp_uint32 *pupper, 791 kmp_int32 *pstride, kmp_int32 incr, 792 kmp_int32 chunk) { 793 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 794 pupper, pstride, incr, chunk 795 #if OMPT_SUPPORT && OMPT_OPTIONAL 796 , 797 OMPT_GET_RETURN_ADDRESS(0) 798 #endif 799 ); 800 } 801 802 /*! 803 See @ref __kmpc_for_static_init_4 804 */ 805 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 806 kmp_int32 *plastiter, kmp_int64 *plower, 807 kmp_int64 *pupper, kmp_int64 *pstride, 808 kmp_int64 incr, kmp_int64 chunk) { 809 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 810 pupper, pstride, incr, chunk 811 #if OMPT_SUPPORT && OMPT_OPTIONAL 812 , 813 OMPT_GET_RETURN_ADDRESS(0) 814 #endif 815 ); 816 } 817 818 /*! 819 See @ref __kmpc_for_static_init_4 820 */ 821 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 822 kmp_int32 schedtype, kmp_int32 *plastiter, 823 kmp_uint64 *plower, kmp_uint64 *pupper, 824 kmp_int64 *pstride, kmp_int64 incr, 825 kmp_int64 chunk) { 826 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 827 pupper, pstride, incr, chunk 828 #if OMPT_SUPPORT && OMPT_OPTIONAL 829 , 830 OMPT_GET_RETURN_ADDRESS(0) 831 #endif 832 ); 833 } 834 /*! 835 @} 836 */ 837 838 /*! 839 @ingroup WORK_SHARING 840 @param loc Source code location 841 @param gtid Global thread id of this thread 842 @param schedule Scheduling type for the parallel loop 843 @param plastiter Pointer to the "last iteration" flag 844 @param plower Pointer to the lower bound 845 @param pupper Pointer to the upper bound of loop chunk 846 @param pupperD Pointer to the upper bound of dist_chunk 847 @param pstride Pointer to the stride for parallel loop 848 @param incr Loop increment 849 @param chunk The chunk size for the parallel loop 850 851 Each of the four functions here are identical apart from the argument types. 852 853 The functions compute the upper and lower bounds and strides to be used for the 854 set of iterations to be executed by the current thread from the statically 855 scheduled loop that is described by the initial values of the bounds, strides, 856 increment and chunks for parallel loop and distribute constructs. 857 858 @{ 859 */ 860 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 861 kmp_int32 schedule, kmp_int32 *plastiter, 862 kmp_int32 *plower, kmp_int32 *pupper, 863 kmp_int32 *pupperD, kmp_int32 *pstride, 864 kmp_int32 incr, kmp_int32 chunk) { 865 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 866 pupper, pupperD, pstride, incr, chunk); 867 } 868 869 /*! 870 See @ref __kmpc_dist_for_static_init_4 871 */ 872 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 873 kmp_int32 schedule, kmp_int32 *plastiter, 874 kmp_uint32 *plower, kmp_uint32 *pupper, 875 kmp_uint32 *pupperD, kmp_int32 *pstride, 876 kmp_int32 incr, kmp_int32 chunk) { 877 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 878 pupper, pupperD, pstride, incr, chunk); 879 } 880 881 /*! 882 See @ref __kmpc_dist_for_static_init_4 883 */ 884 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 885 kmp_int32 schedule, kmp_int32 *plastiter, 886 kmp_int64 *plower, kmp_int64 *pupper, 887 kmp_int64 *pupperD, kmp_int64 *pstride, 888 kmp_int64 incr, kmp_int64 chunk) { 889 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 890 pupper, pupperD, pstride, incr, chunk); 891 } 892 893 /*! 894 See @ref __kmpc_dist_for_static_init_4 895 */ 896 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 897 kmp_int32 schedule, kmp_int32 *plastiter, 898 kmp_uint64 *plower, kmp_uint64 *pupper, 899 kmp_uint64 *pupperD, kmp_int64 *pstride, 900 kmp_int64 incr, kmp_int64 chunk) { 901 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 902 pupper, pupperD, pstride, incr, chunk); 903 } 904 /*! 905 @} 906 */ 907 908 //------------------------------------------------------------------------------ 909 // Auxiliary routines for Distribute Parallel Loop construct implementation 910 // Transfer call to template< type T > 911 // __kmp_team_static_init( ident_t *loc, int gtid, 912 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 913 914 /*! 915 @ingroup WORK_SHARING 916 @{ 917 @param loc Source location 918 @param gtid Global thread id 919 @param p_last pointer to last iteration flag 920 @param p_lb pointer to Lower bound 921 @param p_ub pointer to Upper bound 922 @param p_st Step (or increment if you prefer) 923 @param incr Loop increment 924 @param chunk The chunk size to block with 925 926 The functions compute the upper and lower bounds and stride to be used for the 927 set of iterations to be executed by the current team from the statically 928 scheduled loop that is described by the initial values of the bounds, stride, 929 increment and chunk for the distribute construct as part of composite distribute 930 parallel loop construct. These functions are all identical apart from the types 931 of the arguments. 932 */ 933 934 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 935 kmp_int32 *p_lb, kmp_int32 *p_ub, 936 kmp_int32 *p_st, kmp_int32 incr, 937 kmp_int32 chunk) { 938 KMP_DEBUG_ASSERT(__kmp_init_serial); 939 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 940 chunk); 941 } 942 943 /*! 944 See @ref __kmpc_team_static_init_4 945 */ 946 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 947 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 948 kmp_int32 *p_st, kmp_int32 incr, 949 kmp_int32 chunk) { 950 KMP_DEBUG_ASSERT(__kmp_init_serial); 951 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 952 chunk); 953 } 954 955 /*! 956 See @ref __kmpc_team_static_init_4 957 */ 958 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 959 kmp_int64 *p_lb, kmp_int64 *p_ub, 960 kmp_int64 *p_st, kmp_int64 incr, 961 kmp_int64 chunk) { 962 KMP_DEBUG_ASSERT(__kmp_init_serial); 963 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 964 chunk); 965 } 966 967 /*! 968 See @ref __kmpc_team_static_init_4 969 */ 970 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 971 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 972 kmp_int64 *p_st, kmp_int64 incr, 973 kmp_int64 chunk) { 974 KMP_DEBUG_ASSERT(__kmp_init_serial); 975 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 976 chunk); 977 } 978 /*! 979 @} 980 */ 981 982 } // extern "C" 983