1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 /* Static scheduling initialization. 17 18 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 19 it may change values between parallel regions. __kmp_max_nth 20 is the largest value __kmp_nth may take, 1 is the smallest. */ 21 22 #include "kmp.h" 23 #include "kmp_error.h" 24 #include "kmp_i18n.h" 25 #include "kmp_itt.h" 26 #include "kmp_stats.h" 27 #include "kmp_str.h" 28 29 #if OMPT_SUPPORT 30 #include "ompt-specific.h" 31 #endif 32 33 #ifdef KMP_DEBUG 34 //------------------------------------------------------------------------- 35 // template for debug prints specification ( d, u, lld, llu ) 36 char const *traits_t<int>::spec = "d"; 37 char const *traits_t<unsigned int>::spec = "u"; 38 char const *traits_t<long long>::spec = "lld"; 39 char const *traits_t<unsigned long long>::spec = "llu"; 40 //------------------------------------------------------------------------- 41 #endif 42 43 template <typename T> 44 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 45 kmp_int32 schedtype, kmp_int32 *plastiter, 46 T *plower, T *pupper, 47 typename traits_t<T>::signed_t *pstride, 48 typename traits_t<T>::signed_t incr, 49 typename traits_t<T>::signed_t chunk) { 50 KMP_COUNT_BLOCK(OMP_FOR_static); 51 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); 52 53 typedef typename traits_t<T>::unsigned_t UT; 54 typedef typename traits_t<T>::signed_t ST; 55 /* this all has to be changed back to TID and such.. */ 56 register kmp_int32 gtid = global_tid; 57 register kmp_uint32 tid; 58 register kmp_uint32 nth; 59 register UT trip_count; 60 register kmp_team_t *team; 61 register kmp_info_t *th = __kmp_threads[gtid]; 62 63 #if OMPT_SUPPORT && OMPT_TRACE 64 ompt_team_info_t *team_info = NULL; 65 ompt_task_info_t *task_info = NULL; 66 67 if (ompt_enabled) { 68 // Only fully initialize variables needed by OMPT if OMPT is enabled. 69 team_info = __ompt_get_teaminfo(0, NULL); 70 task_info = __ompt_get_taskinfo(0); 71 } 72 #endif 73 74 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 75 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 76 #ifdef KMP_DEBUG 77 { 78 const char *buff; 79 // create format specifiers before the debug output 80 buff = __kmp_str_format( 81 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 82 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 83 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 84 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 85 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 86 *pstride, incr, chunk)); 87 __kmp_str_free(&buff); 88 } 89 #endif 90 91 if (__kmp_env_consistency_check) { 92 __kmp_push_workshare(global_tid, ct_pdo, loc); 93 if (incr == 0) { 94 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 95 loc); 96 } 97 } 98 /* special handling for zero-trip loops */ 99 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 100 if (plastiter != NULL) 101 *plastiter = FALSE; 102 /* leave pupper and plower set to entire iteration space */ 103 *pstride = incr; /* value should never be used */ 104 // *plower = *pupper - incr; 105 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 106 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 107 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 108 #ifdef KMP_DEBUG 109 { 110 const char *buff; 111 // create format specifiers before the debug output 112 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 113 "lower=%%%s upper=%%%s stride = %%%s " 114 "signed?<%s>, loc = %%s\n", 115 traits_t<T>::spec, traits_t<T>::spec, 116 traits_t<ST>::spec, traits_t<T>::spec); 117 KD_TRACE(100, 118 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 119 __kmp_str_free(&buff); 120 } 121 #endif 122 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 123 124 #if OMPT_SUPPORT && OMPT_TRACE 125 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 126 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 127 team_info->parallel_id, task_info->task_id, team_info->microtask); 128 } 129 #endif 130 KMP_COUNT_VALUE(FOR_static_iterations, 0); 131 return; 132 } 133 134 #if OMP_40_ENABLED 135 // Although there are schedule enumerations above kmp_ord_upper which are not 136 // schedules for "distribute", the only ones which are useful are dynamic, so 137 // cannot be seen here, since this codepath is only executed for static 138 // schedules. 139 if (schedtype > kmp_ord_upper) { 140 // we are in DISTRIBUTE construct 141 schedtype += kmp_sch_static - 142 kmp_distribute_static; // AC: convert to usual schedule type 143 tid = th->th.th_team->t.t_master_tid; 144 team = th->th.th_team->t.t_parent; 145 } else 146 #endif 147 { 148 tid = __kmp_tid_from_gtid(global_tid); 149 team = th->th.th_team; 150 } 151 152 /* determine if "for" loop is an active worksharing construct */ 153 if (team->t.t_serialized) { 154 /* serialized parallel, each thread executes whole iteration space */ 155 if (plastiter != NULL) 156 *plastiter = TRUE; 157 /* leave pupper and plower set to entire iteration space */ 158 *pstride = 159 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 160 161 #ifdef KMP_DEBUG 162 { 163 const char *buff; 164 // create format specifiers before the debug output 165 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 166 "lower=%%%s upper=%%%s stride = %%%s\n", 167 traits_t<T>::spec, traits_t<T>::spec, 168 traits_t<ST>::spec); 169 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 170 __kmp_str_free(&buff); 171 } 172 #endif 173 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 174 175 #if OMPT_SUPPORT && OMPT_TRACE 176 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 177 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 178 team_info->parallel_id, task_info->task_id, team_info->microtask); 179 } 180 #endif 181 return; 182 } 183 nth = team->t.t_nproc; 184 if (nth == 1) { 185 if (plastiter != NULL) 186 *plastiter = TRUE; 187 *pstride = 188 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 189 #ifdef KMP_DEBUG 190 { 191 const char *buff; 192 // create format specifiers before the debug output 193 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 194 "lower=%%%s upper=%%%s stride = %%%s\n", 195 traits_t<T>::spec, traits_t<T>::spec, 196 traits_t<ST>::spec); 197 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 198 __kmp_str_free(&buff); 199 } 200 #endif 201 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 202 203 #if OMPT_SUPPORT && OMPT_TRACE 204 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 205 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 206 team_info->parallel_id, task_info->task_id, team_info->microtask); 207 } 208 #endif 209 return; 210 } 211 212 /* compute trip count */ 213 if (incr == 1) { 214 trip_count = *pupper - *plower + 1; 215 } else if (incr == -1) { 216 trip_count = *plower - *pupper + 1; 217 } else if (incr > 0) { 218 // upper-lower can exceed the limit of signed type 219 trip_count = (UT)(*pupper - *plower) / incr + 1; 220 } else { 221 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 222 } 223 224 if (__kmp_env_consistency_check) { 225 /* tripcount overflow? */ 226 if (trip_count == 0 && *pupper != *plower) { 227 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 228 loc); 229 } 230 } 231 KMP_COUNT_VALUE(FOR_static_iterations, trip_count); 232 233 /* compute remaining parameters */ 234 switch (schedtype) { 235 case kmp_sch_static: { 236 if (trip_count < nth) { 237 KMP_DEBUG_ASSERT( 238 __kmp_static == kmp_sch_static_greedy || 239 __kmp_static == 240 kmp_sch_static_balanced); // Unknown static scheduling type. 241 if (tid < trip_count) { 242 *pupper = *plower = *plower + tid * incr; 243 } else { 244 *plower = *pupper + incr; 245 } 246 if (plastiter != NULL) 247 *plastiter = (tid == trip_count - 1); 248 } else { 249 if (__kmp_static == kmp_sch_static_balanced) { 250 register UT small_chunk = trip_count / nth; 251 register UT extras = trip_count % nth; 252 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 253 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 254 if (plastiter != NULL) 255 *plastiter = (tid == nth - 1); 256 } else { 257 register T big_chunk_inc_count = 258 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 259 register T old_upper = *pupper; 260 261 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 262 // Unknown static scheduling type. 263 264 *plower += tid * big_chunk_inc_count; 265 *pupper = *plower + big_chunk_inc_count - incr; 266 if (incr > 0) { 267 if (*pupper < *plower) 268 *pupper = traits_t<T>::max_value; 269 if (plastiter != NULL) 270 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 271 if (*pupper > old_upper) 272 *pupper = old_upper; // tracker C73258 273 } else { 274 if (*pupper > *plower) 275 *pupper = traits_t<T>::min_value; 276 if (plastiter != NULL) 277 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 278 if (*pupper < old_upper) 279 *pupper = old_upper; // tracker C73258 280 } 281 } 282 } 283 *pstride = trip_count; 284 break; 285 } 286 case kmp_sch_static_chunked: { 287 register ST span; 288 if (chunk < 1) { 289 chunk = 1; 290 } 291 span = chunk * incr; 292 *pstride = span * nth; 293 *plower = *plower + (span * tid); 294 *pupper = *plower + span - incr; 295 if (plastiter != NULL) 296 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); 297 break; 298 } 299 #if OMP_45_ENABLED 300 case kmp_sch_static_balanced_chunked: { 301 register T old_upper = *pupper; 302 // round up to make sure the chunk is enough to cover all iterations 303 register UT span = (trip_count + nth - 1) / nth; 304 305 // perform chunk adjustment 306 chunk = (span + chunk - 1) & ~(chunk - 1); 307 308 span = chunk * incr; 309 *plower = *plower + (span * tid); 310 *pupper = *plower + span - incr; 311 if (incr > 0) { 312 if (*pupper > old_upper) 313 *pupper = old_upper; 314 } else if (*pupper < old_upper) 315 *pupper = old_upper; 316 317 if (plastiter != NULL) 318 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 319 break; 320 } 321 #endif 322 default: 323 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 324 break; 325 } 326 327 #if USE_ITT_BUILD 328 // Report loop metadata 329 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 330 __kmp_forkjoin_frames_mode == 3 && 331 #if OMP_40_ENABLED 332 th->th.th_teams_microtask == NULL && 333 #endif 334 team->t.t_active_level == 1) { 335 kmp_uint64 cur_chunk = chunk; 336 // Calculate chunk in case it was not specified; it is specified for 337 // kmp_sch_static_chunked 338 if (schedtype == kmp_sch_static) { 339 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 340 } 341 // 0 - "static" schedule 342 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 343 } 344 #endif 345 #ifdef KMP_DEBUG 346 { 347 const char *buff; 348 // create format specifiers before the debug output 349 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 350 "upper=%%%s stride = %%%s signed?<%s>\n", 351 traits_t<T>::spec, traits_t<T>::spec, 352 traits_t<ST>::spec, traits_t<T>::spec); 353 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 354 __kmp_str_free(&buff); 355 } 356 #endif 357 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 358 359 #if OMPT_SUPPORT && OMPT_TRACE 360 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 361 ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 362 team_info->parallel_id, task_info->task_id, team_info->microtask); 363 } 364 #endif 365 366 return; 367 } 368 369 template <typename T> 370 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 371 kmp_int32 schedule, kmp_int32 *plastiter, 372 T *plower, T *pupper, T *pupperDist, 373 typename traits_t<T>::signed_t *pstride, 374 typename traits_t<T>::signed_t incr, 375 typename traits_t<T>::signed_t chunk) { 376 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 377 typedef typename traits_t<T>::unsigned_t UT; 378 typedef typename traits_t<T>::signed_t ST; 379 register kmp_uint32 tid; 380 register kmp_uint32 nth; 381 register kmp_uint32 team_id; 382 register kmp_uint32 nteams; 383 register UT trip_count; 384 register kmp_team_t *team; 385 kmp_info_t *th; 386 387 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 388 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 389 #ifdef KMP_DEBUG 390 { 391 const char *buff; 392 // create format specifiers before the debug output 393 buff = __kmp_str_format( 394 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 395 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 396 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 397 traits_t<ST>::spec, traits_t<T>::spec); 398 KD_TRACE(100, 399 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 400 __kmp_str_free(&buff); 401 } 402 #endif 403 404 if (__kmp_env_consistency_check) { 405 __kmp_push_workshare(gtid, ct_pdo, loc); 406 if (incr == 0) { 407 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 408 loc); 409 } 410 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 411 // The loop is illegal. 412 // Some zero-trip loops maintained by compiler, e.g.: 413 // for(i=10;i<0;++i) // lower >= upper - run-time check 414 // for(i=0;i>10;--i) // lower <= upper - run-time check 415 // for(i=0;i>10;++i) // incr > 0 - compile-time check 416 // for(i=10;i<0;--i) // incr < 0 - compile-time check 417 // Compiler does not check the following illegal loops: 418 // for(i=0;i<10;i+=incr) // where incr<0 419 // for(i=10;i>0;i-=incr) // where incr<0 420 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 421 } 422 } 423 tid = __kmp_tid_from_gtid(gtid); 424 th = __kmp_threads[gtid]; 425 nth = th->th.th_team_nproc; 426 team = th->th.th_team; 427 #if OMP_40_ENABLED 428 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 429 nteams = th->th.th_teams_size.nteams; 430 #endif 431 team_id = team->t.t_master_tid; 432 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 433 434 // compute global trip count 435 if (incr == 1) { 436 trip_count = *pupper - *plower + 1; 437 } else if (incr == -1) { 438 trip_count = *plower - *pupper + 1; 439 } else if (incr > 0) { 440 // upper-lower can exceed the limit of signed type 441 trip_count = (UT)(*pupper - *plower) / incr + 1; 442 } else { 443 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 444 } 445 446 *pstride = *pupper - *plower; // just in case (can be unused) 447 if (trip_count <= nteams) { 448 KMP_DEBUG_ASSERT( 449 __kmp_static == kmp_sch_static_greedy || 450 __kmp_static == 451 kmp_sch_static_balanced); // Unknown static scheduling type. 452 // only masters of some teams get single iteration, other threads get 453 // nothing 454 if (team_id < trip_count && tid == 0) { 455 *pupper = *pupperDist = *plower = *plower + team_id * incr; 456 } else { 457 *pupperDist = *pupper; 458 *plower = *pupper + incr; // compiler should skip loop body 459 } 460 if (plastiter != NULL) 461 *plastiter = (tid == 0 && team_id == trip_count - 1); 462 } else { 463 // Get the team's chunk first (each team gets at most one chunk) 464 if (__kmp_static == kmp_sch_static_balanced) { 465 register UT chunkD = trip_count / nteams; 466 register UT extras = trip_count % nteams; 467 *plower += 468 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 469 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 470 if (plastiter != NULL) 471 *plastiter = (team_id == nteams - 1); 472 } else { 473 register T chunk_inc_count = 474 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 475 register T upper = *pupper; 476 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 477 // Unknown static scheduling type. 478 *plower += team_id * chunk_inc_count; 479 *pupperDist = *plower + chunk_inc_count - incr; 480 // Check/correct bounds if needed 481 if (incr > 0) { 482 if (*pupperDist < *plower) 483 *pupperDist = traits_t<T>::max_value; 484 if (plastiter != NULL) 485 *plastiter = *plower <= upper && *pupperDist > upper - incr; 486 if (*pupperDist > upper) 487 *pupperDist = upper; // tracker C73258 488 if (*plower > *pupperDist) { 489 *pupper = *pupperDist; // no iterations available for the team 490 goto end; 491 } 492 } else { 493 if (*pupperDist > *plower) 494 *pupperDist = traits_t<T>::min_value; 495 if (plastiter != NULL) 496 *plastiter = *plower >= upper && *pupperDist < upper - incr; 497 if (*pupperDist < upper) 498 *pupperDist = upper; // tracker C73258 499 if (*plower < *pupperDist) { 500 *pupper = *pupperDist; // no iterations available for the team 501 goto end; 502 } 503 } 504 } 505 // Get the parallel loop chunk now (for thread) 506 // compute trip count for team's chunk 507 if (incr == 1) { 508 trip_count = *pupperDist - *plower + 1; 509 } else if (incr == -1) { 510 trip_count = *plower - *pupperDist + 1; 511 } else if (incr > 1) { 512 // upper-lower can exceed the limit of signed type 513 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 514 } else { 515 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 516 } 517 KMP_DEBUG_ASSERT(trip_count); 518 switch (schedule) { 519 case kmp_sch_static: { 520 if (trip_count <= nth) { 521 KMP_DEBUG_ASSERT( 522 __kmp_static == kmp_sch_static_greedy || 523 __kmp_static == 524 kmp_sch_static_balanced); // Unknown static scheduling type. 525 if (tid < trip_count) 526 *pupper = *plower = *plower + tid * incr; 527 else 528 *plower = *pupper + incr; // no iterations available 529 if (plastiter != NULL) 530 if (*plastiter != 0 && !(tid == trip_count - 1)) 531 *plastiter = 0; 532 } else { 533 if (__kmp_static == kmp_sch_static_balanced) { 534 register UT chunkL = trip_count / nth; 535 register UT extras = trip_count % nth; 536 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 537 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 538 if (plastiter != NULL) 539 if (*plastiter != 0 && !(tid == nth - 1)) 540 *plastiter = 0; 541 } else { 542 register T chunk_inc_count = 543 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 544 register T upper = *pupperDist; 545 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 546 // Unknown static scheduling type. 547 *plower += tid * chunk_inc_count; 548 *pupper = *plower + chunk_inc_count - incr; 549 if (incr > 0) { 550 if (*pupper < *plower) 551 *pupper = traits_t<T>::max_value; 552 if (plastiter != NULL) 553 if (*plastiter != 0 && 554 !(*plower <= upper && *pupper > upper - incr)) 555 *plastiter = 0; 556 if (*pupper > upper) 557 *pupper = upper; // tracker C73258 558 } else { 559 if (*pupper > *plower) 560 *pupper = traits_t<T>::min_value; 561 if (plastiter != NULL) 562 if (*plastiter != 0 && 563 !(*plower >= upper && *pupper < upper - incr)) 564 *plastiter = 0; 565 if (*pupper < upper) 566 *pupper = upper; // tracker C73258 567 } 568 } 569 } 570 break; 571 } 572 case kmp_sch_static_chunked: { 573 register ST span; 574 if (chunk < 1) 575 chunk = 1; 576 span = chunk * incr; 577 *pstride = span * nth; 578 *plower = *plower + (span * tid); 579 *pupper = *plower + span - incr; 580 if (plastiter != NULL) 581 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 582 *plastiter = 0; 583 break; 584 } 585 default: 586 KMP_ASSERT2(0, 587 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 588 break; 589 } 590 } 591 end:; 592 #ifdef KMP_DEBUG 593 { 594 const char *buff; 595 // create format specifiers before the debug output 596 buff = __kmp_str_format( 597 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 598 "stride=%%%s signed?<%s>\n", 599 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 600 traits_t<ST>::spec, traits_t<T>::spec); 601 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 602 __kmp_str_free(&buff); 603 } 604 #endif 605 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 606 return; 607 } 608 609 template <typename T> 610 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 611 kmp_int32 *p_last, T *p_lb, T *p_ub, 612 typename traits_t<T>::signed_t *p_st, 613 typename traits_t<T>::signed_t incr, 614 typename traits_t<T>::signed_t chunk) { 615 // The routine returns the first chunk distributed to the team and 616 // stride for next chunks calculation. 617 // Last iteration flag set for the team that will execute 618 // the last iteration of the loop. 619 // The routine is called for dist_schedue(static,chunk) only. 620 typedef typename traits_t<T>::unsigned_t UT; 621 typedef typename traits_t<T>::signed_t ST; 622 kmp_uint32 team_id; 623 kmp_uint32 nteams; 624 UT trip_count; 625 T lower; 626 T upper; 627 ST span; 628 kmp_team_t *team; 629 kmp_info_t *th; 630 631 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 632 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 633 #ifdef KMP_DEBUG 634 { 635 const char *buff; 636 // create format specifiers before the debug output 637 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 638 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 639 traits_t<T>::spec, traits_t<T>::spec, 640 traits_t<ST>::spec, traits_t<ST>::spec, 641 traits_t<T>::spec); 642 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 643 __kmp_str_free(&buff); 644 } 645 #endif 646 647 lower = *p_lb; 648 upper = *p_ub; 649 if (__kmp_env_consistency_check) { 650 if (incr == 0) { 651 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 652 loc); 653 } 654 if (incr > 0 ? (upper < lower) : (lower < upper)) { 655 // The loop is illegal. 656 // Some zero-trip loops maintained by compiler, e.g.: 657 // for(i=10;i<0;++i) // lower >= upper - run-time check 658 // for(i=0;i>10;--i) // lower <= upper - run-time check 659 // for(i=0;i>10;++i) // incr > 0 - compile-time check 660 // for(i=10;i<0;--i) // incr < 0 - compile-time check 661 // Compiler does not check the following illegal loops: 662 // for(i=0;i<10;i+=incr) // where incr<0 663 // for(i=10;i>0;i-=incr) // where incr<0 664 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 665 } 666 } 667 th = __kmp_threads[gtid]; 668 team = th->th.th_team; 669 #if OMP_40_ENABLED 670 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 671 nteams = th->th.th_teams_size.nteams; 672 #endif 673 team_id = team->t.t_master_tid; 674 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 675 676 // compute trip count 677 if (incr == 1) { 678 trip_count = upper - lower + 1; 679 } else if (incr == -1) { 680 trip_count = lower - upper + 1; 681 } else if (incr > 0) { 682 // upper-lower can exceed the limit of signed type 683 trip_count = (UT)(upper - lower) / incr + 1; 684 } else { 685 trip_count = (UT)(lower - upper) / (-incr) + 1; 686 } 687 if (chunk < 1) 688 chunk = 1; 689 span = chunk * incr; 690 *p_st = span * nteams; 691 *p_lb = lower + (span * team_id); 692 *p_ub = *p_lb + span - incr; 693 if (p_last != NULL) 694 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 695 // Correct upper bound if needed 696 if (incr > 0) { 697 if (*p_ub < *p_lb) // overflow? 698 *p_ub = traits_t<T>::max_value; 699 if (*p_ub > upper) 700 *p_ub = upper; // tracker C73258 701 } else { // incr < 0 702 if (*p_ub > *p_lb) 703 *p_ub = traits_t<T>::min_value; 704 if (*p_ub < upper) 705 *p_ub = upper; // tracker C73258 706 } 707 #ifdef KMP_DEBUG 708 { 709 const char *buff; 710 // create format specifiers before the debug output 711 buff = 712 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 713 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 714 traits_t<T>::spec, traits_t<T>::spec, 715 traits_t<ST>::spec, traits_t<ST>::spec); 716 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 717 __kmp_str_free(&buff); 718 } 719 #endif 720 } 721 722 //------------------------------------------------------------------------------ 723 extern "C" { 724 /*! 725 @ingroup WORK_SHARING 726 @param loc Source code location 727 @param gtid Global thread id of this thread 728 @param schedtype Scheduling type 729 @param plastiter Pointer to the "last iteration" flag 730 @param plower Pointer to the lower bound 731 @param pupper Pointer to the upper bound 732 @param pstride Pointer to the stride 733 @param incr Loop increment 734 @param chunk The chunk size 735 736 Each of the four functions here are identical apart from the argument types. 737 738 The functions compute the upper and lower bounds and stride to be used for the 739 set of iterations to be executed by the current thread from the statically 740 scheduled loop that is described by the initial values of the bounds, stride, 741 increment and chunk size. 742 743 @{ 744 */ 745 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 746 kmp_int32 *plastiter, kmp_int32 *plower, 747 kmp_int32 *pupper, kmp_int32 *pstride, 748 kmp_int32 incr, kmp_int32 chunk) { 749 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 750 pupper, pstride, incr, chunk); 751 } 752 753 /*! 754 See @ref __kmpc_for_static_init_4 755 */ 756 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 757 kmp_int32 schedtype, kmp_int32 *plastiter, 758 kmp_uint32 *plower, kmp_uint32 *pupper, 759 kmp_int32 *pstride, kmp_int32 incr, 760 kmp_int32 chunk) { 761 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 762 pupper, pstride, incr, chunk); 763 } 764 765 /*! 766 See @ref __kmpc_for_static_init_4 767 */ 768 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 769 kmp_int32 *plastiter, kmp_int64 *plower, 770 kmp_int64 *pupper, kmp_int64 *pstride, 771 kmp_int64 incr, kmp_int64 chunk) { 772 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 773 pupper, pstride, incr, chunk); 774 } 775 776 /*! 777 See @ref __kmpc_for_static_init_4 778 */ 779 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 780 kmp_int32 schedtype, kmp_int32 *plastiter, 781 kmp_uint64 *plower, kmp_uint64 *pupper, 782 kmp_int64 *pstride, kmp_int64 incr, 783 kmp_int64 chunk) { 784 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 785 pupper, pstride, incr, chunk); 786 } 787 /*! 788 @} 789 */ 790 791 /*! 792 @ingroup WORK_SHARING 793 @param loc Source code location 794 @param gtid Global thread id of this thread 795 @param schedule Scheduling type for the parallel loop 796 @param plastiter Pointer to the "last iteration" flag 797 @param plower Pointer to the lower bound 798 @param pupper Pointer to the upper bound of loop chunk 799 @param pupperD Pointer to the upper bound of dist_chunk 800 @param pstride Pointer to the stride for parallel loop 801 @param incr Loop increment 802 @param chunk The chunk size for the parallel loop 803 804 Each of the four functions here are identical apart from the argument types. 805 806 The functions compute the upper and lower bounds and strides to be used for the 807 set of iterations to be executed by the current thread from the statically 808 scheduled loop that is described by the initial values of the bounds, strides, 809 increment and chunks for parallel loop and distribute constructs. 810 811 @{ 812 */ 813 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 814 kmp_int32 schedule, kmp_int32 *plastiter, 815 kmp_int32 *plower, kmp_int32 *pupper, 816 kmp_int32 *pupperD, kmp_int32 *pstride, 817 kmp_int32 incr, kmp_int32 chunk) { 818 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 819 pupper, pupperD, pstride, incr, chunk); 820 } 821 822 /*! 823 See @ref __kmpc_dist_for_static_init_4 824 */ 825 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 826 kmp_int32 schedule, kmp_int32 *plastiter, 827 kmp_uint32 *plower, kmp_uint32 *pupper, 828 kmp_uint32 *pupperD, kmp_int32 *pstride, 829 kmp_int32 incr, kmp_int32 chunk) { 830 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 831 pupper, pupperD, pstride, incr, chunk); 832 } 833 834 /*! 835 See @ref __kmpc_dist_for_static_init_4 836 */ 837 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 838 kmp_int32 schedule, kmp_int32 *plastiter, 839 kmp_int64 *plower, kmp_int64 *pupper, 840 kmp_int64 *pupperD, kmp_int64 *pstride, 841 kmp_int64 incr, kmp_int64 chunk) { 842 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 843 pupper, pupperD, pstride, incr, chunk); 844 } 845 846 /*! 847 See @ref __kmpc_dist_for_static_init_4 848 */ 849 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 850 kmp_int32 schedule, kmp_int32 *plastiter, 851 kmp_uint64 *plower, kmp_uint64 *pupper, 852 kmp_uint64 *pupperD, kmp_int64 *pstride, 853 kmp_int64 incr, kmp_int64 chunk) { 854 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 855 pupper, pupperD, pstride, incr, chunk); 856 } 857 /*! 858 @} 859 */ 860 861 //------------------------------------------------------------------------------ 862 // Auxiliary routines for Distribute Parallel Loop construct implementation 863 // Transfer call to template< type T > 864 // __kmp_team_static_init( ident_t *loc, int gtid, 865 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 866 867 /*! 868 @ingroup WORK_SHARING 869 @{ 870 @param loc Source location 871 @param gtid Global thread id 872 @param p_last pointer to last iteration flag 873 @param p_lb pointer to Lower bound 874 @param p_ub pointer to Upper bound 875 @param p_st Step (or increment if you prefer) 876 @param incr Loop increment 877 @param chunk The chunk size to block with 878 879 The functions compute the upper and lower bounds and stride to be used for the 880 set of iterations to be executed by the current team from the statically 881 scheduled loop that is described by the initial values of the bounds, stride, 882 increment and chunk for the distribute construct as part of composite distribute 883 parallel loop construct. These functions are all identical apart from the types 884 of the arguments. 885 */ 886 887 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 888 kmp_int32 *p_lb, kmp_int32 *p_ub, 889 kmp_int32 *p_st, kmp_int32 incr, 890 kmp_int32 chunk) { 891 KMP_DEBUG_ASSERT(__kmp_init_serial); 892 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 893 chunk); 894 } 895 896 /*! 897 See @ref __kmpc_team_static_init_4 898 */ 899 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 900 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 901 kmp_int32 *p_st, kmp_int32 incr, 902 kmp_int32 chunk) { 903 KMP_DEBUG_ASSERT(__kmp_init_serial); 904 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 905 chunk); 906 } 907 908 /*! 909 See @ref __kmpc_team_static_init_4 910 */ 911 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 912 kmp_int64 *p_lb, kmp_int64 *p_ub, 913 kmp_int64 *p_st, kmp_int64 incr, 914 kmp_int64 chunk) { 915 KMP_DEBUG_ASSERT(__kmp_init_serial); 916 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 917 chunk); 918 } 919 920 /*! 921 See @ref __kmpc_team_static_init_4 922 */ 923 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 924 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 925 kmp_int64 *p_st, kmp_int64 incr, 926 kmp_int64 chunk) { 927 KMP_DEBUG_ASSERT(__kmp_init_serial); 928 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 929 chunk); 930 } 931 /*! 932 @} 933 */ 934 935 } // extern "C" 936